From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <bofh@yhbt.net>
X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net
X-Spam-Level: 
X-Spam-ASN:  
X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00
	shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2
Received: from localhost (dcvr.yhbt.net [127.0.0.1])
	by dcvr.yhbt.net (Postfix) with ESMTP id A13A71F619
	for <unicorn-public@yhbt.net>; Thu, 19 Mar 2020 02:28:23 +0000 (UTC)
From: Eric Wong <bofh@yhbt.net>
To: unicorn-public@yhbt.net
Subject: [PATCH] http: improve RFC 7230 conformance
Date: Thu, 19 Mar 2020 02:28:23 +0000
Message-Id: <20200319022823.32472-1-bofh@yhbt.net>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
List-Id: <unicorn-public.yhbt.net>

We need to favor "Transfer-Encoding: chunked" over
"Content-Length" in the request header if they both exist.
Furthermore, we now reject redundant chunking and cases where
"chunked" is not the final encoding.

We currently do not and have no plans to decode "gzip",
"deflate", or "compress" encoding as described by RFC 7230.
That's a job more appropriate for middleware, anyways.

cf. https://tools.ietf.org/html/rfc7230
    https://www.rfc-editor.org/errata_search.php?rfc=7230
---
 ext/unicorn_http/unicorn_http.rl | 46 ++++++++++++++++--
 lib/unicorn/http_request.rb      | 11 +++++
 test/unit/test_http_parser_ng.rb | 81 ++++++++++++++++++++++++++++++++
 3 files changed, 134 insertions(+), 4 deletions(-)

diff --git a/ext/unicorn_http/unicorn_http.rl b/ext/unicorn_http/unicorn_http.rl
index dfe3a63..21e09d6 100644
--- a/ext/unicorn_http/unicorn_http.rl
+++ b/ext/unicorn_http/unicorn_http.rl
@@ -62,7 +62,8 @@ struct http_parser {
   } len;
 };
 
-static ID id_set_backtrace;
+static ID id_set_backtrace, id_is_chunked_p;
+static VALUE cHttpParser;
 
 #ifdef HAVE_RB_HASH_CLEAR /* Ruby >= 2.0 */
 #  define my_hash_clear(h) (void)rb_hash_clear(h)
@@ -220,6 +221,19 @@ static void write_cont_value(struct http_parser *hp,
   rb_str_buf_cat(hp->cont, vptr, end + 1);
 }
 
+static int is_chunked(VALUE v)
+{
+  /* common case first */
+  if (STR_CSTR_CASE_EQ(v, "chunked"))
+    return 1;
+
+  /*
+   * call Ruby function in unicorn/http_request.rb to deal with unlikely
+   * comma-delimited case
+   */
+  return rb_funcall(cHttpParser, id_is_chunked_p, 1, v) != Qfalse;
+}
+
 static void write_value(struct http_parser *hp,
                         const char *buffer, const char *p)
 {
@@ -246,7 +260,9 @@ static void write_value(struct http_parser *hp,
     f = uncommon_field(field, flen);
   } else if (f == g_http_connection) {
     hp_keepalive_connection(hp, v);
-  } else if (f == g_content_length) {
+  } else if (f == g_content_length && !HP_FL_TEST(hp, CHUNKED)) {
+    if (hp->len.content)
+      parser_raise(eHttpParserError, "Content-Length already set");
     hp->len.content = parse_length(RSTRING_PTR(v), RSTRING_LEN(v));
     if (hp->len.content < 0)
       parser_raise(eHttpParserError, "invalid Content-Length");
@@ -254,9 +270,30 @@ static void write_value(struct http_parser *hp,
       HP_FL_SET(hp, HASBODY);
     hp_invalid_if_trailer(hp);
   } else if (f == g_http_transfer_encoding) {
-    if (STR_CSTR_CASE_EQ(v, "chunked")) {
+    if (is_chunked(v)) {
+      if (HP_FL_TEST(hp, CHUNKED))
+        /*
+         * RFC 7230 3.3.1:
+         * A sender MUST NOT apply chunked more than once to a message body
+         * (i.e., chunking an already chunked message is not allowed).
+         */
+        parser_raise(eHttpParserError, "Transfer-Encoding double chunked");
+
       HP_FL_SET(hp, CHUNKED);
       HP_FL_SET(hp, HASBODY);
+
+      /* RFC 7230 3.3.3, 3: favor chunked if Content-Length exists */
+      hp->len.content = 0;
+    } else if (HP_FL_TEST(hp, CHUNKED)) {
+      /*
+       * RFC 7230 3.3.3, point 3 states:
+       * If a Transfer-Encoding header field is present in a request and
+       * the chunked transfer coding is not the final encoding, the
+       * message body length cannot be determined reliably; the server
+       * MUST respond with the 400 (Bad Request) status code and then
+       * close the connection.
+       */
+      parser_raise(eHttpParserError, "invalid Transfer-Encoding");
     }
     hp_invalid_if_trailer(hp);
   } else if (f == g_http_trailer) {
@@ -931,7 +968,7 @@ static VALUE HttpParser_rssget(VALUE self)
 
 void Init_unicorn_http(void)
 {
-  VALUE mUnicorn, cHttpParser;
+  VALUE mUnicorn;
 
   mUnicorn = rb_define_module("Unicorn");
   cHttpParser = rb_define_class_under(mUnicorn, "HttpParser", rb_cObject);
@@ -991,5 +1028,6 @@ void Init_unicorn_http(void)
 #ifndef HAVE_RB_HASH_CLEAR
   id_clear = rb_intern("clear");
 #endif
+  id_is_chunked_p = rb_intern("is_chunked?");
 }
 #undef SET_GLOBAL
diff --git a/lib/unicorn/http_request.rb b/lib/unicorn/http_request.rb
index bcc1f2d..6ca4592 100644
--- a/lib/unicorn/http_request.rb
+++ b/lib/unicorn/http_request.rb
@@ -188,4 +188,15 @@ def write_http_header(socket) # :nodoc:
       HTTP_RESPONSE_START.each { |c| socket.write(c) }
     end
   end
+
+  # called by ext/unicorn_http/unicorn_http.rl via rb_funcall
+  def self.is_chunked?(v) # :nodoc:
+    vals = v.split(/[ \t]*,[ \t]*/).map!(&:downcase)
+    if vals.pop == 'chunked'.freeze
+      return true unless vals.include?('chunked'.freeze)
+      raise Unicorn::HttpParserError, 'double chunked', []
+    end
+    return false unless vals.include?('chunked'.freeze)
+    raise Unicorn::HttpParserError, 'chunked not last', []
+  end
 end
diff --git a/test/unit/test_http_parser_ng.rb b/test/unit/test_http_parser_ng.rb
index d186f5a..425d5ad 100644
--- a/test/unit/test_http_parser_ng.rb
+++ b/test/unit/test_http_parser_ng.rb
@@ -11,6 +11,20 @@ def setup
     @parser = HttpParser.new
   end
 
+  # RFC 7230 allows gzip/deflate/compress Transfer-Encoding,
+  # but "chunked" must be last if used
+  def test_is_chunked
+    [ 'chunked,chunked', 'chunked,gzip', 'chunked,gzip,chunked' ].each do |x|
+      assert_raise(HttpParserError) { HttpParser.is_chunked?(x) }
+    end
+    [ 'gzip, chunked', 'gzip,chunked', 'gzip ,chunked' ].each do |x|
+      assert HttpParser.is_chunked?(x)
+    end
+    [ 'gzip', 'xhunked', 'xchunked' ].each do |x|
+      assert !HttpParser.is_chunked?(x)
+    end
+  end
+
   def test_parser_max_len
     assert_raises(RangeError) do
       HttpParser.max_header_len = 0xffffffff + 1
@@ -566,6 +580,73 @@ def test_invalid_content_length
     end
   end
 
+  def test_duplicate_content_length
+    str = "PUT / HTTP/1.1\r\n" \
+          "Content-Length: 1\r\n" \
+          "Content-Length: 9\r\n" \
+          "\r\n"
+    assert_raises(HttpParserError) { @parser.headers({}, str) }
+  end
+
+  def test_chunked_overrides_content_length
+    order = [ 'Transfer-Encoding: chunked', 'Content-Length: 666' ]
+    %w(a b).each do |x|
+      str = "PUT /#{x} HTTP/1.1\r\n" \
+            "#{order.join("\r\n")}" \
+            "\r\n\r\na\r\nhelloworld\r\n0\r\n\r\n"
+      order.reverse!
+      env = @parser.headers({}, str)
+      assert_nil @parser.content_length
+      assert_equal 'chunked', env['HTTP_TRANSFER_ENCODING']
+      assert_equal '666', env['CONTENT_LENGTH'],
+        'Content-Length logged so the app can log a possible client bug/attack'
+      @parser.filter_body(dst = '', str)
+      assert_equal 'helloworld', dst
+      @parser.parse # handle the non-existent trailer
+      assert @parser.next?
+    end
+  end
+
+  def test_chunked_order_good
+    str = "PUT /x HTTP/1.1\r\n" \
+          "Transfer-Encoding: gzip\r\n" \
+          "Transfer-Encoding: chunked\r\n" \
+          "\r\n"
+    env = @parser.headers({}, str)
+    assert_equal 'gzip,chunked', env['HTTP_TRANSFER_ENCODING']
+    assert_nil @parser.content_length
+
+    @parser.clear
+    str = "PUT /x HTTP/1.1\r\n" \
+          "Transfer-Encoding: gzip, chunked\r\n" \
+          "\r\n"
+    env = @parser.headers({}, str)
+    assert_equal 'gzip, chunked', env['HTTP_TRANSFER_ENCODING']
+    assert_nil @parser.content_length
+  end
+
+  def test_chunked_order_bad
+    str = "PUT /x HTTP/1.1\r\n" \
+          "Transfer-Encoding: chunked\r\n" \
+          "Transfer-Encoding: gzip\r\n" \
+          "\r\n"
+    assert_raise(HttpParserError) { @parser.headers({}, str) }
+  end
+
+  def test_double_chunked
+    str = "PUT /x HTTP/1.1\r\n" \
+          "Transfer-Encoding: chunked\r\n" \
+          "Transfer-Encoding: chunked\r\n" \
+          "\r\n"
+    assert_raise(HttpParserError) { @parser.headers({}, str) }
+
+    @parser.clear
+    str = "PUT /x HTTP/1.1\r\n" \
+          "Transfer-Encoding: chunked,chunked\r\n" \
+          "\r\n"
+    assert_raise(HttpParserError) { @parser.headers({}, str) }
+  end
+
   def test_backtrace_is_empty
     begin
       @parser.headers({}, "AAADFSFDSFD\r\n\r\n")