kcar RubyGem user+dev discussion/patches/pulls/bugs/help
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: kcar-public@bogomips.org
Subject: [PATCH 07/11] flesh out filter_body for request parsing
Date: Sat,  1 Dec 2018 13:31:21 +0000	[thread overview]
Message-ID: <20181201133125.5524-8-e@80x24.org> (raw)
In-Reply-To: <20181201133125.5524-1-e@80x24.org>

Relying on Kcar::Parser#body_bytes_left= is still more
efficient, but allowing filter_body to work for identity
request bodies should simplify server implementations
at the moment.

When parsing responses, identity request bodies will still fail
on filter_body, at least for now. It's always been that way, and
it might be easier-to-write backwards compatible code in case
people want to stick to an older version of kcar.
---
 ext/kcar/kcar.rl            |  41 +++-
 test/test_request_parser.rb | 362 +++++++++++++++++++++++++++++++++++-
 2 files changed, 399 insertions(+), 4 deletions(-)

diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
index ea3dacc..6330910 100644
--- a/ext/kcar/kcar.rl
+++ b/ext/kcar/kcar.rl
@@ -201,6 +201,13 @@ static void finalize_header(struct http_parser *hp, VALUE hdr)
   if (hp->has_trailer && !hp->chunked)
     rb_raise(eParserError, "trailer but not chunked");
   if (hp->is_request) {
+    if (hp->chunked) {
+      if (hp->len.chunk >= 0)
+        rb_raise(eParserError, "Content-Length set with chunked encoding");
+      else
+        hp->len.chunk = 0;
+    }
+
     if (!hp->has_query)
       rb_hash_aset(hdr, g_QUERY_STRING, rb_str_new(NULL, 0));
     if (hp->has_header) {
@@ -831,6 +838,8 @@ static VALUE body_bytes_left_set(VALUE self, VALUE bytes)
   if (hp->chunked)
     rb_raise(rb_eRuntimeError, "body_bytes_left= is not for chunked bodies");
   hp->len.content = NUM2OFFT(bytes);
+  if (hp->len.content == 0)
+      hp->body_eof_seen = 1;
   return bytes;
 }
 
@@ -973,8 +982,12 @@ static VALUE keepalive(VALUE self)
 
   if (hp->persistent) {
     if (hp->has_header && hp->has_body) {
-      if (hp->chunked || (hp->len.content >= 0))
-        return Qtrue;
+      if (hp->chunked || (hp->len.content >= 0)) {
+        if (!hp->is_request)
+          return Qtrue;
+        else
+          return hp->body_eof_seen ? Qtrue : Qfalse;
+      }
 
       /* unknown Content-Length and not chunked, we must assume close */
       return Qfalse;
@@ -1012,12 +1025,34 @@ static VALUE filter_body(VALUE self, VALUE dst, VALUE src)
 
   StringValue(dst);
   rb_str_modify(dst);
-  rb_str_resize(dst, slen); /* we can never copy more than slen bytes */
   OBJ_TAINT(dst); /* keep weirdo $SAFE users happy */
 
+  /*
+   * for now, only support filter_body for identity requests,
+   * not responses; it's rather inefficient to blindly memcpy
+   * giant request bodies; on the other hand, it simplifies
+   * server-side code.
+   */
+  if (hp->is_request && !hp->chunked) {
+    /* no need to enter the Ragel machine for unchunked transfers */
+    assert(hp->len.content >= 0 && "negative Content-Length");
+    if (hp->len.content > 0) {
+      long nr = MIN(slen, hp->len.content);
+
+      rb_str_resize(dst, nr);
+      memcpy(RSTRING_PTR(dst), sptr, nr);
+      hp->len.content -= nr;
+      if (hp->len.content == 0)
+        hp->body_eof_seen = 1;
+      advance_str(src, nr);
+    }
+    return dst;
+  }
+
   if (!hp->chunked)
     rb_raise(rb_eRuntimeError, "filter_body is only for chunked bodies");
 
+  rb_str_resize(dst, slen); /* we can never copy more than slen bytes */
   if (!chunked_eof(hp)) {
     hp->s.dest_offset = 0;
     http_parser_execute(hp, dst, sptr, slen);
diff --git a/test/test_request_parser.rb b/test/test_request_parser.rb
index 5e97a0e..7654e62 100644
--- a/test/test_request_parser.rb
+++ b/test/test_request_parser.rb
@@ -732,7 +732,7 @@ class TestRequestParser < Test::Unit::TestCase
     req = @hp.request(@env, str)
     assert_equal 'Content-MD5,Content-SHA1', req['HTTP_TRAILER']
     assert_equal "1\r\na\r\n2\r\n..\r\n0\r\n", str
-    assert @hp.keepalive?
+    assert_not_predicate @hp, :keepalive?
   end
 
   def test_http_09
@@ -859,4 +859,364 @@ class TestRequestParser < Test::Unit::TestCase
     assert_equal expect, env2
     assert_equal '', buf
   end
+
+  def test_identity_byte_headers
+    str = "PUT / HTTP/1.1\r\n"
+    str << "Content-Length: 123\r\n"
+    str << "\r"
+    buf = ''
+    str.each_byte { |byte|
+      buf << byte.chr
+      assert_nil @hp.request(@env, str)
+    }
+    buf << "\n"
+    req = @hp.request(@env, buf)
+    assert_equal '123', req['CONTENT_LENGTH']
+    assert_predicate buf, :empty?
+    assert ! @hp.keepalive?
+    assert_equal 123, @hp.body_bytes_left
+    dst = ""
+    buf = '.' * 123
+    @hp.filter_body(dst, buf)
+    assert_equal '.' * 123, dst
+    assert_equal "", buf
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_identity_step_headers
+    str = "PUT / HTTP/1.1\r\n"
+    assert_nil @hp.request(@env, str)
+    str << "Content-Length: 123\r\n"
+    assert_nil @hp.request(@env, str)
+    str << "\r\n"
+    req = @hp.request(@env, str)
+    assert_equal '123', req['CONTENT_LENGTH']
+    assert_equal 0, str.size
+    assert_not_predicate @hp, :keepalive?
+    dst = ""
+    buf = '.' * 123
+    @hp.filter_body(dst, buf)
+    assert_equal '.' * 123, dst
+    assert_equal "", buf
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_identity_oneshot_header
+    str = "PUT / HTTP/1.1\r\nContent-Length: 123\r\n\r\n"
+    req = @hp.request(@env, str)
+    assert_equal '123', req['CONTENT_LENGTH']
+    assert_equal 0, str.size
+    assert_not_predicate @hp, :keepalive?
+    dst = ""
+    buf = '.' * 123
+    @hp.filter_body(dst, buf)
+    assert_equal '.' * 123, dst
+    assert_equal "", buf
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_identity_oneshot_header_with_body
+    body = ('a' * 123).freeze
+    str = "PUT / HTTP/1.1\r\n" \
+           "Content-Length: #{body.length}\r\n" \
+           "\r\n#{body}"
+    req = @hp.request(@env, str)
+    assert_equal '123', req['CONTENT_LENGTH']
+    assert_equal 123, str.size
+    assert_equal body, str
+    tmp = ''
+    assert_same tmp, @hp.filter_body(tmp, str)
+    assert_equal 0, str.size
+    assert_equal tmp, body
+    assert_equal body, @hp.filter_body(tmp, str)
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_identity_oneshot_header_with_body_partial
+    str = "PUT / HTTP/1.1\r\nContent-Length: 123\r\n\r\na"
+    req = @hp.request(@env, str)
+    assert_equal 1, str.size
+    assert_equal 'a', str
+    tmp = ''
+    assert_same tmp, @hp.filter_body(tmp, str)
+    assert_equal "", str
+    assert_equal "a", tmp
+    str << ' ' * 122
+    rv = @hp.filter_body(tmp, str)
+    assert_equal 122, tmp.size
+    assert_same tmp, rv
+    assert_equal "", str
+    assert_same tmp, @hp.filter_body(tmp, str)
+    assert_predicate @hp, :keepalive?
+    assert_equal '123', req['CONTENT_LENGTH']
+  end
+
+  def test_identity_oneshot_header_with_body_slop
+    str = "PUT / HTTP/1.1\r\nContent-Length: 1\r\n\r\naG"
+    req = @hp.request(@env, str)
+    assert_equal 2, str.size
+    assert_equal 'aG', str
+    tmp = ''
+    assert_same tmp, @hp.filter_body(tmp, str)
+    assert_equal "G", str
+    assert_equal "a", @hp.filter_body(tmp, str)
+    assert_equal 1, tmp.size
+    assert_equal "a", tmp
+    assert_predicate @hp, :keepalive?
+    assert_equal '1', req['CONTENT_LENGTH']
+  end
+
+  def test_chunked
+    str = "PUT / HTTP/1.1\r\ntransfer-Encoding: chunked\r\n\r\n"
+    req = @hp.request(@env, str)
+    assert_equal 0, str.size
+    tmp = ""
+    assert_predicate @hp, :chunked?
+    assert_nil @hp.filter_body(tmp, str << "6")
+    assert_equal 0, tmp.size
+    assert_nil @hp.filter_body(tmp, str << "\r\n")
+    assert_equal 0, str.size
+    assert_equal 0, tmp.size
+    tmp = ""
+    assert_nil @hp.filter_body(tmp, str << "..")
+    assert_equal "..", tmp
+    assert_nil @hp.filter_body(tmp, str << "abcd\r\n0\r\n")
+    assert_equal "abcd", tmp
+    assert_same tmp, @hp.filter_body(tmp, str << "PUT")
+    assert_equal "PUT", str
+    assert_not_predicate @hp, :keepalive?
+    str << "TY: FOO\r\n\r\n"
+    req = @hp.request(@env, str)
+    assert_equal "FOO", req["HTTP_PUTTY"]
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_chunked_empty
+    str = "PUT / HTTP/1.1\r\ntransfer-Encoding: chunked\r\n\r\n"
+    req = @hp.request(@env, str)
+    assert_equal 0, str.size
+    tmp = ""
+    assert_same tmp, @hp.filter_body(tmp, str << "0\r\n\r\n")
+    assert_predicate @hp, :body_eof?
+    assert_equal "", tmp
+    assert_same req, @hp.request(@env, str)
+    assert_equal "", str
+  end
+
+  def test_two_chunks
+    str = "PUT / HTTP/1.1\r\ntransfer-Encoding: chunked\r\n\r\n"
+    req = @hp.request(@env, str)
+    assert_same req, @env
+    assert_equal 0, str.size
+    tmp = ""
+    assert_nil @hp.filter_body(tmp, str << "6")
+    assert_equal 0, tmp.size
+    assert_nil @hp.filter_body(tmp, str << "\r\n")
+    assert_equal "", str
+    assert_equal 0, tmp.size
+    tmp = ""
+    assert_nil @hp.filter_body(tmp, str << "..")
+    assert_equal 2, tmp.size
+    assert_equal "..", tmp
+    assert_nil @hp.filter_body(tmp, str << "abcd\r\n1")
+    assert_equal "abcd", tmp
+    assert_nil @hp.filter_body(tmp, str << "\r")
+    assert_equal "", tmp
+    assert_nil @hp.filter_body(tmp, str << "\n")
+    assert_equal "", tmp
+    assert_nil @hp.filter_body(tmp, str << "z")
+    assert_equal "z", tmp
+    assert_nil @hp.filter_body(tmp, str << "\r\n")
+    assert_nil @hp.filter_body(tmp, str << "0")
+    assert_nil @hp.filter_body(tmp, str << "\r")
+    rv = @hp.filter_body(tmp, str << "\nGET")
+    assert_equal "GET", str
+    assert_same tmp, rv
+    assert_equal '', tmp
+    assert_not_predicate @hp, :keepalive?
+  end
+
+  def test_big_chunk
+    str = "PUT / HTTP/1.1\r\ntransfer-Encoding: chunked\r\n\r\n" \
+          "4000\r\nabcd"
+    req = @hp.request(@env, str)
+    tmp = ''
+    assert_nil @hp.filter_body(tmp, str)
+    assert_equal '', str
+    str << ' ' * 16300
+    assert_nil @hp.filter_body(tmp, str)
+    assert_equal '', str
+    str << ' ' * 80
+    assert_nil @hp.filter_body(tmp, str)
+    assert_equal '', str
+    assert_not_predicate @hp, :body_eof?
+    assert_same tmp, @hp.filter_body(tmp, str << "\r\n0\r\n")
+    assert_equal "", tmp
+    assert_predicate @hp, :body_eof?
+    str << "\r\n"
+    assert_same req, @hp.request(@env, str)
+    assert_equal "", str
+    assert_predicate @hp, :body_eof?
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_two_chunks_oneshot
+    str = "PUT / HTTP/1.1\r\ntransfer-Encoding: chunked\r\n\r\n" \
+           "1\r\na\r\n2\r\n..\r\n0\r\n"
+    req = @hp.request(@env, str)
+    assert_same req, @env
+    tmp = ''
+    assert_nil @hp.filter_body(tmp, str)
+    assert_equal 'a..', tmp
+    assert_same tmp, @hp.filter_body(tmp, str)
+    assert_not_predicate @hp, :keepalive?
+  end
+
+  def test_chunks_bytewise
+    chunked = "10\r\nabcdefghijklmnop\r\n11\r\n0123456789abcdefg\r\n0\r\n"
+    str = "PUT / HTTP/1.1\r\ntransfer-Encoding: chunked\r\n\r\n"
+    buf = str.dup
+    req = @hp.request(@env, buf)
+    assert_same req, @env
+    assert_equal "", buf
+    tmp = ''
+    body = ''
+    str = chunked[0..-2]
+    str.each_byte { |byte|
+      assert_nil @hp.filter_body(tmp, buf << byte.chr)
+      body << tmp
+    }
+    assert_equal 'abcdefghijklmnop0123456789abcdefg', body
+    assert_same tmp, @hp.filter_body(tmp, buf << "\n")
+    assert_not_predicate @hp, :keepalive?
+  end
+
+  def test_trailers
+    str = "PUT / HTTP/1.1\r\n" \
+           "Trailer: Content-MD5\r\n" \
+           "transfer-Encoding: chunked\r\n\r\n" \
+           "1\r\na\r\n2\r\n..\r\n0\r\n"
+    req = @hp.request(@env, str)
+    assert_same req, @env
+    assert_equal 'Content-MD5', req['HTTP_TRAILER']
+    assert_nil req['HTTP_CONTENT_MD5']
+    tmp = ''
+    assert_nil @hp.filter_body(tmp, str)
+    assert_equal 'a..', tmp
+    md5_b64 = [ Digest::MD5.digest(tmp) ].pack('m').strip.freeze
+    assert_same tmp, @hp.filter_body(tmp, str)
+    assert_equal '', str
+    md5_hdr = "Content-MD5: #{md5_b64}\r\n".freeze
+    str << md5_hdr
+    assert_nil @hp.request(@env, str)
+    assert_equal md5_b64, req['HTTP_CONTENT_MD5']
+    assert_equal "CONTENT_MD5: #{md5_b64}\r\n", str
+    str << "\r"
+    assert_nil @hp.request(@env, str)
+    str << "\nGET / "
+    req = @hp.request(@env, str)
+    assert_equal "GET / ", str
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_trailers_slowly
+    str = "PUT / HTTP/1.1\r\n" \
+           "Trailer: Content-MD5\r\n" \
+           "transfer-Encoding: chunked\r\n\r\n" \
+           "1\r\na\r\n2\r\n..\r\n0\r\n"
+    req = @hp.request(@env, str)
+    assert_equal 'Content-MD5', req['HTTP_TRAILER']
+    assert_nil req['HTTP_CONTENT_MD5']
+    tmp = ''
+    assert_nil @hp.filter_body(tmp, str)
+    assert_equal 'a..', tmp
+    md5_b64 = [ Digest::MD5.digest(tmp) ].pack('m').strip.freeze
+    assert_same tmp, @hp.filter_body(tmp, str)
+    assert_equal '', str
+    assert_nil @hp.request(req, str)
+    md5_hdr = "Content-MD5: #{md5_b64}\r\n".freeze
+    md5_hdr.each_byte { |byte|
+      str << byte.chr
+      assert_nil @hp.request(req, str)
+    }
+    assert_equal md5_b64, req['HTTP_CONTENT_MD5']
+    assert_equal "CONTENT_MD5: #{md5_b64}\r\n", str
+    str << "\r"
+    assert_nil @hp.request(@env, str)
+    str << "\n"
+    assert_same req, @hp.request(@env, str)
+  end
+
+  def test_max_chunk
+    assert_operator Kcar::Parser::CHUNK_MAX, :>=, 0xffff
+    str = "PUT / HTTP/1.1\r\n" \
+           "transfer-Encoding: chunked\r\n\r\n" \
+           "#{Kcar::Parser::CHUNK_MAX.to_s(16)}\r\na\r\n2\r\n..\r\n0\r\n"
+    req = @hp.request(@env, str)
+    assert_same req, @env
+    assert_nil @hp.body_bytes_left
+    assert_nil @hp.filter_body('', str)
+    assert_not_predicate @hp, :keepalive?
+  end
+
+  def test_max_body
+    n = Kcar::Parser::LENGTH_MAX
+    buf = "PUT / HTTP/1.1\r\nContent-Length: #{n}\r\n\r\n"
+    req = @hp.request(@env, buf)
+    assert_equal n, req['CONTENT_LENGTH'].to_i
+    # connection can only be persistent when body is drained:
+    assert_not_predicate @hp, :keepalive?
+    @hp.body_bytes_left = 1
+    assert_not_predicate @hp, :keepalive?
+    @hp.body_bytes_left = 0
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_overflow_chunk
+    n = Kcar::Parser::CHUNK_MAX + 1
+    str = "PUT / HTTP/1.1\r\n" \
+           "transfer-Encoding: chunked\r\n\r\n" \
+           "#{n.to_s(16)}\r\na\r\n2\r\n..\r\n0\r\n"
+    req = @hp.request(@env, str)
+    assert_nil @hp.body_bytes_left
+    assert_equal 'chunked', req['HTTP_TRANSFER_ENCODING']
+    assert_raise(Kcar::ParserError) { @hp.filter_body('', str) }
+  end
+
+  def test_overflow_content_length
+    n = Kcar::Parser::LENGTH_MAX + 1
+    buf ="PUT / HTTP/1.1\r\nContent-Length: #{n}\r\n\r\n"
+    assert_raise(Kcar::ParserError) do
+      @hp.request(@env, buf)
+    end
+  end
+
+  def test_bad_chunk
+    buf = "PUT / HTTP/1.1\r\n" \
+          "transfer-Encoding: chunked\r\n\r\n" \
+          "#zzz\r\na\r\n2\r\n..\r\n0\r\n"
+    @hp.request(@env, buf)
+    assert_nil @hp.body_bytes_left
+    assert_raise(Kcar::ParserError) { @hp.filter_body("", buf) }
+  end
+
+  def test_bad_content_length
+    buf = "PUT / HTTP/1.1\r\nContent-Length: 7ff\r\n\r\n"
+    assert_raise(Kcar::ParserError) { @hp.request(@env, buf) }
+  end
+
+  def test_bad_trailers
+    str = "PUT / HTTP/1.1\r\n" \
+           "Trailer: Transfer-Encoding\r\n" \
+           "transfer-Encoding: chunked\r\n\r\n" \
+           "1\r\na\r\n2\r\n..\r\n0\r\n"
+    req = @hp.request(@env, str)
+    assert_equal 'Transfer-Encoding', req['HTTP_TRAILER']
+    tmp = ''
+    assert_nil @hp.filter_body(tmp, str)
+    assert_equal 'a..', tmp
+    assert_equal '', str
+    str << "Transfer-Encoding: identity\r\n\r\n"
+    assert_raise(Kcar::ParserError) { @hp.request(@env, str) }
+  end
 end

  parent reply	other threads:[~2018-12-01 13:31 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-12-01 13:31 [PATCH v2] request parsing bits Eric Wong
2018-12-01 13:31 ` [PATCH 01/11] introduce new str_new_dd_freeze internal function Eric Wong
2018-12-01 13:31 ` [PATCH 02/11] begin implementing request parsing Eric Wong
2018-12-01 13:31 ` [PATCH 03/11] favor bitfields instead flags + macros Eric Wong
2018-12-01 13:31 ` [PATCH 04/11] implement request parsing with tests Eric Wong
2018-12-01 13:31 ` [PATCH 05/11] pkg.mk: enable warnings by default for tests Eric Wong
2018-12-01 13:31 ` [PATCH 06/11] filter_body: rename variables to be like memcpy(3) Eric Wong
2018-12-01 13:31 ` Eric Wong [this message]
2018-12-01 13:31 ` [PATCH 08/11] do not assume SERVER_PORT Eric Wong
2018-12-01 13:31 ` [PATCH 09/11] do not set "HTTP/0.9" for pre-1.0 requests Eric Wong
2018-12-01 13:31 ` [PATCH 10/11] always set non-negative Content-Length for requests Eric Wong
2018-12-01 13:31 ` [PATCH 11/11] avoid String#-@ call on request parsing under Ruby 2.6 Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://yhbt.net/kcar/

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181201133125.5524-8-e@80x24.org \
    --to=e@80x24.org \
    --cc=kcar-public@bogomips.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://yhbt.net/kcar.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).