kcar RubyGem user+dev discussion/patches/pulls/bugs/help
 help / color / mirror / code / Atom feed
* [PATCH v2] request parsing bits
@ 2018-12-01 13:31 Eric Wong
  2018-12-01 13:31 ` [PATCH 01/11] introduce new str_new_dd_freeze internal function Eric Wong
                   ` (10 more replies)
  0 siblings, 11 replies; 12+ messages in thread
From: Eric Wong @ 2018-12-01 13:31 UTC (permalink / raw)
  To: kcar-public

Most of this was done back in April 2017 but here they are again;
refined and cleaned up and with an additional patch on top
for a nice rb_hash_aset optimization in ruby 2.6.



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 01/11] introduce new str_new_dd_freeze internal function
  2018-12-01 13:31 [PATCH v2] request parsing bits Eric Wong
@ 2018-12-01 13:31 ` Eric Wong
  2018-12-01 13:31 ` [PATCH 02/11] begin implementing request parsing Eric Wong
                   ` (9 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2018-12-01 13:31 UTC (permalink / raw)
  To: kcar-public

This seems like it will be a common pattern to create
and immediately dedupe a string.  In the future, we may
be able to save some memory by eliding temporary object
creation or releasing heap memory via rb_str_resize(..., 0)
---
 ext/kcar/extconf.rb | 15 ++++++++++-----
 ext/kcar/kcar.rl    |  9 +++++++--
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/ext/kcar/extconf.rb b/ext/kcar/extconf.rb
index 89ac586..b65846a 100644
--- a/ext/kcar/extconf.rb
+++ b/ext/kcar/extconf.rb
@@ -6,15 +6,20 @@ dir_config("kcar")
 have_macro("SIZEOF_OFF_T", "ruby.h") or check_sizeof("off_t", "sys/types.h")
 have_macro("SIZEOF_LONG", "ruby.h") or check_sizeof("long", "sys/types.h")
 
-uminus_dedupe = false
+message('checking if String#-@ (str_uminus) dedupes... ')
 begin
-  # oddly, opt_str_freeze is not always effective:
-  # https://bugs.ruby-lang.org/issues/13282
   a = -(%w(t e s t).join)
   b = -(%w(t e s t).join)
-  uminus_dedupe = a.object_id == b.object_id
+  if a.equal?(b)
+    $CPPFLAGS += " -DSTR_UMINUS_DEDUPE=1 "
+    message("yes\n")
+  else
+    $CPPFLAGS += " -DSTR_UMINUS_DEDUPE=0 "
+    message("no, needs Ruby 2.5+\n")
+  end
 rescue NoMethodError
+  $CPPFLAGS += " -DSTR_UMINUS_DEDUPE=0 "
+  message("no, String#-@ not available\n")
 end
-$CFLAGS += " -DSTR_UMINUS_DEDUPE=#{uminus_dedupe ? 1 : 0}"
 
 create_makefile("kcar_ext")
diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
index cbcfa97..79f65db 100644
--- a/ext/kcar/kcar.rl
+++ b/ext/kcar/kcar.rl
@@ -106,6 +106,11 @@ static VALUE str_dd_freeze(VALUE str)
   return str;
 }
 
+static VALUE str_new_dd_freeze(const char *ptr, long len)
+{
+  return str_dd_freeze(rb_str_new(ptr, len));
+}
+
 static VALUE stripped_str_new(const char *str, long len)
 {
   long end;
@@ -161,7 +166,7 @@ status_phrase(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len)
 {
   long nr;
 
-  hp->status = str_dd_freeze(rb_str_new(ptr, len));
+  hp->status = str_new_dd_freeze(ptr, len);
 
   /* RSTRING_PTR is null terminated, ptr is not */
   nr = strtol(RSTRING_PTR(hp->status), NULL, 10);
@@ -238,7 +243,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
   vlen = LEN(mark, p);
   VALIDATE_MAX_LENGTH(vlen, FIELD_VALUE);
   VALIDATE_MAX_LENGTH(flen, FIELD_NAME);
-  f = str_dd_freeze(rb_str_new(fptr, (long)flen));
+  f = str_new_dd_freeze(fptr, (long)flen);
   v = stripped_str_new(vptr, (long)vlen);
 
   /* needs more tests for error-checking here */

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 02/11] begin implementing request parsing
  2018-12-01 13:31 [PATCH v2] request parsing bits Eric Wong
  2018-12-01 13:31 ` [PATCH 01/11] introduce new str_new_dd_freeze internal function Eric Wong
@ 2018-12-01 13:31 ` Eric Wong
  2018-12-01 13:31 ` [PATCH 03/11] favor bitfields instead flags + macros Eric Wong
                   ` (8 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2018-12-01 13:31 UTC (permalink / raw)
  To: kcar-public

Not wired up, yet; but for now everything compiles
and existing tests run.
---
 ext/kcar/kcar.rl             | 107 ++++++++++++++++++++++++++++++++++-
 ext/kcar/kcar_http_common.rl |  36 +++++++++++-
 2 files changed, 138 insertions(+), 5 deletions(-)

diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
index 79f65db..033e6ea 100644
--- a/ext/kcar/kcar.rl
+++ b/ext/kcar/kcar.rl
@@ -15,6 +15,10 @@
 
 static VALUE eParserError;
 static ID id_uminus, id_sq, id_sq_set;
+static VALUE g_rack_url_scheme,
+  g_HOST, g_PATH_INFO, g_QUERY_STRING,
+  g_REQUEST_METHOD, g_REQUEST_PATH, g_REQUEST_URI;
+static VALUE e413, e414;
 
 /** Defines common length and error messages for input length validation. */
 #define DEF_MAX_LENGTH(N, length) \
@@ -31,10 +35,20 @@ static ID id_uminus, id_sq, id_sq_set;
     rb_raise(eParserError, MAX_##N##_LENGTH_ERR); \
 } while (0)
 
+#define VALIDATE_MAX_URI_LENGTH(len, N) do { \
+  if (len > MAX_##N##_LENGTH) \
+    rb_raise(e414, MAX_##N##_LENGTH_ERR); \
+} while (0)
+
 /* Defines the maximum allowed lengths for various input elements.*/
 DEF_MAX_LENGTH(FIELD_NAME, 256);
 DEF_MAX_LENGTH(FIELD_VALUE, 80 * 1024);
 DEF_MAX_LENGTH(HEADER, (1024 * (80 + 32)));
+DEF_MAX_LENGTH(REQUEST_URI, 1024 * 15);
+DEF_MAX_LENGTH(FRAGMENT, 1024); /* just in case (stolen from Mongrel) */
+DEF_MAX_LENGTH(REQUEST_PATH, 4096); /* common PATH_MAX on modern systems */
+DEF_MAX_LENGTH(QUERY_STRING, (1024 * 10));
+
 
 #define UH_FL_CHUNKED  0x1
 #define UH_FL_HASBODY  0x2
@@ -90,6 +104,13 @@ static unsigned int ulong2uint(unsigned long n)
 #define HP_FL_UNSET(hp,fl) ((hp)->flags &= ~(UH_FL_##fl))
 #define HP_FL_ALL(hp,fl) (HP_FL_TEST(hp, fl) == (UH_FL_##fl))
 
+/* Downcases a single ASCII character.  Locale-agnostic. */
+static void downcase_char(char *c)
+{
+  if (*c >= 'A' && *c <= 'Z')
+    *c |= 0x20;
+}
+
 static int is_lws(char c)
 {
   return (c == ' ' || c == '\t');
@@ -153,7 +174,54 @@ static void hp_keepalive_connection(struct http_parser *hp, VALUE val)
 }
 
 static void
-http_version(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len)
+request_method(VALUE env, const char *ptr, size_t len)
+{
+  rb_hash_aset(env, g_REQUEST_METHOD, str_new_dd_freeze(ptr, len));
+}
+
+static void
+url_scheme(VALUE env, const char *ptr, size_t len)
+{
+  rb_hash_aset(env, g_rack_url_scheme, str_new_dd_freeze(ptr, len));
+}
+
+static void
+request_host(VALUE env, const char *ptr, size_t len)
+{
+  rb_hash_aset(env, g_HOST, str_new_dd_freeze(ptr, len));
+}
+
+static void
+request_uri(VALUE env, const char *ptr, size_t len)
+{
+  VALIDATE_MAX_URI_LENGTH(len, REQUEST_URI);
+  rb_hash_aset(env, g_REQUEST_URI, rb_str_new(ptr, len));
+}
+
+static void
+query_string(VALUE env, const char *ptr, size_t len)
+{
+  VALIDATE_MAX_URI_LENGTH(len, QUERY_STRING);
+  rb_hash_aset(env, g_QUERY_STRING, rb_str_new(ptr, len));
+}
+
+static void
+request_path(VALUE env, const char *ptr, size_t len)
+{
+  VALUE val;
+
+  VALIDATE_MAX_URI_LENGTH(len, REQUEST_PATH);
+  val = rb_hash_aset(env, g_REQUEST_PATH, rb_str_new(ptr, len));
+
+  /* rack says PATH_INFO must start with "/" or be empty */
+  if (CONST_MEM_EQ("*", ptr, len))
+    val = rb_str_new(NULL, 0);
+
+  rb_hash_aset(env, g_PATH_INFO, val);
+}
+
+static void
+http_version(struct http_parser *hp, const char *ptr, size_t len)
 {
   if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
     /* HTTP/1.1 implies keepalive unless "Connection: close" is set */
@@ -328,12 +396,24 @@ static void write_value(VALUE hdr, struct http_parser *hp,
 
   action mark {MARK(mark, fpc); }
 
+  action snake_upcase_field { snake_upcase_char(deconst(fpc)); }
+  action downcase_char { downcase_char(deconst(fpc)); }
+  action request_method { request_method(hdr, PTR_TO(mark), LEN(mark, fpc)); }
+  action url_scheme { url_scheme(hdr, PTR_TO(mark), LEN(mark, fpc)); }
+  action host { request_host(hdr, PTR_TO(mark), LEN(mark, fpc)); }
+  action request_uri { request_uri(hdr, PTR_TO(mark), LEN(mark, fpc)); }
+
+  action start_query { MARK(start.query, fpc); }
+  action query_string {
+    query_string(hdr, PTR_TO(start.query), LEN(start.query, fpc));
+  }
+  action request_path { request_path(hdr, PTR_TO(mark), LEN(mark, fpc)); }
   action start_field { MARK(start.field, fpc); }
   action write_field { hp->s.field_len = LEN(start.field, fpc); }
   action start_value { MARK(mark, fpc); }
   action write_value { write_value(hdr, hp, buffer, fpc); }
   action write_cont_value { write_cont_value(hp, buffer, fpc); }
-  action http_version { http_version(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
+  action http_version { http_version(hp, PTR_TO(mark), LEN(mark, fpc)); }
   action status_phrase { status_phrase(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
 
   action add_to_chunk_size {
@@ -720,6 +800,7 @@ static VALUE filter_body(VALUE self, VALUE buf, VALUE data)
 
 void Init_kcar_ext(void)
 {
+  static VALUE globals;
   VALUE mKcar = rb_define_module("Kcar");
   VALUE cParser = rb_define_class_under(mKcar, "Parser", rb_cObject);
 
@@ -729,6 +810,10 @@ void Init_kcar_ext(void)
    * This is raised if there are parsing errors.
    */
   eParserError = rb_define_class_under(mKcar, "ParserError", rb_eIOError);
+  e413 = rb_define_class_under(mKcar, "RequestEntityTooLargeError",
+                               eParserError);
+  e414 = rb_define_class_under(mKcar, "RequestURITooLongError",
+                               eParserError);
 
   rb_define_alloc_func(cParser, kcar_alloc);
   rb_define_method(cParser, "initialize", initialize, 0);
@@ -759,4 +844,22 @@ void Init_kcar_ext(void)
   id_sq = rb_intern("[]");
   id_sq_set = rb_intern("[]=");
   id_uminus = rb_intern("-@");
+
+  /* TODO: gperf to make a perfect hash of common strings */
+#define C(ary, var, cstr) do { \
+  var = str_new_dd_freeze((cstr), sizeof(cstr) - 1); \
+  rb_ary_push((ary), (var)); \
+} while (0);
+
+  globals = rb_ary_new();
+  rb_global_variable(&globals);
+  C(globals, g_HOST, "HOST");
+  C(globals, g_PATH_INFO, "PATH_INFO");
+  C(globals, g_QUERY_STRING, "QUERY_STRING");
+  C(globals, g_REQUEST_METHOD, "REQUEST_METHOD");
+  C(globals, g_REQUEST_PATH, "REQUEST_PATH");
+  C(globals, g_REQUEST_URI, "REQUEST_URI");
+  C(globals, g_rack_url_scheme, "rack.url_scheme");
+  OBJ_FREEZE(globals);
+#undef C
 }
diff --git a/ext/kcar/kcar_http_common.rl b/ext/kcar/kcar_http_common.rl
index cb89248..0c596bc 100644
--- a/ext/kcar/kcar_http_common.rl
+++ b/ext/kcar/kcar_http_common.rl
@@ -25,10 +25,38 @@
 
 # elements
   token = (ascii -- (CTL | tspecials));
+
+# URI schemes and absolute paths
+  scheme = ( "http"i ("s"i)? ) $downcase_char >mark %url_scheme;
+  hostname = ((alnum | "-" | "." | "_")+ | ("[" (":" | xdigit)+ "]"));
+  host_with_port = (hostname (":" digit*)?) >mark %host;
+  userinfo = ((unreserved | escape | ";" | ":" | "&" | "=" | "+")+ "@")*;
+
+  path = ( pchar+ ( "/" pchar* )* ) ;
+  query = ( uchar | reserved )* %query_string ;
+  param = ( pchar | "/" )* ;
+  params = ( param ( ";" param )* ) ;
+  rel_path = (path? (";" params)? %request_path) ("?" %start_query query)?;
+  absolute_path = ( "/"+ rel_path );
+  path_uri = absolute_path > mark %request_uri;
+  Absolute_URI = (scheme "://" userinfo host_with_port path_uri);
+
+  Request_URI = ((absolute_path | "*") >mark %request_uri) | Absolute_URI;
+
+  # lets not waste cycles setting fragment in the request,
+  # valid clients do not send it, but we will just silently ignore it.
+  Fragment = ( uchar | reserved )*;
+
+  Method = (token){1,20} >mark %request_method;
+  GetOnly = "GET" >mark %request_method;
+
+  http_number = ( digit+ "." digit+ ) ;
+  HTTP_Version = ( "HTTP/" http_number ) >mark %http_version ;
+  Request_Line = ( Method " " Request_URI ("#" Fragment){0,1} " "
+                   HTTP_Version CRLF ) ;
+
   phrase = (any -- CRLF)+;
   Status_Phrase = (digit+ (" "+ phrase)?) >mark %status_phrase ;
-  http_number = (digit+ "." digit+) ;
-  HTTP_Version = ("HTTP/" http_number) >mark %http_version ;
   Status_Line = HTTP_Version " "+ Status_Phrase :> CRLF;
 
   field_name = ( token -- ":" )+ >start_field %write_field;
@@ -51,7 +79,9 @@
   Trailers := (message_header)* CRLF @end_trailers;
 
   FullResponse = Status_Line (message_header)* CRLF @header_done;
+  FullRequest = Request_Line (message_header)* CRLF @header_done;
+  SimpleRequest = GetOnly " " Request_URI ("#"Fragment){0,1} CRLF @header_done;
 
-main := FullResponse;
+main := FullResponse | FullRequest | SimpleRequest;
 
 }%%

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 03/11] favor bitfields instead flags + macros
  2018-12-01 13:31 [PATCH v2] request parsing bits Eric Wong
  2018-12-01 13:31 ` [PATCH 01/11] introduce new str_new_dd_freeze internal function Eric Wong
  2018-12-01 13:31 ` [PATCH 02/11] begin implementing request parsing Eric Wong
@ 2018-12-01 13:31 ` Eric Wong
  2018-12-01 13:31 ` [PATCH 04/11] implement request parsing with tests Eric Wong
                   ` (7 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2018-12-01 13:31 UTC (permalink / raw)
  To: kcar-public

This (IMHO) improves readability of code in the face of future
changes.  At least on my system(*), binary size is even reduced:

	 text      data     bss     dec     hex filename
	59233      1088     136   60457    ec29 before.so
	58945      1088     136   60169    eb09 after.so

(*) x86-64 Debian gcc 4.9.2-10
---
 ext/kcar/kcar.rl    | 87 +++++++++++++++++++++------------------------
 test/test_parser.rb |  1 +
 2 files changed, 41 insertions(+), 47 deletions(-)

diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
index 033e6ea..c0e8bbc 100644
--- a/ext/kcar/kcar.rl
+++ b/ext/kcar/kcar.rl
@@ -49,19 +49,17 @@ DEF_MAX_LENGTH(FRAGMENT, 1024); /* just in case (stolen from Mongrel) */
 DEF_MAX_LENGTH(REQUEST_PATH, 4096); /* common PATH_MAX on modern systems */
 DEF_MAX_LENGTH(QUERY_STRING, (1024 * 10));
 
-
-#define UH_FL_CHUNKED  0x1
-#define UH_FL_HASBODY  0x2
-#define UH_FL_INBODY   0x4
-#define UH_FL_HASTRAILER   0x8
-#define UH_FL_INTRAILER 0x10
-#define UH_FL_INCHUNK  0x20
-#define UH_FL_KEEPALIVE 0x40
-#define UH_FL_HASHEADER 0x80
-
 struct http_parser {
   int cs; /* Ragel internal state */
-  unsigned int flags;
+  unsigned int chunked:1;
+  unsigned int has_body:1;
+  unsigned int in_body:1;
+  unsigned int has_trailer:1;
+  unsigned int in_trailer:1;
+  unsigned int in_chunk:1;
+  unsigned int persistent:1;
+  unsigned int has_header:1;
+  unsigned int padding:24;
   unsigned int mark;
   unsigned int offset;
   union { /* these 2 fields don't nest */
@@ -99,11 +97,6 @@ static unsigned int ulong2uint(unsigned long n)
 #define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC))
 #define STRIPPED_STR_NEW(M,FPC) stripped_str_new(PTR_TO(M), LEN(M, FPC))
 
-#define HP_FL_TEST(hp,fl) ((hp)->flags & (UH_FL_##fl))
-#define HP_FL_SET(hp,fl) ((hp)->flags |= (UH_FL_##fl))
-#define HP_FL_UNSET(hp,fl) ((hp)->flags &= ~(UH_FL_##fl))
-#define HP_FL_ALL(hp,fl) (HP_FL_TEST(hp, fl) == (UH_FL_##fl))
-
 /* Downcases a single ASCII character.  Locale-agnostic. */
 static void downcase_char(char *c)
 {
@@ -143,7 +136,7 @@ static VALUE stripped_str_new(const char *str, long len)
 
 static void finalize_header(struct http_parser *hp)
 {
-  if ((HP_FL_TEST(hp, HASTRAILER) && ! HP_FL_TEST(hp, CHUNKED)))
+  if (hp->has_trailer && !hp->chunked)
     rb_raise(eParserError, "trailer but not chunked");
 }
 
@@ -157,17 +150,17 @@ static void hp_keepalive_connection(struct http_parser *hp, VALUE val)
   /* REQUEST_METHOD is always set before any headers */
   if (STR_CSTR_CASE_EQ(val, "keep-alive")) {
     /* basically have HTTP/1.0 masquerade as HTTP/1.1+ */
-    HP_FL_SET(hp, KEEPALIVE);
+    hp->persistent = 1;
   } else if (STR_CSTR_CASE_EQ(val, "close")) {
     /*
      * it doesn't matter what HTTP version or request method we have,
      * if a server says "Connection: close", we disable keepalive
      */
-    HP_FL_UNSET(hp, KEEPALIVE);
+    hp->persistent = 0;
   } else {
     /*
      * server could've sent anything, ignore it for now.  Maybe
-     * "HP_FL_UNSET(hp, KEEPALIVE);" just in case?
+     * "hp->persistent = 0;" just in case?
      * Raising an exception might be too mean...
      */
   }
@@ -225,7 +218,7 @@ http_version(struct http_parser *hp, const char *ptr, size_t len)
 {
   if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
     /* HTTP/1.1 implies keepalive unless "Connection: close" is set */
-    HP_FL_SET(hp, KEEPALIVE);
+    hp->persistent = 1;
   }
 }
 
@@ -243,12 +236,12 @@ status_phrase(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len)
     rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->status));
 
   if ( !((nr >= 100 && nr <= 199) || nr == 204 || nr == 304) )
-    HP_FL_SET(hp, HASBODY);
+    hp->has_body = 1;
 }
 
 static inline void invalid_if_trailer(struct http_parser *hp)
 {
-  if (HP_FL_TEST(hp, INTRAILER))
+  if (hp->in_trailer)
     rb_raise(eParserError, "invalid Trailer");
 }
 
@@ -299,7 +292,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
   const char *vptr;
   size_t vlen;
 
-  HP_FL_SET(hp, HASHEADER);
+  hp->has_header = 1;
 
   /* Rack does not like Status headers, so we never send them */
   if (CSTR_CASE_EQ(fptr, flen, "status")) {
@@ -323,9 +316,9 @@ static void write_value(VALUE hdr, struct http_parser *hp,
   if (STR_CSTR_CASE_EQ(f, "connection")) {
     hp_keepalive_connection(hp, v);
   } else if (STR_CSTR_CASE_EQ(f, "content-length")) {
-    if (! HP_FL_TEST(hp, HASBODY))
+    if (!hp->has_body)
       rb_raise(eParserError, "Content-Length with no body expected");
-    if (HP_FL_TEST(hp, CHUNKED))
+    if (hp->chunked)
       rb_raise(eParserError,
                "Content-Length when chunked Transfer-Encoding is set");
     hp->len.content = parse_length(vptr, vlen);
@@ -336,7 +329,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
     invalid_if_trailer(hp);
   } else if (STR_CSTR_CASE_EQ(f, "transfer-encoding")) {
     if (STR_CSTR_CASE_EQ(v, "chunked")) {
-      if (! HP_FL_TEST(hp, HASBODY))
+      if (!hp->has_body)
         rb_raise(eParserError,
                  "chunked Transfer-Encoding with no body expected");
       if (hp->len.content >= 0)
@@ -344,13 +337,13 @@ static void write_value(VALUE hdr, struct http_parser *hp,
                  "chunked Transfer-Encoding when Content-Length is set");
 
       hp->len.chunk = 0;
-      HP_FL_SET(hp, CHUNKED);
+      hp->chunked = 1;
     }
     invalid_if_trailer(hp);
   } else if (STR_CSTR_CASE_EQ(f, "trailer")) {
-    if (! HP_FL_TEST(hp, HASBODY))
+    if (!hp->has_body)
       rb_raise(eParserError, "trailer with no body");
-    HP_FL_SET(hp, HASTRAILER);
+    hp->has_trailer = 1;
     invalid_if_trailer(hp);
   }
 
@@ -425,7 +418,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
     finalize_header(hp);
     cs = http_parser_first_final;
 
-    if (HP_FL_TEST(hp, CHUNKED))
+    if (hp->chunked)
       cs = http_parser_en_ChunkedBody;
 
     /*
@@ -441,7 +434,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
   }
 
   action end_chunked_body {
-    HP_FL_SET(hp, INTRAILER);
+    hp->in_trailer = 1;
     cs = http_parser_en_Trailers;
     ++p;
     assert(p <= pe && "buffer overflow after chunked body");
@@ -457,7 +450,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
     p += nr;
     assert(hp->len.chunk >= 0 && "negative chunk length");
     if ((size_t)hp->len.chunk > REMAINING) {
-      HP_FL_SET(hp, INCHUNK);
+      hp->in_chunk = 1;
       goto post_exec;
     } else {
       fhold;
@@ -501,8 +494,8 @@ static void http_parser_execute(struct http_parser *hp,
   assert((void *)(pe - p) == (void *)(len - off) &&
          "pointers aren't same distance");
 
-  if (HP_FL_TEST(hp, INCHUNK)) {
-    HP_FL_UNSET(hp, INCHUNK);
+  if (hp->in_chunk) {
+    hp->in_chunk = 0;
     goto skip_chunk_data_hack;
   }
   %% write exec;
@@ -603,7 +596,7 @@ static VALUE body_bytes_left(VALUE self)
 {
   struct http_parser *hp = data_get(self);
 
-  if (HP_FL_TEST(hp, CHUNKED))
+  if (hp->chunked)
     return Qnil;
   if (hp->len.content >= 0)
     return OFFT2NUM(hp->len.content);
@@ -623,7 +616,7 @@ static VALUE body_bytes_left_set(VALUE self, VALUE bytes)
 {
   struct http_parser *hp = data_get(self);
 
-  if (HP_FL_TEST(hp, CHUNKED))
+  if (hp->chunked)
     rb_raise(rb_eRuntimeError, "body_bytes_left= is not for chunked bodies");
   hp->len.content = NUM2OFFT(bytes);
   return bytes;
@@ -640,7 +633,7 @@ static VALUE chunked(VALUE self)
 {
   struct http_parser *hp = data_get(self);
 
-  return HP_FL_TEST(hp, CHUNKED) ? Qtrue : Qfalse;
+  return hp->chunked ? Qtrue : Qfalse;
 }
 
 static void check_buffer_size(long dlen)
@@ -674,7 +667,7 @@ static VALUE headers(VALUE self, VALUE hdr, VALUE data)
       hp->cs == http_parser_en_ChunkedBody) {
     advance_str(data, hp->offset + 1);
     hp->offset = 0;
-    if (HP_FL_TEST(hp, INTRAILER))
+    if (hp->in_trailer)
       return hdr;
     else
       return rb_ary_new3(2, hp->status, hdr);
@@ -688,7 +681,7 @@ static VALUE headers(VALUE self, VALUE hdr, VALUE data)
 
 static int chunked_eof(struct http_parser *hp)
 {
-  return ((hp->cs == http_parser_first_final) || HP_FL_TEST(hp, INTRAILER));
+  return ((hp->cs == http_parser_first_final) || hp->in_trailer);
 }
 
 /**
@@ -702,13 +695,13 @@ static VALUE body_eof(VALUE self)
 {
   struct http_parser *hp = data_get(self);
 
-  if (!HP_FL_TEST(hp, HASHEADER) && HP_FL_ALL(hp, KEEPALIVE))
+  if (!hp->has_header && hp->persistent)
     return Qtrue;
 
-  if (HP_FL_TEST(hp, CHUNKED))
+  if (hp->chunked)
     return chunked_eof(hp) ? Qtrue : Qfalse;
 
-  if (! HP_FL_TEST(hp, HASBODY))
+  if (!hp->has_body)
     return Qtrue;
 
   return hp->len.content == 0 ? Qtrue : Qfalse;
@@ -730,9 +723,9 @@ static VALUE keepalive(VALUE self)
 {
   struct http_parser *hp = data_get(self);
 
-  if (HP_FL_ALL(hp, KEEPALIVE)) {
-    if (HP_FL_TEST(hp, HASHEADER) && HP_FL_TEST(hp, HASBODY) ) {
-      if (HP_FL_TEST(hp, CHUNKED) || (hp->len.content >= 0))
+  if (hp->persistent) {
+    if (hp->has_header && hp->has_body) {
+      if (hp->chunked || (hp->len.content >= 0))
         return Qtrue;
 
       /* unknown Content-Length and not chunked, we must assume close */
@@ -774,7 +767,7 @@ static VALUE filter_body(VALUE self, VALUE buf, VALUE data)
   rb_str_resize(buf, dlen); /* we can never copy more than dlen bytes */
   OBJ_TAINT(buf); /* keep weirdo $SAFE users happy */
 
-  if (!HP_FL_TEST(hp, CHUNKED))
+  if (!hp->chunked)
     rb_raise(rb_eRuntimeError, "filter_body is only for chunked bodies");
 
   if (!chunked_eof(hp)) {
diff --git a/test/test_parser.rb b/test/test_parser.rb
index 3768808..84e609d 100644
--- a/test/test_parser.rb
+++ b/test/test_parser.rb
@@ -300,6 +300,7 @@ class TestParser < Test::Unit::TestCase
     require 'objspace'
     n = ObjectSpace.memsize_of(@hp)
     assert_kind_of Integer, n
+    warn "memsize: #{n}\n" if $DEBUG
   rescue LoadError
     warn 'ObjectSpace not available'
   end

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 04/11] implement request parsing with tests
  2018-12-01 13:31 [PATCH v2] request parsing bits Eric Wong
                   ` (2 preceding siblings ...)
  2018-12-01 13:31 ` [PATCH 03/11] favor bitfields instead flags + macros Eric Wong
@ 2018-12-01 13:31 ` Eric Wong
  2018-12-01 13:31 ` [PATCH 05/11] pkg.mk: enable warnings by default for tests Eric Wong
                   ` (6 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2018-12-01 13:31 UTC (permalink / raw)
  To: kcar-public

Most of these tests are ported over from a Mongrel-derived
server.  Chunked body parsing is not implemented, yet.
---
 ext/kcar/kcar.rl            | 338 ++++++++++++--
 test/test_request_parser.rb | 862 ++++++++++++++++++++++++++++++++++++
 2 files changed, 1162 insertions(+), 38 deletions(-)
 create mode 100644 test/test_request_parser.rb

diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
index c0e8bbc..01e4c76 100644
--- a/ext/kcar/kcar.rl
+++ b/ext/kcar/kcar.rl
@@ -16,8 +16,13 @@
 static VALUE eParserError;
 static ID id_uminus, id_sq, id_sq_set;
 static VALUE g_rack_url_scheme,
-  g_HOST, g_PATH_INFO, g_QUERY_STRING,
-  g_REQUEST_METHOD, g_REQUEST_PATH, g_REQUEST_URI;
+  g_80, g_443, g_http, g_https,
+  g_HTTP_HOST, g_HTTP_CONNECTION, g_HTTP_TRAILER, g_HTTP_TRANSFER_ENCODING,
+  g_HTTP_VERSION,
+  g_CONTENT_LENGTH, g_CONTENT_TYPE,
+  g_PATH_INFO, g_QUERY_STRING,
+  g_REQUEST_METHOD, g_REQUEST_PATH, g_REQUEST_URI,
+  g_SERVER_NAME, g_SERVER_PORT, g_SERVER_PROTOCOL;
 static VALUE e413, e414;
 
 /** Defines common length and error messages for input length validation. */
@@ -51,6 +56,9 @@ DEF_MAX_LENGTH(QUERY_STRING, (1024 * 10));
 
 struct http_parser {
   int cs; /* Ragel internal state */
+  unsigned int is_request:1;
+  unsigned int has_query:1;
+  unsigned int has_scheme:1;
   unsigned int chunked:1;
   unsigned int has_body:1;
   unsigned int in_body:1;
@@ -59,7 +67,9 @@ struct http_parser {
   unsigned int in_chunk:1;
   unsigned int persistent:1;
   unsigned int has_header:1;
-  unsigned int padding:24;
+  unsigned int body_eof_seen:1;
+  unsigned int is_https:1;
+  unsigned int padding:19;
   unsigned int mark;
   unsigned int offset;
   union { /* these 2 fields don't nest */
@@ -71,7 +81,11 @@ struct http_parser {
     unsigned int dest_offset; /* only used during body processing */
   } s;
   VALUE cont; /* Qfalse: unset, Qnil: ignored header, T_STRING: append */
-  VALUE status; /* String or Qnil */
+  union {
+    /* String or Qnil */
+    VALUE status; /* status string for responses */
+    VALUE host; /* Host: header for requests */
+  } v;
   union {
     off_t content;
     off_t chunk;
@@ -134,10 +148,69 @@ static VALUE stripped_str_new(const char *str, long len)
   return rb_str_new(str, end + 1);
 }
 
-static void finalize_header(struct http_parser *hp)
+static VALUE request_host_val(struct http_parser *hp)
+{
+  assert(hp->is_request == 1 && "not a request");
+  return NIL_P(hp->v.host) ? Qfalse : hp->v.host;
+}
+
+static void set_server_vars(struct http_parser *hp, VALUE env, VALUE host)
+{
+  char *host_ptr = RSTRING_PTR(host);
+  long host_len = RSTRING_LEN(host);
+  char *colon;
+  VALUE server_name = host;
+  VALUE server_port = hp->is_https ? g_443 : g_80;
+
+  if (*host_ptr == '[') { /* ipv6 address format */
+    char *rbracket = memchr(host_ptr + 1, ']', host_len - 1);
+
+    if (rbracket)
+      colon = (rbracket[1] == ':') ? rbracket + 1 : NULL;
+    else
+      colon = memchr(host_ptr + 1, ':', host_len - 1);
+  } else {
+    colon = memchr(host_ptr, ':', host_len);
+  }
+
+  if (colon) {
+    long port_start = colon - host_ptr + 1;
+    long port_len = host_len - port_start;
+
+    server_name = rb_str_substr(host, 0, colon - host_ptr);
+    server_name = str_dd_freeze(server_name);
+    if (port_len > 0) {
+      server_port = rb_str_substr(host, port_start, port_len);
+      server_port = str_dd_freeze(server_port);
+    }
+  }
+  rb_hash_aset(env, g_SERVER_NAME, server_name);
+  rb_hash_aset(env, g_SERVER_PORT, server_port);
+}
+
+static void http_09_request(VALUE env)
+{
+  VALUE v = str_new_dd_freeze("HTTP/0.9", 8);
+
+  rb_hash_aset(env, g_SERVER_PROTOCOL, v);
+  rb_hash_aset(env, g_HTTP_VERSION, v);
+}
+
+static void finalize_header(struct http_parser *hp, VALUE hdr)
 {
   if (hp->has_trailer && !hp->chunked)
     rb_raise(eParserError, "trailer but not chunked");
+  if (hp->is_request) {
+    if (!hp->has_query)
+      rb_hash_aset(hdr, g_QUERY_STRING, rb_str_new(NULL, 0));
+    if (hp->has_header) {
+      VALUE host = request_host_val(hp);
+      if (host != Qfalse)
+        set_server_vars(hp, hdr, host);
+    } else {
+      http_09_request(hdr);
+    }
+  }
 }
 
 /*
@@ -173,28 +246,58 @@ request_method(VALUE env, const char *ptr, size_t len)
 }
 
 static void
-url_scheme(VALUE env, const char *ptr, size_t len)
+url_scheme(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
 {
-  rb_hash_aset(env, g_rack_url_scheme, str_new_dd_freeze(ptr, len));
+  VALUE val;
+
+  hp->has_scheme = 1;
+  /* Ragel machine downcases and enforces this as "http" or "https" */
+  if (len == 5) {
+    hp->is_https = 1;
+    assert(CONST_MEM_EQ("https", ptr, len) && "len == 5 but not 'https'");
+    val = g_https;
+  } else {
+    assert(CONST_MEM_EQ("http", ptr, len) && "len != 4 but not 'http'");
+    val = g_http;
+  }
+  rb_hash_aset(env, g_rack_url_scheme, val);
 }
 
 static void
-request_host(VALUE env, const char *ptr, size_t len)
+request_host(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
 {
-  rb_hash_aset(env, g_HOST, str_new_dd_freeze(ptr, len));
+  VALUE val = rb_str_new(ptr, len);
+
+  rb_hash_aset(env, g_HTTP_HOST, val);
+  hp->v.host = val;
 }
 
 static void
 request_uri(VALUE env, const char *ptr, size_t len)
 {
+  VALUE val;
+
   VALIDATE_MAX_URI_LENGTH(len, REQUEST_URI);
-  rb_hash_aset(env, g_REQUEST_URI, rb_str_new(ptr, len));
+  val = rb_str_new(ptr, len);
+  rb_hash_aset(env, g_REQUEST_URI, val);
+
+  /*
+   * rack says PATH_INFO must start with "/" or be empty,
+   * but "OPTIONS *" is a valid request
+   */
+  if (CONST_MEM_EQ("*", ptr, len)) {
+    val = rb_str_new(NULL, 0);
+    rb_hash_aset(env, g_PATH_INFO, val);
+    rb_hash_aset(env, g_REQUEST_PATH, val);
+  }
 }
 
 static void
-query_string(VALUE env, const char *ptr, size_t len)
+query_string(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
 {
   VALIDATE_MAX_URI_LENGTH(len, QUERY_STRING);
+
+  hp->has_query = 1;
   rb_hash_aset(env, g_QUERY_STRING, rb_str_new(ptr, len));
 }
 
@@ -204,22 +307,26 @@ request_path(VALUE env, const char *ptr, size_t len)
   VALUE val;
 
   VALIDATE_MAX_URI_LENGTH(len, REQUEST_PATH);
-  val = rb_hash_aset(env, g_REQUEST_PATH, rb_str_new(ptr, len));
-
-  /* rack says PATH_INFO must start with "/" or be empty */
-  if (CONST_MEM_EQ("*", ptr, len))
-    val = rb_str_new(NULL, 0);
+  val = rb_str_new(ptr, len);
 
+  rb_hash_aset(env, g_REQUEST_PATH, val);
   rb_hash_aset(env, g_PATH_INFO, val);
 }
 
 static void
-http_version(struct http_parser *hp, const char *ptr, size_t len)
+http_version(struct http_parser *hp, VALUE env, const char *ptr, size_t len)
 {
   if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
     /* HTTP/1.1 implies keepalive unless "Connection: close" is set */
     hp->persistent = 1;
   }
+  if (hp->is_request) {
+    VALUE v = str_new_dd_freeze(ptr, len);
+    hp->has_header = 1;
+
+    rb_hash_aset(env, g_SERVER_PROTOCOL, v);
+    rb_hash_aset(env, g_HTTP_VERSION, v);
+  }
 }
 
 static void
@@ -227,13 +334,13 @@ status_phrase(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len)
 {
   long nr;
 
-  hp->status = str_new_dd_freeze(ptr, len);
+  hp->v.status = str_new_dd_freeze(ptr, len);
 
   /* RSTRING_PTR is null terminated, ptr is not */
-  nr = strtol(RSTRING_PTR(hp->status), NULL, 10);
+  nr = strtol(RSTRING_PTR(hp->v.status), NULL, 10);
 
   if (nr < 100 || nr > 999)
-    rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->status));
+    rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->v.status));
 
   if ( !((nr >= 100 && nr <= 199) || nr == 204 || nr == 304) )
     hp->has_body = 1;
@@ -282,8 +389,8 @@ static void write_cont_value(struct http_parser *hp,
   rb_str_buf_cat(hp->cont, vptr, end + 1);
 }
 
-static void write_value(VALUE hdr, struct http_parser *hp,
-                        const char *buffer, const char *p)
+static void write_response_value(struct http_parser *hp, VALUE hdr,
+                          const char *buffer, const char *p)
 {
   VALUE f, v;
   VALUE hclass;
@@ -382,6 +489,111 @@ static void write_value(VALUE hdr, struct http_parser *hp,
   }
 }
 
+/* TODO cache */
+static VALUE req_field(const char *ptr, size_t len)
+{
+  size_t pfxlen = sizeof("HTTP_") - 1;
+  VALUE str = rb_str_new(NULL, pfxlen + len);
+  char *dst = RSTRING_PTR(str);
+
+  memcpy(dst, "HTTP_", pfxlen);
+  memcpy(dst + pfxlen, ptr, len);
+  assert(*(dst + RSTRING_LEN(str)) == '\0' &&
+         "string didn't end with \\0"); /* paranoia */
+
+  return str_dd_freeze(str);
+}
+
+static void snake_upcase(char *ptr, size_t len)
+{
+  char *c;
+
+  for (c = ptr; len--; c++) {
+    if (*c >= 'a' && *c <= 'z')
+      *c &= ~0x20;
+    else if (*c == '-')
+      *c = '_';
+  }
+}
+
+static void write_request_value(struct http_parser *hp, VALUE env,
+                            char *buffer, const char *p)
+{
+  char *fptr = PTR_TO(start.field);
+  size_t flen = hp->s.field_len;
+  char *vptr = PTR_TO(mark);
+  size_t vlen = LEN(mark, p);
+  VALUE key, val;
+  VALUE existing;
+
+  VALIDATE_MAX_LENGTH(flen, FIELD_NAME);
+  VALIDATE_MAX_LENGTH(LEN(mark, p), FIELD_VALUE);
+  snake_upcase(fptr, flen);
+
+  /*
+   * ignore "Version" headers since they conflict with the HTTP_VERSION
+   * rack env variable.
+   */
+  if (CONST_MEM_EQ("VERSION", fptr, flen)) {
+    hp->cont = Qnil;
+    return;
+  }
+  val = vlen == 0 ? rb_str_new(0, 0) : stripped_str_new(vptr, vlen);
+
+  if (CONST_MEM_EQ("CONNECTION", fptr, flen)) {
+    key = g_HTTP_CONNECTION;
+    hp_keepalive_connection(hp, val);
+  } else if (CONST_MEM_EQ("CONTENT_LENGTH", fptr, flen)) {
+    key = g_CONTENT_LENGTH;
+    hp->len.content = parse_length(vptr, vlen);
+    if (hp->len.content < 0)
+      rb_raise(eParserError, "invalid Content-Length");
+    if (hp->len.content != 0)
+      hp->has_body = 1;
+    invalid_if_trailer(hp);
+  } else if (CONST_MEM_EQ("CONTENT_TYPE", fptr, flen)) {
+    key = g_CONTENT_TYPE;
+  } else if (CONST_MEM_EQ("TRANSFER_ENCODING", fptr, flen)) {
+    key = g_HTTP_TRANSFER_ENCODING;
+    if (STR_CSTR_CASE_EQ(val, "chunked")) {
+      hp->chunked = 1;
+      hp->has_body = 1;
+    }
+    invalid_if_trailer(hp);
+  } else if (CONST_MEM_EQ("TRAILER", fptr, flen)) {
+    key = g_HTTP_TRAILER;
+    hp->has_trailer = 1;
+    invalid_if_trailer(hp);
+  } else if (CONST_MEM_EQ("HOST", fptr, flen)) {
+    key = g_HTTP_HOST;
+    if (NIL_P(hp->v.host))
+      hp->v.host = val;
+  } else {
+    key = req_field(fptr, flen);
+  }
+  existing = rb_hash_aref(env, key);
+  if (NIL_P(existing)) {
+    hp->cont = rb_hash_aset(env, key, val);
+  /*
+   * Ignore repeated Host headers and favor host set by absolute URIs.
+   * absoluteURI Request-URI takes precedence over
+   * the Host: header (ref: rfc 2616, section 5.2.1)
+   */
+  } else if (key == g_HTTP_HOST) {
+     hp->cont = Qnil;
+  } else {
+    rb_str_buf_cat(existing, ",", 1);
+    hp->cont = rb_str_buf_append(existing, val);
+  }
+}
+
+static void write_value(struct http_parser *hp, VALUE hdr,
+                        char *buf, const char *p)
+{
+  hp->is_request ? write_request_value(hp, hdr, buf, p) :
+                   write_response_value(hp, hdr, buf, p);
+}
+
 /** Machine **/
 
 %%{
@@ -392,21 +604,21 @@ static void write_value(VALUE hdr, struct http_parser *hp,
   action snake_upcase_field { snake_upcase_char(deconst(fpc)); }
   action downcase_char { downcase_char(deconst(fpc)); }
   action request_method { request_method(hdr, PTR_TO(mark), LEN(mark, fpc)); }
-  action url_scheme { url_scheme(hdr, PTR_TO(mark), LEN(mark, fpc)); }
-  action host { request_host(hdr, PTR_TO(mark), LEN(mark, fpc)); }
+  action url_scheme { url_scheme(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
+  action host { request_host(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
   action request_uri { request_uri(hdr, PTR_TO(mark), LEN(mark, fpc)); }
 
   action start_query { MARK(start.query, fpc); }
   action query_string {
-    query_string(hdr, PTR_TO(start.query), LEN(start.query, fpc));
+    query_string(hp, hdr, PTR_TO(start.query), LEN(start.query, fpc));
   }
   action request_path { request_path(hdr, PTR_TO(mark), LEN(mark, fpc)); }
   action start_field { MARK(start.field, fpc); }
   action write_field { hp->s.field_len = LEN(start.field, fpc); }
   action start_value { MARK(mark, fpc); }
-  action write_value { write_value(hdr, hp, buffer, fpc); }
+  action write_value { write_value(hp, hdr, buffer, fpc); }
   action write_cont_value { write_cont_value(hp, buffer, fpc); }
-  action http_version { http_version(hp, PTR_TO(mark), LEN(mark, fpc)); }
+  action http_version { http_version(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
   action status_phrase { status_phrase(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
 
   action add_to_chunk_size {
@@ -415,7 +627,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
       rb_raise(eParserError, "invalid chunk size");
   }
   action header_done {
-    finalize_header(hp);
+    finalize_header(hp, hdr);
     cs = http_parser_first_final;
 
     if (hp->chunked)
@@ -469,7 +681,7 @@ static void http_parser_init(struct http_parser *hp)
   int cs = 0;
   memset(hp, 0, sizeof(struct http_parser));
   hp->cont = Qfalse; /* zero on MRI, should be optimized away by above */
-  hp->status = Qnil;
+  hp->v.status = Qnil;
   hp->len.content = -1;
   %% write init;
   hp->cs = cs;
@@ -513,7 +725,7 @@ static void kcar_mark(void *ptr)
   struct http_parser *hp = ptr;
 
   rb_gc_mark(hp->cont);
-  rb_gc_mark(hp->status);
+  rb_gc_mark(hp->v.status);
 }
 
 static size_t kcar_memsize(const void *ptr)
@@ -642,6 +854,23 @@ static void check_buffer_size(long dlen)
     rb_raise(rb_eRangeError, "headers too large to process (%ld bytes)", dlen);
 }
 
+static void parser_execute(struct http_parser *hp, VALUE hdr, VALUE buf)
+{
+  char *ptr;
+  long len;
+
+  Check_Type(buf, T_STRING);
+  rb_str_modify(buf);
+  ptr = RSTRING_PTR(buf);
+  len = RSTRING_LEN(buf);
+  check_buffer_size(len);
+
+  http_parser_execute(hp, hdr, ptr, len);
+
+  if (hp->cs == http_parser_error)
+    rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
+}
+
 /**
  * Document-method: headers
  * call-seq:
@@ -657,10 +886,11 @@ static void check_buffer_size(long dlen)
 static VALUE headers(VALUE self, VALUE hdr, VALUE data)
 {
   struct http_parser *hp = data_get(self);
-  long dlen = RSTRING_LEN(data);
 
-  check_buffer_size(dlen);
-  http_parser_execute(hp, hdr, RSTRING_PTR(data), dlen);
+  if (hp->is_request)
+    rb_raise(rb_eRuntimeError, "parser is handling a request, not response");
+
+  parser_execute(hp, hdr, data);
   VALIDATE_MAX_LENGTH(hp->offset, HEADER);
 
   if (hp->cs == http_parser_first_final ||
@@ -670,15 +900,33 @@ static VALUE headers(VALUE self, VALUE hdr, VALUE data)
     if (hp->in_trailer)
       return hdr;
     else
-      return rb_ary_new3(2, hp->status, hdr);
+      return rb_ary_new3(2, hp->v.status, hdr);
   }
 
-  if (hp->cs == http_parser_error)
-    rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
-
   return Qnil;
 }
 
+static VALUE request(VALUE self, VALUE env, VALUE buf)
+{
+  struct http_parser *hp = data_get(self);
+
+  hp->is_request = 1;
+  Check_Type(buf, T_STRING);
+  parser_execute(hp, env, buf);
+
+  if (hp->cs == http_parser_first_final ||
+      hp->cs == http_parser_en_ChunkedBody) {
+    advance_str(buf, hp->offset + 1);
+    hp->offset = 0;
+    if (hp->in_trailer)
+      hp->body_eof_seen = 1;
+
+    return env;
+  }
+  return Qnil; /* incomplete */
+}
+
+
 static int chunked_eof(struct http_parser *hp)
 {
   return ((hp->cs == http_parser_first_final) || hp->in_trailer);
@@ -811,6 +1059,7 @@ void Init_kcar_ext(void)
   rb_define_alloc_func(cParser, kcar_alloc);
   rb_define_method(cParser, "initialize", initialize, 0);
   rb_define_method(cParser, "reset", initialize, 0);
+  rb_define_method(cParser, "request", request, 2);
   rb_define_method(cParser, "headers", headers, 2);
   rb_define_method(cParser, "trailers", headers, 2);
   rb_define_method(cParser, "filter_body", filter_body, 2);
@@ -846,13 +1095,26 @@ void Init_kcar_ext(void)
 
   globals = rb_ary_new();
   rb_global_variable(&globals);
-  C(globals, g_HOST, "HOST");
+  C(globals, g_CONTENT_LENGTH, "CONTENT_LENGTH");
+  C(globals, g_CONTENT_TYPE, "CONTENT_TYPE");
+  C(globals, g_HTTP_HOST, "HTTP_HOST");
+  C(globals, g_HTTP_CONNECTION, "HTTP_CONNECTION");
+  C(globals, g_HTTP_TRAILER, "HTTP_TRAILER");
+  C(globals, g_HTTP_TRANSFER_ENCODING, "HTTP_TRANSFER_ENCODING");
+  C(globals, g_HTTP_VERSION, "HTTP_VERSION");
   C(globals, g_PATH_INFO, "PATH_INFO");
   C(globals, g_QUERY_STRING, "QUERY_STRING");
   C(globals, g_REQUEST_METHOD, "REQUEST_METHOD");
   C(globals, g_REQUEST_PATH, "REQUEST_PATH");
   C(globals, g_REQUEST_URI, "REQUEST_URI");
+  C(globals, g_SERVER_NAME, "SERVER_NAME");
+  C(globals, g_SERVER_PORT, "SERVER_PORT");
+  C(globals, g_SERVER_PROTOCOL, "SERVER_PROTOCOL");
   C(globals, g_rack_url_scheme, "rack.url_scheme");
+  C(globals, g_http, "http");
+  C(globals, g_https, "https");
+  C(globals, g_80, "80");
+  C(globals, g_443, "443");
   OBJ_FREEZE(globals);
 #undef C
 }
diff --git a/test/test_request_parser.rb b/test/test_request_parser.rb
new file mode 100644
index 0000000..5e97a0e
--- /dev/null
+++ b/test/test_request_parser.rb
@@ -0,0 +1,862 @@
+# -*- encoding: binary -*-
+## Copyright (c) 2005 Zed A. Shaw
+# You can redistribute it and/or modify it under the same terms as Ruby 1.8
+# or GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+#
+# Additional work donated by contributors.  See git history of
+# unicorn for more information: git clone https://bogomips.org/unicorn.git
+
+require 'test/unit'
+require 'digest'
+require 'kcar'
+require 'uri'
+
+class TestRequestParser < Test::Unit::TestCase
+  def setup
+    @hp = Kcar::Parser.new
+    @env = {}
+  end
+
+  def test_parse_oneshot_simple
+    buf = "GET / HTTP/1.1\r\n\r\n"
+    http = 'http'.freeze
+    @env['rack.url_scheme'] = http
+    env = @hp.request(@env, buf.dup)
+    assert_same env, @env
+    exp = {
+      'SERVER_PROTOCOL' => 'HTTP/1.1',
+      'HTTP_VERSION' => 'HTTP/1.1',
+      'REQUEST_PATH' => '/',
+      'PATH_INFO' => '/',
+      'REQUEST_URI' => '/',
+      'REQUEST_METHOD' => 'GET',
+      'QUERY_STRING' => '',
+      'rack.url_scheme' => 'http',
+    }
+    assert_equal exp, env
+    assert_same env['HTTP_VERSION'], env['SERVER_PROTOCOL']
+    assert_predicate env['HTTP_VERSION'], :frozen?
+    assert_predicate env['REQUEST_METHOD'], :frozen?
+    assert_same http, env['rack.url_scheme']
+    assert_predicate @hp, :keepalive?
+
+    @hp.reset
+
+    buf = "G"
+    assert_nil @hp.request(@env, buf)
+    # try parsing again to ensure we were reset correctly
+    buf << "ET /hello-world HTTP/1.1\r\n\r\n"
+    assert_same env, @hp.request(env, buf)
+
+    assert_equal 'HTTP/1.1', env['SERVER_PROTOCOL']
+    assert_equal '/hello-world', env['REQUEST_PATH']
+    assert_equal 'HTTP/1.1', env['HTTP_VERSION']
+    assert_equal '/hello-world', env['REQUEST_URI']
+    assert_equal 'GET', env['REQUEST_METHOD']
+    assert_equal '', env['QUERY_STRING']
+    assert @hp.keepalive?
+  end
+
+  def test_tab_lws
+    @hp.request(@env, "GET / HTTP/1.1\r\nHost:\tfoo.bar\r\n\r\n")
+    assert_equal "foo.bar", @env['HTTP_HOST']
+  end
+
+  def test_connection_close_no_ka
+    @hp.request(@env = {}, "GET / HTTP/1.1\r\nConnection: close\r\n\r\n")
+    assert_equal 'GET', @env['REQUEST_METHOD']
+    assert ! @hp.keepalive?
+  end
+
+  def test_connection_keep_alive_ka
+    @hp.request(@env, "HEAD / HTTP/1.1\r\nConnection: keep-alive\r\n\r\n")
+    assert @hp.keepalive?
+  end
+
+  def test_connection_keep_alive_no_body
+    r = @hp.request(@env, "POST / HTTP/1.1\r\nConnection: keep-alive\r\n\r\n")
+    assert_same @env, r
+    assert @hp.keepalive?
+  end
+
+  def test_connection_keep_alive_no_body_empty
+    buf = "POST / HTTP/1.1\r\n" \
+          "Content-Length: 0\r\n" \
+          "Connection: keep-alive\r\n\r\n"
+    assert_same @env, @hp.request(@env, buf)
+    assert @hp.keepalive?
+  end
+
+  def test_connection_keep_alive_ka_bad_version
+    @hp.request(@env, "GET / HTTP/1.0\r\nConnection: keep-alive\r\n\r\n")
+    assert @hp.keepalive?
+  end
+
+  def test_parse_server_host_default_port
+    buf = "GET / HTTP/1.1\r\nHost: foo\r\n\r\n"
+    assert_same @env, @hp.request(@env, buf)
+    assert_equal 'foo', @env['SERVER_NAME']
+    assert_equal '80', @env['SERVER_PORT']
+    assert_equal '', buf
+    assert @hp.keepalive?
+  end
+
+  def test_parse_server_host_alt_port
+    buf = "GET / HTTP/1.1\r\nHost: foo:999\r\n\r\n"
+    @hp.request(@env, buf)
+    assert_equal 'foo', @env['SERVER_NAME']
+    assert_equal '999', @env['SERVER_PORT']
+    assert_equal '', buf
+    assert @hp.keepalive?
+  end
+
+  def test_parse_server_host_empty_port
+    @hp.request(@env, "GET / HTTP/1.1\r\nHost: foo:\r\n\r\n")
+    assert_equal 'foo', @env['SERVER_NAME']
+    assert_equal '80', @env['SERVER_PORT']
+    assert @hp.keepalive?
+  end
+
+  def test_parse_host_cont
+    @hp.request(@env, "GET / HTTP/1.1\r\nHost:\r\n foo\r\n\r\n")
+    assert_equal 'foo', @env['SERVER_NAME']
+    assert_equal '80', @env['SERVER_PORT']
+    assert @hp.keepalive?
+  end
+
+  def test_preserve_existing_server_vars
+    @env = {
+      'SERVER_NAME' => 'example.com',
+      'SERVER_PORT' => '1234',
+      'rack.url_scheme' => 'https'
+    }
+    @hp.request(@env, "GET / HTTP/1.0\r\n\r\n")
+    assert_equal 'example.com', @env['SERVER_NAME']
+    assert_equal '1234', @env['SERVER_PORT']
+    assert_equal 'https', @env['rack.url_scheme']
+  end
+
+  def test_parse_strange_headers
+    should_be_good = "GET / HTTP/1.1\r\naaaaaaaaaaaaa:++++++++++\r\n\r\n"
+    req = @hp.request(@env, should_be_good)
+    assert_same req, @env
+    assert_equal '', should_be_good
+    assert_predicate @hp, :keepalive?
+    assert_equal '++++++++++', @env['HTTP_AAAAAAAAAAAAA']
+  end
+
+  # legacy test case from Mongrel
+  # I still consider Pound irrelevant, unfortunately stupid clients that
+  # send extremely big headers do exist and they've managed to find us...
+  def test_nasty_pound_header
+    nasty_pound_header = "GET / HTTP/1.1\r\n" \
+"X-SSL-Bullshit:   -----BEGIN CERTIFICATE-----\r\n" \
+"\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n" \
+"\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n" \
+"\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n" \
+"\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n" \
+"\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n" \
+"\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n" \
+"\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n" \
+"\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n" \
+"\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n" \
+"\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n" \
+"\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n" \
+"\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n" \
+"\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgEBBAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n" \
+"\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n" \
+"\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n" \
+"\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n" \
+"\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n" \
+"\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n" \
+"\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n" \
+"\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n" \
+"\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n" \
+"\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n" \
+"\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n" \
+"\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n" \
+"\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n" \
+"\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n" \
+"\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n" \
+"\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n" \
+"\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n" \
+"\tRA==\r\n" \
+"\t-----END CERTIFICATE-----\r\n" \
+"\r\n"
+    ok = (/(-----BEGIN .*--END CERTIFICATE-----)/m =~ nasty_pound_header)
+    expect = $1.dup
+    assert ok, 'end certificate matched'
+    expect.gsub!(/\r\n\t/, ' ')
+    req = @hp.request(@env, nasty_pound_header.dup)
+    assert_equal expect, req['HTTP_X_SSL_BULLSHIT']
+  end
+
+  def test_multiline_header_0d0a
+    req = @hp.request(@env, "GET / HTTP/1.0\r\n" \
+      "X-Multiline-Header: foo bar\r\n\tcha cha\r\n\tzha zha\r\n\r\n")
+    assert_same req, @env
+    assert_equal 'foo bar cha cha zha zha', req['HTTP_X_MULTILINE_HEADER']
+  end
+
+  def test_multiline_header_0a
+    req = @hp.request(@env, "GET / HTTP/1.0\n" \
+      "X-Multiline-Header: foo bar\n\tcha cha\n\tzha zha\n\n")
+    assert_same req, @env
+    assert_equal 'foo bar cha cha zha zha', req['HTTP_X_MULTILINE_HEADER']
+  end
+
+  def test_continuation_eats_leading_spaces
+    header = "GET / HTTP/1.1\r\n" \
+             "X-ASDF:      \r\n" \
+             "\t\r\n" \
+             "    \r\n" \
+             "  ASDF\r\n\r\n"
+    req = @hp.request(@env, header)
+    assert_same req, @env
+    assert_equal '', header
+    assert_equal 'ASDF', req['HTTP_X_ASDF']
+  end
+
+  def test_continuation_eats_scattered_leading_spaces
+    header = "GET / HTTP/1.1\r\n" \
+             "X-ASDF:   hi\r\n" \
+             "    y\r\n" \
+             "\t\r\n" \
+             "       x\r\n" \
+             "  ASDF\r\n\r\n"
+    req = @hp.request(@env, header)
+    assert_same req, @env
+    assert_equal '', header
+    assert_equal 'hi y x ASDF', req['HTTP_X_ASDF']
+  end
+
+  def test_continuation_eats_trailing_spaces
+    header = "GET / HTTP/1.1\r\n" \
+             "X-ASDF:      \r\n" \
+             "\t\r\n" \
+             "  b  \r\n" \
+             "  ASDF\r\n\r\nZ"
+    req = @hp.request(@env, header)
+    assert_same req, @env
+    assert_equal 'Z', header
+    assert_equal 'b ASDF', req['HTTP_X_ASDF']
+  end
+
+  def test_continuation_with_absolute_uri_and_ignored_host_header
+    header = "GET http://example.com/ HTTP/1.1\r\n" \
+             "Host: \r\n" \
+             "    example.org\r\n" \
+             "\r\n"
+    req = @hp.request(@env, header)
+    assert_same req, @env
+    assert_equal 'example.com', req['HTTP_HOST']
+    assert_equal 'http', req['rack.url_scheme']
+    assert_equal '/', req['PATH_INFO']
+    assert_equal 'example.com', req['SERVER_NAME']
+    assert_equal '80', req['SERVER_PORT']
+  end
+
+  # this may seem to be testing more of an implementation detail, but
+  # it also helps ensure we're safe in the presence of multiple parsers
+  # in case we ever go multithreaded/evented...
+  def test_resumable_continuations
+    nr = 1000
+    header = "GET / HTTP/1.1\r\n" \
+             "X-ASDF:      \r\n" \
+             "  hello\r\n"
+    tmp = []
+    nr.times { |i|
+      hp = Kcar::Parser.new
+      env = {}
+      assert_nil hp.request(env, buf = "#{header} #{i}\r\n")
+      asdf = env['HTTP_X_ASDF']
+      assert_equal "hello #{i}", asdf
+      tmp << [ hp, asdf, env, buf ]
+    }
+    tmp.each_with_index { |(hp, asdf, env, buf), i|
+      buf << " .\r\n\r\n"
+      assert_same env, hp.request(env, buf)
+      assert_equal "hello #{i} .", asdf
+    }
+  end
+
+  def test_invalid_continuation
+    header = "GET / HTTP/1.1\r\n" \
+             "    y\r\n" \
+             "Host: hello\r\n" \
+             "\r\n"
+    buf = header.dup
+    assert_raises(Kcar::ParserError) do
+      @hp.request(@env, buf)
+    end
+    assert_equal header, buf, 'no modification on invalid'
+  end
+
+  def test_parse_ie6_urls
+    %w(/some/random/path"
+       /some/random/path>
+       /some/random/path<
+       /we/love/you/ie6?q=<"">
+       /url?<="&>="
+       /mal"formed"?
+    ).each do |path|
+      sorta_safe = %(GET #{path} HTTP/1.1\r\n\r\n)
+      assert_same @env, @hp.request(@env, sorta_safe)
+      assert_equal path, @env['REQUEST_URI']
+      assert_equal '', sorta_safe
+      assert @hp.keepalive?
+      @hp.reset
+    end
+  end
+
+  def test_parse_error
+    bad_http = "GET / SsUTF/1.1"
+    assert_raises(Kcar::ParserError) { @hp.request(@env, bad_http) }
+
+    # make sure we can recover
+    @env.clear
+    @hp.reset
+    assert_equal @env, @hp.request(@env, "GET / HTTP/1.0\r\n\r\n")
+    assert ! @hp.keepalive?
+  end
+
+  def test_piecemeal
+    http = "GET"
+    req = @env
+    assert_nil @hp.request(@env, http)
+    assert_nil @hp.request(@env, http)
+    assert_nil @hp.request(@env, http << " / HTTP/1.0")
+    assert_equal '/', req['REQUEST_PATH']
+    assert_equal '/', req['REQUEST_URI']
+    assert_equal 'GET', req['REQUEST_METHOD']
+    assert_nil @hp.request(req, http << "\r\n")
+    assert_equal 'HTTP/1.0', req['HTTP_VERSION']
+    assert_nil @hp.request(req, http << "\r")
+    assert_same req, @hp.request(req, http << "\n")
+    assert_equal 'HTTP/1.0', req['SERVER_PROTOCOL']
+    assert_nil req['FRAGMENT']
+    assert_equal '', req['QUERY_STRING']
+    assert_equal "", http
+    assert ! @hp.keepalive?
+  end
+
+  # not common, but underscores do appear in practice
+  def test_absolute_uri_underscores
+    http = "GET https://under_score.example.com/foo?q=bar HTTP/1.0\r\n\r\n"
+    req = @hp.request(@env, http)
+    assert_same req, @env
+    assert_equal 'https', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+    assert_equal 'under_score.example.com', req['HTTP_HOST']
+    assert_equal 'under_score.example.com', req['SERVER_NAME']
+    assert_equal '443', req['SERVER_PORT']
+    assert_equal "", http
+    assert ! @hp.keepalive?
+  end
+
+  # some dumb clients add users because they're stupid
+  def test_absolute_uri_w_user
+    http = "GET http://user%20space@example.com/foo?q=bar HTTP/1.0\r\n\r\n"
+    req = @hp.request(@env, http)
+    assert_same req, @env
+    assert_equal 'http', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+    assert_equal 'example.com', req['HTTP_HOST']
+    assert_equal 'example.com', req['SERVER_NAME']
+    assert_equal '80', req['SERVER_PORT']
+    assert_equal "", http
+    assert ! @hp.keepalive?
+  end
+
+  # since Mongrel supported anything URI.parse supported, we're stuck
+  # supporting everything URI.parse supports
+  def test_absolute_uri_uri_parse
+    require 'uri'
+    "#{URI::REGEXP::PATTERN::UNRESERVED};:&=+$,".split(//).each do |char|
+      http = "GET http://#{char}@example.com/ HTTP/1.0\r\n\r\n"
+      req = @hp.request(@env, http)
+      assert_equal 'http', req['rack.url_scheme']
+      assert_equal '/', req['REQUEST_URI']
+      assert_equal '/', req['REQUEST_PATH']
+      assert_equal '', req['QUERY_STRING']
+
+      assert_equal 'example.com', req['HTTP_HOST']
+      assert_equal 'example.com', req['SERVER_NAME']
+      assert_equal '80', req['SERVER_PORT']
+      assert_equal "", http
+      assert ! @hp.keepalive?
+      @hp.reset
+    end
+  end
+
+  def test_absolute_uri
+    req = @hp.request(@env, "GET http://example.com/foo?q=bar HTTP/1.0\r\n\r\n")
+    assert_equal 'http', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+    assert_equal '80', req['SERVER_PORT']
+    assert_equal 'example.com', req['HTTP_HOST']
+    assert_equal 'example.com', req['SERVER_NAME']
+    assert ! @hp.keepalive?
+  end
+
+  def test_absolute_uri_https
+    http = "GET https://example.com/foo?q=bar HTTP/1.1\r\n" \
+           "X-Foo: bar\n\r\n"
+    req = @hp.request(@env, http)
+    assert_equal 'https', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+    assert_equal 'example.com', req['HTTP_HOST']
+    assert_equal 'example.com', req['SERVER_NAME']
+    assert_equal '443', req['SERVER_PORT']
+    assert_equal "", http
+    assert @hp.keepalive?
+  end
+
+  # Host: header should be ignored for absolute URIs
+  def test_absolute_uri_with_port
+    req = @hp.request(@env,"GET http://example.com:8080/foo?q=bar HTTP/1.1\r\n" \
+           "Host: bad.example.com\r\n\r\n")
+    assert_equal 'http', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+    assert_equal 'example.com:8080', req['HTTP_HOST']
+    assert_equal 'example.com', req['SERVER_NAME']
+    assert_equal '8080', req['SERVER_PORT']
+    assert @hp.keepalive?
+  end
+
+  def test_absolute_uri_with_empty_port
+    req = @hp.request(@env, "GET https://example.com:/foo?q=bar HTTP/1.1\r\n" \
+           "Host: bad.example.com\r\n\r\n")
+    assert_same req, @env
+    assert_equal 'https', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+    assert_equal 'example.com:', req['HTTP_HOST']
+    assert_equal 'example.com', req['SERVER_NAME']
+    assert_equal '443', req['SERVER_PORT']
+    assert @hp.keepalive?
+  end
+
+  def test_absolute_ipv6_uri
+    url = "http://[::1]/foo?q=bar"
+    http = "GET #{url} HTTP/1.1\r\n" \
+           "Host: bad.example.com\r\n\r\n"
+    req = @hp.request(@env, http)
+    assert_same req, @env
+    assert_equal 'http', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+
+    uri = URI.parse(url)
+    assert_equal "[::1]", uri.host,
+                 "URI.parse changed upstream for #{url}? host=#{uri.host}"
+    assert_equal "[::1]", req['HTTP_HOST']
+    assert_equal "[::1]", req['SERVER_NAME']
+    assert_equal '80', req['SERVER_PORT']
+    assert_equal "", http
+  end
+
+  def test_absolute_ipv6_uri_alpha
+    url = "http://[::a]/"
+    http = "GET #{url} HTTP/1.1\r\n" \
+           "Host: bad.example.com\r\n\r\n"
+    req = @hp.request(@env, http)
+    assert_equal 'http', req['rack.url_scheme']
+    uri = URI.parse(url)
+    assert_equal "[::a]", uri.host,
+                 "URI.parse changed upstream for #{url}? host=#{uri.host}"
+    assert_equal "[::a]", req['HTTP_HOST']
+    assert_equal "[::a]", req['SERVER_NAME']
+    assert_equal '80', req['SERVER_PORT']
+  end
+
+  def test_absolute_ipv6_uri_alpha_2
+    url = "http://[::B]/"
+    http = "GET #{url} HTTP/1.1\r\n" \
+           "Host: bad.example.com\r\n\r\n"
+    req = @hp.request(@env, http)
+    assert_equal 'http', req['rack.url_scheme']
+
+    uri = URI.parse(url)
+    assert_equal "[::B]", uri.host,
+                 "URI.parse changed upstream for #{url}? host=#{uri.host}"
+    assert_equal "[::B]", req['HTTP_HOST']
+    assert_equal "[::B]", req['SERVER_NAME']
+    assert_equal '80', req['SERVER_PORT']
+  end
+
+  def test_absolute_ipv6_uri_with_empty_port
+    url = "https://[::1]:/foo?q=bar"
+    http = "GET #{url} HTTP/1.1\r\n" \
+           "Host: bad.example.com\r\n\r\n"
+    req = @hp.request(@env, http)
+    assert_equal 'https', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+
+    uri = URI.parse(url)
+    assert_equal "[::1]", uri.host,
+                 "URI.parse changed upstream for #{url}? host=#{uri.host}"
+    assert_equal "[::1]:", req['HTTP_HOST']
+    assert_equal "[::1]", req['SERVER_NAME']
+    assert_equal '443', req['SERVER_PORT']
+    assert_equal "", http
+  end
+
+  def test_absolute_ipv6_uri_with_port
+    url = "https://[::1]:666/foo?q=bar"
+    http = "GET #{url} HTTP/1.1\r\n" \
+           "Host: bad.example.com\r\n\r\n"
+    req = @hp.request(@env, http)
+    assert_equal 'https', req['rack.url_scheme']
+    assert_equal '/foo?q=bar', req['REQUEST_URI']
+    assert_equal '/foo', req['REQUEST_PATH']
+    assert_equal 'q=bar', req['QUERY_STRING']
+
+    uri = URI.parse(url)
+    assert_equal "[::1]", uri.host,
+                 "URI.parse changed upstream for #{url}? host=#{uri.host}"
+    assert_equal "[::1]:666", req['HTTP_HOST']
+    assert_equal "[::1]", req['SERVER_NAME']
+    assert_equal '666', req['SERVER_PORT']
+    assert_equal "", http
+  end
+
+  def test_ipv6_host_header
+    buf = "GET / HTTP/1.1\r\n" \
+          "Host: [::1]\r\n\r\n"
+    req = @hp.request(@env, buf)
+    assert_equal "[::1]", req['HTTP_HOST']
+    assert_equal "[::1]", req['SERVER_NAME']
+    assert_equal '80', req['SERVER_PORT']
+  end
+
+  def test_ipv6_host_header_with_port
+    req = @hp.request(@env, "GET / HTTP/1.1\r\n" \
+                  "Host: [::1]:666\r\n\r\n")
+    assert_equal "[::1]", req['SERVER_NAME']
+    assert_equal '666', req['SERVER_PORT']
+    assert_equal "[::1]:666", req['HTTP_HOST']
+  end
+
+  def test_ipv6_host_header_with_empty_port
+    req = @hp.request(@env, "GET / HTTP/1.1\r\nHost: [::1]:\r\n\r\n")
+    assert_equal "[::1]", req['SERVER_NAME']
+    assert_equal '80', req['SERVER_PORT']
+    assert_equal "[::1]:", req['HTTP_HOST']
+  end
+
+  # XXX Highly unlikely..., just make sure we don't segfault or assert on it
+  def test_broken_ipv6_host_header
+    req = @hp.request(@env, "GET / HTTP/1.1\r\nHost: [::1:\r\n\r\n")
+    assert_equal "[", req['SERVER_NAME']
+    assert_equal ':1:', req['SERVER_PORT']
+    assert_equal "[::1:", req['HTTP_HOST']
+  end
+
+  def test_put_body_oneshot
+    buf = "PUT / HTTP/1.0\r\nContent-Length: 5\r\n\r\nabcde"
+    req = @hp.request(@env, buf)
+    assert_equal '/', req['REQUEST_PATH']
+    assert_equal '/', req['REQUEST_URI']
+    assert_equal 'PUT', req['REQUEST_METHOD']
+    assert_equal 'HTTP/1.0', req['HTTP_VERSION']
+    assert_equal 'HTTP/1.0', req['SERVER_PROTOCOL']
+    assert_equal "abcde", buf
+  end
+
+  def test_put_body_later
+    buf = "PUT /l HTTP/1.0\r\nContent-Length: 5\r\n\r\n"
+    req = @hp.request(@env, buf)
+    assert_equal '/l', req['REQUEST_PATH']
+    assert_equal '/l', req['REQUEST_URI']
+    assert_equal 'PUT', req['REQUEST_METHOD']
+    assert_equal 'HTTP/1.0', req['HTTP_VERSION']
+    assert_equal 'HTTP/1.0', req['SERVER_PROTOCOL']
+    assert_equal "", buf
+  end
+
+  def test_unknown_methods
+    %w(GETT HEADR XGET XHEAD).each { |m|
+      s = "#{m} /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n\r\n"
+      req = @hp.request(@env, s)
+      assert_equal '/forums/1/topics/2375?page=1', req['REQUEST_URI']
+      assert_nil req['FRAGMENT']
+      assert_equal 'page=1', req['QUERY_STRING']
+      assert_equal "", s
+      assert_equal m, req['REQUEST_METHOD']
+      @hp.reset
+    }
+  end
+
+  def test_fragment_in_uri
+    get = "GET /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n\r\n"
+    req = @hp.request(@env, get)
+    assert_equal '/forums/1/topics/2375?page=1', req['REQUEST_URI']
+    assert_nil req['FRAGMENT']
+    assert_equal 'page=1', req['QUERY_STRING']
+    assert_equal '', get
+  end
+
+  # lame random garbage maker
+  def rand_data(min, max, readable=true)
+    count = min + ((rand(max)+1) *10).to_i
+    res = count.to_s + "/"
+
+    if readable
+      res << Digest::SHA1.hexdigest(rand(count * 100).to_s) * (count / 40)
+    else
+      res << Digest::SHA1.digest(rand(count * 100).to_s) * (count / 20)
+    end
+
+    return res
+  end
+
+  def test_horrible_queries
+    # then that large header names are caught
+    10.times do |c|
+      get = "GET /#{rand_data(10,120)} HTTP/1.1\r\nX-#{rand_data(1024, 1024+(c*1024))}: Test\r\n\r\n"
+      assert_raises(Kcar::ParserError, Kcar::RequestURITooLongError) do
+        @hp.request(@env, get)
+        @hp.clear
+      end
+    end
+
+    # then that large mangled field values are caught
+    10.times do |c|
+      get = "GET /#{rand_data(10,120)} HTTP/1.1\r\nX-Test: #{rand_data(1024, 1024+(c*1024), false)}\r\n\r\n"
+      assert_raises(Kcar::ParserError,Kcar::RequestURITooLongError) do
+        @hp.request(@env, get)
+        @hp.clear
+      end
+    end
+
+    # then large headers are rejected too FIXME not supported, yet
+    if false
+      get = "GET /#{rand_data(10,120)} HTTP/1.1\r\n"
+      get << "X-Test: test\r\n" * (80 * 1024)
+      @hp.reset
+      assert_raises(Kcar::ParserError,Kcar::RequestURITooLongError) do
+        @hp.request(@env, get)
+      end
+    end
+
+    # finally just that random garbage gets blocked all the time
+    10.times do |c|
+      get = "GET #{rand_data(1024, 1024+(c*1024), false)} #{rand_data(1024, 1024+(c*1024), false)}\r\n\r\n"
+      @hp.reset
+      assert_raises(Kcar::ParserError,Kcar::RequestURITooLongError) do
+        @hp.request(@env, get)
+      end
+    end
+  end
+
+  def test_leading_tab
+    get = "GET / HTTP/1.1\r\nHost:\texample.com\r\n\r\n"
+    req = @hp.request(@env, get)
+    assert_equal 'example.com', req['HTTP_HOST']
+  end
+
+  def test_trailing_whitespace
+    get = "GET / HTTP/1.1\r\nHost: example.com \r\n\r\n"
+    req = @hp.request(@env, get)
+    assert_equal 'example.com', req['HTTP_HOST']
+  end
+
+  def test_trailing_tab
+    get = "GET / HTTP/1.1\r\nHost: example.com\t\r\n\r\n"
+    req = @hp.request(@env, get)
+    assert_equal 'example.com', req['HTTP_HOST']
+  end
+
+  def test_trailing_multiple_linear_whitespace
+    get = "GET / HTTP/1.1\r\nHost: example.com\t \t \t\r\n\r\n"
+    req = @hp.request(@env, get)
+    assert_equal 'example.com', req['HTTP_HOST']
+  end
+
+  def test_embedded_linear_whitespace_ok
+    get = "GET / HTTP/1.1\r\nX-Space: hello\t world\t \r\n\r\n"
+    req = @hp.request(@env, get)
+    assert_equal "hello\t world", req["HTTP_X_SPACE"]
+  end
+
+  def test_null_byte_header
+    get = "GET / HTTP/1.1\r\nHost: \0\r\n\r\n"
+    assert_raises(Kcar::ParserError) { @hp.request(@env, get) }
+  end
+
+  def test_null_byte_in_middle
+    get = "GET / HTTP/1.1\r\nHost: hello\0world\r\n\r\n"
+    assert_raises(Kcar::ParserError) { @hp.request(@env, get) }
+  end
+
+  def test_null_byte_at_end
+    get = "GET / HTTP/1.1\r\nHost: hello\0\r\n\r\n"
+    assert_raises(Kcar::ParserError) { @hp.request(@env, get) }
+  end
+
+  def test_empty_header
+    get = "GET / HTTP/1.1\r\nHost:  \r\n\r\n"
+    req = @hp.request(@env, get)
+    assert_equal '', req['HTTP_HOST']
+  end
+
+  def test_connection_TE
+    req = @hp.request(@env, "GET / HTTP/1.1\r\nHost: example.com\r\n" \
+                            "Connection: TE\r\n" \
+                            "TE: trailers\r\n\r\n")
+    assert_predicate @hp, :keepalive?
+    assert_equal 'TE', req['HTTP_CONNECTION']
+  end
+
+  def test_repeat_headers
+    str = "PUT / HTTP/1.1\r\n" \
+          "Trailer: Content-MD5\r\n" \
+          "Trailer: Content-SHA1\r\n" \
+          "transfer-Encoding: chunked\r\n\r\n" \
+          "1\r\na\r\n2\r\n..\r\n0\r\n"
+    req = @hp.request(@env, str)
+    assert_equal 'Content-MD5,Content-SHA1', req['HTTP_TRAILER']
+    assert_equal "1\r\na\r\n2\r\n..\r\n0\r\n", str
+    assert @hp.keepalive?
+  end
+
+  def test_http_09
+    buf = "GET /read-rfc1945-if-you-dont-believe-me\r\n"
+    req = @hp.request(@env, buf)
+    assert_equal '', buf
+    expect = {
+      "REQUEST_PATH" => "/read-rfc1945-if-you-dont-believe-me",
+      "PATH_INFO" => "/read-rfc1945-if-you-dont-believe-me",
+      "REQUEST_URI" => "/read-rfc1945-if-you-dont-believe-me",
+      "SERVER_PROTOCOL" => "HTTP/0.9",
+      "HTTP_VERSION" => "HTTP/0.9",
+      "REQUEST_METHOD" => "GET",
+      "QUERY_STRING" => ""
+    }
+    assert_equal expect, req
+  end
+
+  def test_path_info_semicolon
+    qs = "QUERY_STRING"
+    pi = "PATH_INFO"
+    req = {}
+    str = "GET %s HTTP/1.1\r\nHost: example.com\r\n\r\n"
+    {
+      "/1;a=b?c=d&e=f" => { qs => "c=d&e=f", pi => "/1;a=b" },
+      "/1?c=d&e=f" => { qs => "c=d&e=f", pi => "/1" },
+      "/1;a=b" => { qs => "", pi => "/1;a=b" },
+      "/1;a=b?" => { qs => "", pi => "/1;a=b" },
+      "/1?a=b;c=d&e=f" => { qs => "a=b;c=d&e=f", pi => "/1" },
+      "*" => { qs => "", pi => "" },
+    }.each do |uri,expect|
+      @env.clear
+      @hp.reset
+      buf = str % [ uri ]
+      req = @hp.request(@env, buf)
+      assert_equal uri, req["REQUEST_URI"], "REQUEST_URI mismatch"
+      assert_equal expect[qs], req[qs], "#{qs} mismatch"
+      assert_equal expect[pi], req[pi], "#{pi} mismatch"
+      next if uri == "*"
+      uri = URI.parse("http://example.com#{uri}")
+      assert_equal uri.query.to_s, req[qs], "#{qs} mismatch URI.parse disagrees"
+      assert_equal uri.path, req[pi], "#{pi} mismatch URI.parse disagrees"
+    end
+  end
+
+  def test_path_info_semicolon_absolute
+    qs = "QUERY_STRING"
+    pi = "PATH_INFO"
+    str = "GET http://example.com%s HTTP/1.1\r\nHost: www.example.com\r\n\r\n"
+    {
+      "/1;a=b?c=d&e=f" => { qs => "c=d&e=f", pi => "/1;a=b" },
+      "/1?c=d&e=f" => { qs => "c=d&e=f", pi => "/1" },
+      "/1;a=b" => { qs => "", pi => "/1;a=b" },
+      "/1;a=b?" => { qs => "", pi => "/1;a=b" },
+      "/1?a=b;c=d&e=f" => { qs => "a=b;c=d&e=f", pi => "/1" },
+    }.each do |uri,expect|
+      @hp.reset
+      @env.clear
+      buf = str % [ uri ]
+      req = @hp.request(@env, buf)
+      assert_equal uri, req["REQUEST_URI"], "REQUEST_URI mismatch"
+      assert_equal "example.com", req["HTTP_HOST"], "Host: mismatch"
+      assert_equal expect[qs], req[qs], "#{qs} mismatch"
+      assert_equal expect[pi], req[pi], "#{pi} mismatch"
+    end
+  end
+
+  def test_negative_content_length
+    str = "PUT / HTTP/1.1\r\n" \
+          "Content-Length: -1\r\n" \
+          "\r\n"
+    assert_raises(Kcar::ParserError) do
+      @hp.request(@env, str)
+    end
+  end
+
+  def test_invalid_content_length
+    str = "PUT / HTTP/1.1\r\n" \
+          "Content-Length: zzzzz\r\n" \
+          "\r\n"
+    assert_raises(Kcar::ParserError) do
+      @hp.request(@env, str)
+    end
+  end
+
+  def test_ignore_version_header
+    req = @hp.request(@env, "GET / HTTP/1.1\r\nVersion: hello\r\n\r\n")
+    expect = {
+      'REQUEST_PATH' => '/',
+      'SERVER_PROTOCOL' => 'HTTP/1.1',
+      'PATH_INFO' => '/',
+      'HTTP_VERSION' => 'HTTP/1.1',
+      'REQUEST_URI' => '/',
+      'REQUEST_METHOD' => 'GET',
+      'QUERY_STRING' => ''
+    }
+    assert_equal expect, req
+  end
+
+  def test_pipelined_requests
+    expect = {
+      'HTTP_HOST' => 'example.com',
+      'SERVER_NAME' => 'example.com',
+      'SERVER_PORT' => '80',
+      'REQUEST_PATH' => '/',
+      'SERVER_PROTOCOL' => 'HTTP/1.1',
+      'PATH_INFO' => '/',
+      'HTTP_VERSION' => 'HTTP/1.1',
+      'REQUEST_URI' => '/',
+      'REQUEST_METHOD' => 'GET',
+      'QUERY_STRING' => ''
+    }
+    req1 = "GET / HTTP/1.1\r\nHost: example.com\r\n\r\n"
+    req2 = "GET / HTTP/1.1\r\nHost: www.example.com\r\n\r\n"
+    buf = req1 + req2
+    env1 = @hp.request(@env, buf)
+    assert_equal expect, env1
+    assert_equal req2, buf
+    assert_predicate @hp, :keepalive?
+    @env.clear
+    @hp.reset
+    env2 = @hp.request(@env, buf)
+    expect['HTTP_HOST'] = expect['SERVER_NAME'] = 'www.example.com'
+    assert_equal expect, env2
+    assert_equal '', buf
+  end
+end

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 05/11] pkg.mk: enable warnings by default for tests
  2018-12-01 13:31 [PATCH v2] request parsing bits Eric Wong
                   ` (3 preceding siblings ...)
  2018-12-01 13:31 ` [PATCH 04/11] implement request parsing with tests Eric Wong
@ 2018-12-01 13:31 ` Eric Wong
  2018-12-01 13:31 ` [PATCH 06/11] filter_body: rename variables to be like memcpy(3) Eric Wong
                   ` (5 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2018-12-01 13:31 UTC (permalink / raw)
  To: kcar-public

---
 pkg.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg.mk b/pkg.mk
index 325b8eb..df8cfec 100644
--- a/pkg.mk
+++ b/pkg.mk
@@ -119,7 +119,7 @@ test: check
 check: test-unit
 test-unit: $(test_units)
 $(test_units): build
-	$(VALGRIND) $(RUBY) -I $(lib) $@ $(RUBY_TEST_OPTS)
+	$(VALGRIND) $(RUBY) -w -I $(lib) $@ $(RUBY_TEST_OPTS)
 
 # this requires GNU coreutils variants
 ifneq ($(RSYNC_DEST),)

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 06/11] filter_body: rename variables to be like memcpy(3)
  2018-12-01 13:31 [PATCH v2] request parsing bits Eric Wong
                   ` (4 preceding siblings ...)
  2018-12-01 13:31 ` [PATCH 05/11] pkg.mk: enable warnings by default for tests Eric Wong
@ 2018-12-01 13:31 ` Eric Wong
  2018-12-01 13:31 ` [PATCH 07/11] flesh out filter_body for request parsing Eric Wong
                   ` (4 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2018-12-01 13:31 UTC (permalink / raw)
  To: kcar-public

This makes the naming more consistent with terminology used for
memcpy, since this method is intended to act like memcpy(3)

No actual functionality change.
---
 ext/kcar/kcar.rl | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
index 01e4c76..ea3dacc 100644
--- a/ext/kcar/kcar.rl
+++ b/ext/kcar/kcar.rl
@@ -988,55 +988,55 @@ static VALUE keepalive(VALUE self)
 
 /**
  * call-seq:
- *    parser.filter_body(buf, data) => nil/data
+ *    parser.filter_body(dst, src) => nil/dst
  *
- * Takes a String of +data+, will modify data if dechunking is done.
- * Returns +nil+ if there is more data left to process.  Returns
- * +data+ if body processing is complete. When returning +data+,
- * it may modify +data+ so the start of the string points to where
+ * Takes a String of +src+, will modify src if dechunking is done.
+ * Returns +nil+ if there is more +src+ left to process.  Returns
+ * +dst+ if body processing is complete. When returning +dst+,
+ * it may modify +src+ so the start of the string points to where
  * the body ended so that trailer processing can begin.
  *
  * Raises ParserError if there are dechunking errors.
- * Basically this is a glorified memcpy(3) that copies +data+
- * into +buf+ while filtering it through the dechunker.
+ * Basically this is a glorified memcpy(3) that copies +src+
+ * into +dst+ while filtering it through the dechunker.
  */
-static VALUE filter_body(VALUE self, VALUE buf, VALUE data)
+static VALUE filter_body(VALUE self, VALUE dst, VALUE src)
 {
   struct http_parser *hp = data_get(self);
-  char *dptr;
-  long dlen;
+  char *sptr;
+  long slen;
 
-  dptr = RSTRING_PTR(data);
-  dlen = RSTRING_LEN(data);
-  check_buffer_size(dlen);
+  sptr = RSTRING_PTR(src);
+  slen = RSTRING_LEN(src);
+  check_buffer_size(slen);
 
-  StringValue(buf);
-  rb_str_modify(buf);
-  rb_str_resize(buf, dlen); /* we can never copy more than dlen bytes */
-  OBJ_TAINT(buf); /* keep weirdo $SAFE users happy */
+  StringValue(dst);
+  rb_str_modify(dst);
+  rb_str_resize(dst, slen); /* we can never copy more than slen bytes */
+  OBJ_TAINT(dst); /* keep weirdo $SAFE users happy */
 
   if (!hp->chunked)
     rb_raise(rb_eRuntimeError, "filter_body is only for chunked bodies");
 
   if (!chunked_eof(hp)) {
     hp->s.dest_offset = 0;
-    http_parser_execute(hp, buf, dptr, dlen);
+    http_parser_execute(hp, dst, sptr, slen);
     if (hp->cs == http_parser_error)
       rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
 
     assert(hp->s.dest_offset <= hp->offset &&
            "destination buffer overflow");
-    advance_str(data, hp->offset);
-    rb_str_set_len(buf, hp->s.dest_offset);
+    advance_str(src, hp->offset);
+    rb_str_set_len(dst, hp->s.dest_offset);
 
-    if (RSTRING_LEN(buf) == 0 && chunked_eof(hp)) {
+    if (RSTRING_LEN(dst) == 0 && chunked_eof(hp)) {
       assert(hp->len.chunk == 0 && "chunk at EOF but more to parse");
     } else {
-      data = Qnil;
+      dst = Qnil;
     }
   }
   hp->offset = 0; /* for trailer parsing */
-  return data;
+  return dst;
 }
 
 void Init_kcar_ext(void)

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 07/11] flesh out filter_body for request parsing
  2018-12-01 13:31 [PATCH v2] request parsing bits Eric Wong
                   ` (5 preceding siblings ...)
  2018-12-01 13:31 ` [PATCH 06/11] filter_body: rename variables to be like memcpy(3) Eric Wong
@ 2018-12-01 13:31 ` Eric Wong
  2018-12-01 13:31 ` [PATCH 08/11] do not assume SERVER_PORT Eric Wong
                   ` (3 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2018-12-01 13:31 UTC (permalink / raw)
  To: kcar-public

Relying on Kcar::Parser#body_bytes_left= is still more
efficient, but allowing filter_body to work for identity
request bodies should simplify server implementations
at the moment.

When parsing responses, identity request bodies will still fail
on filter_body, at least for now. It's always been that way, and
it might be easier-to-write backwards compatible code in case
people want to stick to an older version of kcar.
---
 ext/kcar/kcar.rl            |  41 +++-
 test/test_request_parser.rb | 362 +++++++++++++++++++++++++++++++++++-
 2 files changed, 399 insertions(+), 4 deletions(-)

diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
index ea3dacc..6330910 100644
--- a/ext/kcar/kcar.rl
+++ b/ext/kcar/kcar.rl
@@ -201,6 +201,13 @@ static void finalize_header(struct http_parser *hp, VALUE hdr)
   if (hp->has_trailer && !hp->chunked)
     rb_raise(eParserError, "trailer but not chunked");
   if (hp->is_request) {
+    if (hp->chunked) {
+      if (hp->len.chunk >= 0)
+        rb_raise(eParserError, "Content-Length set with chunked encoding");
+      else
+        hp->len.chunk = 0;
+    }
+
     if (!hp->has_query)
       rb_hash_aset(hdr, g_QUERY_STRING, rb_str_new(NULL, 0));
     if (hp->has_header) {
@@ -831,6 +838,8 @@ static VALUE body_bytes_left_set(VALUE self, VALUE bytes)
   if (hp->chunked)
     rb_raise(rb_eRuntimeError, "body_bytes_left= is not for chunked bodies");
   hp->len.content = NUM2OFFT(bytes);
+  if (hp->len.content == 0)
+      hp->body_eof_seen = 1;
   return bytes;
 }
 
@@ -973,8 +982,12 @@ static VALUE keepalive(VALUE self)
 
   if (hp->persistent) {
     if (hp->has_header && hp->has_body) {
-      if (hp->chunked || (hp->len.content >= 0))
-        return Qtrue;
+      if (hp->chunked || (hp->len.content >= 0)) {
+        if (!hp->is_request)
+          return Qtrue;
+        else
+          return hp->body_eof_seen ? Qtrue : Qfalse;
+      }
 
       /* unknown Content-Length and not chunked, we must assume close */
       return Qfalse;
@@ -1012,12 +1025,34 @@ static VALUE filter_body(VALUE self, VALUE dst, VALUE src)
 
   StringValue(dst);
   rb_str_modify(dst);
-  rb_str_resize(dst, slen); /* we can never copy more than slen bytes */
   OBJ_TAINT(dst); /* keep weirdo $SAFE users happy */
 
+  /*
+   * for now, only support filter_body for identity requests,
+   * not responses; it's rather inefficient to blindly memcpy
+   * giant request bodies; on the other hand, it simplifies
+   * server-side code.
+   */
+  if (hp->is_request && !hp->chunked) {
+    /* no need to enter the Ragel machine for unchunked transfers */
+    assert(hp->len.content >= 0 && "negative Content-Length");
+    if (hp->len.content > 0) {
+      long nr = MIN(slen, hp->len.content);
+
+      rb_str_resize(dst, nr);
+      memcpy(RSTRING_PTR(dst), sptr, nr);
+      hp->len.content -= nr;
+      if (hp->len.content == 0)
+        hp->body_eof_seen = 1;
+      advance_str(src, nr);
+    }
+    return dst;
+  }
+
   if (!hp->chunked)
     rb_raise(rb_eRuntimeError, "filter_body is only for chunked bodies");
 
+  rb_str_resize(dst, slen); /* we can never copy more than slen bytes */
   if (!chunked_eof(hp)) {
     hp->s.dest_offset = 0;
     http_parser_execute(hp, dst, sptr, slen);
diff --git a/test/test_request_parser.rb b/test/test_request_parser.rb
index 5e97a0e..7654e62 100644
--- a/test/test_request_parser.rb
+++ b/test/test_request_parser.rb
@@ -732,7 +732,7 @@ class TestRequestParser < Test::Unit::TestCase
     req = @hp.request(@env, str)
     assert_equal 'Content-MD5,Content-SHA1', req['HTTP_TRAILER']
     assert_equal "1\r\na\r\n2\r\n..\r\n0\r\n", str
-    assert @hp.keepalive?
+    assert_not_predicate @hp, :keepalive?
   end
 
   def test_http_09
@@ -859,4 +859,364 @@ class TestRequestParser < Test::Unit::TestCase
     assert_equal expect, env2
     assert_equal '', buf
   end
+
+  def test_identity_byte_headers
+    str = "PUT / HTTP/1.1\r\n"
+    str << "Content-Length: 123\r\n"
+    str << "\r"
+    buf = ''
+    str.each_byte { |byte|
+      buf << byte.chr
+      assert_nil @hp.request(@env, str)
+    }
+    buf << "\n"
+    req = @hp.request(@env, buf)
+    assert_equal '123', req['CONTENT_LENGTH']
+    assert_predicate buf, :empty?
+    assert ! @hp.keepalive?
+    assert_equal 123, @hp.body_bytes_left
+    dst = ""
+    buf = '.' * 123
+    @hp.filter_body(dst, buf)
+    assert_equal '.' * 123, dst
+    assert_equal "", buf
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_identity_step_headers
+    str = "PUT / HTTP/1.1\r\n"
+    assert_nil @hp.request(@env, str)
+    str << "Content-Length: 123\r\n"
+    assert_nil @hp.request(@env, str)
+    str << "\r\n"
+    req = @hp.request(@env, str)
+    assert_equal '123', req['CONTENT_LENGTH']
+    assert_equal 0, str.size
+    assert_not_predicate @hp, :keepalive?
+    dst = ""
+    buf = '.' * 123
+    @hp.filter_body(dst, buf)
+    assert_equal '.' * 123, dst
+    assert_equal "", buf
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_identity_oneshot_header
+    str = "PUT / HTTP/1.1\r\nContent-Length: 123\r\n\r\n"
+    req = @hp.request(@env, str)
+    assert_equal '123', req['CONTENT_LENGTH']
+    assert_equal 0, str.size
+    assert_not_predicate @hp, :keepalive?
+    dst = ""
+    buf = '.' * 123
+    @hp.filter_body(dst, buf)
+    assert_equal '.' * 123, dst
+    assert_equal "", buf
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_identity_oneshot_header_with_body
+    body = ('a' * 123).freeze
+    str = "PUT / HTTP/1.1\r\n" \
+           "Content-Length: #{body.length}\r\n" \
+           "\r\n#{body}"
+    req = @hp.request(@env, str)
+    assert_equal '123', req['CONTENT_LENGTH']
+    assert_equal 123, str.size
+    assert_equal body, str
+    tmp = ''
+    assert_same tmp, @hp.filter_body(tmp, str)
+    assert_equal 0, str.size
+    assert_equal tmp, body
+    assert_equal body, @hp.filter_body(tmp, str)
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_identity_oneshot_header_with_body_partial
+    str = "PUT / HTTP/1.1\r\nContent-Length: 123\r\n\r\na"
+    req = @hp.request(@env, str)
+    assert_equal 1, str.size
+    assert_equal 'a', str
+    tmp = ''
+    assert_same tmp, @hp.filter_body(tmp, str)
+    assert_equal "", str
+    assert_equal "a", tmp
+    str << ' ' * 122
+    rv = @hp.filter_body(tmp, str)
+    assert_equal 122, tmp.size
+    assert_same tmp, rv
+    assert_equal "", str
+    assert_same tmp, @hp.filter_body(tmp, str)
+    assert_predicate @hp, :keepalive?
+    assert_equal '123', req['CONTENT_LENGTH']
+  end
+
+  def test_identity_oneshot_header_with_body_slop
+    str = "PUT / HTTP/1.1\r\nContent-Length: 1\r\n\r\naG"
+    req = @hp.request(@env, str)
+    assert_equal 2, str.size
+    assert_equal 'aG', str
+    tmp = ''
+    assert_same tmp, @hp.filter_body(tmp, str)
+    assert_equal "G", str
+    assert_equal "a", @hp.filter_body(tmp, str)
+    assert_equal 1, tmp.size
+    assert_equal "a", tmp
+    assert_predicate @hp, :keepalive?
+    assert_equal '1', req['CONTENT_LENGTH']
+  end
+
+  def test_chunked
+    str = "PUT / HTTP/1.1\r\ntransfer-Encoding: chunked\r\n\r\n"
+    req = @hp.request(@env, str)
+    assert_equal 0, str.size
+    tmp = ""
+    assert_predicate @hp, :chunked?
+    assert_nil @hp.filter_body(tmp, str << "6")
+    assert_equal 0, tmp.size
+    assert_nil @hp.filter_body(tmp, str << "\r\n")
+    assert_equal 0, str.size
+    assert_equal 0, tmp.size
+    tmp = ""
+    assert_nil @hp.filter_body(tmp, str << "..")
+    assert_equal "..", tmp
+    assert_nil @hp.filter_body(tmp, str << "abcd\r\n0\r\n")
+    assert_equal "abcd", tmp
+    assert_same tmp, @hp.filter_body(tmp, str << "PUT")
+    assert_equal "PUT", str
+    assert_not_predicate @hp, :keepalive?
+    str << "TY: FOO\r\n\r\n"
+    req = @hp.request(@env, str)
+    assert_equal "FOO", req["HTTP_PUTTY"]
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_chunked_empty
+    str = "PUT / HTTP/1.1\r\ntransfer-Encoding: chunked\r\n\r\n"
+    req = @hp.request(@env, str)
+    assert_equal 0, str.size
+    tmp = ""
+    assert_same tmp, @hp.filter_body(tmp, str << "0\r\n\r\n")
+    assert_predicate @hp, :body_eof?
+    assert_equal "", tmp
+    assert_same req, @hp.request(@env, str)
+    assert_equal "", str
+  end
+
+  def test_two_chunks
+    str = "PUT / HTTP/1.1\r\ntransfer-Encoding: chunked\r\n\r\n"
+    req = @hp.request(@env, str)
+    assert_same req, @env
+    assert_equal 0, str.size
+    tmp = ""
+    assert_nil @hp.filter_body(tmp, str << "6")
+    assert_equal 0, tmp.size
+    assert_nil @hp.filter_body(tmp, str << "\r\n")
+    assert_equal "", str
+    assert_equal 0, tmp.size
+    tmp = ""
+    assert_nil @hp.filter_body(tmp, str << "..")
+    assert_equal 2, tmp.size
+    assert_equal "..", tmp
+    assert_nil @hp.filter_body(tmp, str << "abcd\r\n1")
+    assert_equal "abcd", tmp
+    assert_nil @hp.filter_body(tmp, str << "\r")
+    assert_equal "", tmp
+    assert_nil @hp.filter_body(tmp, str << "\n")
+    assert_equal "", tmp
+    assert_nil @hp.filter_body(tmp, str << "z")
+    assert_equal "z", tmp
+    assert_nil @hp.filter_body(tmp, str << "\r\n")
+    assert_nil @hp.filter_body(tmp, str << "0")
+    assert_nil @hp.filter_body(tmp, str << "\r")
+    rv = @hp.filter_body(tmp, str << "\nGET")
+    assert_equal "GET", str
+    assert_same tmp, rv
+    assert_equal '', tmp
+    assert_not_predicate @hp, :keepalive?
+  end
+
+  def test_big_chunk
+    str = "PUT / HTTP/1.1\r\ntransfer-Encoding: chunked\r\n\r\n" \
+          "4000\r\nabcd"
+    req = @hp.request(@env, str)
+    tmp = ''
+    assert_nil @hp.filter_body(tmp, str)
+    assert_equal '', str
+    str << ' ' * 16300
+    assert_nil @hp.filter_body(tmp, str)
+    assert_equal '', str
+    str << ' ' * 80
+    assert_nil @hp.filter_body(tmp, str)
+    assert_equal '', str
+    assert_not_predicate @hp, :body_eof?
+    assert_same tmp, @hp.filter_body(tmp, str << "\r\n0\r\n")
+    assert_equal "", tmp
+    assert_predicate @hp, :body_eof?
+    str << "\r\n"
+    assert_same req, @hp.request(@env, str)
+    assert_equal "", str
+    assert_predicate @hp, :body_eof?
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_two_chunks_oneshot
+    str = "PUT / HTTP/1.1\r\ntransfer-Encoding: chunked\r\n\r\n" \
+           "1\r\na\r\n2\r\n..\r\n0\r\n"
+    req = @hp.request(@env, str)
+    assert_same req, @env
+    tmp = ''
+    assert_nil @hp.filter_body(tmp, str)
+    assert_equal 'a..', tmp
+    assert_same tmp, @hp.filter_body(tmp, str)
+    assert_not_predicate @hp, :keepalive?
+  end
+
+  def test_chunks_bytewise
+    chunked = "10\r\nabcdefghijklmnop\r\n11\r\n0123456789abcdefg\r\n0\r\n"
+    str = "PUT / HTTP/1.1\r\ntransfer-Encoding: chunked\r\n\r\n"
+    buf = str.dup
+    req = @hp.request(@env, buf)
+    assert_same req, @env
+    assert_equal "", buf
+    tmp = ''
+    body = ''
+    str = chunked[0..-2]
+    str.each_byte { |byte|
+      assert_nil @hp.filter_body(tmp, buf << byte.chr)
+      body << tmp
+    }
+    assert_equal 'abcdefghijklmnop0123456789abcdefg', body
+    assert_same tmp, @hp.filter_body(tmp, buf << "\n")
+    assert_not_predicate @hp, :keepalive?
+  end
+
+  def test_trailers
+    str = "PUT / HTTP/1.1\r\n" \
+           "Trailer: Content-MD5\r\n" \
+           "transfer-Encoding: chunked\r\n\r\n" \
+           "1\r\na\r\n2\r\n..\r\n0\r\n"
+    req = @hp.request(@env, str)
+    assert_same req, @env
+    assert_equal 'Content-MD5', req['HTTP_TRAILER']
+    assert_nil req['HTTP_CONTENT_MD5']
+    tmp = ''
+    assert_nil @hp.filter_body(tmp, str)
+    assert_equal 'a..', tmp
+    md5_b64 = [ Digest::MD5.digest(tmp) ].pack('m').strip.freeze
+    assert_same tmp, @hp.filter_body(tmp, str)
+    assert_equal '', str
+    md5_hdr = "Content-MD5: #{md5_b64}\r\n".freeze
+    str << md5_hdr
+    assert_nil @hp.request(@env, str)
+    assert_equal md5_b64, req['HTTP_CONTENT_MD5']
+    assert_equal "CONTENT_MD5: #{md5_b64}\r\n", str
+    str << "\r"
+    assert_nil @hp.request(@env, str)
+    str << "\nGET / "
+    req = @hp.request(@env, str)
+    assert_equal "GET / ", str
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_trailers_slowly
+    str = "PUT / HTTP/1.1\r\n" \
+           "Trailer: Content-MD5\r\n" \
+           "transfer-Encoding: chunked\r\n\r\n" \
+           "1\r\na\r\n2\r\n..\r\n0\r\n"
+    req = @hp.request(@env, str)
+    assert_equal 'Content-MD5', req['HTTP_TRAILER']
+    assert_nil req['HTTP_CONTENT_MD5']
+    tmp = ''
+    assert_nil @hp.filter_body(tmp, str)
+    assert_equal 'a..', tmp
+    md5_b64 = [ Digest::MD5.digest(tmp) ].pack('m').strip.freeze
+    assert_same tmp, @hp.filter_body(tmp, str)
+    assert_equal '', str
+    assert_nil @hp.request(req, str)
+    md5_hdr = "Content-MD5: #{md5_b64}\r\n".freeze
+    md5_hdr.each_byte { |byte|
+      str << byte.chr
+      assert_nil @hp.request(req, str)
+    }
+    assert_equal md5_b64, req['HTTP_CONTENT_MD5']
+    assert_equal "CONTENT_MD5: #{md5_b64}\r\n", str
+    str << "\r"
+    assert_nil @hp.request(@env, str)
+    str << "\n"
+    assert_same req, @hp.request(@env, str)
+  end
+
+  def test_max_chunk
+    assert_operator Kcar::Parser::CHUNK_MAX, :>=, 0xffff
+    str = "PUT / HTTP/1.1\r\n" \
+           "transfer-Encoding: chunked\r\n\r\n" \
+           "#{Kcar::Parser::CHUNK_MAX.to_s(16)}\r\na\r\n2\r\n..\r\n0\r\n"
+    req = @hp.request(@env, str)
+    assert_same req, @env
+    assert_nil @hp.body_bytes_left
+    assert_nil @hp.filter_body('', str)
+    assert_not_predicate @hp, :keepalive?
+  end
+
+  def test_max_body
+    n = Kcar::Parser::LENGTH_MAX
+    buf = "PUT / HTTP/1.1\r\nContent-Length: #{n}\r\n\r\n"
+    req = @hp.request(@env, buf)
+    assert_equal n, req['CONTENT_LENGTH'].to_i
+    # connection can only be persistent when body is drained:
+    assert_not_predicate @hp, :keepalive?
+    @hp.body_bytes_left = 1
+    assert_not_predicate @hp, :keepalive?
+    @hp.body_bytes_left = 0
+    assert_predicate @hp, :keepalive?
+  end
+
+  def test_overflow_chunk
+    n = Kcar::Parser::CHUNK_MAX + 1
+    str = "PUT / HTTP/1.1\r\n" \
+           "transfer-Encoding: chunked\r\n\r\n" \
+           "#{n.to_s(16)}\r\na\r\n2\r\n..\r\n0\r\n"
+    req = @hp.request(@env, str)
+    assert_nil @hp.body_bytes_left
+    assert_equal 'chunked', req['HTTP_TRANSFER_ENCODING']
+    assert_raise(Kcar::ParserError) { @hp.filter_body('', str) }
+  end
+
+  def test_overflow_content_length
+    n = Kcar::Parser::LENGTH_MAX + 1
+    buf ="PUT / HTTP/1.1\r\nContent-Length: #{n}\r\n\r\n"
+    assert_raise(Kcar::ParserError) do
+      @hp.request(@env, buf)
+    end
+  end
+
+  def test_bad_chunk
+    buf = "PUT / HTTP/1.1\r\n" \
+          "transfer-Encoding: chunked\r\n\r\n" \
+          "#zzz\r\na\r\n2\r\n..\r\n0\r\n"
+    @hp.request(@env, buf)
+    assert_nil @hp.body_bytes_left
+    assert_raise(Kcar::ParserError) { @hp.filter_body("", buf) }
+  end
+
+  def test_bad_content_length
+    buf = "PUT / HTTP/1.1\r\nContent-Length: 7ff\r\n\r\n"
+    assert_raise(Kcar::ParserError) { @hp.request(@env, buf) }
+  end
+
+  def test_bad_trailers
+    str = "PUT / HTTP/1.1\r\n" \
+           "Trailer: Transfer-Encoding\r\n" \
+           "transfer-Encoding: chunked\r\n\r\n" \
+           "1\r\na\r\n2\r\n..\r\n0\r\n"
+    req = @hp.request(@env, str)
+    assert_equal 'Transfer-Encoding', req['HTTP_TRAILER']
+    tmp = ''
+    assert_nil @hp.filter_body(tmp, str)
+    assert_equal 'a..', tmp
+    assert_equal '', str
+    str << "Transfer-Encoding: identity\r\n\r\n"
+    assert_raise(Kcar::ParserError) { @hp.request(@env, str) }
+  end
 end

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 08/11] do not assume SERVER_PORT
  2018-12-01 13:31 [PATCH v2] request parsing bits Eric Wong
                   ` (6 preceding siblings ...)
  2018-12-01 13:31 ` [PATCH 07/11] flesh out filter_body for request parsing Eric Wong
@ 2018-12-01 13:31 ` Eric Wong
  2018-12-01 13:31 ` [PATCH 09/11] do not set "HTTP/0.9" for pre-1.0 requests Eric Wong
                   ` (2 subsequent siblings)
  10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2018-12-01 13:31 UTC (permalink / raw)
  To: kcar-public

Without X-Forwarded-{Proto,Ssl} or Forwarded header parsing,
it is impossible for us to know to know which default port
was specified for non-absolute-URI requests.

We will only set SERVER_PORT for the rare absolute-URI requests
and when we can parse the Host header.
---
 ext/kcar/kcar.rl            |  5 +++--
 test/test_request_parser.rb | 11 +++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
index 6330910..e1445aa 100644
--- a/ext/kcar/kcar.rl
+++ b/ext/kcar/kcar.rl
@@ -160,7 +160,7 @@ static void set_server_vars(struct http_parser *hp, VALUE env, VALUE host)
   long host_len = RSTRING_LEN(host);
   char *colon;
   VALUE server_name = host;
-  VALUE server_port = hp->is_https ? g_443 : g_80;
+  VALUE server_port = hp->has_scheme ? (hp->is_https ? g_443 : g_80) : Qfalse;
 
   if (*host_ptr == '[') { /* ipv6 address format */
     char *rbracket = memchr(host_ptr + 1, ']', host_len - 1);
@@ -185,7 +185,8 @@ static void set_server_vars(struct http_parser *hp, VALUE env, VALUE host)
     }
   }
   rb_hash_aset(env, g_SERVER_NAME, server_name);
-  rb_hash_aset(env, g_SERVER_PORT, server_port);
+  if (server_port != Qfalse)
+    rb_hash_aset(env, g_SERVER_PORT, server_port);
 }
 
 static void http_09_request(VALUE env)
diff --git a/test/test_request_parser.rb b/test/test_request_parser.rb
index 7654e62..c4fefc8 100644
--- a/test/test_request_parser.rb
+++ b/test/test_request_parser.rb
@@ -96,7 +96,7 @@ class TestRequestParser < Test::Unit::TestCase
     buf = "GET / HTTP/1.1\r\nHost: foo\r\n\r\n"
     assert_same @env, @hp.request(@env, buf)
     assert_equal 'foo', @env['SERVER_NAME']
-    assert_equal '80', @env['SERVER_PORT']
+    assert_nil @env['SERVER_PORT']
     assert_equal '', buf
     assert @hp.keepalive?
   end
@@ -113,14 +113,14 @@ class TestRequestParser < Test::Unit::TestCase
   def test_parse_server_host_empty_port
     @hp.request(@env, "GET / HTTP/1.1\r\nHost: foo:\r\n\r\n")
     assert_equal 'foo', @env['SERVER_NAME']
-    assert_equal '80', @env['SERVER_PORT']
+    assert_nil @env['SERVER_PORT']
     assert @hp.keepalive?
   end
 
   def test_parse_host_cont
     @hp.request(@env, "GET / HTTP/1.1\r\nHost:\r\n foo\r\n\r\n")
     assert_equal 'foo', @env['SERVER_NAME']
-    assert_equal '80', @env['SERVER_PORT']
+    assert_nil @env['SERVER_PORT']
     assert @hp.keepalive?
   end
 
@@ -541,7 +541,7 @@ class TestRequestParser < Test::Unit::TestCase
     req = @hp.request(@env, buf)
     assert_equal "[::1]", req['HTTP_HOST']
     assert_equal "[::1]", req['SERVER_NAME']
-    assert_equal '80', req['SERVER_PORT']
+    assert_nil req['SERVER_PORT']
   end
 
   def test_ipv6_host_header_with_port
@@ -555,7 +555,7 @@ class TestRequestParser < Test::Unit::TestCase
   def test_ipv6_host_header_with_empty_port
     req = @hp.request(@env, "GET / HTTP/1.1\r\nHost: [::1]:\r\n\r\n")
     assert_equal "[::1]", req['SERVER_NAME']
-    assert_equal '80', req['SERVER_PORT']
+    assert_nil req['SERVER_PORT']
     assert_equal "[::1]:", req['HTTP_HOST']
   end
 
@@ -836,7 +836,6 @@ class TestRequestParser < Test::Unit::TestCase
     expect = {
       'HTTP_HOST' => 'example.com',
       'SERVER_NAME' => 'example.com',
-      'SERVER_PORT' => '80',
       'REQUEST_PATH' => '/',
       'SERVER_PROTOCOL' => 'HTTP/1.1',
       'PATH_INFO' => '/',

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 09/11] do not set "HTTP/0.9" for pre-1.0 requests
  2018-12-01 13:31 [PATCH v2] request parsing bits Eric Wong
                   ` (7 preceding siblings ...)
  2018-12-01 13:31 ` [PATCH 08/11] do not assume SERVER_PORT Eric Wong
@ 2018-12-01 13:31 ` Eric Wong
  2018-12-01 13:31 ` [PATCH 10/11] always set non-negative Content-Length for requests Eric Wong
  2018-12-01 13:31 ` [PATCH 11/11] avoid String#-@ call on request parsing under Ruby 2.6 Eric Wong
  10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2018-12-01 13:31 UTC (permalink / raw)
  To: kcar-public

Since "HTTP/0.9" is an informal name, we'll omit it to avoid
confusing any applications which may see them.
---
 ext/kcar/kcar.rl            | 10 ----------
 test/test_request_parser.rb |  2 --
 2 files changed, 12 deletions(-)

diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
index e1445aa..c85eb3c 100644
--- a/ext/kcar/kcar.rl
+++ b/ext/kcar/kcar.rl
@@ -189,14 +189,6 @@ static void set_server_vars(struct http_parser *hp, VALUE env, VALUE host)
     rb_hash_aset(env, g_SERVER_PORT, server_port);
 }
 
-static void http_09_request(VALUE env)
-{
-  VALUE v = str_new_dd_freeze("HTTP/0.9", 8);
-
-  rb_hash_aset(env, g_SERVER_PROTOCOL, v);
-  rb_hash_aset(env, g_HTTP_VERSION, v);
-}
-
 static void finalize_header(struct http_parser *hp, VALUE hdr)
 {
   if (hp->has_trailer && !hp->chunked)
@@ -215,8 +207,6 @@ static void finalize_header(struct http_parser *hp, VALUE hdr)
       VALUE host = request_host_val(hp);
       if (host != Qfalse)
         set_server_vars(hp, hdr, host);
-    } else {
-      http_09_request(hdr);
     }
   }
 }
diff --git a/test/test_request_parser.rb b/test/test_request_parser.rb
index c4fefc8..895a948 100644
--- a/test/test_request_parser.rb
+++ b/test/test_request_parser.rb
@@ -743,8 +743,6 @@ class TestRequestParser < Test::Unit::TestCase
       "REQUEST_PATH" => "/read-rfc1945-if-you-dont-believe-me",
       "PATH_INFO" => "/read-rfc1945-if-you-dont-believe-me",
       "REQUEST_URI" => "/read-rfc1945-if-you-dont-believe-me",
-      "SERVER_PROTOCOL" => "HTTP/0.9",
-      "HTTP_VERSION" => "HTTP/0.9",
       "REQUEST_METHOD" => "GET",
       "QUERY_STRING" => ""
     }

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 10/11] always set non-negative Content-Length for requests
  2018-12-01 13:31 [PATCH v2] request parsing bits Eric Wong
                   ` (8 preceding siblings ...)
  2018-12-01 13:31 ` [PATCH 09/11] do not set "HTTP/0.9" for pre-1.0 requests Eric Wong
@ 2018-12-01 13:31 ` Eric Wong
  2018-12-01 13:31 ` [PATCH 11/11] avoid String#-@ call on request parsing under Ruby 2.6 Eric Wong
  10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2018-12-01 13:31 UTC (permalink / raw)
  To: kcar-public

Negative len.content is used to denote chunked responses
on #body_bytes_left, but we don't use that for request
parsing.
---
 ext/kcar/kcar.rl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
index c85eb3c..336af55 100644
--- a/ext/kcar/kcar.rl
+++ b/ext/kcar/kcar.rl
@@ -199,6 +199,8 @@ static void finalize_header(struct http_parser *hp, VALUE hdr)
         rb_raise(eParserError, "Content-Length set with chunked encoding");
       else
         hp->len.chunk = 0;
+    } else if (hp->len.content < 0) {
+        hp->len.content = 0;
     }
 
     if (!hp->has_query)

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 11/11] avoid String#-@ call on request parsing under Ruby 2.6
  2018-12-01 13:31 [PATCH v2] request parsing bits Eric Wong
                   ` (9 preceding siblings ...)
  2018-12-01 13:31 ` [PATCH 10/11] always set non-negative Content-Length for requests Eric Wong
@ 2018-12-01 13:31 ` Eric Wong
  10 siblings, 0 replies; 12+ messages in thread
From: Eric Wong @ 2018-12-01 13:31 UTC (permalink / raw)
  To: kcar-public

Ruby 2.6 will unconditionally dedupe all hash key strings
thanks to Anmol Chopra at https://bugs.ruby-lang.org/issues/15251
---
 ext/kcar/extconf.rb | 11 +++++++++++
 ext/kcar/kcar.rl    |  5 +++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/ext/kcar/extconf.rb b/ext/kcar/extconf.rb
index b65846a..ba69c3c 100644
--- a/ext/kcar/extconf.rb
+++ b/ext/kcar/extconf.rb
@@ -22,4 +22,15 @@ rescue NoMethodError
   message("no, String#-@ not available\n")
 end
 
+message('checking if Hash#[]= (rb_hash_aset) dedupes... ')
+h = {}
+h[%w(m k m f).join('')] = :foo
+if 'mkmf'.freeze.equal?(h.keys[0])
+  $CPPFLAGS += ' -DHASH_ASET_DEDUPE=1 '
+  message("yes\n")
+else
+  $CPPFLAGS += ' -DHASH_ASET_DEDUPE=0 '
+  message("no, needs Ruby 2.6+\n")
+end
+
 create_makefile("kcar_ext")
diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
index 336af55..9d04dd8 100644
--- a/ext/kcar/kcar.rl
+++ b/ext/kcar/kcar.rl
@@ -489,7 +489,6 @@ static void write_response_value(struct http_parser *hp, VALUE hdr,
   }
 }
 
-/* TODO cache */
 static VALUE req_field(const char *ptr, size_t len)
 {
   size_t pfxlen = sizeof("HTTP_") - 1;
@@ -501,7 +500,7 @@ static VALUE req_field(const char *ptr, size_t len)
   assert(*(dst + RSTRING_LEN(str)) == '\0' &&
          "string didn't end with \\0"); /* paranoia */
 
-  return str_dd_freeze(str);
+  return str;
 }
 
 static void snake_upcase(char *ptr, size_t len)
@@ -570,6 +569,8 @@ static void write_request_value(struct http_parser *hp, VALUE env,
       hp->v.host = val;
   } else {
     key = req_field(fptr, flen);
+    if (!HASH_ASET_DEDUPE)
+      key = str_dd_freeze(key);
   }
   existing = rb_hash_aref(env, key);
   if (NIL_P(existing)) {

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2018-12-01 13:31 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-01 13:31 [PATCH v2] request parsing bits Eric Wong
2018-12-01 13:31 ` [PATCH 01/11] introduce new str_new_dd_freeze internal function Eric Wong
2018-12-01 13:31 ` [PATCH 02/11] begin implementing request parsing Eric Wong
2018-12-01 13:31 ` [PATCH 03/11] favor bitfields instead flags + macros Eric Wong
2018-12-01 13:31 ` [PATCH 04/11] implement request parsing with tests Eric Wong
2018-12-01 13:31 ` [PATCH 05/11] pkg.mk: enable warnings by default for tests Eric Wong
2018-12-01 13:31 ` [PATCH 06/11] filter_body: rename variables to be like memcpy(3) Eric Wong
2018-12-01 13:31 ` [PATCH 07/11] flesh out filter_body for request parsing Eric Wong
2018-12-01 13:31 ` [PATCH 08/11] do not assume SERVER_PORT Eric Wong
2018-12-01 13:31 ` [PATCH 09/11] do not set "HTTP/0.9" for pre-1.0 requests Eric Wong
2018-12-01 13:31 ` [PATCH 10/11] always set non-negative Content-Length for requests Eric Wong
2018-12-01 13:31 ` [PATCH 11/11] avoid String#-@ call on request parsing under Ruby 2.6 Eric Wong

Code repositories for project(s) associated with this public inbox

	https://yhbt.net/kcar.git/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).