about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2011-05-23 17:11:29 -0700
committerEric Wong <normalperson@yhbt.net>2011-05-23 17:11:29 -0700
commitc73e5023a056e28543869722203d9f62d75e3949 (patch)
tree99fa296c41f8e570a95389ef199348872bb6a2f3
parent71bb4732573c45b91b88922df3e13160af6f65c6 (diff)
downloadkcar-c73e5023a056e28543869722203d9f62d75e3949.tar.gz
RFC 2616, section 4.2:
> The field-content does not include any leading or trailing LWS:
> linear white space occurring before the first non-whitespace
> character of the field-value or after the last non-whitespace
> character of the field-value. Such leading or trailing LWS MAY be
> removed without changing the semantics of the field value. Any LWS
> that occurs between field-content MAY be replaced with a single SP
> before interpreting the field value or forwarding the message
> downstream.
-rw-r--r--ext/kcar/kcar.rl33
-rw-r--r--ext/kcar/kcar_http_common.rl2
-rw-r--r--test/test_parser.rb35
3 files changed, 64 insertions, 6 deletions
diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
index 5b620ac..78437e7 100644
--- a/ext/kcar/kcar.rl
+++ b/ext/kcar/kcar.rl
@@ -70,12 +70,27 @@ struct http_parser {
 #define MARK(M,FPC) (hp->M = (FPC) - buffer)
 #define PTR_TO(F) (buffer + hp->F)
 #define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC))
+#define STRIPPED_STR_NEW(M,FPC) stripped_str_new(PTR_TO(M), LEN(M, FPC))
 
 #define HP_FL_TEST(hp,fl) ((hp)->flags & (UH_FL_##fl))
 #define HP_FL_SET(hp,fl) ((hp)->flags |= (UH_FL_##fl))
 #define HP_FL_UNSET(hp,fl) ((hp)->flags &= ~(UH_FL_##fl))
 #define HP_FL_ALL(hp,fl) (HP_FL_TEST(hp, fl) == (UH_FL_##fl))
 
+static int is_lws(char c)
+{
+  return (c == ' ' || c == '\t');
+}
+
+static VALUE stripped_str_new(const char *str, long len)
+{
+  long end;
+
+  for (end = len - 1; end >= 0 && is_lws(str[end]); end--);
+
+  return rb_str_new(str, end + 1);
+}
+
 static void finalize_header(struct http_parser *hp)
 {
   if ((HP_FL_TEST(hp, HASTRAILER) && ! HP_FL_TEST(hp, CHUNKED)))
@@ -144,6 +159,9 @@ static void write_cont_value(struct http_parser *hp,
                              char *buffer, const char *p)
 {
   char *vptr;
+  long end;
+  long len = LEN(mark, p);
+  long cont_len;
 
   if (hp->cont == Qfalse)
     rb_raise(eParserError, "invalid continuation line");
@@ -154,19 +172,24 @@ static void write_cont_value(struct http_parser *hp,
   assert(TYPE(hp->cont) == T_STRING && "continuation line is not a string");
   assert(hp->mark > 0 && "impossible continuation line offset");
 
-  if (LEN(mark, p) == 0)
+  if (len == 0)
     return;
 
-  if (RSTRING_LEN(hp->cont) > 0)
+  cont_len = RSTRING_LEN(hp->cont);
+  if (cont_len > 0) {
     --hp->mark;
+    len = LEN(mark, p);
+  }
 
   vptr = PTR_TO(mark);
 
-  if (RSTRING_LEN(hp->cont) > 0) {
+  /* normalize tab to space */
+  if (cont_len > 0) {
     assert((' ' == *vptr || '\t' == *vptr) && "invalid leading white space");
     *vptr = ' ';
   }
-  rb_str_buf_cat(hp->cont, vptr, LEN(mark, p));
+  for (end = len - 1; end >= 0 && is_lws(vptr[end]); end--);
+  rb_str_buf_cat(hp->cont, vptr, end + 1);
 }
 
 static void write_value(VALUE hdr, struct http_parser *hp,
@@ -192,7 +215,7 @@ static void write_value(VALUE hdr, struct http_parser *hp,
   VALIDATE_MAX_LENGTH(vlen, FIELD_VALUE);
   VALIDATE_MAX_LENGTH(flen, FIELD_NAME);
   f = rb_str_new(fptr, (long)flen);
-  v = rb_str_new(vptr, (long)vlen);
+  v = stripped_str_new(vptr, (long)vlen);
 
   /* needs more tests for error-checking here */
   /*
diff --git a/ext/kcar/kcar_http_common.rl b/ext/kcar/kcar_http_common.rl
index 54206ed..36752b0 100644
--- a/ext/kcar/kcar_http_common.rl
+++ b/ext/kcar/kcar_http_common.rl
@@ -36,7 +36,7 @@
 
   value_cont = lws+ any* >start_value %write_cont_value;
 
-  message_header = ((field_name ":" " "* field_value)|value_cont) :> CRLF;
+  message_header = ((field_name ":" lws* field_value)|value_cont) :> CRLF;
   chunk_ext_val = token*;
   chunk_ext_name = token*;
   chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*;
diff --git a/test/test_parser.rb b/test/test_parser.rb
index 7eab684..dab2e77 100644
--- a/test/test_parser.rb
+++ b/test/test_parser.rb
@@ -254,4 +254,39 @@ class TestParser < Test::Unit::TestCase
     end
   end
 
+  def test_leading_tab
+    resp = "HTTP/1.1 200 OK\r\nHost:\texample.com\r\n\r\n"
+    assert @hp.headers(env = {}, resp)
+    assert_equal 'example.com', env['Host']
+  end
+
+  def test_trailing_whitespace
+    resp = "HTTP/1.1 200 OK\r\nHost: example.com \r\n\r\n"
+    assert @hp.headers(env = {}, resp)
+    assert_equal 'example.com', env['Host']
+  end
+
+  def test_trailing_tab
+    resp = "HTTP/1.1 200 OK\r\nHost: example.com\t\r\n\r\n"
+    assert @hp.headers(env = {}, resp)
+    assert_equal 'example.com', env['Host']
+  end
+
+  def test_trailing_multiple_linear_whitespace
+    resp = "HTTP/1.1 200 OK\r\nHost: example.com\t \t \t\r\n\r\n"
+    assert @hp.headers(env = {}, resp)
+    assert_equal 'example.com', env['Host']
+  end
+
+  def test_embedded_linear_whitespace_ok
+    resp = "HTTP/1.1 200 OK\r\nX-Space: hello\t world\t \r\n\r\n"
+    assert @hp.headers(env = {}, resp)
+    assert_equal "hello\t world", env["X-Space"]
+  end
+
+  def test_empty_header
+    resp = "HTTP/1.1 200 OK\r\nHost:  \r\n\r\n"
+    assert @hp.headers(env = {}, resp)
+    assert_equal '', env['Host']
+  end
 end