From 1b31c40997ff8b932a457275e9a2f219de1d32c8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 23 May 2011 21:04:56 +0000 Subject: strip trailing and leading linear whitespace in headers RFC 2616, section 4.2: > The field-content does not include any leading or trailing LWS: > linear white space occurring before the first non-whitespace > character of the field-value or after the last non-whitespace > character of the field-value. Such leading or trailing LWS MAY be > removed without changing the semantics of the field value. Any LWS > that occurs between field-content MAY be replaced with a single SP > before interpreting the field value or forwarding the message > downstream. --- ext/unicorn_http/unicorn_http.rl | 35 ++++++++++++++++++++----- test/unit/test_http_parser.rb | 56 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 6 deletions(-) diff --git a/ext/unicorn_http/unicorn_http.rl b/ext/unicorn_http/unicorn_http.rl index 7a6e031..116b2b2 100644 --- a/ext/unicorn_http/unicorn_http.rl +++ b/ext/unicorn_http/unicorn_http.rl @@ -132,12 +132,27 @@ static void parser_raise(VALUE klass, const char *msg) #define MARK(M,FPC) (hp->M = (FPC) - buffer) #define PTR_TO(F) (buffer + hp->F) #define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC)) +#define STRIPPED_STR_NEW(M,FPC) stripped_str_new(PTR_TO(M), LEN(M, FPC)) #define HP_FL_TEST(hp,fl) ((hp)->flags & (UH_FL_##fl)) #define HP_FL_SET(hp,fl) ((hp)->flags |= (UH_FL_##fl)) #define HP_FL_UNSET(hp,fl) ((hp)->flags &= ~(UH_FL_##fl)) #define HP_FL_ALL(hp,fl) (HP_FL_TEST(hp, fl) == (UH_FL_##fl)) +static int is_lws(char c) +{ + return (c == ' ' || c == '\t'); +} + +static VALUE stripped_str_new(const char *str, long len) +{ + long end; + + for (end = len - 1; end >= 0 && is_lws(str[end]); end--); + + return rb_str_new(str, end + 1); +} + /* * handles values of the "Connection:" header, keepalive is implied * for HTTP/1.1 but needs to be explicitly enabled with HTTP/1.0 @@ -201,6 +216,9 @@ static void write_cont_value(struct http_parser *hp, char *buffer, const char *p) { char *vptr; + long end; + long len = LEN(mark, p); + long cont_len; if (hp->cont == Qfalse) parser_raise(eHttpParserError, "invalid continuation line"); @@ -210,19 +228,24 @@ static void write_cont_value(struct http_parser *hp, assert(TYPE(hp->cont) == T_STRING && "continuation line is not a string"); assert(hp->mark > 0 && "impossible continuation line offset"); - if (LEN(mark, p) == 0) + if (len == 0) return; - if (RSTRING_LEN(hp->cont) > 0) + cont_len = RSTRING_LEN(hp->cont); + if (cont_len > 0) { --hp->mark; - + len = LEN(mark, p); + } vptr = PTR_TO(mark); - if (RSTRING_LEN(hp->cont) > 0) { + /* normalize tab to space */ + if (cont_len > 0) { assert((' ' == *vptr || '\t' == *vptr) && "invalid leading white space"); *vptr = ' '; } - rb_str_buf_cat(hp->cont, vptr, LEN(mark, p)); + + for (end = len - 1; end >= 0 && is_lws(vptr[end]); end--); + rb_str_buf_cat(hp->cont, vptr, end + 1); } static void write_value(struct http_parser *hp, @@ -233,7 +256,7 @@ static void write_value(struct http_parser *hp, VALUE e; VALIDATE_MAX_LENGTH(LEN(mark, p), FIELD_VALUE); - v = LEN(mark, p) == 0 ? rb_str_buf_new(128) : STR_NEW(mark, p); + v = LEN(mark, p) == 0 ? rb_str_buf_new(128) : STRIPPED_STR_NEW(mark, p); if (NIL_P(f)) { const char *field = PTR_TO(start.field); size_t flen = hp->s.field_len; diff --git a/test/unit/test_http_parser.rb b/test/unit/test_http_parser.rb index dc1aab7..70bb789 100644 --- a/test/unit/test_http_parser.rb +++ b/test/unit/test_http_parser.rb @@ -258,6 +258,20 @@ class HttpParserTest < Test::Unit::TestCase assert_equal 'hi y x ASDF', req['HTTP_X_ASDF'] end + def test_continuation_eats_trailing_spaces + parser = HttpParser.new + header = "GET / HTTP/1.1\r\n" \ + "X-ASDF: \r\n" \ + "\t\r\n" \ + " b \r\n" \ + " ASDF\r\n\r\n" + parser.buf << header + req = parser.env + assert_equal req, parser.parse + assert_equal '', parser.buf + assert_equal 'b ASDF', req['HTTP_X_ASDF'] + end + def test_continuation_with_absolute_uri_and_ignored_host_header parser = HttpParser.new header = "GET http://example.com/ HTTP/1.1\r\n" \ @@ -764,6 +778,48 @@ class HttpParserTest < Test::Unit::TestCase end + def test_leading_tab + parser = HttpParser.new + get = "GET / HTTP/1.1\r\nHost:\texample.com\r\n\r\n" + assert parser.add_parse(get) + assert_equal 'example.com', parser.env['HTTP_HOST'] + end + + def test_trailing_whitespace + parser = HttpParser.new + get = "GET / HTTP/1.1\r\nHost: example.com \r\n\r\n" + assert parser.add_parse(get) + assert_equal 'example.com', parser.env['HTTP_HOST'] + end + + def test_trailing_tab + parser = HttpParser.new + get = "GET / HTTP/1.1\r\nHost: example.com\t\r\n\r\n" + assert parser.add_parse(get) + assert_equal 'example.com', parser.env['HTTP_HOST'] + end + + def test_trailing_multiple_linear_whitespace + parser = HttpParser.new + get = "GET / HTTP/1.1\r\nHost: example.com\t \t \t\r\n\r\n" + assert parser.add_parse(get) + assert_equal 'example.com', parser.env['HTTP_HOST'] + end + + def test_embedded_linear_whitespace_ok + parser = HttpParser.new + get = "GET / HTTP/1.1\r\nX-Space: hello\t world\t \r\n\r\n" + assert parser.add_parse(get) + assert_equal "hello\t world", parser.env["HTTP_X_SPACE"] + end + + def test_empty_header + parser = HttpParser.new + get = "GET / HTTP/1.1\r\nHost: \r\n\r\n" + assert parser.add_parse(get) + assert_equal '', parser.env['HTTP_HOST'] + end + # so we don't care about the portability of this test # if it doesn't leak on Linux, it won't leak anywhere else # unless your C compiler or platform is otherwise broken -- cgit v1.2.3-24-ge0c7