From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-3.3 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, HTTP_ESCAPED_HOST shortcircuit=no autolearn=no autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 4F86E211B7 for ; Sat, 1 Dec 2018 13:31:27 +0000 (UTC) From: Eric Wong To: kcar-public@bogomips.org Subject: [PATCH 04/11] implement request parsing with tests Date: Sat, 1 Dec 2018 13:31:18 +0000 Message-Id: <20181201133125.5524-5-e@80x24.org> In-Reply-To: <20181201133125.5524-1-e@80x24.org> References: <20181201133125.5524-1-e@80x24.org> List-Id: Most of these tests are ported over from a Mongrel-derived server. Chunked body parsing is not implemented, yet. --- ext/kcar/kcar.rl | 338 ++++++++++++-- test/test_request_parser.rb | 862 ++++++++++++++++++++++++++++++++++++ 2 files changed, 1162 insertions(+), 38 deletions(-) create mode 100644 test/test_request_parser.rb diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl index c0e8bbc..01e4c76 100644 --- a/ext/kcar/kcar.rl +++ b/ext/kcar/kcar.rl @@ -16,8 +16,13 @@ static VALUE eParserError; static ID id_uminus, id_sq, id_sq_set; static VALUE g_rack_url_scheme, - g_HOST, g_PATH_INFO, g_QUERY_STRING, - g_REQUEST_METHOD, g_REQUEST_PATH, g_REQUEST_URI; + g_80, g_443, g_http, g_https, + g_HTTP_HOST, g_HTTP_CONNECTION, g_HTTP_TRAILER, g_HTTP_TRANSFER_ENCODING, + g_HTTP_VERSION, + g_CONTENT_LENGTH, g_CONTENT_TYPE, + g_PATH_INFO, g_QUERY_STRING, + g_REQUEST_METHOD, g_REQUEST_PATH, g_REQUEST_URI, + g_SERVER_NAME, g_SERVER_PORT, g_SERVER_PROTOCOL; static VALUE e413, e414; /** Defines common length and error messages for input length validation. */ @@ -51,6 +56,9 @@ DEF_MAX_LENGTH(QUERY_STRING, (1024 * 10)); struct http_parser { int cs; /* Ragel internal state */ + unsigned int is_request:1; + unsigned int has_query:1; + unsigned int has_scheme:1; unsigned int chunked:1; unsigned int has_body:1; unsigned int in_body:1; @@ -59,7 +67,9 @@ struct http_parser { unsigned int in_chunk:1; unsigned int persistent:1; unsigned int has_header:1; - unsigned int padding:24; + unsigned int body_eof_seen:1; + unsigned int is_https:1; + unsigned int padding:19; unsigned int mark; unsigned int offset; union { /* these 2 fields don't nest */ @@ -71,7 +81,11 @@ struct http_parser { unsigned int dest_offset; /* only used during body processing */ } s; VALUE cont; /* Qfalse: unset, Qnil: ignored header, T_STRING: append */ - VALUE status; /* String or Qnil */ + union { + /* String or Qnil */ + VALUE status; /* status string for responses */ + VALUE host; /* Host: header for requests */ + } v; union { off_t content; off_t chunk; @@ -134,10 +148,69 @@ static VALUE stripped_str_new(const char *str, long len) return rb_str_new(str, end + 1); } -static void finalize_header(struct http_parser *hp) +static VALUE request_host_val(struct http_parser *hp) +{ + assert(hp->is_request == 1 && "not a request"); + return NIL_P(hp->v.host) ? Qfalse : hp->v.host; +} + +static void set_server_vars(struct http_parser *hp, VALUE env, VALUE host) +{ + char *host_ptr = RSTRING_PTR(host); + long host_len = RSTRING_LEN(host); + char *colon; + VALUE server_name = host; + VALUE server_port = hp->is_https ? g_443 : g_80; + + if (*host_ptr == '[') { /* ipv6 address format */ + char *rbracket = memchr(host_ptr + 1, ']', host_len - 1); + + if (rbracket) + colon = (rbracket[1] == ':') ? rbracket + 1 : NULL; + else + colon = memchr(host_ptr + 1, ':', host_len - 1); + } else { + colon = memchr(host_ptr, ':', host_len); + } + + if (colon) { + long port_start = colon - host_ptr + 1; + long port_len = host_len - port_start; + + server_name = rb_str_substr(host, 0, colon - host_ptr); + server_name = str_dd_freeze(server_name); + if (port_len > 0) { + server_port = rb_str_substr(host, port_start, port_len); + server_port = str_dd_freeze(server_port); + } + } + rb_hash_aset(env, g_SERVER_NAME, server_name); + rb_hash_aset(env, g_SERVER_PORT, server_port); +} + +static void http_09_request(VALUE env) +{ + VALUE v = str_new_dd_freeze("HTTP/0.9", 8); + + rb_hash_aset(env, g_SERVER_PROTOCOL, v); + rb_hash_aset(env, g_HTTP_VERSION, v); +} + +static void finalize_header(struct http_parser *hp, VALUE hdr) { if (hp->has_trailer && !hp->chunked) rb_raise(eParserError, "trailer but not chunked"); + if (hp->is_request) { + if (!hp->has_query) + rb_hash_aset(hdr, g_QUERY_STRING, rb_str_new(NULL, 0)); + if (hp->has_header) { + VALUE host = request_host_val(hp); + if (host != Qfalse) + set_server_vars(hp, hdr, host); + } else { + http_09_request(hdr); + } + } } /* @@ -173,28 +246,58 @@ request_method(VALUE env, const char *ptr, size_t len) } static void -url_scheme(VALUE env, const char *ptr, size_t len) +url_scheme(struct http_parser *hp, VALUE env, const char *ptr, size_t len) { - rb_hash_aset(env, g_rack_url_scheme, str_new_dd_freeze(ptr, len)); + VALUE val; + + hp->has_scheme = 1; + /* Ragel machine downcases and enforces this as "http" or "https" */ + if (len == 5) { + hp->is_https = 1; + assert(CONST_MEM_EQ("https", ptr, len) && "len == 5 but not 'https'"); + val = g_https; + } else { + assert(CONST_MEM_EQ("http", ptr, len) && "len != 4 but not 'http'"); + val = g_http; + } + rb_hash_aset(env, g_rack_url_scheme, val); } static void -request_host(VALUE env, const char *ptr, size_t len) +request_host(struct http_parser *hp, VALUE env, const char *ptr, size_t len) { - rb_hash_aset(env, g_HOST, str_new_dd_freeze(ptr, len)); + VALUE val = rb_str_new(ptr, len); + + rb_hash_aset(env, g_HTTP_HOST, val); + hp->v.host = val; } static void request_uri(VALUE env, const char *ptr, size_t len) { + VALUE val; + VALIDATE_MAX_URI_LENGTH(len, REQUEST_URI); - rb_hash_aset(env, g_REQUEST_URI, rb_str_new(ptr, len)); + val = rb_str_new(ptr, len); + rb_hash_aset(env, g_REQUEST_URI, val); + + /* + * rack says PATH_INFO must start with "/" or be empty, + * but "OPTIONS *" is a valid request + */ + if (CONST_MEM_EQ("*", ptr, len)) { + val = rb_str_new(NULL, 0); + rb_hash_aset(env, g_PATH_INFO, val); + rb_hash_aset(env, g_REQUEST_PATH, val); + } } static void -query_string(VALUE env, const char *ptr, size_t len) +query_string(struct http_parser *hp, VALUE env, const char *ptr, size_t len) { VALIDATE_MAX_URI_LENGTH(len, QUERY_STRING); + + hp->has_query = 1; rb_hash_aset(env, g_QUERY_STRING, rb_str_new(ptr, len)); } @@ -204,22 +307,26 @@ request_path(VALUE env, const char *ptr, size_t len) VALUE val; VALIDATE_MAX_URI_LENGTH(len, REQUEST_PATH); - val = rb_hash_aset(env, g_REQUEST_PATH, rb_str_new(ptr, len)); - - /* rack says PATH_INFO must start with "/" or be empty */ - if (CONST_MEM_EQ("*", ptr, len)) - val = rb_str_new(NULL, 0); + val = rb_str_new(ptr, len); + rb_hash_aset(env, g_REQUEST_PATH, val); rb_hash_aset(env, g_PATH_INFO, val); } static void -http_version(struct http_parser *hp, const char *ptr, size_t len) +http_version(struct http_parser *hp, VALUE env, const char *ptr, size_t len) { if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) { /* HTTP/1.1 implies keepalive unless "Connection: close" is set */ hp->persistent = 1; } + if (hp->is_request) { + VALUE v = str_new_dd_freeze(ptr, len); + hp->has_header = 1; + + rb_hash_aset(env, g_SERVER_PROTOCOL, v); + rb_hash_aset(env, g_HTTP_VERSION, v); + } } static void @@ -227,13 +334,13 @@ status_phrase(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len) { long nr; - hp->status = str_new_dd_freeze(ptr, len); + hp->v.status = str_new_dd_freeze(ptr, len); /* RSTRING_PTR is null terminated, ptr is not */ - nr = strtol(RSTRING_PTR(hp->status), NULL, 10); + nr = strtol(RSTRING_PTR(hp->v.status), NULL, 10); if (nr < 100 || nr > 999) - rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->status)); + rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->v.status)); if ( !((nr >= 100 && nr <= 199) || nr == 204 || nr == 304) ) hp->has_body = 1; @@ -282,8 +389,8 @@ static void write_cont_value(struct http_parser *hp, rb_str_buf_cat(hp->cont, vptr, end + 1); } -static void write_value(VALUE hdr, struct http_parser *hp, - const char *buffer, const char *p) +static void write_response_value(struct http_parser *hp, VALUE hdr, + const char *buffer, const char *p) { VALUE f, v; VALUE hclass; @@ -382,6 +489,111 @@ static void write_value(VALUE hdr, struct http_parser *hp, } } +/* TODO cache */ +static VALUE req_field(const char *ptr, size_t len) +{ + size_t pfxlen = sizeof("HTTP_") - 1; + VALUE str = rb_str_new(NULL, pfxlen + len); + char *dst = RSTRING_PTR(str); + + memcpy(dst, "HTTP_", pfxlen); + memcpy(dst + pfxlen, ptr, len); + assert(*(dst + RSTRING_LEN(str)) == '\0' && + "string didn't end with \\0"); /* paranoia */ + + return str_dd_freeze(str); +} + +static void snake_upcase(char *ptr, size_t len) +{ + char *c; + + for (c = ptr; len--; c++) { + if (*c >= 'a' && *c <= 'z') + *c &= ~0x20; + else if (*c == '-') + *c = '_'; + } +} + +static void write_request_value(struct http_parser *hp, VALUE env, + char *buffer, const char *p) +{ + char *fptr = PTR_TO(start.field); + size_t flen = hp->s.field_len; + char *vptr = PTR_TO(mark); + size_t vlen = LEN(mark, p); + VALUE key, val; + VALUE existing; + + VALIDATE_MAX_LENGTH(flen, FIELD_NAME); + VALIDATE_MAX_LENGTH(LEN(mark, p), FIELD_VALUE); + snake_upcase(fptr, flen); + + /* + * ignore "Version" headers since they conflict with the HTTP_VERSION + * rack env variable. + */ + if (CONST_MEM_EQ("VERSION", fptr, flen)) { + hp->cont = Qnil; + return; + } + val = vlen == 0 ? rb_str_new(0, 0) : stripped_str_new(vptr, vlen); + + if (CONST_MEM_EQ("CONNECTION", fptr, flen)) { + key = g_HTTP_CONNECTION; + hp_keepalive_connection(hp, val); + } else if (CONST_MEM_EQ("CONTENT_LENGTH", fptr, flen)) { + key = g_CONTENT_LENGTH; + hp->len.content = parse_length(vptr, vlen); + if (hp->len.content < 0) + rb_raise(eParserError, "invalid Content-Length"); + if (hp->len.content != 0) + hp->has_body = 1; + invalid_if_trailer(hp); + } else if (CONST_MEM_EQ("CONTENT_TYPE", fptr, flen)) { + key = g_CONTENT_TYPE; + } else if (CONST_MEM_EQ("TRANSFER_ENCODING", fptr, flen)) { + key = g_HTTP_TRANSFER_ENCODING; + if (STR_CSTR_CASE_EQ(val, "chunked")) { + hp->chunked = 1; + hp->has_body = 1; + } + invalid_if_trailer(hp); + } else if (CONST_MEM_EQ("TRAILER", fptr, flen)) { + key = g_HTTP_TRAILER; + hp->has_trailer = 1; + invalid_if_trailer(hp); + } else if (CONST_MEM_EQ("HOST", fptr, flen)) { + key = g_HTTP_HOST; + if (NIL_P(hp->v.host)) + hp->v.host = val; + } else { + key = req_field(fptr, flen); + } + existing = rb_hash_aref(env, key); + if (NIL_P(existing)) { + hp->cont = rb_hash_aset(env, key, val); + /* + * Ignore repeated Host headers and favor host set by absolute URIs. + * absoluteURI Request-URI takes precedence over + * the Host: header (ref: rfc 2616, section 5.2.1) + */ + } else if (key == g_HTTP_HOST) { + hp->cont = Qnil; + } else { + rb_str_buf_cat(existing, ",", 1); + hp->cont = rb_str_buf_append(existing, val); + } +} + +static void write_value(struct http_parser *hp, VALUE hdr, + char *buf, const char *p) +{ + hp->is_request ? write_request_value(hp, hdr, buf, p) : + write_response_value(hp, hdr, buf, p); +} + /** Machine **/ %%{ @@ -392,21 +604,21 @@ static void write_value(VALUE hdr, struct http_parser *hp, action snake_upcase_field { snake_upcase_char(deconst(fpc)); } action downcase_char { downcase_char(deconst(fpc)); } action request_method { request_method(hdr, PTR_TO(mark), LEN(mark, fpc)); } - action url_scheme { url_scheme(hdr, PTR_TO(mark), LEN(mark, fpc)); } - action host { request_host(hdr, PTR_TO(mark), LEN(mark, fpc)); } + action url_scheme { url_scheme(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); } + action host { request_host(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); } action request_uri { request_uri(hdr, PTR_TO(mark), LEN(mark, fpc)); } action start_query { MARK(start.query, fpc); } action query_string { - query_string(hdr, PTR_TO(start.query), LEN(start.query, fpc)); + query_string(hp, hdr, PTR_TO(start.query), LEN(start.query, fpc)); } action request_path { request_path(hdr, PTR_TO(mark), LEN(mark, fpc)); } action start_field { MARK(start.field, fpc); } action write_field { hp->s.field_len = LEN(start.field, fpc); } action start_value { MARK(mark, fpc); } - action write_value { write_value(hdr, hp, buffer, fpc); } + action write_value { write_value(hp, hdr, buffer, fpc); } action write_cont_value { write_cont_value(hp, buffer, fpc); } - action http_version { http_version(hp, PTR_TO(mark), LEN(mark, fpc)); } + action http_version { http_version(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); } action status_phrase { status_phrase(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); } action add_to_chunk_size { @@ -415,7 +627,7 @@ static void write_value(VALUE hdr, struct http_parser *hp, rb_raise(eParserError, "invalid chunk size"); } action header_done { - finalize_header(hp); + finalize_header(hp, hdr); cs = http_parser_first_final; if (hp->chunked) @@ -469,7 +681,7 @@ static void http_parser_init(struct http_parser *hp) int cs = 0; memset(hp, 0, sizeof(struct http_parser)); hp->cont = Qfalse; /* zero on MRI, should be optimized away by above */ - hp->status = Qnil; + hp->v.status = Qnil; hp->len.content = -1; %% write init; hp->cs = cs; @@ -513,7 +725,7 @@ static void kcar_mark(void *ptr) struct http_parser *hp = ptr; rb_gc_mark(hp->cont); - rb_gc_mark(hp->status); + rb_gc_mark(hp->v.status); } static size_t kcar_memsize(const void *ptr) @@ -642,6 +854,23 @@ static void check_buffer_size(long dlen) rb_raise(rb_eRangeError, "headers too large to process (%ld bytes)", dlen); } +static void parser_execute(struct http_parser *hp, VALUE hdr, VALUE buf) +{ + char *ptr; + long len; + + Check_Type(buf, T_STRING); + rb_str_modify(buf); + ptr = RSTRING_PTR(buf); + len = RSTRING_LEN(buf); + check_buffer_size(len); + + http_parser_execute(hp, hdr, ptr, len); + + if (hp->cs == http_parser_error) + rb_raise(eParserError, "Invalid HTTP format, parsing fails."); +} + /** * Document-method: headers * call-seq: @@ -657,10 +886,11 @@ static void check_buffer_size(long dlen) static VALUE headers(VALUE self, VALUE hdr, VALUE data) { struct http_parser *hp = data_get(self); - long dlen = RSTRING_LEN(data); - check_buffer_size(dlen); - http_parser_execute(hp, hdr, RSTRING_PTR(data), dlen); + if (hp->is_request) + rb_raise(rb_eRuntimeError, "parser is handling a request, not response"); + + parser_execute(hp, hdr, data); VALIDATE_MAX_LENGTH(hp->offset, HEADER); if (hp->cs == http_parser_first_final || @@ -670,15 +900,33 @@ static VALUE headers(VALUE self, VALUE hdr, VALUE data) if (hp->in_trailer) return hdr; else - return rb_ary_new3(2, hp->status, hdr); + return rb_ary_new3(2, hp->v.status, hdr); } - if (hp->cs == http_parser_error) - rb_raise(eParserError, "Invalid HTTP format, parsing fails."); - return Qnil; } +static VALUE request(VALUE self, VALUE env, VALUE buf) +{ + struct http_parser *hp = data_get(self); + + hp->is_request = 1; + Check_Type(buf, T_STRING); + parser_execute(hp, env, buf); + + if (hp->cs == http_parser_first_final || + hp->cs == http_parser_en_ChunkedBody) { + advance_str(buf, hp->offset + 1); + hp->offset = 0; + if (hp->in_trailer) + hp->body_eof_seen = 1; + + return env; + } + return Qnil; /* incomplete */ +} + + static int chunked_eof(struct http_parser *hp) { return ((hp->cs == http_parser_first_final) || hp->in_trailer); @@ -811,6 +1059,7 @@ void Init_kcar_ext(void) rb_define_alloc_func(cParser, kcar_alloc); rb_define_method(cParser, "initialize", initialize, 0); rb_define_method(cParser, "reset", initialize, 0); + rb_define_method(cParser, "request", request, 2); rb_define_method(cParser, "headers", headers, 2); rb_define_method(cParser, "trailers", headers, 2); rb_define_method(cParser, "filter_body", filter_body, 2); @@ -846,13 +1095,26 @@ void Init_kcar_ext(void) globals = rb_ary_new(); rb_global_variable(&globals); - C(globals, g_HOST, "HOST"); + C(globals, g_CONTENT_LENGTH, "CONTENT_LENGTH"); + C(globals, g_CONTENT_TYPE, "CONTENT_TYPE"); + C(globals, g_HTTP_HOST, "HTTP_HOST"); + C(globals, g_HTTP_CONNECTION, "HTTP_CONNECTION"); + C(globals, g_HTTP_TRAILER, "HTTP_TRAILER"); + C(globals, g_HTTP_TRANSFER_ENCODING, "HTTP_TRANSFER_ENCODING"); + C(globals, g_HTTP_VERSION, "HTTP_VERSION"); C(globals, g_PATH_INFO, "PATH_INFO"); C(globals, g_QUERY_STRING, "QUERY_STRING"); C(globals, g_REQUEST_METHOD, "REQUEST_METHOD"); C(globals, g_REQUEST_PATH, "REQUEST_PATH"); C(globals, g_REQUEST_URI, "REQUEST_URI"); + C(globals, g_SERVER_NAME, "SERVER_NAME"); + C(globals, g_SERVER_PORT, "SERVER_PORT"); + C(globals, g_SERVER_PROTOCOL, "SERVER_PROTOCOL"); C(globals, g_rack_url_scheme, "rack.url_scheme"); + C(globals, g_http, "http"); + C(globals, g_https, "https"); + C(globals, g_80, "80"); + C(globals, g_443, "443"); OBJ_FREEZE(globals); #undef C } diff --git a/test/test_request_parser.rb b/test/test_request_parser.rb new file mode 100644 index 0000000..5e97a0e --- /dev/null +++ b/test/test_request_parser.rb @@ -0,0 +1,862 @@ +# -*- encoding: binary -*- +## Copyright (c) 2005 Zed A. Shaw +# You can redistribute it and/or modify it under the same terms as Ruby 1.8 +# or GPL-2.0+ +# +# Additional work donated by contributors. See git history of +# unicorn for more information: git clone https://bogomips.org/unicorn.git + +require 'test/unit' +require 'digest' +require 'kcar' +require 'uri' + +class TestRequestParser < Test::Unit::TestCase + def setup + @hp = Kcar::Parser.new + @env = {} + end + + def test_parse_oneshot_simple + buf = "GET / HTTP/1.1\r\n\r\n" + http = 'http'.freeze + @env['rack.url_scheme'] = http + env = @hp.request(@env, buf.dup) + assert_same env, @env + exp = { + 'SERVER_PROTOCOL' => 'HTTP/1.1', + 'HTTP_VERSION' => 'HTTP/1.1', + 'REQUEST_PATH' => '/', + 'PATH_INFO' => '/', + 'REQUEST_URI' => '/', + 'REQUEST_METHOD' => 'GET', + 'QUERY_STRING' => '', + 'rack.url_scheme' => 'http', + } + assert_equal exp, env + assert_same env['HTTP_VERSION'], env['SERVER_PROTOCOL'] + assert_predicate env['HTTP_VERSION'], :frozen? + assert_predicate env['REQUEST_METHOD'], :frozen? + assert_same http, env['rack.url_scheme'] + assert_predicate @hp, :keepalive? + + @hp.reset + + buf = "G" + assert_nil @hp.request(@env, buf) + # try parsing again to ensure we were reset correctly + buf << "ET /hello-world HTTP/1.1\r\n\r\n" + assert_same env, @hp.request(env, buf) + + assert_equal 'HTTP/1.1', env['SERVER_PROTOCOL'] + assert_equal '/hello-world', env['REQUEST_PATH'] + assert_equal 'HTTP/1.1', env['HTTP_VERSION'] + assert_equal '/hello-world', env['REQUEST_URI'] + assert_equal 'GET', env['REQUEST_METHOD'] + assert_equal '', env['QUERY_STRING'] + assert @hp.keepalive? + end + + def test_tab_lws + @hp.request(@env, "GET / HTTP/1.1\r\nHost:\tfoo.bar\r\n\r\n") + assert_equal "foo.bar", @env['HTTP_HOST'] + end + + def test_connection_close_no_ka + @hp.request(@env = {}, "GET / HTTP/1.1\r\nConnection: close\r\n\r\n") + assert_equal 'GET', @env['REQUEST_METHOD'] + assert ! @hp.keepalive? + end + + def test_connection_keep_alive_ka + @hp.request(@env, "HEAD / HTTP/1.1\r\nConnection: keep-alive\r\n\r\n") + assert @hp.keepalive? + end + + def test_connection_keep_alive_no_body + r = @hp.request(@env, "POST / HTTP/1.1\r\nConnection: keep-alive\r\n\r\n") + assert_same @env, r + assert @hp.keepalive? + end + + def test_connection_keep_alive_no_body_empty + buf = "POST / HTTP/1.1\r\n" \ + "Content-Length: 0\r\n" \ + "Connection: keep-alive\r\n\r\n" + assert_same @env, @hp.request(@env, buf) + assert @hp.keepalive? + end + + def test_connection_keep_alive_ka_bad_version + @hp.request(@env, "GET / HTTP/1.0\r\nConnection: keep-alive\r\n\r\n") + assert @hp.keepalive? + end + + def test_parse_server_host_default_port + buf = "GET / HTTP/1.1\r\nHost: foo\r\n\r\n" + assert_same @env, @hp.request(@env, buf) + assert_equal 'foo', @env['SERVER_NAME'] + assert_equal '80', @env['SERVER_PORT'] + assert_equal '', buf + assert @hp.keepalive? + end + + def test_parse_server_host_alt_port + buf = "GET / HTTP/1.1\r\nHost: foo:999\r\n\r\n" + @hp.request(@env, buf) + assert_equal 'foo', @env['SERVER_NAME'] + assert_equal '999', @env['SERVER_PORT'] + assert_equal '', buf + assert @hp.keepalive? + end + + def test_parse_server_host_empty_port + @hp.request(@env, "GET / HTTP/1.1\r\nHost: foo:\r\n\r\n") + assert_equal 'foo', @env['SERVER_NAME'] + assert_equal '80', @env['SERVER_PORT'] + assert @hp.keepalive? + end + + def test_parse_host_cont + @hp.request(@env, "GET / HTTP/1.1\r\nHost:\r\n foo\r\n\r\n") + assert_equal 'foo', @env['SERVER_NAME'] + assert_equal '80', @env['SERVER_PORT'] + assert @hp.keepalive? + end + + def test_preserve_existing_server_vars + @env = { + 'SERVER_NAME' => 'example.com', + 'SERVER_PORT' => '1234', + 'rack.url_scheme' => 'https' + } + @hp.request(@env, "GET / HTTP/1.0\r\n\r\n") + assert_equal 'example.com', @env['SERVER_NAME'] + assert_equal '1234', @env['SERVER_PORT'] + assert_equal 'https', @env['rack.url_scheme'] + end + + def test_parse_strange_headers + should_be_good = "GET / HTTP/1.1\r\naaaaaaaaaaaaa:++++++++++\r\n\r\n" + req = @hp.request(@env, should_be_good) + assert_same req, @env + assert_equal '', should_be_good + assert_predicate @hp, :keepalive? + assert_equal '++++++++++', @env['HTTP_AAAAAAAAAAAAA'] + end + + # legacy test case from Mongrel + # I still consider Pound irrelevant, unfortunately stupid clients that + # send extremely big headers do exist and they've managed to find us... + def test_nasty_pound_header + nasty_pound_header = "GET / HTTP/1.1\r\n" \ +"X-SSL-Bullshit: -----BEGIN CERTIFICATE-----\r\n" \ +"\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n" \ +"\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n" \ +"\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n" \ +"\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n" \ +"\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n" \ +"\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n" \ +"\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n" \ +"\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n" \ +"\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n" \ +"\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n" \ +"\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n" \ +"\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n" \ +"\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgEBBAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n" \ +"\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n" \ +"\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n" \ +"\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n" \ +"\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n" \ +"\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n" \ +"\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n" \ +"\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n" \ +"\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n" \ +"\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n" \ +"\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n" \ +"\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n" \ +"\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n" \ +"\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n" \ +"\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n" \ +"\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n" \ +"\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n" \ +"\tRA==\r\n" \ +"\t-----END CERTIFICATE-----\r\n" \ +"\r\n" + ok = (/(-----BEGIN .*--END CERTIFICATE-----)/m =~ nasty_pound_header) + expect = $1.dup + assert ok, 'end certificate matched' + expect.gsub!(/\r\n\t/, ' ') + req = @hp.request(@env, nasty_pound_header.dup) + assert_equal expect, req['HTTP_X_SSL_BULLSHIT'] + end + + def test_multiline_header_0d0a + req = @hp.request(@env, "GET / HTTP/1.0\r\n" \ + "X-Multiline-Header: foo bar\r\n\tcha cha\r\n\tzha zha\r\n\r\n") + assert_same req, @env + assert_equal 'foo bar cha cha zha zha', req['HTTP_X_MULTILINE_HEADER'] + end + + def test_multiline_header_0a + req = @hp.request(@env, "GET / HTTP/1.0\n" \ + "X-Multiline-Header: foo bar\n\tcha cha\n\tzha zha\n\n") + assert_same req, @env + assert_equal 'foo bar cha cha zha zha', req['HTTP_X_MULTILINE_HEADER'] + end + + def test_continuation_eats_leading_spaces + header = "GET / HTTP/1.1\r\n" \ + "X-ASDF: \r\n" \ + "\t\r\n" \ + " \r\n" \ + " ASDF\r\n\r\n" + req = @hp.request(@env, header) + assert_same req, @env + assert_equal '', header + assert_equal 'ASDF', req['HTTP_X_ASDF'] + end + + def test_continuation_eats_scattered_leading_spaces + header = "GET / HTTP/1.1\r\n" \ + "X-ASDF: hi\r\n" \ + " y\r\n" \ + "\t\r\n" \ + " x\r\n" \ + " ASDF\r\n\r\n" + req = @hp.request(@env, header) + assert_same req, @env + assert_equal '', header + assert_equal 'hi y x ASDF', req['HTTP_X_ASDF'] + end + + def test_continuation_eats_trailing_spaces + header = "GET / HTTP/1.1\r\n" \ + "X-ASDF: \r\n" \ + "\t\r\n" \ + " b \r\n" \ + " ASDF\r\n\r\nZ" + req = @hp.request(@env, header) + assert_same req, @env + assert_equal 'Z', header + assert_equal 'b ASDF', req['HTTP_X_ASDF'] + end + + def test_continuation_with_absolute_uri_and_ignored_host_header + header = "GET http://example.com/ HTTP/1.1\r\n" \ + "Host: \r\n" \ + " example.org\r\n" \ + "\r\n" + req = @hp.request(@env, header) + assert_same req, @env + assert_equal 'example.com', req['HTTP_HOST'] + assert_equal 'http', req['rack.url_scheme'] + assert_equal '/', req['PATH_INFO'] + assert_equal 'example.com', req['SERVER_NAME'] + assert_equal '80', req['SERVER_PORT'] + end + + # this may seem to be testing more of an implementation detail, but + # it also helps ensure we're safe in the presence of multiple parsers + # in case we ever go multithreaded/evented... + def test_resumable_continuations + nr = 1000 + header = "GET / HTTP/1.1\r\n" \ + "X-ASDF: \r\n" \ + " hello\r\n" + tmp = [] + nr.times { |i| + hp = Kcar::Parser.new + env = {} + assert_nil hp.request(env, buf = "#{header} #{i}\r\n") + asdf = env['HTTP_X_ASDF'] + assert_equal "hello #{i}", asdf + tmp << [ hp, asdf, env, buf ] + } + tmp.each_with_index { |(hp, asdf, env, buf), i| + buf << " .\r\n\r\n" + assert_same env, hp.request(env, buf) + assert_equal "hello #{i} .", asdf + } + end + + def test_invalid_continuation + header = "GET / HTTP/1.1\r\n" \ + " y\r\n" \ + "Host: hello\r\n" \ + "\r\n" + buf = header.dup + assert_raises(Kcar::ParserError) do + @hp.request(@env, buf) + end + assert_equal header, buf, 'no modification on invalid' + end + + def test_parse_ie6_urls + %w(/some/random/path" + /some/random/path> + /some/random/path< + /we/love/you/ie6?q=<""> + /url?<="&>=" + /mal"formed"? + ).each do |path| + sorta_safe = %(GET #{path} HTTP/1.1\r\n\r\n) + assert_same @env, @hp.request(@env, sorta_safe) + assert_equal path, @env['REQUEST_URI'] + assert_equal '', sorta_safe + assert @hp.keepalive? + @hp.reset + end + end + + def test_parse_error + bad_http = "GET / SsUTF/1.1" + assert_raises(Kcar::ParserError) { @hp.request(@env, bad_http) } + + # make sure we can recover + @env.clear + @hp.reset + assert_equal @env, @hp.request(@env, "GET / HTTP/1.0\r\n\r\n") + assert ! @hp.keepalive? + end + + def test_piecemeal + http = "GET" + req = @env + assert_nil @hp.request(@env, http) + assert_nil @hp.request(@env, http) + assert_nil @hp.request(@env, http << " / HTTP/1.0") + assert_equal '/', req['REQUEST_PATH'] + assert_equal '/', req['REQUEST_URI'] + assert_equal 'GET', req['REQUEST_METHOD'] + assert_nil @hp.request(req, http << "\r\n") + assert_equal 'HTTP/1.0', req['HTTP_VERSION'] + assert_nil @hp.request(req, http << "\r") + assert_same req, @hp.request(req, http << "\n") + assert_equal 'HTTP/1.0', req['SERVER_PROTOCOL'] + assert_nil req['FRAGMENT'] + assert_equal '', req['QUERY_STRING'] + assert_equal "", http + assert ! @hp.keepalive? + end + + # not common, but underscores do appear in practice + def test_absolute_uri_underscores + http = "GET https://under_score.example.com/foo?q=bar HTTP/1.0\r\n\r\n" + req = @hp.request(@env, http) + assert_same req, @env + assert_equal 'https', req['rack.url_scheme'] + assert_equal '/foo?q=bar', req['REQUEST_URI'] + assert_equal '/foo', req['REQUEST_PATH'] + assert_equal 'q=bar', req['QUERY_STRING'] + assert_equal 'under_score.example.com', req['HTTP_HOST'] + assert_equal 'under_score.example.com', req['SERVER_NAME'] + assert_equal '443', req['SERVER_PORT'] + assert_equal "", http + assert ! @hp.keepalive? + end + + # some dumb clients add users because they're stupid + def test_absolute_uri_w_user + http = "GET http://user%20space@example.com/foo?q=bar HTTP/1.0\r\n\r\n" + req = @hp.request(@env, http) + assert_same req, @env + assert_equal 'http', req['rack.url_scheme'] + assert_equal '/foo?q=bar', req['REQUEST_URI'] + assert_equal '/foo', req['REQUEST_PATH'] + assert_equal 'q=bar', req['QUERY_STRING'] + assert_equal 'example.com', req['HTTP_HOST'] + assert_equal 'example.com', req['SERVER_NAME'] + assert_equal '80', req['SERVER_PORT'] + assert_equal "", http + assert ! @hp.keepalive? + end + + # since Mongrel supported anything URI.parse supported, we're stuck + # supporting everything URI.parse supports + def test_absolute_uri_uri_parse + require 'uri' + "#{URI::REGEXP::PATTERN::UNRESERVED};:&=+$,".split(//).each do |char| + http = "GET http://#{char}@example.com/ HTTP/1.0\r\n\r\n" + req = @hp.request(@env, http) + assert_equal 'http', req['rack.url_scheme'] + assert_equal '/', req['REQUEST_URI'] + assert_equal '/', req['REQUEST_PATH'] + assert_equal '', req['QUERY_STRING'] + + assert_equal 'example.com', req['HTTP_HOST'] + assert_equal 'example.com', req['SERVER_NAME'] + assert_equal '80', req['SERVER_PORT'] + assert_equal "", http + assert ! @hp.keepalive? + @hp.reset + end + end + + def test_absolute_uri + req = @hp.request(@env, "GET http://example.com/foo?q=bar HTTP/1.0\r\n\r\n") + assert_equal 'http', req['rack.url_scheme'] + assert_equal '/foo?q=bar', req['REQUEST_URI'] + assert_equal '/foo', req['REQUEST_PATH'] + assert_equal 'q=bar', req['QUERY_STRING'] + assert_equal '80', req['SERVER_PORT'] + assert_equal 'example.com', req['HTTP_HOST'] + assert_equal 'example.com', req['SERVER_NAME'] + assert ! @hp.keepalive? + end + + def test_absolute_uri_https + http = "GET https://example.com/foo?q=bar HTTP/1.1\r\n" \ + "X-Foo: bar\n\r\n" + req = @hp.request(@env, http) + assert_equal 'https', req['rack.url_scheme'] + assert_equal '/foo?q=bar', req['REQUEST_URI'] + assert_equal '/foo', req['REQUEST_PATH'] + assert_equal 'q=bar', req['QUERY_STRING'] + assert_equal 'example.com', req['HTTP_HOST'] + assert_equal 'example.com', req['SERVER_NAME'] + assert_equal '443', req['SERVER_PORT'] + assert_equal "", http + assert @hp.keepalive? + end + + # Host: header should be ignored for absolute URIs + def test_absolute_uri_with_port + req = @hp.request(@env,"GET http://example.com:8080/foo?q=bar HTTP/1.1\r\n" \ + "Host: bad.example.com\r\n\r\n") + assert_equal 'http', req['rack.url_scheme'] + assert_equal '/foo?q=bar', req['REQUEST_URI'] + assert_equal '/foo', req['REQUEST_PATH'] + assert_equal 'q=bar', req['QUERY_STRING'] + assert_equal 'example.com:8080', req['HTTP_HOST'] + assert_equal 'example.com', req['SERVER_NAME'] + assert_equal '8080', req['SERVER_PORT'] + assert @hp.keepalive? + end + + def test_absolute_uri_with_empty_port + req = @hp.request(@env, "GET https://example.com:/foo?q=bar HTTP/1.1\r\n" \ + "Host: bad.example.com\r\n\r\n") + assert_same req, @env + assert_equal 'https', req['rack.url_scheme'] + assert_equal '/foo?q=bar', req['REQUEST_URI'] + assert_equal '/foo', req['REQUEST_PATH'] + assert_equal 'q=bar', req['QUERY_STRING'] + assert_equal 'example.com:', req['HTTP_HOST'] + assert_equal 'example.com', req['SERVER_NAME'] + assert_equal '443', req['SERVER_PORT'] + assert @hp.keepalive? + end + + def test_absolute_ipv6_uri + url = "http://[::1]/foo?q=bar" + http = "GET #{url} HTTP/1.1\r\n" \ + "Host: bad.example.com\r\n\r\n" + req = @hp.request(@env, http) + assert_same req, @env + assert_equal 'http', req['rack.url_scheme'] + assert_equal '/foo?q=bar', req['REQUEST_URI'] + assert_equal '/foo', req['REQUEST_PATH'] + assert_equal 'q=bar', req['QUERY_STRING'] + + uri = URI.parse(url) + assert_equal "[::1]", uri.host, + "URI.parse changed upstream for #{url}? host=#{uri.host}" + assert_equal "[::1]", req['HTTP_HOST'] + assert_equal "[::1]", req['SERVER_NAME'] + assert_equal '80', req['SERVER_PORT'] + assert_equal "", http + end + + def test_absolute_ipv6_uri_alpha + url = "http://[::a]/" + http = "GET #{url} HTTP/1.1\r\n" \ + "Host: bad.example.com\r\n\r\n" + req = @hp.request(@env, http) + assert_equal 'http', req['rack.url_scheme'] + uri = URI.parse(url) + assert_equal "[::a]", uri.host, + "URI.parse changed upstream for #{url}? host=#{uri.host}" + assert_equal "[::a]", req['HTTP_HOST'] + assert_equal "[::a]", req['SERVER_NAME'] + assert_equal '80', req['SERVER_PORT'] + end + + def test_absolute_ipv6_uri_alpha_2 + url = "http://[::B]/" + http = "GET #{url} HTTP/1.1\r\n" \ + "Host: bad.example.com\r\n\r\n" + req = @hp.request(@env, http) + assert_equal 'http', req['rack.url_scheme'] + + uri = URI.parse(url) + assert_equal "[::B]", uri.host, + "URI.parse changed upstream for #{url}? host=#{uri.host}" + assert_equal "[::B]", req['HTTP_HOST'] + assert_equal "[::B]", req['SERVER_NAME'] + assert_equal '80', req['SERVER_PORT'] + end + + def test_absolute_ipv6_uri_with_empty_port + url = "https://[::1]:/foo?q=bar" + http = "GET #{url} HTTP/1.1\r\n" \ + "Host: bad.example.com\r\n\r\n" + req = @hp.request(@env, http) + assert_equal 'https', req['rack.url_scheme'] + assert_equal '/foo?q=bar', req['REQUEST_URI'] + assert_equal '/foo', req['REQUEST_PATH'] + assert_equal 'q=bar', req['QUERY_STRING'] + + uri = URI.parse(url) + assert_equal "[::1]", uri.host, + "URI.parse changed upstream for #{url}? host=#{uri.host}" + assert_equal "[::1]:", req['HTTP_HOST'] + assert_equal "[::1]", req['SERVER_NAME'] + assert_equal '443', req['SERVER_PORT'] + assert_equal "", http + end + + def test_absolute_ipv6_uri_with_port + url = "https://[::1]:666/foo?q=bar" + http = "GET #{url} HTTP/1.1\r\n" \ + "Host: bad.example.com\r\n\r\n" + req = @hp.request(@env, http) + assert_equal 'https', req['rack.url_scheme'] + assert_equal '/foo?q=bar', req['REQUEST_URI'] + assert_equal '/foo', req['REQUEST_PATH'] + assert_equal 'q=bar', req['QUERY_STRING'] + + uri = URI.parse(url) + assert_equal "[::1]", uri.host, + "URI.parse changed upstream for #{url}? host=#{uri.host}" + assert_equal "[::1]:666", req['HTTP_HOST'] + assert_equal "[::1]", req['SERVER_NAME'] + assert_equal '666', req['SERVER_PORT'] + assert_equal "", http + end + + def test_ipv6_host_header + buf = "GET / HTTP/1.1\r\n" \ + "Host: [::1]\r\n\r\n" + req = @hp.request(@env, buf) + assert_equal "[::1]", req['HTTP_HOST'] + assert_equal "[::1]", req['SERVER_NAME'] + assert_equal '80', req['SERVER_PORT'] + end + + def test_ipv6_host_header_with_port + req = @hp.request(@env, "GET / HTTP/1.1\r\n" \ + "Host: [::1]:666\r\n\r\n") + assert_equal "[::1]", req['SERVER_NAME'] + assert_equal '666', req['SERVER_PORT'] + assert_equal "[::1]:666", req['HTTP_HOST'] + end + + def test_ipv6_host_header_with_empty_port + req = @hp.request(@env, "GET / HTTP/1.1\r\nHost: [::1]:\r\n\r\n") + assert_equal "[::1]", req['SERVER_NAME'] + assert_equal '80', req['SERVER_PORT'] + assert_equal "[::1]:", req['HTTP_HOST'] + end + + # XXX Highly unlikely..., just make sure we don't segfault or assert on it + def test_broken_ipv6_host_header + req = @hp.request(@env, "GET / HTTP/1.1\r\nHost: [::1:\r\n\r\n") + assert_equal "[", req['SERVER_NAME'] + assert_equal ':1:', req['SERVER_PORT'] + assert_equal "[::1:", req['HTTP_HOST'] + end + + def test_put_body_oneshot + buf = "PUT / HTTP/1.0\r\nContent-Length: 5\r\n\r\nabcde" + req = @hp.request(@env, buf) + assert_equal '/', req['REQUEST_PATH'] + assert_equal '/', req['REQUEST_URI'] + assert_equal 'PUT', req['REQUEST_METHOD'] + assert_equal 'HTTP/1.0', req['HTTP_VERSION'] + assert_equal 'HTTP/1.0', req['SERVER_PROTOCOL'] + assert_equal "abcde", buf + end + + def test_put_body_later + buf = "PUT /l HTTP/1.0\r\nContent-Length: 5\r\n\r\n" + req = @hp.request(@env, buf) + assert_equal '/l', req['REQUEST_PATH'] + assert_equal '/l', req['REQUEST_URI'] + assert_equal 'PUT', req['REQUEST_METHOD'] + assert_equal 'HTTP/1.0', req['HTTP_VERSION'] + assert_equal 'HTTP/1.0', req['SERVER_PROTOCOL'] + assert_equal "", buf + end + + def test_unknown_methods + %w(GETT HEADR XGET XHEAD).each { |m| + s = "#{m} /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n\r\n" + req = @hp.request(@env, s) + assert_equal '/forums/1/topics/2375?page=1', req['REQUEST_URI'] + assert_nil req['FRAGMENT'] + assert_equal 'page=1', req['QUERY_STRING'] + assert_equal "", s + assert_equal m, req['REQUEST_METHOD'] + @hp.reset + } + end + + def test_fragment_in_uri + get = "GET /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n\r\n" + req = @hp.request(@env, get) + assert_equal '/forums/1/topics/2375?page=1', req['REQUEST_URI'] + assert_nil req['FRAGMENT'] + assert_equal 'page=1', req['QUERY_STRING'] + assert_equal '', get + end + + # lame random garbage maker + def rand_data(min, max, readable=true) + count = min + ((rand(max)+1) *10).to_i + res = count.to_s + "/" + + if readable + res << Digest::SHA1.hexdigest(rand(count * 100).to_s) * (count / 40) + else + res << Digest::SHA1.digest(rand(count * 100).to_s) * (count / 20) + end + + return res + end + + def test_horrible_queries + # then that large header names are caught + 10.times do |c| + get = "GET /#{rand_data(10,120)} HTTP/1.1\r\nX-#{rand_data(1024, 1024+(c*1024))}: Test\r\n\r\n" + assert_raises(Kcar::ParserError, Kcar::RequestURITooLongError) do + @hp.request(@env, get) + @hp.clear + end + end + + # then that large mangled field values are caught + 10.times do |c| + get = "GET /#{rand_data(10,120)} HTTP/1.1\r\nX-Test: #{rand_data(1024, 1024+(c*1024), false)}\r\n\r\n" + assert_raises(Kcar::ParserError,Kcar::RequestURITooLongError) do + @hp.request(@env, get) + @hp.clear + end + end + + # then large headers are rejected too FIXME not supported, yet + if false + get = "GET /#{rand_data(10,120)} HTTP/1.1\r\n" + get << "X-Test: test\r\n" * (80 * 1024) + @hp.reset + assert_raises(Kcar::ParserError,Kcar::RequestURITooLongError) do + @hp.request(@env, get) + end + end + + # finally just that random garbage gets blocked all the time + 10.times do |c| + get = "GET #{rand_data(1024, 1024+(c*1024), false)} #{rand_data(1024, 1024+(c*1024), false)}\r\n\r\n" + @hp.reset + assert_raises(Kcar::ParserError,Kcar::RequestURITooLongError) do + @hp.request(@env, get) + end + end + end + + def test_leading_tab + get = "GET / HTTP/1.1\r\nHost:\texample.com\r\n\r\n" + req = @hp.request(@env, get) + assert_equal 'example.com', req['HTTP_HOST'] + end + + def test_trailing_whitespace + get = "GET / HTTP/1.1\r\nHost: example.com \r\n\r\n" + req = @hp.request(@env, get) + assert_equal 'example.com', req['HTTP_HOST'] + end + + def test_trailing_tab + get = "GET / HTTP/1.1\r\nHost: example.com\t\r\n\r\n" + req = @hp.request(@env, get) + assert_equal 'example.com', req['HTTP_HOST'] + end + + def test_trailing_multiple_linear_whitespace + get = "GET / HTTP/1.1\r\nHost: example.com\t \t \t\r\n\r\n" + req = @hp.request(@env, get) + assert_equal 'example.com', req['HTTP_HOST'] + end + + def test_embedded_linear_whitespace_ok + get = "GET / HTTP/1.1\r\nX-Space: hello\t world\t \r\n\r\n" + req = @hp.request(@env, get) + assert_equal "hello\t world", req["HTTP_X_SPACE"] + end + + def test_null_byte_header + get = "GET / HTTP/1.1\r\nHost: \0\r\n\r\n" + assert_raises(Kcar::ParserError) { @hp.request(@env, get) } + end + + def test_null_byte_in_middle + get = "GET / HTTP/1.1\r\nHost: hello\0world\r\n\r\n" + assert_raises(Kcar::ParserError) { @hp.request(@env, get) } + end + + def test_null_byte_at_end + get = "GET / HTTP/1.1\r\nHost: hello\0\r\n\r\n" + assert_raises(Kcar::ParserError) { @hp.request(@env, get) } + end + + def test_empty_header + get = "GET / HTTP/1.1\r\nHost: \r\n\r\n" + req = @hp.request(@env, get) + assert_equal '', req['HTTP_HOST'] + end + + def test_connection_TE + req = @hp.request(@env, "GET / HTTP/1.1\r\nHost: example.com\r\n" \ + "Connection: TE\r\n" \ + "TE: trailers\r\n\r\n") + assert_predicate @hp, :keepalive? + assert_equal 'TE', req['HTTP_CONNECTION'] + end + + def test_repeat_headers + str = "PUT / HTTP/1.1\r\n" \ + "Trailer: Content-MD5\r\n" \ + "Trailer: Content-SHA1\r\n" \ + "transfer-Encoding: chunked\r\n\r\n" \ + "1\r\na\r\n2\r\n..\r\n0\r\n" + req = @hp.request(@env, str) + assert_equal 'Content-MD5,Content-SHA1', req['HTTP_TRAILER'] + assert_equal "1\r\na\r\n2\r\n..\r\n0\r\n", str + assert @hp.keepalive? + end + + def test_http_09 + buf = "GET /read-rfc1945-if-you-dont-believe-me\r\n" + req = @hp.request(@env, buf) + assert_equal '', buf + expect = { + "REQUEST_PATH" => "/read-rfc1945-if-you-dont-believe-me", + "PATH_INFO" => "/read-rfc1945-if-you-dont-believe-me", + "REQUEST_URI" => "/read-rfc1945-if-you-dont-believe-me", + "SERVER_PROTOCOL" => "HTTP/0.9", + "HTTP_VERSION" => "HTTP/0.9", + "REQUEST_METHOD" => "GET", + "QUERY_STRING" => "" + } + assert_equal expect, req + end + + def test_path_info_semicolon + qs = "QUERY_STRING" + pi = "PATH_INFO" + req = {} + str = "GET %s HTTP/1.1\r\nHost: example.com\r\n\r\n" + { + "/1;a=b?c=d&e=f" => { qs => "c=d&e=f", pi => "/1;a=b" }, + "/1?c=d&e=f" => { qs => "c=d&e=f", pi => "/1" }, + "/1;a=b" => { qs => "", pi => "/1;a=b" }, + "/1;a=b?" => { qs => "", pi => "/1;a=b" }, + "/1?a=b;c=d&e=f" => { qs => "a=b;c=d&e=f", pi => "/1" }, + "*" => { qs => "", pi => "" }, + }.each do |uri,expect| + @env.clear + @hp.reset + buf = str % [ uri ] + req = @hp.request(@env, buf) + assert_equal uri, req["REQUEST_URI"], "REQUEST_URI mismatch" + assert_equal expect[qs], req[qs], "#{qs} mismatch" + assert_equal expect[pi], req[pi], "#{pi} mismatch" + next if uri == "*" + uri = URI.parse("http://example.com#{uri}") + assert_equal uri.query.to_s, req[qs], "#{qs} mismatch URI.parse disagrees" + assert_equal uri.path, req[pi], "#{pi} mismatch URI.parse disagrees" + end + end + + def test_path_info_semicolon_absolute + qs = "QUERY_STRING" + pi = "PATH_INFO" + str = "GET http://example.com%s HTTP/1.1\r\nHost: www.example.com\r\n\r\n" + { + "/1;a=b?c=d&e=f" => { qs => "c=d&e=f", pi => "/1;a=b" }, + "/1?c=d&e=f" => { qs => "c=d&e=f", pi => "/1" }, + "/1;a=b" => { qs => "", pi => "/1;a=b" }, + "/1;a=b?" => { qs => "", pi => "/1;a=b" }, + "/1?a=b;c=d&e=f" => { qs => "a=b;c=d&e=f", pi => "/1" }, + }.each do |uri,expect| + @hp.reset + @env.clear + buf = str % [ uri ] + req = @hp.request(@env, buf) + assert_equal uri, req["REQUEST_URI"], "REQUEST_URI mismatch" + assert_equal "example.com", req["HTTP_HOST"], "Host: mismatch" + assert_equal expect[qs], req[qs], "#{qs} mismatch" + assert_equal expect[pi], req[pi], "#{pi} mismatch" + end + end + + def test_negative_content_length + str = "PUT / HTTP/1.1\r\n" \ + "Content-Length: -1\r\n" \ + "\r\n" + assert_raises(Kcar::ParserError) do + @hp.request(@env, str) + end + end + + def test_invalid_content_length + str = "PUT / HTTP/1.1\r\n" \ + "Content-Length: zzzzz\r\n" \ + "\r\n" + assert_raises(Kcar::ParserError) do + @hp.request(@env, str) + end + end + + def test_ignore_version_header + req = @hp.request(@env, "GET / HTTP/1.1\r\nVersion: hello\r\n\r\n") + expect = { + 'REQUEST_PATH' => '/', + 'SERVER_PROTOCOL' => 'HTTP/1.1', + 'PATH_INFO' => '/', + 'HTTP_VERSION' => 'HTTP/1.1', + 'REQUEST_URI' => '/', + 'REQUEST_METHOD' => 'GET', + 'QUERY_STRING' => '' + } + assert_equal expect, req + end + + def test_pipelined_requests + expect = { + 'HTTP_HOST' => 'example.com', + 'SERVER_NAME' => 'example.com', + 'SERVER_PORT' => '80', + 'REQUEST_PATH' => '/', + 'SERVER_PROTOCOL' => 'HTTP/1.1', + 'PATH_INFO' => '/', + 'HTTP_VERSION' => 'HTTP/1.1', + 'REQUEST_URI' => '/', + 'REQUEST_METHOD' => 'GET', + 'QUERY_STRING' => '' + } + req1 = "GET / HTTP/1.1\r\nHost: example.com\r\n\r\n" + req2 = "GET / HTTP/1.1\r\nHost: www.example.com\r\n\r\n" + buf = req1 + req2 + env1 = @hp.request(@env, buf) + assert_equal expect, env1 + assert_equal req2, buf + assert_predicate @hp, :keepalive? + @env.clear + @hp.reset + env2 = @hp.request(@env, buf) + expect['HTTP_HOST'] = expect['SERVER_NAME'] = 'www.example.com' + assert_equal expect, env2 + assert_equal '', buf + end +end