From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-3.6 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id DDEB0211B5 for ; Sat, 1 Dec 2018 13:31:26 +0000 (UTC) From: Eric Wong To: kcar-public@bogomips.org Subject: [PATCH 02/11] begin implementing request parsing Date: Sat, 1 Dec 2018 13:31:16 +0000 Message-Id: <20181201133125.5524-3-e@80x24.org> In-Reply-To: <20181201133125.5524-1-e@80x24.org> References: <20181201133125.5524-1-e@80x24.org> List-Id: Not wired up, yet; but for now everything compiles and existing tests run. --- ext/kcar/kcar.rl | 107 ++++++++++++++++++++++++++++++++++- ext/kcar/kcar_http_common.rl | 36 +++++++++++- 2 files changed, 138 insertions(+), 5 deletions(-) diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl index 79f65db..033e6ea 100644 --- a/ext/kcar/kcar.rl +++ b/ext/kcar/kcar.rl @@ -15,6 +15,10 @@ static VALUE eParserError; static ID id_uminus, id_sq, id_sq_set; +static VALUE g_rack_url_scheme, + g_HOST, g_PATH_INFO, g_QUERY_STRING, + g_REQUEST_METHOD, g_REQUEST_PATH, g_REQUEST_URI; +static VALUE e413, e414; /** Defines common length and error messages for input length validation. */ #define DEF_MAX_LENGTH(N, length) \ @@ -31,10 +35,20 @@ static ID id_uminus, id_sq, id_sq_set; rb_raise(eParserError, MAX_##N##_LENGTH_ERR); \ } while (0) +#define VALIDATE_MAX_URI_LENGTH(len, N) do { \ + if (len > MAX_##N##_LENGTH) \ + rb_raise(e414, MAX_##N##_LENGTH_ERR); \ +} while (0) + /* Defines the maximum allowed lengths for various input elements.*/ DEF_MAX_LENGTH(FIELD_NAME, 256); DEF_MAX_LENGTH(FIELD_VALUE, 80 * 1024); DEF_MAX_LENGTH(HEADER, (1024 * (80 + 32))); +DEF_MAX_LENGTH(REQUEST_URI, 1024 * 15); +DEF_MAX_LENGTH(FRAGMENT, 1024); /* just in case (stolen from Mongrel) */ +DEF_MAX_LENGTH(REQUEST_PATH, 4096); /* common PATH_MAX on modern systems */ +DEF_MAX_LENGTH(QUERY_STRING, (1024 * 10)); + #define UH_FL_CHUNKED 0x1 #define UH_FL_HASBODY 0x2 @@ -90,6 +104,13 @@ static unsigned int ulong2uint(unsigned long n) #define HP_FL_UNSET(hp,fl) ((hp)->flags &= ~(UH_FL_##fl)) #define HP_FL_ALL(hp,fl) (HP_FL_TEST(hp, fl) == (UH_FL_##fl)) +/* Downcases a single ASCII character. Locale-agnostic. */ +static void downcase_char(char *c) +{ + if (*c >= 'A' && *c <= 'Z') + *c |= 0x20; +} + static int is_lws(char c) { return (c == ' ' || c == '\t'); @@ -153,7 +174,54 @@ static void hp_keepalive_connection(struct http_parser *hp, VALUE val) } static void -http_version(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len) +request_method(VALUE env, const char *ptr, size_t len) +{ + rb_hash_aset(env, g_REQUEST_METHOD, str_new_dd_freeze(ptr, len)); +} + +static void +url_scheme(VALUE env, const char *ptr, size_t len) +{ + rb_hash_aset(env, g_rack_url_scheme, str_new_dd_freeze(ptr, len)); +} + +static void +request_host(VALUE env, const char *ptr, size_t len) +{ + rb_hash_aset(env, g_HOST, str_new_dd_freeze(ptr, len)); +} + +static void +request_uri(VALUE env, const char *ptr, size_t len) +{ + VALIDATE_MAX_URI_LENGTH(len, REQUEST_URI); + rb_hash_aset(env, g_REQUEST_URI, rb_str_new(ptr, len)); +} + +static void +query_string(VALUE env, const char *ptr, size_t len) +{ + VALIDATE_MAX_URI_LENGTH(len, QUERY_STRING); + rb_hash_aset(env, g_QUERY_STRING, rb_str_new(ptr, len)); +} + +static void +request_path(VALUE env, const char *ptr, size_t len) +{ + VALUE val; + + VALIDATE_MAX_URI_LENGTH(len, REQUEST_PATH); + val = rb_hash_aset(env, g_REQUEST_PATH, rb_str_new(ptr, len)); + + /* rack says PATH_INFO must start with "/" or be empty */ + if (CONST_MEM_EQ("*", ptr, len)) + val = rb_str_new(NULL, 0); + + rb_hash_aset(env, g_PATH_INFO, val); +} + +static void +http_version(struct http_parser *hp, const char *ptr, size_t len) { if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) { /* HTTP/1.1 implies keepalive unless "Connection: close" is set */ @@ -328,12 +396,24 @@ static void write_value(VALUE hdr, struct http_parser *hp, action mark {MARK(mark, fpc); } + action snake_upcase_field { snake_upcase_char(deconst(fpc)); } + action downcase_char { downcase_char(deconst(fpc)); } + action request_method { request_method(hdr, PTR_TO(mark), LEN(mark, fpc)); } + action url_scheme { url_scheme(hdr, PTR_TO(mark), LEN(mark, fpc)); } + action host { request_host(hdr, PTR_TO(mark), LEN(mark, fpc)); } + action request_uri { request_uri(hdr, PTR_TO(mark), LEN(mark, fpc)); } + + action start_query { MARK(start.query, fpc); } + action query_string { + query_string(hdr, PTR_TO(start.query), LEN(start.query, fpc)); + } + action request_path { request_path(hdr, PTR_TO(mark), LEN(mark, fpc)); } action start_field { MARK(start.field, fpc); } action write_field { hp->s.field_len = LEN(start.field, fpc); } action start_value { MARK(mark, fpc); } action write_value { write_value(hdr, hp, buffer, fpc); } action write_cont_value { write_cont_value(hp, buffer, fpc); } - action http_version { http_version(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); } + action http_version { http_version(hp, PTR_TO(mark), LEN(mark, fpc)); } action status_phrase { status_phrase(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); } action add_to_chunk_size { @@ -720,6 +800,7 @@ static VALUE filter_body(VALUE self, VALUE buf, VALUE data) void Init_kcar_ext(void) { + static VALUE globals; VALUE mKcar = rb_define_module("Kcar"); VALUE cParser = rb_define_class_under(mKcar, "Parser", rb_cObject); @@ -729,6 +810,10 @@ void Init_kcar_ext(void) * This is raised if there are parsing errors. */ eParserError = rb_define_class_under(mKcar, "ParserError", rb_eIOError); + e413 = rb_define_class_under(mKcar, "RequestEntityTooLargeError", + eParserError); + e414 = rb_define_class_under(mKcar, "RequestURITooLongError", + eParserError); rb_define_alloc_func(cParser, kcar_alloc); rb_define_method(cParser, "initialize", initialize, 0); @@ -759,4 +844,22 @@ void Init_kcar_ext(void) id_sq = rb_intern("[]"); id_sq_set = rb_intern("[]="); id_uminus = rb_intern("-@"); + + /* TODO: gperf to make a perfect hash of common strings */ +#define C(ary, var, cstr) do { \ + var = str_new_dd_freeze((cstr), sizeof(cstr) - 1); \ + rb_ary_push((ary), (var)); \ +} while (0); + + globals = rb_ary_new(); + rb_global_variable(&globals); + C(globals, g_HOST, "HOST"); + C(globals, g_PATH_INFO, "PATH_INFO"); + C(globals, g_QUERY_STRING, "QUERY_STRING"); + C(globals, g_REQUEST_METHOD, "REQUEST_METHOD"); + C(globals, g_REQUEST_PATH, "REQUEST_PATH"); + C(globals, g_REQUEST_URI, "REQUEST_URI"); + C(globals, g_rack_url_scheme, "rack.url_scheme"); + OBJ_FREEZE(globals); +#undef C } diff --git a/ext/kcar/kcar_http_common.rl b/ext/kcar/kcar_http_common.rl index cb89248..0c596bc 100644 --- a/ext/kcar/kcar_http_common.rl +++ b/ext/kcar/kcar_http_common.rl @@ -25,10 +25,38 @@ # elements token = (ascii -- (CTL | tspecials)); + +# URI schemes and absolute paths + scheme = ( "http"i ("s"i)? ) $downcase_char >mark %url_scheme; + hostname = ((alnum | "-" | "." | "_")+ | ("[" (":" | xdigit)+ "]")); + host_with_port = (hostname (":" digit*)?) >mark %host; + userinfo = ((unreserved | escape | ";" | ":" | "&" | "=" | "+")+ "@")*; + + path = ( pchar+ ( "/" pchar* )* ) ; + query = ( uchar | reserved )* %query_string ; + param = ( pchar | "/" )* ; + params = ( param ( ";" param )* ) ; + rel_path = (path? (";" params)? %request_path) ("?" %start_query query)?; + absolute_path = ( "/"+ rel_path ); + path_uri = absolute_path > mark %request_uri; + Absolute_URI = (scheme "://" userinfo host_with_port path_uri); + + Request_URI = ((absolute_path | "*") >mark %request_uri) | Absolute_URI; + + # lets not waste cycles setting fragment in the request, + # valid clients do not send it, but we will just silently ignore it. + Fragment = ( uchar | reserved )*; + + Method = (token){1,20} >mark %request_method; + GetOnly = "GET" >mark %request_method; + + http_number = ( digit+ "." digit+ ) ; + HTTP_Version = ( "HTTP/" http_number ) >mark %http_version ; + Request_Line = ( Method " " Request_URI ("#" Fragment){0,1} " " + HTTP_Version CRLF ) ; + phrase = (any -- CRLF)+; Status_Phrase = (digit+ (" "+ phrase)?) >mark %status_phrase ; - http_number = (digit+ "." digit+) ; - HTTP_Version = ("HTTP/" http_number) >mark %http_version ; Status_Line = HTTP_Version " "+ Status_Phrase :> CRLF; field_name = ( token -- ":" )+ >start_field %write_field; @@ -51,7 +79,9 @@ Trailers := (message_header)* CRLF @end_trailers; FullResponse = Status_Line (message_header)* CRLF @header_done; + FullRequest = Request_Line (message_header)* CRLF @header_done; + SimpleRequest = GetOnly " " Request_URI ("#"Fragment){0,1} CRLF @header_done; -main := FullResponse; +main := FullResponse | FullRequest | SimpleRequest; }%%