From 094e419ce649632d4cff8be3848f2b8d395cb9ba Mon Sep 17 00:00:00 2001 From: normalperson Date: Thu, 6 Mar 2008 07:41:20 +0000 Subject: ext/http11: modify the C Ragel parser to upper-snake-case headers in-place This is based on Zed's suggestion and helps take complexity out of the hand-written C code, allowing memcpy() to be used instead. Zed Shaw wrote in <20080303044659.5a550c19.zedshaw@zedshaw.com>: > * Also, now that I think about it, if you don't care that the original > string is modified in place then you can just have ragel do all of this > as it goes. Simply modify the parser to have it do this transform on > the header chars using the existing pointer. That'd probably be > alright since people don't usually keep the input headers around when > using the mongrel parser. I don't have a working Java runtime, so I've only made the bare minimum modification to the http11_parser.java.rl file which allows Ragel to still work with it. All the other Java parts are untouched and whatever upper-casing routine was used before continues to be used now. git-svn-id: svn+ssh://rubyforge.org/var/svn/mongrel/trunk@990 19e92222-5c0b-0410-8929-a290d50e31e9 --- ext/http11/ext_help.h | 2 - ext/http11/http11.c | 10 +-- ext/http11/http11_parser.c | 172 ++++++++++++++++++++----------------- ext/http11/http11_parser.java.rl | 1 + ext/http11/http11_parser.rl | 13 +++ ext/http11/http11_parser_common.rl | 2 +- 6 files changed, 111 insertions(+), 89 deletions(-) (limited to 'ext') diff --git a/ext/http11/ext_help.h b/ext/http11/ext_help.h index 1017c64..8b4d754 100644 --- a/ext/http11/ext_help.h +++ b/ext/http11/ext_help.h @@ -4,8 +4,6 @@ #define RAISE_NOT_NULL(T) if(T == NULL) rb_raise(rb_eArgError, "NULL found for " # T " when shouldn't be."); #define DATA_GET(from,type,name) Data_Get_Struct(from,type,name); RAISE_NOT_NULL(name); #define REQUIRE_TYPE(V, T) if(TYPE(V) != T) rb_raise(rb_eTypeError, "Wrong argument type for " # V " required " # T); -#define ASCII_UPCASE_CHAR(ch) (ch & ~0x20) - #ifdef DEBUG #define TRACE() fprintf(stderr, "> %s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__) diff --git a/ext/http11/http11.c b/ext/http11/http11.c index 3cb6697..1e028ee 100644 --- a/ext/http11/http11.c +++ b/ext/http11/http11.c @@ -68,8 +68,6 @@ DEF_MAX_LENGTH(HEADER, (1024 * (80 + 32))); void http_field(void *data, const char *field, size_t flen, const char *value, size_t vlen) { - char *ch; - const char *fch; VALUE req = (VALUE)data; VALUE v = Qnil; VALUE f = Qnil; @@ -90,13 +88,7 @@ void http_field(void *data, const char *field, size_t flen, const char *value, s memcpy(RSTRING_PTR(f), RSTRING_PTR(global_http_prefix), RSTRING_LEN(global_http_prefix)); - - ch = RSTRING_PTR(f) + RSTRING_LEN(global_http_prefix); - for(fch = field; flen-- != 0; ++fch) { - *ch++ = (*fch >= 'a' && *fch <= 'z') ? - ASCII_UPCASE_CHAR(*fch) : - (*fch == '-' ? '_' : *fch); - } + memcpy(RSTRING_PTR(f) + RSTRING_LEN(global_http_prefix), field, flen); rb_hash_aset(req, f, v); } diff --git a/ext/http11/http11_parser.c b/ext/http11/http11_parser.c index 1712d29..a62cbe0 100644 --- a/ext/http11/http11_parser.c +++ b/ext/http11/http11_parser.c @@ -10,34 +10,46 @@ #include #include +/* + * capitalizes all lower-case ASCII characters, + * converts dashes to underscores. + */ +static void snake_upcase_char(char *c) +{ + if (*c >= 'a' && *c <= 'z') + *c &= ~0x20; + else if (*c == '-') + *c = '_'; +} + #define LEN(AT, FPC) (FPC - buffer - parser->AT) #define MARK(M,FPC) (parser->M = (FPC) - buffer) #define PTR_TO(F) (buffer + parser->F) /** Machine **/ -#line 74 "http11_parser.rl" +#line 87 "http11_parser.rl" /** Data **/ -#line 25 "http11_parser.c" +#line 37 "http11_parser.c" static const int http_parser_start = 1; static const int http_parser_first_final = 57; static const int http_parser_error = 0; static const int http_parser_en_main = 1; -#line 78 "http11_parser.rl" +#line 91 "http11_parser.rl" int http_parser_init(http_parser *parser) { int cs = 0; -#line 37 "http11_parser.c" +#line 49 "http11_parser.c" { cs = http_parser_start; } -#line 82 "http11_parser.rl" +#line 95 "http11_parser.rl" parser->cs = cs; parser->body_start = 0; parser->content_len = 0; @@ -65,7 +77,7 @@ size_t http_parser_execute(http_parser *parser, const char *buffer, size_t len, -#line 69 "http11_parser.c" +#line 81 "http11_parser.c" { if ( p == pe ) goto _out; @@ -88,14 +100,14 @@ case 1: st0: goto _out0; tr0: -#line 22 "http11_parser.rl" +#line 34 "http11_parser.rl" {MARK(mark, p); } goto st2; st2: if ( ++p == pe ) goto _out2; case 2: -#line 99 "http11_parser.c" +#line 111 "http11_parser.c" switch( (*p) ) { case 32: goto tr2; case 36: goto st38; @@ -111,7 +123,7 @@ case 2: goto st38; goto st0; tr2: -#line 36 "http11_parser.rl" +#line 49 "http11_parser.rl" { if(parser->request_method != NULL) parser->request_method(parser->data, PTR_TO(mark), LEN(mark, p)); @@ -121,7 +133,7 @@ st3: if ( ++p == pe ) goto _out3; case 3: -#line 125 "http11_parser.c" +#line 137 "http11_parser.c" switch( (*p) ) { case 42: goto tr4; case 43: goto tr5; @@ -138,66 +150,66 @@ case 3: goto tr5; goto st0; tr4: -#line 22 "http11_parser.rl" +#line 34 "http11_parser.rl" {MARK(mark, p); } goto st4; st4: if ( ++p == pe ) goto _out4; case 4: -#line 149 "http11_parser.c" +#line 161 "http11_parser.c" switch( (*p) ) { case 32: goto tr8; case 35: goto tr9; } goto st0; tr8: -#line 40 "http11_parser.rl" +#line 53 "http11_parser.rl" { if(parser->request_uri != NULL) parser->request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st5; tr30: -#line 44 "http11_parser.rl" +#line 57 "http11_parser.rl" { if(parser->fragment != NULL) parser->fragment(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st5; tr40: -#line 60 "http11_parser.rl" +#line 73 "http11_parser.rl" { if(parser->request_path != NULL) parser->request_path(parser->data, PTR_TO(mark), LEN(mark,p)); } -#line 40 "http11_parser.rl" +#line 53 "http11_parser.rl" { if(parser->request_uri != NULL) parser->request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st5; tr51: -#line 49 "http11_parser.rl" +#line 62 "http11_parser.rl" {MARK(query_start, p); } -#line 50 "http11_parser.rl" +#line 63 "http11_parser.rl" { if(parser->query_string != NULL) parser->query_string(parser->data, PTR_TO(query_start), LEN(query_start, p)); } -#line 40 "http11_parser.rl" +#line 53 "http11_parser.rl" { if(parser->request_uri != NULL) parser->request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st5; tr55: -#line 50 "http11_parser.rl" +#line 63 "http11_parser.rl" { if(parser->query_string != NULL) parser->query_string(parser->data, PTR_TO(query_start), LEN(query_start, p)); } -#line 40 "http11_parser.rl" +#line 53 "http11_parser.rl" { if(parser->request_uri != NULL) parser->request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); @@ -207,19 +219,19 @@ st5: if ( ++p == pe ) goto _out5; case 5: -#line 211 "http11_parser.c" +#line 223 "http11_parser.c" if ( (*p) == 72 ) goto tr10; goto st0; tr10: -#line 22 "http11_parser.rl" +#line 34 "http11_parser.rl" {MARK(mark, p); } goto st6; st6: if ( ++p == pe ) goto _out6; case 6: -#line 223 "http11_parser.c" +#line 235 "http11_parser.c" if ( (*p) == 84 ) goto st7; goto st0; @@ -277,14 +289,14 @@ case 13: goto st13; goto st0; tr18: -#line 55 "http11_parser.rl" +#line 68 "http11_parser.rl" { if(parser->http_version != NULL) parser->http_version(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st14; tr26: -#line 31 "http11_parser.rl" +#line 44 "http11_parser.rl" { if(parser->http_field != NULL) { parser->http_field(parser->data, PTR_TO(field_start), parser->field_len, PTR_TO(mark), LEN(mark, p)); @@ -295,7 +307,7 @@ st14: if ( ++p == pe ) goto _out14; case 14: -#line 299 "http11_parser.c" +#line 311 "http11_parser.c" if ( (*p) == 10 ) goto st15; goto st0; @@ -335,7 +347,7 @@ case 16: goto tr22; goto st0; tr22: -#line 65 "http11_parser.rl" +#line 78 "http11_parser.rl" { parser->body_start = p - buffer + 1; if(parser->header_done != NULL) @@ -347,113 +359,119 @@ st57: if ( ++p == pe ) goto _out57; case 57: -#line 351 "http11_parser.c" +#line 363 "http11_parser.c" goto st0; tr21: -#line 25 "http11_parser.rl" +#line 37 "http11_parser.rl" { MARK(field_start, p); } +#line 38 "http11_parser.rl" + { snake_upcase_char((char *)p); } + goto st17; +tr23: +#line 38 "http11_parser.rl" + { snake_upcase_char((char *)p); } goto st17; st17: if ( ++p == pe ) goto _out17; case 17: -#line 361 "http11_parser.c" +#line 379 "http11_parser.c" switch( (*p) ) { - case 33: goto st17; + case 33: goto tr23; case 58: goto tr24; - case 124: goto st17; - case 126: goto st17; + case 124: goto tr23; + case 126: goto tr23; } if ( (*p) < 45 ) { if ( (*p) > 39 ) { if ( 42 <= (*p) && (*p) <= 43 ) - goto st17; + goto tr23; } else if ( (*p) >= 35 ) - goto st17; + goto tr23; } else if ( (*p) > 46 ) { if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) - goto st17; + goto tr23; } else if ( (*p) > 90 ) { if ( 94 <= (*p) && (*p) <= 122 ) - goto st17; + goto tr23; } else - goto st17; + goto tr23; } else - goto st17; + goto tr23; goto st0; tr24: -#line 26 "http11_parser.rl" +#line 39 "http11_parser.rl" { parser->field_len = LEN(field_start, p); } goto st18; tr27: -#line 30 "http11_parser.rl" +#line 43 "http11_parser.rl" { MARK(mark, p); } goto st18; st18: if ( ++p == pe ) goto _out18; case 18: -#line 400 "http11_parser.c" +#line 418 "http11_parser.c" switch( (*p) ) { case 13: goto tr26; case 32: goto tr27; } goto tr25; tr25: -#line 30 "http11_parser.rl" +#line 43 "http11_parser.rl" { MARK(mark, p); } goto st19; st19: if ( ++p == pe ) goto _out19; case 19: -#line 414 "http11_parser.c" +#line 432 "http11_parser.c" if ( (*p) == 13 ) goto tr26; goto st19; tr9: -#line 40 "http11_parser.rl" +#line 53 "http11_parser.rl" { if(parser->request_uri != NULL) parser->request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st20; tr41: -#line 60 "http11_parser.rl" +#line 73 "http11_parser.rl" { if(parser->request_path != NULL) parser->request_path(parser->data, PTR_TO(mark), LEN(mark,p)); } -#line 40 "http11_parser.rl" +#line 53 "http11_parser.rl" { if(parser->request_uri != NULL) parser->request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st20; tr52: -#line 49 "http11_parser.rl" +#line 62 "http11_parser.rl" {MARK(query_start, p); } -#line 50 "http11_parser.rl" +#line 63 "http11_parser.rl" { if(parser->query_string != NULL) parser->query_string(parser->data, PTR_TO(query_start), LEN(query_start, p)); } -#line 40 "http11_parser.rl" +#line 53 "http11_parser.rl" { if(parser->request_uri != NULL) parser->request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); } goto st20; tr56: -#line 50 "http11_parser.rl" +#line 63 "http11_parser.rl" { if(parser->query_string != NULL) parser->query_string(parser->data, PTR_TO(query_start), LEN(query_start, p)); } -#line 40 "http11_parser.rl" +#line 53 "http11_parser.rl" { if(parser->request_uri != NULL) parser->request_uri(parser->data, PTR_TO(mark), LEN(mark, p)); @@ -463,7 +481,7 @@ st20: if ( ++p == pe ) goto _out20; case 20: -#line 467 "http11_parser.c" +#line 485 "http11_parser.c" switch( (*p) ) { case 32: goto tr30; case 37: goto tr31; @@ -478,14 +496,14 @@ case 20: goto st0; goto tr29; tr29: -#line 22 "http11_parser.rl" +#line 34 "http11_parser.rl" {MARK(mark, p); } goto st21; st21: if ( ++p == pe ) goto _out21; case 21: -#line 489 "http11_parser.c" +#line 507 "http11_parser.c" switch( (*p) ) { case 32: goto tr30; case 37: goto st22; @@ -500,14 +518,14 @@ case 21: goto st0; goto st21; tr31: -#line 22 "http11_parser.rl" +#line 34 "http11_parser.rl" {MARK(mark, p); } goto st22; st22: if ( ++p == pe ) goto _out22; case 22: -#line 511 "http11_parser.c" +#line 529 "http11_parser.c" if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto st23; @@ -531,14 +549,14 @@ case 23: goto st21; goto st0; tr5: -#line 22 "http11_parser.rl" +#line 34 "http11_parser.rl" {MARK(mark, p); } goto st24; st24: if ( ++p == pe ) goto _out24; case 24: -#line 542 "http11_parser.c" +#line 560 "http11_parser.c" switch( (*p) ) { case 43: goto st24; case 58: goto st25; @@ -556,14 +574,14 @@ case 24: goto st24; goto st0; tr7: -#line 22 "http11_parser.rl" +#line 34 "http11_parser.rl" {MARK(mark, p); } goto st25; st25: if ( ++p == pe ) goto _out25; case 25: -#line 567 "http11_parser.c" +#line 585 "http11_parser.c" switch( (*p) ) { case 32: goto tr8; case 34: goto st0; @@ -603,14 +621,14 @@ case 27: goto st25; goto st0; tr6: -#line 22 "http11_parser.rl" +#line 34 "http11_parser.rl" {MARK(mark, p); } goto st28; st28: if ( ++p == pe ) goto _out28; case 28: -#line 614 "http11_parser.c" +#line 632 "http11_parser.c" switch( (*p) ) { case 32: goto tr40; case 34: goto st0; @@ -652,7 +670,7 @@ case 30: goto st28; goto st0; tr43: -#line 60 "http11_parser.rl" +#line 73 "http11_parser.rl" { if(parser->request_path != NULL) parser->request_path(parser->data, PTR_TO(mark), LEN(mark,p)); @@ -662,7 +680,7 @@ st31: if ( ++p == pe ) goto _out31; case 31: -#line 666 "http11_parser.c" +#line 684 "http11_parser.c" switch( (*p) ) { case 32: goto tr8; case 34: goto st0; @@ -703,7 +721,7 @@ case 33: goto st31; goto st0; tr44: -#line 60 "http11_parser.rl" +#line 73 "http11_parser.rl" { if(parser->request_path != NULL) parser->request_path(parser->data, PTR_TO(mark), LEN(mark,p)); @@ -713,7 +731,7 @@ st34: if ( ++p == pe ) goto _out34; case 34: -#line 717 "http11_parser.c" +#line 735 "http11_parser.c" switch( (*p) ) { case 32: goto tr51; case 34: goto st0; @@ -727,14 +745,14 @@ case 34: goto st0; goto tr50; tr50: -#line 49 "http11_parser.rl" +#line 62 "http11_parser.rl" {MARK(query_start, p); } goto st35; st35: if ( ++p == pe ) goto _out35; case 35: -#line 738 "http11_parser.c" +#line 756 "http11_parser.c" switch( (*p) ) { case 32: goto tr55; case 34: goto st0; @@ -748,14 +766,14 @@ case 35: goto st0; goto st35; tr53: -#line 49 "http11_parser.rl" +#line 62 "http11_parser.rl" {MARK(query_start, p); } goto st36; st36: if ( ++p == pe ) goto _out36; case 36: -#line 759 "http11_parser.c" +#line 777 "http11_parser.c" if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto st37; @@ -1170,7 +1188,7 @@ case 56: _out: {} } -#line 109 "http11_parser.rl" +#line 122 "http11_parser.rl" parser->cs = cs; parser->nread += p - (buffer + off); @@ -1185,8 +1203,8 @@ case 56: if(parser->body_start) { /* final \r\n combo encountered so stop right here */ -#line 1189 "http11_parser.c" -#line 123 "http11_parser.rl" +#line 1207 "http11_parser.c" +#line 136 "http11_parser.rl" parser->nread++; } @@ -1198,8 +1216,8 @@ int http_parser_finish(http_parser *parser) int cs = parser->cs; -#line 1202 "http11_parser.c" -#line 134 "http11_parser.rl" +#line 1220 "http11_parser.c" +#line 147 "http11_parser.rl" parser->cs = cs; diff --git a/ext/http11/http11_parser.java.rl b/ext/http11/http11_parser.java.rl index 71f8d3c..c9be22e 100644 --- a/ext/http11/http11_parser.java.rl +++ b/ext/http11/http11_parser.java.rl @@ -13,6 +13,7 @@ public class Http11Parser { action mark {parser.mark = fpc; } action start_field { parser.field_start = fpc; } + action snake_upcase_field { /* FIXME stub */ } action write_field { parser.field_len = fpc-parser.field_start; } diff --git a/ext/http11/http11_parser.rl b/ext/http11/http11_parser.rl index fffe57a..a418605 100644 --- a/ext/http11/http11_parser.rl +++ b/ext/http11/http11_parser.rl @@ -9,6 +9,18 @@ #include #include +/* + * capitalizes all lower-case ASCII characters, + * converts dashes to underscores. + */ +static void snake_upcase_char(char *c) +{ + if (*c >= 'a' && *c <= 'z') + *c &= ~0x20; + else if (*c == '-') + *c = '_'; +} + #define LEN(AT, FPC) (FPC - buffer - parser->AT) #define MARK(M,FPC) (parser->M = (FPC) - buffer) #define PTR_TO(F) (buffer + parser->F) @@ -23,6 +35,7 @@ action start_field { MARK(field_start, fpc); } + action snake_upcase_field { snake_upcase_char((char *)fpc); } action write_field { parser->field_len = LEN(field_start, fpc); } diff --git a/ext/http11/http11_parser_common.rl b/ext/http11/http11_parser_common.rl index a70d4da..53c805f 100644 --- a/ext/http11/http11_parser_common.rl +++ b/ext/http11/http11_parser_common.rl @@ -41,7 +41,7 @@ HTTP_Version = ( "HTTP/" http_number ) >mark %http_version ; Request_Line = ( Method " " Request_URI ("#" Fragment){0,1} " " HTTP_Version CRLF ) ; - field_name = ( token -- ":" )+ >start_field %write_field; + field_name = ( token -- ":" )+ >start_field $snake_upcase_field %write_field; field_value = any* >start_value %write_value; -- cgit v1.2.3-24-ge0c7