diff options
-rw-r--r-- | ext/http11/ext_help.h | 1 | ||||
-rw-r--r-- | ext/http11/http11.c | 138 | ||||
-rw-r--r-- | test/test_http11.rb | 60 |
3 files changed, 189 insertions, 10 deletions
diff --git a/ext/http11/ext_help.h b/ext/http11/ext_help.h index 8b4d754..08c0e1e 100644 --- a/ext/http11/ext_help.h +++ b/ext/http11/ext_help.h @@ -4,6 +4,7 @@ #define RAISE_NOT_NULL(T) if(T == NULL) rb_raise(rb_eArgError, "NULL found for " # T " when shouldn't be."); #define DATA_GET(from,type,name) Data_Get_Struct(from,type,name); RAISE_NOT_NULL(name); #define REQUIRE_TYPE(V, T) if(TYPE(V) != T) rb_raise(rb_eTypeError, "Wrong argument type for " # V " required " # T); +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) #ifdef DEBUG #define TRACE() fprintf(stderr, "> %s:%d:%s\n", __FILE__, __LINE__, __FUNCTION__) diff --git a/ext/http11/http11.c b/ext/http11/http11.c index e7a8658..5af1e8c 100644 --- a/ext/http11/http11.c +++ b/ext/http11/http11.c @@ -66,6 +66,114 @@ DEF_MAX_LENGTH(REQUEST_PATH, 1024); DEF_MAX_LENGTH(QUERY_STRING, (1024 * 10)); DEF_MAX_LENGTH(HEADER, (1024 * (80 + 32))); +struct common_field { + const signed long len; + const char *name; + VALUE value; +}; + +/* + * A list of common HTTP headers we expect to receive. + * This allows us to avoid repeatedly creating identical string + * objects to be used with rb_hash_aset(). + */ +static struct common_field common_http_fields[] = { +# define f(N) { (sizeof(N) - 1), N, Qnil } + f("ACCEPT"), + f("ACCEPT_CHARSET"), + f("ACCEPT_ENCODING"), + f("ACCEPT_LANGUAGE"), + f("ALLOW"), + f("AUTHORIZATION"), + f("CACHE_CONTROL"), + f("CONNECTION"), + f("CONTENT_ENCODING"), + f("CONTENT_LENGTH"), + f("CONTENT_TYPE"), + f("COOKIE"), + f("DATE"), + f("EXPECT"), + f("FROM"), + f("HOST"), + f("IF_MATCH"), + f("IF_MODIFIED_SINCE"), + f("IF_NONE_MATCH"), + f("IF_RANGE"), + f("IF_UNMODIFIED_SINCE"), + f("KEEP_ALIVE"), /* Firefox sends this */ + f("MAX_FORWARDS"), + f("PRAGMA"), + f("PROXY_AUTHORIZATION"), + f("RANGE"), + f("REFERER"), + f("TE"), + f("TRAILER"), + f("TRANSFER_ENCODING"), + f("UPGRADE"), + f("USER_AGENT"), + f("VIA"), + f("WARNING") +# undef f +}; + +/* + * qsort(3) and bsearch(3) improve average performance slightly, but may + * not be worth it for lack of portability to certain platforms... + */ +#if defined(HAVE_QSORT_BSEARCH) +/* sort by length, then by name if there's a tie */ +static int common_field_cmp(const void *a, const void *b) +{ + struct common_field *cfa = (struct common_field *)a; + struct common_field *cfb = (struct common_field *)b; + signed long diff = cfa->len - cfb->len; + return diff ? diff : memcmp(cfa->name, cfb->name, cfa->len); +} +#endif /* HAVE_QSORT_BSEARCH */ + +static void init_common_fields(void) +{ + int i; + struct common_field *cf = common_http_fields; + char tmp[256]; /* MAX_FIELD_NAME_LENGTH */ + memcpy(tmp, HTTP_PREFIX, HTTP_PREFIX_LEN); + + for(i = 0; i < ARRAY_SIZE(common_http_fields); cf++, i++) { + memcpy(tmp + HTTP_PREFIX_LEN, cf->name, cf->len + 1); + cf->value = rb_obj_freeze(rb_str_new(tmp, HTTP_PREFIX_LEN + cf->len)); + rb_global_variable(&cf->value); + } + +#if defined(HAVE_QSORT_BSEARCH) + qsort(common_http_fields, + ARRAY_SIZE(common_http_fields), + sizeof(struct common_field), + common_field_cmp); +#endif /* HAVE_QSORT_BSEARCH */ +} + +static VALUE find_common_field_value(const char *field, size_t flen) +{ +#if defined(HAVE_QSORT_BSEARCH) + struct common_field key; + struct common_field *found; + key.name = field; + key.len = (signed long)flen; + found = (struct common_field *)bsearch(&key, common_http_fields, + ARRAY_SIZE(common_http_fields), + sizeof(struct common_field), + common_field_cmp); + return found ? found->value : Qnil; +#else /* !HAVE_QSORT_BSEARCH */ + int i; + struct common_field *cf = common_http_fields; + for(i = 0; i < ARRAY_SIZE(common_http_fields); i++, cf++) { + if (cf->len == flen && !memcmp(cf->name, field, flen)) + return cf->value; + } + return Qnil; +#endif /* !HAVE_QSORT_BSEARCH */ +} void http_field(void *data, const char *field, size_t flen, const char *value, size_t vlen) { @@ -78,16 +186,25 @@ void http_field(void *data, const char *field, size_t flen, const char *value, s v = rb_str_new(value, vlen); - /* - * using rb_str_new(NULL, len) here is faster than rb_str_buf_new(len) - * in my testing, because: there's no minimum allocation length (and - * no check for it, either), RSTRING_LEN(f) does not need to be - * written twice, and and RSTRING_PTR(f) will already be - * null-terminated for us. - */ - f = rb_str_new(NULL, HTTP_PREFIX_LEN + flen); - memcpy(RSTRING_PTR(f), HTTP_PREFIX, HTTP_PREFIX_LEN); - memcpy(RSTRING_PTR(f) + HTTP_PREFIX_LEN, field, flen); + f = find_common_field_value(field, flen); + + if (f == Qnil) { + /* + * We got a strange header that we don't have a memoized value for. + * Fallback to creating a new string to use as a hash key. + * + * using rb_str_new(NULL, len) here is faster than rb_str_buf_new(len) + * in my testing, because: there's no minimum allocation length (and + * no check for it, either), RSTRING_LEN(f) does not need to be + * written twice, and and RSTRING_PTR(f) will already be + * null-terminated for us. + */ + f = rb_str_new(NULL, HTTP_PREFIX_LEN + flen); + memcpy(RSTRING_PTR(f), HTTP_PREFIX, HTTP_PREFIX_LEN); + memcpy(RSTRING_PTR(f) + HTTP_PREFIX_LEN, field, flen); + assert(*(RSTRING_PTR(f) + RSTRING_LEN(f)) == '\0'); /* paranoia */ + /* fprintf(stderr, "UNKNOWN HEADER <%s>\n", RSTRING_PTR(f)); */ + } rb_hash_aset(req, f, v); } @@ -405,4 +522,5 @@ void Init_http11() rb_define_method(cHttpParser, "error?", HttpParser_has_error,0); rb_define_method(cHttpParser, "finished?", HttpParser_is_finished,0); rb_define_method(cHttpParser, "nread", HttpParser_nread,0); + init_common_fields(); } diff --git a/test/test_http11.rb b/test/test_http11.rb index 81b1626..deaeaeb 100644 --- a/test/test_http11.rb +++ b/test/test_http11.rb @@ -70,6 +70,66 @@ class HttpParserTest < Test::Unit::TestCase assert parser.error?, "Parser SHOULD have error" end + def test_parse_like_optimized_header + parser = HttpParser.new + req = {} + should_be_good = "GET / HTTP/1.1\r\nAuthorizationn: zz\r\n\r\n" + nread = parser.execute(req, should_be_good, 0) + assert_equal should_be_good.length, nread + assert parser.finished? + assert !parser.error? + assert_equal "zz", req["HTTP_AUTHORIZATIONN"] + assert ! req["HTTP_AUTHORIZATION"] + end + + def test_parse_twin_lookalike_optimized_headers + parser = HttpParser.new + req = {} + should_be_good = "GET / HTTP/1.1\r\n" \ + "Accept-Encoding: abcdef\r\n" \ + "Accept-Language: zyxvut\r\n" \ + "\r\n" + nread = parser.execute(req, should_be_good, 0) + assert_equal should_be_good.length, nread + assert parser.finished? + assert !parser.error? + assert_equal "abcdef", req["HTTP_ACCEPT_ENCODING"] + assert_equal "zyxvut", req["HTTP_ACCEPT_LANGUAGE"] + end + + if RUBY_PLATFORM !~ /java/ + # as of now, the Java version does not have the same global-object + # reuse optimization the C version does + + def test_parse_optimized_headers_global_objects_used + parser = HttpParser.new + req = {} + should_be_good = "GET / HTTP/1.1\r\nHost: example.com\r\n\r\n" + nread = parser.execute(req, should_be_good, 0) + assert_equal should_be_good.length, nread + assert parser.finished? + assert !parser.error? + assert_equal "example.com", req["HTTP_HOST"] + + frozen_host_a = nil + req.each { |k,v| k == "HTTP_HOST" && frozen_host_a = k } + + parser = HttpParser.new + req = {} + should_be_good = "GET / HTTP/1.1\r\nHost: example.com\r\n\r\n" + nread = parser.execute(req, should_be_good, 0) + assert_equal should_be_good.length, nread + assert parser.finished? + assert !parser.error? + + frozen_host_b = nil + req.each { |k,v| k == "HTTP_HOST" && frozen_host_b = k } + assert_equal "HTTP_HOST", frozen_host_a + assert_equal "HTTP_HOST", frozen_host_b + assert_equal frozen_host_a.object_id, frozen_host_b.object_id + end + end + def test_fragment_in_uri parser = HttpParser.new req = {} |