From 1cf8ca2c6e57cf8cd9794d5bb6bb4f8b22711560 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 4 Jul 2019 03:49:51 +0000 Subject: http: use gperf for common fields optimization GNU gperf is a commonly-used tool for generating perfect hashes and available on every platform unicorn runs on. C Ruby, gcc, glibc all already use it. Using a hash lookup instead of a linear scan already shows measurable improvements when memoized header keys are all used: * test/benchmark/http_parser.rb (no options): 100000 iterations user system total real - 0.411857 0.000200 0.412057 ( 0.412070) + 0.397960 0.000181 0.398141 ( 0.398149) Results which require generating a new string from an unmemoized header is less significant, but still consistent measurable: * test/benchmark/http_parser.rb -H 'DNT: 1' 100000 iterations user system total real - 0.461416 0.000000 0.461416 ( 0.461417) + 0.461329 0.000000 0.461329 ( 0.461363) Most importantly, this change allows us to memoize more keys without worrying too much about the overhead of a O(n) scan. --- ext/unicorn_http/gperf.rb | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 ext/unicorn_http/gperf.rb (limited to 'ext/unicorn_http/gperf.rb') diff --git a/ext/unicorn_http/gperf.rb b/ext/unicorn_http/gperf.rb new file mode 100644 index 0000000..9765f86 --- /dev/null +++ b/ext/unicorn_http/gperf.rb @@ -0,0 +1,27 @@ +#!/usr/bin/ruby -w +buf = STDIN.read # output of: gperf ext/unicorn_http/common_fields.gperf + +# this is supposed to fail if it doesn't subsitute anything: +print buf.sub!( + +# make sure all functions are static +/\nstruct \w+ \*\n(\w+_)?lookup/) { + "\nstatic#$&" +}. + +gsub!( +# gperf 3.0.x used "(int)(long)", 3.1 uses "(int)(size_t)", +# input: {(int)(size_t)&((struct cf_pool_t *)0)->cf_pool_str3}, +# output: {offsetof(struct cf_pool_t, cf_pool_str3)}, +/{\(int\)\(\w+\)\&\(\((struct \w+) *\*\)0\)->(\w+)}/) { + "{offsetof(#$1, #$2)}" +}. + +# make sure everything is 64-bit safe and compilers don't truncate +gsub!(/\b(?:unsigned )?int\b/, 'size_t'). + +# This isn't need for %switch%, but we'll experiment with to see +# if it's necessary, or not. +# don't give compilers a reason to complain, (struct foo *)->name +# is size_t, so unused slots should be size_t: +gsub(/\{-1\}/, '{(size_t)-1}') -- cgit v1.2.3-24-ge0c7