diff options
Diffstat (limited to 'ext/unicorn_http')
-rw-r--r-- | ext/unicorn_http/c_util.h | 18 | ||||
-rw-r--r-- | ext/unicorn_http/common_field_optimization.h | 1 | ||||
-rw-r--r-- | ext/unicorn_http/epollexclusive.h | 128 | ||||
-rw-r--r-- | ext/unicorn_http/ext_help.h | 24 | ||||
-rw-r--r-- | ext/unicorn_http/extconf.rb | 11 | ||||
-rw-r--r-- | ext/unicorn_http/global_variables.h | 2 | ||||
-rw-r--r-- | ext/unicorn_http/httpdate.c | 21 | ||||
-rw-r--r-- | ext/unicorn_http/unicorn_http.rl | 87 | ||||
-rw-r--r-- | ext/unicorn_http/unicorn_http_common.rl | 2 |
9 files changed, 223 insertions, 71 deletions
diff --git a/ext/unicorn_http/c_util.h b/ext/unicorn_http/c_util.h index ab1fc0e..5774615 100644 --- a/ext/unicorn_http/c_util.h +++ b/ext/unicorn_http/c_util.h @@ -8,23 +8,15 @@ #include <unistd.h> #include <assert.h> +#include <limits.h> #define MIN(a,b) (a < b ? a : b) #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) -#ifndef SIZEOF_OFF_T -# define SIZEOF_OFF_T 4 -# warning SIZEOF_OFF_T not defined, guessing 4. Did you run extconf.rb? -#endif - -#if SIZEOF_OFF_T == 4 -# define UH_OFF_T_MAX 0x7fffffff -#elif SIZEOF_OFF_T == 8 -# if SIZEOF_LONG == 4 -# define UH_OFF_T_MAX 0x7fffffffffffffffLL -# else -# define UH_OFF_T_MAX 0x7fffffffffffffff -# endif +#if SIZEOF_OFF_T == SIZEOF_INT +# define UH_OFF_T_MAX INT_MAX +#elif SIZEOF_OFF_T == SIZEOF_LONG_LONG +# define UH_OFF_T_MAX LLONG_MAX #else # error off_t size unknown for this platform! #endif /* SIZEOF_OFF_T check */ diff --git a/ext/unicorn_http/common_field_optimization.h b/ext/unicorn_http/common_field_optimization.h index 0659fc7..250e43e 100644 --- a/ext/unicorn_http/common_field_optimization.h +++ b/ext/unicorn_http/common_field_optimization.h @@ -83,7 +83,6 @@ static void init_common_fields(void) struct common_field *cf = common_http_fields; char tmp[64]; - id_uminus = rb_intern("-@"); memcpy(tmp, HTTP_PREFIX, HTTP_PREFIX_LEN); for(i = ARRAY_SIZE(common_http_fields); --i >= 0; cf++) { diff --git a/ext/unicorn_http/epollexclusive.h b/ext/unicorn_http/epollexclusive.h new file mode 100644 index 0000000..c74a779 --- /dev/null +++ b/ext/unicorn_http/epollexclusive.h @@ -0,0 +1,128 @@ +/* + * This is only intended for use inside a unicorn worker, nowhere else. + * EPOLLEXCLUSIVE somewhat mitigates the thundering herd problem for + * mostly idle processes since we can't use blocking accept4. + * This is NOT intended for use with multi-threaded servers, nor + * single-threaded multi-client ("C10K") servers or anything advanced + * like that. This use of epoll is only appropriate for a primitive, + * single-client, single-threaded servers like unicorn that need to + * support SIGKILL timeouts and parent death detection. + */ +#if defined(HAVE_EPOLL_CREATE1) +# include <sys/epoll.h> +# include <errno.h> +# include <ruby/io.h> +# include <ruby/thread.h> +#endif /* __linux__ */ + +#if defined(EPOLLEXCLUSIVE) && defined(HAVE_EPOLL_CREATE1) +# define USE_EPOLL (1) +#else +# define USE_EPOLL (0) +#endif + +#if USE_EPOLL +#if defined(HAVE_RB_IO_DESCRIPTOR) /* Ruby 3.1+ */ +# define my_fileno(io) rb_io_descriptor(io) +#else /* Ruby <3.1 */ +static int my_fileno(VALUE io) +{ + rb_io_t *fptr; + GetOpenFile(io, fptr); + rb_io_check_closed(fptr); + return fptr->fd; +} +#endif /* Ruby <3.1 */ + +/* + * :nodoc: + * returns IO object if EPOLLEXCLUSIVE works and arms readers + */ +static VALUE prep_readers(VALUE cls, VALUE readers) +{ + long i; + int epfd = epoll_create1(EPOLL_CLOEXEC); + VALUE epio; + + if (epfd < 0) rb_sys_fail("epoll_create1"); + + epio = rb_funcall(cls, rb_intern("for_fd"), 1, INT2NUM(epfd)); + + Check_Type(readers, T_ARRAY); + for (i = 0; i < RARRAY_LEN(readers); i++) { + int rc, fd; + struct epoll_event e; + VALUE io = rb_ary_entry(readers, i); + + e.data.u64 = i; /* the reason readers shouldn't change */ + + /* + * I wanted to use EPOLLET here, but maintaining our own + * equivalent of ep->rdllist in Ruby-space doesn't fit + * our design at all (and the kernel already has it's own + * code path for doing it). So let the kernel spend + * cycles on maintaining level-triggering. + */ + e.events = EPOLLEXCLUSIVE | EPOLLIN; + fd = my_fileno(rb_io_get_io(io)); + rc = epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &e); + if (rc < 0) rb_sys_fail("epoll_ctl"); + } + return epio; +} +#endif /* USE_EPOLL */ + +#if USE_EPOLL +struct ep_wait { + struct epoll_event event; + int epfd; + int timeout_msec; +}; + +static void *do_wait(void *ptr) /* runs w/o GVL */ +{ + struct ep_wait *epw = ptr; + /* + * Linux delivers epoll events in the order received, and using + * maxevents=1 ensures we pluck one item off ep->rdllist + * at-a-time (c.f. fs/eventpoll.c in linux.git, it's quite + * easy-to-understand for anybody familiar with Ruby C). + */ + return (void *)(long)epoll_wait(epw->epfd, &epw->event, 1, + epw->timeout_msec); +} + +/* :nodoc: */ +/* readers must not change between prepare_readers and get_readers */ +static VALUE +get_readers(VALUE epio, VALUE ready, VALUE readers, VALUE timeout_msec) +{ + struct ep_wait epw; + long n; + + Check_Type(ready, T_ARRAY); + Check_Type(readers, T_ARRAY); + + epw.epfd = my_fileno(epio); + epw.timeout_msec = NUM2INT(timeout_msec); + n = (long)rb_thread_call_without_gvl(do_wait, &epw, RUBY_UBF_IO, NULL); + if (n < 0) { + if (errno != EINTR) rb_sys_fail("epoll_wait"); + } else if (n > 0) { /* maxevents is hardcoded to 1 */ + VALUE obj = rb_ary_entry(readers, epw.event.data.u64); + + if (RTEST(obj)) + rb_ary_push(ready, obj); + } /* n == 0 : timeout */ + return Qfalse; +} +#endif /* USE_EPOLL */ + +static void init_epollexclusive(VALUE mUnicorn) +{ +#if USE_EPOLL + VALUE cWaiter = rb_define_class_under(mUnicorn, "Waiter", rb_cIO); + rb_define_singleton_method(cWaiter, "prep_readers", prep_readers, 1); + rb_define_method(cWaiter, "get_readers", get_readers, 3); +#endif +} diff --git a/ext/unicorn_http/ext_help.h b/ext/unicorn_http/ext_help.h index 747c36c..86a187e 100644 --- a/ext/unicorn_http/ext_help.h +++ b/ext/unicorn_http/ext_help.h @@ -8,30 +8,6 @@ # define assert_frozen(f) do {} while (0) #endif /* !defined(OBJ_FROZEN) */ -#if !defined(OFFT2NUM) -# if SIZEOF_OFF_T == SIZEOF_LONG -# define OFFT2NUM(n) LONG2NUM(n) -# else -# define OFFT2NUM(n) LL2NUM(n) -# endif -#endif /* ! defined(OFFT2NUM) */ - -#if !defined(SIZET2NUM) -# if SIZEOF_SIZE_T == SIZEOF_LONG -# define SIZET2NUM(n) ULONG2NUM(n) -# else -# define SIZET2NUM(n) ULL2NUM(n) -# endif -#endif /* ! defined(SIZET2NUM) */ - -#if !defined(NUM2SIZET) -# if SIZEOF_SIZE_T == SIZEOF_LONG -# define NUM2SIZET(n) ((size_t)NUM2ULONG(n)) -# else -# define NUM2SIZET(n) ((size_t)NUM2ULL(n)) -# endif -#endif /* ! defined(NUM2SIZET) */ - static inline int str_cstr_eq(VALUE val, const char *ptr, long len) { return (RSTRING_LEN(val) == len && !memcmp(ptr, RSTRING_PTR(val), len)); diff --git a/ext/unicorn_http/extconf.rb b/ext/unicorn_http/extconf.rb index d5f81fb..de896fe 100644 --- a/ext/unicorn_http/extconf.rb +++ b/ext/unicorn_http/extconf.rb @@ -1,12 +1,8 @@ # -*- encoding: binary -*- +# frozen_string_literal: false require 'mkmf' -have_macro("SIZEOF_OFF_T", "ruby.h") or check_sizeof("off_t", "sys/types.h") -have_macro("SIZEOF_SIZE_T", "ruby.h") or check_sizeof("size_t", "sys/types.h") -have_macro("SIZEOF_LONG", "ruby.h") or check_sizeof("long", "sys/types.h") -have_func("rb_str_set_len", "ruby.h") or abort 'Ruby 1.9.3+ required' -have_func("rb_hash_clear", "ruby.h") # Ruby 2.0+ -have_func("gmtime_r", "time.h") +have_func("rb_hash_clear", "ruby.h") or abort 'Ruby 2.0+ required' message('checking if String#-@ (str_uminus) dedupes... ') begin @@ -38,4 +34,7 @@ else message("no, needs Ruby 2.6+\n") end +if have_func('epoll_create1', %w(sys/epoll.h)) + have_func('rb_io_descriptor') # Ruby 3.1+ +end create_makefile("unicorn_http") diff --git a/ext/unicorn_http/global_variables.h b/ext/unicorn_http/global_variables.h index f8e694c..c9ceebd 100644 --- a/ext/unicorn_http/global_variables.h +++ b/ext/unicorn_http/global_variables.h @@ -55,7 +55,7 @@ NORETURN(static void parser_raise(VALUE klass, const char *)); /** Defines global strings in the init method. */ #define DEF_GLOBAL(N, val) do { \ - g_##N = rb_obj_freeze(rb_str_new(val, sizeof(val) - 1)); \ + g_##N = str_new_dd_freeze(val, (long)sizeof(val) - 1); \ rb_gc_register_mark_object(g_##N); \ } while (0) diff --git a/ext/unicorn_http/httpdate.c b/ext/unicorn_http/httpdate.c index b59d038..0faf5da 100644 --- a/ext/unicorn_http/httpdate.c +++ b/ext/unicorn_http/httpdate.c @@ -1,5 +1,6 @@ #include <ruby.h> #include <time.h> +#include <sys/time.h> #include <stdio.h> static const size_t buf_capa = sizeof("Thu, 01 Jan 1970 00:00:00 GMT"); @@ -11,6 +12,7 @@ static const char months[] = "Jan\0Feb\0Mar\0Apr\0May\0Jun\0" /* for people on wonky systems only */ #ifndef HAVE_GMTIME_R +# warning using fake gmtime_r static struct tm * my_gmtime_r(time_t *now, struct tm *tm) { struct tm *global = gmtime(now); @@ -42,13 +44,24 @@ static struct tm * my_gmtime_r(time_t *now, struct tm *tm) static VALUE httpdate(VALUE self) { static time_t last; - time_t now = time(NULL); /* not a syscall on modern 64-bit systems */ + struct timeval now; struct tm tm; - if (last == now) + /* + * Favor gettimeofday(2) over time(2), as the latter can return the + * wrong value in the first 1 .. 2.5 ms of every second(!) + * + * https://lore.kernel.org/git/20230320230507.3932018-1-gitster@pobox.com/ + * https://inbox.sourceware.org/libc-alpha/20230306160321.2942372-1-adhemerval.zanella@linaro.org/T/ + * https://sourceware.org/bugzilla/show_bug.cgi?id=30200 + */ + if (gettimeofday(&now, NULL)) + rb_sys_fail("gettimeofday"); + + if (last == now.tv_sec) return buf; - last = now; - gmtime_r(&now, &tm); + last = now.tv_sec; + gmtime_r(&now.tv_sec, &tm); /* we can make this thread-safe later if our Ruby loses the GVL */ snprintf(buf_ptr, buf_capa, diff --git a/ext/unicorn_http/unicorn_http.rl b/ext/unicorn_http/unicorn_http.rl index 8ef23bc..fb5dcde 100644 --- a/ext/unicorn_http/unicorn_http.rl +++ b/ext/unicorn_http/unicorn_http.rl @@ -12,6 +12,7 @@ #include "common_field_optimization.h" #include "global_variables.h" #include "c_util.h" +#include "epollexclusive.h" void init_unicorn_httpdate(void); @@ -27,10 +28,15 @@ void init_unicorn_httpdate(void); #define UH_FL_TO_CLEAR 0x200 #define UH_FL_RESSTART 0x400 /* for check_client_connection */ #define UH_FL_HIJACK 0x800 +#define UH_FL_RES_CHUNK_VER (1U << 12) +#define UH_FL_RES_CHUNK_METHOD (1U << 13) /* all of these flags need to be set for keepalive to be supported */ #define UH_FL_KEEPALIVE (UH_FL_KAVERSION | UH_FL_REQEOF | UH_FL_HASHEADER) +/* we can only chunk responses for non-HEAD HTTP/1.1 requests */ +#define UH_FL_RES_CHUNKABLE (UH_FL_RES_CHUNK_VER | UH_FL_RES_CHUNK_METHOD) + static unsigned int MAX_HEADER_LEN = 1024 * (80 + 32); /* same as Mongrel */ /* this is only intended for use with Rainbows! */ @@ -62,19 +68,8 @@ struct http_parser { } len; }; -static ID id_set_backtrace; - -#ifdef HAVE_RB_HASH_CLEAR /* Ruby >= 2.0 */ -# define my_hash_clear(h) (void)rb_hash_clear(h) -#else /* !HAVE_RB_HASH_CLEAR - Ruby <= 1.9.3 */ - -static ID id_clear; - -static void my_hash_clear(VALUE h) -{ - rb_funcall(h, id_clear, 0); -} -#endif /* HAVE_RB_HASH_CLEAR */ +static ID id_set_backtrace, id_is_chunked_p; +static VALUE cHttpParser; static void finalize_header(struct http_parser *hp); @@ -155,6 +150,9 @@ request_method(struct http_parser *hp, const char *ptr, size_t len) { VALUE v = rb_str_new(ptr, len); + if (len != 4 || memcmp(ptr, "HEAD", 4)) + HP_FL_SET(hp, RES_CHUNK_METHOD); + rb_hash_aset(hp->env, g_request_method, v); } @@ -168,6 +166,7 @@ http_version(struct http_parser *hp, const char *ptr, size_t len) if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) { /* HTTP/1.1 implies keepalive unless "Connection: close" is set */ HP_FL_SET(hp, KAVERSION); + HP_FL_SET(hp, RES_CHUNK_VER); v = g_http_11; } else if (CONST_MEM_EQ("HTTP/1.0", ptr, len)) { v = g_http_10; @@ -220,6 +219,19 @@ static void write_cont_value(struct http_parser *hp, rb_str_buf_cat(hp->cont, vptr, end + 1); } +static int is_chunked(VALUE v) +{ + /* common case first */ + if (STR_CSTR_CASE_EQ(v, "chunked")) + return 1; + + /* + * call Ruby function in unicorn/http_request.rb to deal with unlikely + * comma-delimited case + */ + return rb_funcall(cHttpParser, id_is_chunked_p, 1, v) != Qfalse; +} + static void write_value(struct http_parser *hp, const char *buffer, const char *p) { @@ -246,7 +258,9 @@ static void write_value(struct http_parser *hp, f = uncommon_field(field, flen); } else if (f == g_http_connection) { hp_keepalive_connection(hp, v); - } else if (f == g_content_length) { + } else if (f == g_content_length && !HP_FL_TEST(hp, CHUNKED)) { + if (hp->len.content) + parser_raise(eHttpParserError, "Content-Length already set"); hp->len.content = parse_length(RSTRING_PTR(v), RSTRING_LEN(v)); if (hp->len.content < 0) parser_raise(eHttpParserError, "invalid Content-Length"); @@ -254,9 +268,30 @@ static void write_value(struct http_parser *hp, HP_FL_SET(hp, HASBODY); hp_invalid_if_trailer(hp); } else if (f == g_http_transfer_encoding) { - if (STR_CSTR_CASE_EQ(v, "chunked")) { + if (is_chunked(v)) { + if (HP_FL_TEST(hp, CHUNKED)) + /* + * RFC 7230 3.3.1: + * A sender MUST NOT apply chunked more than once to a message body + * (i.e., chunking an already chunked message is not allowed). + */ + parser_raise(eHttpParserError, "Transfer-Encoding double chunked"); + HP_FL_SET(hp, CHUNKED); HP_FL_SET(hp, HASBODY); + + /* RFC 7230 3.3.3, 3: favor chunked if Content-Length exists */ + hp->len.content = 0; + } else if (HP_FL_TEST(hp, CHUNKED)) { + /* + * RFC 7230 3.3.3, point 3 states: + * If a Transfer-Encoding header field is present in a request and + * the chunked transfer coding is not the final encoding, the + * message body length cannot be determined reliably; the server + * MUST respond with the 400 (Bad Request) status code and then + * close the connection. + */ + parser_raise(eHttpParserError, "invalid Transfer-Encoding"); } hp_invalid_if_trailer(hp); } else if (f == g_http_trailer) { @@ -487,7 +522,7 @@ static void set_url_scheme(VALUE env, VALUE *server_port) * and X-Forwarded-Proto handling from this parser? We've had it * forever and nobody has said anything against it, either. * Anyways, please send comments to our public mailing list: - * unicorn-public@bogomips.org (no HTML mail, no subscription necessary) + * unicorn-public@yhbt.net (no HTML mail, no subscription necessary) */ scheme = rb_hash_aref(env, g_http_x_forwarded_ssl); if (!NIL_P(scheme) && STR_CSTR_EQ(scheme, "on")) { @@ -613,7 +648,7 @@ static VALUE HttpParser_clear(VALUE self) return HttpParser_init(self); http_parser_init(hp); - my_hash_clear(hp->env); + rb_hash_clear(hp->env); return self; } @@ -775,6 +810,14 @@ static VALUE HttpParser_keepalive(VALUE self) return HP_FL_ALL(hp, KEEPALIVE) ? Qtrue : Qfalse; } +/* :nodoc: */ +static VALUE chunkable_response_p(VALUE self) +{ + const struct http_parser *hp = data_get(self); + + return HP_FL_ALL(hp, RES_CHUNKABLE) ? Qtrue : Qfalse; +} + /** * call-seq: * parser.next? => true or false @@ -931,7 +974,7 @@ static VALUE HttpParser_rssget(VALUE self) void Init_unicorn_http(void) { - VALUE mUnicorn, cHttpParser; + VALUE mUnicorn; mUnicorn = rb_define_module("Unicorn"); cHttpParser = rb_define_class_under(mUnicorn, "HttpParser", rb_cObject); @@ -942,6 +985,7 @@ void Init_unicorn_http(void) e414 = rb_define_class_under(mUnicorn, "RequestURITooLongError", eHttpParserError); + id_uminus = rb_intern("-@"); init_globals(); rb_define_alloc_func(cHttpParser, HttpParser_alloc); rb_define_method(cHttpParser, "initialize", HttpParser_init, 0); @@ -954,6 +998,7 @@ void Init_unicorn_http(void) rb_define_method(cHttpParser, "content_length", HttpParser_content_length, 0); rb_define_method(cHttpParser, "body_eof?", HttpParser_body_eof, 0); rb_define_method(cHttpParser, "keepalive?", HttpParser_keepalive, 0); + rb_define_method(cHttpParser, "chunkable_response?", chunkable_response_p, 0); rb_define_method(cHttpParser, "headers?", HttpParser_has_headers, 0); rb_define_method(cHttpParser, "next?", HttpParser_next, 0); rb_define_method(cHttpParser, "buf", HttpParser_buf, 0); @@ -988,8 +1033,8 @@ void Init_unicorn_http(void) id_set_backtrace = rb_intern("set_backtrace"); init_unicorn_httpdate(); -#ifndef HAVE_RB_HASH_CLEAR - id_clear = rb_intern("clear"); -#endif + id_is_chunked_p = rb_intern("is_chunked?"); + + init_epollexclusive(mUnicorn); } #undef SET_GLOBAL diff --git a/ext/unicorn_http/unicorn_http_common.rl b/ext/unicorn_http/unicorn_http_common.rl index 0988b54..507e570 100644 --- a/ext/unicorn_http/unicorn_http_common.rl +++ b/ext/unicorn_http/unicorn_http_common.rl @@ -4,7 +4,7 @@ #### HTTP PROTOCOL GRAMMAR # line endings - CRLF = ("\r\n" | "\n"); + CRLF = ("\r\n" | "\n"); # character types CTL = (cntrl | 127); |