about summary refs log tree commit homepage
path: root/ext
diff options
context:
space:
mode:
Diffstat (limited to 'ext')
-rw-r--r--ext/unicorn_http/c_util.h18
-rw-r--r--ext/unicorn_http/common_field_optimization.h1
-rw-r--r--ext/unicorn_http/epollexclusive.h128
-rw-r--r--ext/unicorn_http/ext_help.h24
-rw-r--r--ext/unicorn_http/extconf.rb11
-rw-r--r--ext/unicorn_http/global_variables.h2
-rw-r--r--ext/unicorn_http/httpdate.c21
-rw-r--r--ext/unicorn_http/unicorn_http.rl87
-rw-r--r--ext/unicorn_http/unicorn_http_common.rl2
9 files changed, 223 insertions, 71 deletions
diff --git a/ext/unicorn_http/c_util.h b/ext/unicorn_http/c_util.h
index ab1fc0e..5774615 100644
--- a/ext/unicorn_http/c_util.h
+++ b/ext/unicorn_http/c_util.h
@@ -8,23 +8,15 @@
 
 #include <unistd.h>
 #include <assert.h>
+#include <limits.h>
 
 #define MIN(a,b) (a < b ? a : b)
 #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
 
-#ifndef SIZEOF_OFF_T
-#  define SIZEOF_OFF_T 4
-#  warning SIZEOF_OFF_T not defined, guessing 4.  Did you run extconf.rb?
-#endif
-
-#if SIZEOF_OFF_T == 4
-#  define UH_OFF_T_MAX 0x7fffffff
-#elif SIZEOF_OFF_T == 8
-#  if SIZEOF_LONG == 4
-#    define UH_OFF_T_MAX 0x7fffffffffffffffLL
-#  else
-#    define UH_OFF_T_MAX 0x7fffffffffffffff
-#  endif
+#if SIZEOF_OFF_T == SIZEOF_INT
+#  define UH_OFF_T_MAX INT_MAX
+#elif SIZEOF_OFF_T == SIZEOF_LONG_LONG
+#  define UH_OFF_T_MAX LLONG_MAX
 #else
 #  error off_t size unknown for this platform!
 #endif /* SIZEOF_OFF_T check */
diff --git a/ext/unicorn_http/common_field_optimization.h b/ext/unicorn_http/common_field_optimization.h
index 0659fc7..250e43e 100644
--- a/ext/unicorn_http/common_field_optimization.h
+++ b/ext/unicorn_http/common_field_optimization.h
@@ -83,7 +83,6 @@ static void init_common_fields(void)
   struct common_field *cf = common_http_fields;
   char tmp[64];
 
-  id_uminus = rb_intern("-@");
   memcpy(tmp, HTTP_PREFIX, HTTP_PREFIX_LEN);
 
   for(i = ARRAY_SIZE(common_http_fields); --i >= 0; cf++) {
diff --git a/ext/unicorn_http/epollexclusive.h b/ext/unicorn_http/epollexclusive.h
new file mode 100644
index 0000000..c74a779
--- /dev/null
+++ b/ext/unicorn_http/epollexclusive.h
@@ -0,0 +1,128 @@
+/*
+ * This is only intended for use inside a unicorn worker, nowhere else.
+ * EPOLLEXCLUSIVE somewhat mitigates the thundering herd problem for
+ * mostly idle processes since we can't use blocking accept4.
+ * This is NOT intended for use with multi-threaded servers, nor
+ * single-threaded multi-client ("C10K") servers or anything advanced
+ * like that.  This use of epoll is only appropriate for a primitive,
+ * single-client, single-threaded servers like unicorn that need to
+ * support SIGKILL timeouts and parent death detection.
+ */
+#if defined(HAVE_EPOLL_CREATE1)
+#  include <sys/epoll.h>
+#  include <errno.h>
+#  include <ruby/io.h>
+#  include <ruby/thread.h>
+#endif /* __linux__ */
+
+#if defined(EPOLLEXCLUSIVE) && defined(HAVE_EPOLL_CREATE1)
+#  define USE_EPOLL (1)
+#else
+#  define USE_EPOLL (0)
+#endif
+
+#if USE_EPOLL
+#if defined(HAVE_RB_IO_DESCRIPTOR) /* Ruby 3.1+ */
+#        define my_fileno(io) rb_io_descriptor(io)
+#else /* Ruby <3.1 */
+static int my_fileno(VALUE io)
+{
+        rb_io_t *fptr;
+        GetOpenFile(io, fptr);
+        rb_io_check_closed(fptr);
+        return fptr->fd;
+}
+#endif /* Ruby <3.1 */
+
+/*
+ * :nodoc:
+ * returns IO object if EPOLLEXCLUSIVE works and arms readers
+ */
+static VALUE prep_readers(VALUE cls, VALUE readers)
+{
+        long i;
+        int epfd = epoll_create1(EPOLL_CLOEXEC);
+        VALUE epio;
+
+        if (epfd < 0) rb_sys_fail("epoll_create1");
+
+        epio = rb_funcall(cls, rb_intern("for_fd"), 1, INT2NUM(epfd));
+
+        Check_Type(readers, T_ARRAY);
+        for (i = 0; i < RARRAY_LEN(readers); i++) {
+                int rc, fd;
+                struct epoll_event e;
+                VALUE io = rb_ary_entry(readers, i);
+
+                e.data.u64 = i; /* the reason readers shouldn't change */
+
+                /*
+                 * I wanted to use EPOLLET here, but maintaining our own
+                 * equivalent of ep->rdllist in Ruby-space doesn't fit
+                 * our design at all (and the kernel already has it's own
+                 * code path for doing it).  So let the kernel spend
+                 * cycles on maintaining level-triggering.
+                 */
+                e.events = EPOLLEXCLUSIVE | EPOLLIN;
+                fd = my_fileno(rb_io_get_io(io));
+                rc = epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &e);
+                if (rc < 0) rb_sys_fail("epoll_ctl");
+        }
+        return epio;
+}
+#endif /* USE_EPOLL */
+
+#if USE_EPOLL
+struct ep_wait {
+        struct epoll_event event;
+        int epfd;
+        int timeout_msec;
+};
+
+static void *do_wait(void *ptr) /* runs w/o GVL */
+{
+        struct ep_wait *epw = ptr;
+        /*
+         * Linux delivers epoll events in the order received, and using
+         * maxevents=1 ensures we pluck one item off ep->rdllist
+         * at-a-time (c.f. fs/eventpoll.c in linux.git, it's quite
+         * easy-to-understand for anybody familiar with Ruby C).
+         */
+        return (void *)(long)epoll_wait(epw->epfd, &epw->event, 1,
+                                        epw->timeout_msec);
+}
+
+/* :nodoc: */
+/* readers must not change between prepare_readers and get_readers */
+static VALUE
+get_readers(VALUE epio, VALUE ready, VALUE readers, VALUE timeout_msec)
+{
+        struct ep_wait epw;
+        long n;
+
+        Check_Type(ready, T_ARRAY);
+        Check_Type(readers, T_ARRAY);
+
+        epw.epfd = my_fileno(epio);
+        epw.timeout_msec = NUM2INT(timeout_msec);
+        n = (long)rb_thread_call_without_gvl(do_wait, &epw, RUBY_UBF_IO, NULL);
+        if (n < 0) {
+                if (errno != EINTR) rb_sys_fail("epoll_wait");
+        } else if (n > 0) { /* maxevents is hardcoded to 1 */
+                VALUE obj = rb_ary_entry(readers, epw.event.data.u64);
+
+                if (RTEST(obj))
+                        rb_ary_push(ready, obj);
+        } /* n == 0 : timeout */
+        return Qfalse;
+}
+#endif /* USE_EPOLL */
+
+static void init_epollexclusive(VALUE mUnicorn)
+{
+#if USE_EPOLL
+        VALUE cWaiter = rb_define_class_under(mUnicorn, "Waiter", rb_cIO);
+        rb_define_singleton_method(cWaiter, "prep_readers", prep_readers, 1);
+        rb_define_method(cWaiter, "get_readers", get_readers, 3);
+#endif
+}
diff --git a/ext/unicorn_http/ext_help.h b/ext/unicorn_http/ext_help.h
index 747c36c..86a187e 100644
--- a/ext/unicorn_http/ext_help.h
+++ b/ext/unicorn_http/ext_help.h
@@ -8,30 +8,6 @@
 #  define assert_frozen(f) do {} while (0)
 #endif /* !defined(OBJ_FROZEN) */
 
-#if !defined(OFFT2NUM)
-#  if SIZEOF_OFF_T == SIZEOF_LONG
-#    define OFFT2NUM(n) LONG2NUM(n)
-#  else
-#    define OFFT2NUM(n) LL2NUM(n)
-#  endif
-#endif /* ! defined(OFFT2NUM) */
-
-#if !defined(SIZET2NUM)
-#  if SIZEOF_SIZE_T == SIZEOF_LONG
-#    define SIZET2NUM(n) ULONG2NUM(n)
-#  else
-#    define SIZET2NUM(n) ULL2NUM(n)
-#  endif
-#endif /* ! defined(SIZET2NUM) */
-
-#if !defined(NUM2SIZET)
-#  if SIZEOF_SIZE_T == SIZEOF_LONG
-#    define NUM2SIZET(n) ((size_t)NUM2ULONG(n))
-#  else
-#    define NUM2SIZET(n) ((size_t)NUM2ULL(n))
-#  endif
-#endif /* ! defined(NUM2SIZET) */
-
 static inline int str_cstr_eq(VALUE val, const char *ptr, long len)
 {
   return (RSTRING_LEN(val) == len && !memcmp(ptr, RSTRING_PTR(val), len));
diff --git a/ext/unicorn_http/extconf.rb b/ext/unicorn_http/extconf.rb
index d5f81fb..de896fe 100644
--- a/ext/unicorn_http/extconf.rb
+++ b/ext/unicorn_http/extconf.rb
@@ -1,12 +1,8 @@
 # -*- encoding: binary -*-
+# frozen_string_literal: false
 require 'mkmf'
 
-have_macro("SIZEOF_OFF_T", "ruby.h") or check_sizeof("off_t", "sys/types.h")
-have_macro("SIZEOF_SIZE_T", "ruby.h") or check_sizeof("size_t", "sys/types.h")
-have_macro("SIZEOF_LONG", "ruby.h") or check_sizeof("long", "sys/types.h")
-have_func("rb_str_set_len", "ruby.h") or abort 'Ruby 1.9.3+ required'
-have_func("rb_hash_clear", "ruby.h") # Ruby 2.0+
-have_func("gmtime_r", "time.h")
+have_func("rb_hash_clear", "ruby.h") or abort 'Ruby 2.0+ required'
 
 message('checking if String#-@ (str_uminus) dedupes... ')
 begin
@@ -38,4 +34,7 @@ else
   message("no, needs Ruby 2.6+\n")
 end
 
+if have_func('epoll_create1', %w(sys/epoll.h))
+  have_func('rb_io_descriptor') # Ruby 3.1+
+end
 create_makefile("unicorn_http")
diff --git a/ext/unicorn_http/global_variables.h b/ext/unicorn_http/global_variables.h
index f8e694c..c9ceebd 100644
--- a/ext/unicorn_http/global_variables.h
+++ b/ext/unicorn_http/global_variables.h
@@ -55,7 +55,7 @@ NORETURN(static void parser_raise(VALUE klass, const char *));
 
 /** Defines global strings in the init method. */
 #define DEF_GLOBAL(N, val) do { \
-  g_##N = rb_obj_freeze(rb_str_new(val, sizeof(val) - 1)); \
+  g_##N = str_new_dd_freeze(val, (long)sizeof(val) - 1); \
   rb_gc_register_mark_object(g_##N); \
 } while (0)
 
diff --git a/ext/unicorn_http/httpdate.c b/ext/unicorn_http/httpdate.c
index b59d038..0faf5da 100644
--- a/ext/unicorn_http/httpdate.c
+++ b/ext/unicorn_http/httpdate.c
@@ -1,5 +1,6 @@
 #include <ruby.h>
 #include <time.h>
+#include <sys/time.h>
 #include <stdio.h>
 
 static const size_t buf_capa = sizeof("Thu, 01 Jan 1970 00:00:00 GMT");
@@ -11,6 +12,7 @@ static const char months[] = "Jan\0Feb\0Mar\0Apr\0May\0Jun\0"
 
 /* for people on wonky systems only */
 #ifndef HAVE_GMTIME_R
+# warning using fake gmtime_r
 static struct tm * my_gmtime_r(time_t *now, struct tm *tm)
 {
         struct tm *global = gmtime(now);
@@ -42,13 +44,24 @@ static struct tm * my_gmtime_r(time_t *now, struct tm *tm)
 static VALUE httpdate(VALUE self)
 {
         static time_t last;
-        time_t now = time(NULL); /* not a syscall on modern 64-bit systems */
+        struct timeval now;
         struct tm tm;
 
-        if (last == now)
+        /*
+         * Favor gettimeofday(2) over time(2), as the latter can return the
+         * wrong value in the first 1 .. 2.5 ms of every second(!)
+         *
+         * https://lore.kernel.org/git/20230320230507.3932018-1-gitster@pobox.com/
+         * https://inbox.sourceware.org/libc-alpha/20230306160321.2942372-1-adhemerval.zanella@linaro.org/T/
+         * https://sourceware.org/bugzilla/show_bug.cgi?id=30200
+         */
+        if (gettimeofday(&now, NULL))
+                rb_sys_fail("gettimeofday");
+
+        if (last == now.tv_sec)
                 return buf;
-        last = now;
-        gmtime_r(&now, &tm);
+        last = now.tv_sec;
+        gmtime_r(&now.tv_sec, &tm);
 
         /* we can make this thread-safe later if our Ruby loses the GVL */
         snprintf(buf_ptr, buf_capa,
diff --git a/ext/unicorn_http/unicorn_http.rl b/ext/unicorn_http/unicorn_http.rl
index 8ef23bc..fb5dcde 100644
--- a/ext/unicorn_http/unicorn_http.rl
+++ b/ext/unicorn_http/unicorn_http.rl
@@ -12,6 +12,7 @@
 #include "common_field_optimization.h"
 #include "global_variables.h"
 #include "c_util.h"
+#include "epollexclusive.h"
 
 void init_unicorn_httpdate(void);
 
@@ -27,10 +28,15 @@ void init_unicorn_httpdate(void);
 #define UH_FL_TO_CLEAR 0x200
 #define UH_FL_RESSTART 0x400 /* for check_client_connection */
 #define UH_FL_HIJACK 0x800
+#define UH_FL_RES_CHUNK_VER (1U << 12)
+#define UH_FL_RES_CHUNK_METHOD (1U << 13)
 
 /* all of these flags need to be set for keepalive to be supported */
 #define UH_FL_KEEPALIVE (UH_FL_KAVERSION | UH_FL_REQEOF | UH_FL_HASHEADER)
 
+/* we can only chunk responses for non-HEAD HTTP/1.1 requests */
+#define UH_FL_RES_CHUNKABLE (UH_FL_RES_CHUNK_VER | UH_FL_RES_CHUNK_METHOD)
+
 static unsigned int MAX_HEADER_LEN = 1024 * (80 + 32); /* same as Mongrel */
 
 /* this is only intended for use with Rainbows! */
@@ -62,19 +68,8 @@ struct http_parser {
   } len;
 };
 
-static ID id_set_backtrace;
-
-#ifdef HAVE_RB_HASH_CLEAR /* Ruby >= 2.0 */
-#  define my_hash_clear(h) (void)rb_hash_clear(h)
-#else /* !HAVE_RB_HASH_CLEAR - Ruby <= 1.9.3 */
-
-static ID id_clear;
-
-static void my_hash_clear(VALUE h)
-{
-  rb_funcall(h, id_clear, 0);
-}
-#endif /* HAVE_RB_HASH_CLEAR */
+static ID id_set_backtrace, id_is_chunked_p;
+static VALUE cHttpParser;
 
 static void finalize_header(struct http_parser *hp);
 
@@ -155,6 +150,9 @@ request_method(struct http_parser *hp, const char *ptr, size_t len)
 {
   VALUE v = rb_str_new(ptr, len);
 
+  if (len != 4 || memcmp(ptr, "HEAD", 4))
+    HP_FL_SET(hp, RES_CHUNK_METHOD);
+
   rb_hash_aset(hp->env, g_request_method, v);
 }
 
@@ -168,6 +166,7 @@ http_version(struct http_parser *hp, const char *ptr, size_t len)
   if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
     /* HTTP/1.1 implies keepalive unless "Connection: close" is set */
     HP_FL_SET(hp, KAVERSION);
+    HP_FL_SET(hp, RES_CHUNK_VER);
     v = g_http_11;
   } else if (CONST_MEM_EQ("HTTP/1.0", ptr, len)) {
     v = g_http_10;
@@ -220,6 +219,19 @@ static void write_cont_value(struct http_parser *hp,
   rb_str_buf_cat(hp->cont, vptr, end + 1);
 }
 
+static int is_chunked(VALUE v)
+{
+  /* common case first */
+  if (STR_CSTR_CASE_EQ(v, "chunked"))
+    return 1;
+
+  /*
+   * call Ruby function in unicorn/http_request.rb to deal with unlikely
+   * comma-delimited case
+   */
+  return rb_funcall(cHttpParser, id_is_chunked_p, 1, v) != Qfalse;
+}
+
 static void write_value(struct http_parser *hp,
                         const char *buffer, const char *p)
 {
@@ -246,7 +258,9 @@ static void write_value(struct http_parser *hp,
     f = uncommon_field(field, flen);
   } else if (f == g_http_connection) {
     hp_keepalive_connection(hp, v);
-  } else if (f == g_content_length) {
+  } else if (f == g_content_length && !HP_FL_TEST(hp, CHUNKED)) {
+    if (hp->len.content)
+      parser_raise(eHttpParserError, "Content-Length already set");
     hp->len.content = parse_length(RSTRING_PTR(v), RSTRING_LEN(v));
     if (hp->len.content < 0)
       parser_raise(eHttpParserError, "invalid Content-Length");
@@ -254,9 +268,30 @@ static void write_value(struct http_parser *hp,
       HP_FL_SET(hp, HASBODY);
     hp_invalid_if_trailer(hp);
   } else if (f == g_http_transfer_encoding) {
-    if (STR_CSTR_CASE_EQ(v, "chunked")) {
+    if (is_chunked(v)) {
+      if (HP_FL_TEST(hp, CHUNKED))
+        /*
+         * RFC 7230 3.3.1:
+         * A sender MUST NOT apply chunked more than once to a message body
+         * (i.e., chunking an already chunked message is not allowed).
+         */
+        parser_raise(eHttpParserError, "Transfer-Encoding double chunked");
+
       HP_FL_SET(hp, CHUNKED);
       HP_FL_SET(hp, HASBODY);
+
+      /* RFC 7230 3.3.3, 3: favor chunked if Content-Length exists */
+      hp->len.content = 0;
+    } else if (HP_FL_TEST(hp, CHUNKED)) {
+      /*
+       * RFC 7230 3.3.3, point 3 states:
+       * If a Transfer-Encoding header field is present in a request and
+       * the chunked transfer coding is not the final encoding, the
+       * message body length cannot be determined reliably; the server
+       * MUST respond with the 400 (Bad Request) status code and then
+       * close the connection.
+       */
+      parser_raise(eHttpParserError, "invalid Transfer-Encoding");
     }
     hp_invalid_if_trailer(hp);
   } else if (f == g_http_trailer) {
@@ -487,7 +522,7 @@ static void set_url_scheme(VALUE env, VALUE *server_port)
      * and X-Forwarded-Proto handling from this parser?  We've had it
      * forever and nobody has said anything against it, either.
      * Anyways, please send comments to our public mailing list:
-     * unicorn-public@bogomips.org (no HTML mail, no subscription necessary)
+     * unicorn-public@yhbt.net (no HTML mail, no subscription necessary)
      */
     scheme = rb_hash_aref(env, g_http_x_forwarded_ssl);
     if (!NIL_P(scheme) && STR_CSTR_EQ(scheme, "on")) {
@@ -613,7 +648,7 @@ static VALUE HttpParser_clear(VALUE self)
     return HttpParser_init(self);
 
   http_parser_init(hp);
-  my_hash_clear(hp->env);
+  rb_hash_clear(hp->env);
 
   return self;
 }
@@ -775,6 +810,14 @@ static VALUE HttpParser_keepalive(VALUE self)
   return HP_FL_ALL(hp, KEEPALIVE) ? Qtrue : Qfalse;
 }
 
+/* :nodoc: */
+static VALUE chunkable_response_p(VALUE self)
+{
+  const struct http_parser *hp = data_get(self);
+
+  return HP_FL_ALL(hp, RES_CHUNKABLE) ? Qtrue : Qfalse;
+}
+
 /**
  * call-seq:
  *    parser.next? => true or false
@@ -931,7 +974,7 @@ static VALUE HttpParser_rssget(VALUE self)
 
 void Init_unicorn_http(void)
 {
-  VALUE mUnicorn, cHttpParser;
+  VALUE mUnicorn;
 
   mUnicorn = rb_define_module("Unicorn");
   cHttpParser = rb_define_class_under(mUnicorn, "HttpParser", rb_cObject);
@@ -942,6 +985,7 @@ void Init_unicorn_http(void)
   e414 = rb_define_class_under(mUnicorn, "RequestURITooLongError",
                                eHttpParserError);
 
+  id_uminus = rb_intern("-@");
   init_globals();
   rb_define_alloc_func(cHttpParser, HttpParser_alloc);
   rb_define_method(cHttpParser, "initialize", HttpParser_init, 0);
@@ -954,6 +998,7 @@ void Init_unicorn_http(void)
   rb_define_method(cHttpParser, "content_length", HttpParser_content_length, 0);
   rb_define_method(cHttpParser, "body_eof?", HttpParser_body_eof, 0);
   rb_define_method(cHttpParser, "keepalive?", HttpParser_keepalive, 0);
+  rb_define_method(cHttpParser, "chunkable_response?", chunkable_response_p, 0);
   rb_define_method(cHttpParser, "headers?", HttpParser_has_headers, 0);
   rb_define_method(cHttpParser, "next?", HttpParser_next, 0);
   rb_define_method(cHttpParser, "buf", HttpParser_buf, 0);
@@ -988,8 +1033,8 @@ void Init_unicorn_http(void)
   id_set_backtrace = rb_intern("set_backtrace");
   init_unicorn_httpdate();
 
-#ifndef HAVE_RB_HASH_CLEAR
-  id_clear = rb_intern("clear");
-#endif
+  id_is_chunked_p = rb_intern("is_chunked?");
+
+  init_epollexclusive(mUnicorn);
 }
 #undef SET_GLOBAL
diff --git a/ext/unicorn_http/unicorn_http_common.rl b/ext/unicorn_http/unicorn_http_common.rl
index 0988b54..507e570 100644
--- a/ext/unicorn_http/unicorn_http_common.rl
+++ b/ext/unicorn_http/unicorn_http_common.rl
@@ -4,7 +4,7 @@
 
 #### HTTP PROTOCOL GRAMMAR
 # line endings
-  CRLF = ("\r\n" | "\n");
+  CRLF = ("\r\n" | "\n");
 
 # character types
   CTL = (cntrl | 127);