From 158e9aad11ee2ed7dc01182da150e803f7cdbfef Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 1 Oct 2021 03:09:23 +0000 Subject: use EPOLLEXCLUSIVE on Linux 4.5+ While the capabilities of epoll cannot be fully exploited given our primitive design; avoiding thundering herd wakeups on larger SMP machines while below 100% utilization is possible with Linux 4.5+. With this change, only one worker wakes up per-connect(2) (instead of all of them via select(2)), avoiding the thundering herd effect when the system is mostly idle. Saturated instances should not notice the difference if they rarely had multiple workers sleeping in select(2). This change benefits non-saturated instances. With 2 parallel clients and 8 workers on a nominally (:P) 8-core CPU (AMD FX-8320), the uconnect.perl test script invocation showed a reduction from ~3.4s to ~2.5s when reading an 11-byte response body: echo worker_processes 8 >u.conf.rb bs=11 ruby -I lib -I test/ruby-2.5.5/ext/unicorn_http/ bin/unicorn \ test/benchmark/dd.ru -E none -l /tmp/u.sock -c u.conf.rb time perl -I lib -w test/benchmark/uconnect.perl \ -n 100000 -c 2 /tmp/u.sock Times improve less as "-c" increases for uconnect.perl (system noise and timings are inconsistent). The benefit of this change should be more noticeable on systems with more workers (and more cores). I wanted to use EPOLLET (Edge-Triggered) to further reduce syscalls, here, (similar to the old select()-avoidance bet) but that would've either added too much complexity to deduplicate wakeup sources, or run into the same starvation problem we solved in April 2020[1]. Since the kernel already has the complexity and deduplication built-in for Level-Triggered epoll support, we'll just let the kernel deal with it. Note: do NOT take this as an example of how epoll should be used in a sophisticated server. unicorn is primitive by design and cannot use threads nor handle multiple clients at once, thus it it only uses epoll in this extremely limited manner. Linux 4.5+ users will notice a regression of one extra epoll FD per-worker and at least two epoll watches, so /proc/sys/fs/epoll/max_user_watches may need to be changed along with RLIMIT_NOFILE. This change has also been tested on Linux 3.10.x (CentOS 7.x) and FreeBSD 11.x to ensure compatibility with systems without EPOLLEXCLUSIVE. Various EPOLLEXCLUSIVE discussions over the years: https://yhbt.net/lore/lkml/?q=s:EPOLLEXCLUSIVE+d:..20211001&x=t&o=-1 [1] https://yhbt.net/unicorn-public/CAMBWrQ=Yh42MPtzJCEO7XryVknDNetRMuA87irWfqVuLdJmiBQ@mail.gmail.com/ --- lib/unicorn/http_server.rb | 17 +++++++++++++---- lib/unicorn/select_waiter.rb | 6 ++++++ 2 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 lib/unicorn/select_waiter.rb (limited to 'lib/unicorn') diff --git a/lib/unicorn/http_server.rb b/lib/unicorn/http_server.rb index 7f33f98..21f2a05 100644 --- a/lib/unicorn/http_server.rb +++ b/lib/unicorn/http_server.rb @@ -685,7 +685,6 @@ class Unicorn::HttpServer LISTENERS.each { |sock| sock.close_on_exec = true } worker.user(*user) if user.kind_of?(Array) && ! worker.switched - self.timeout /= 2.0 # halve it for select() @config = nil build_app! unless preload_app @after_fork = @listener_opts = @orig_app = nil @@ -705,11 +704,22 @@ class Unicorn::HttpServer exit!(77) # EX_NOPERM in sysexits.h end + def prep_readers(readers) + wtr = Unicorn::Waiter.prep_readers(readers) + @timeout *= 500 # to milliseconds for epoll, but halved + wtr + rescue + require_relative 'select_waiter' + @timeout /= 2.0 # halved for IO.select + Unicorn::SelectWaiter.new + end + # runs inside each forked worker, this sits around and waits # for connections and doesn't die until the parent dies (or is # given a INT, QUIT, or TERM signal) def worker_loop(worker) readers = init_worker_process(worker) + waiter = prep_readers(readers) reopen = false # this only works immediately if the master sent us the signal @@ -722,8 +732,7 @@ class Unicorn::HttpServer begin reopen = reopen_worker_logs(worker.nr) if reopen worker.tick = time_now.to_i - tmp = ready.dup - while sock = tmp.shift + while sock = ready.shift # Unicorn::Worker#kgio_tryaccept is not like accept(2) at all, # but that will return false if client = sock.kgio_tryaccept @@ -735,7 +744,7 @@ class Unicorn::HttpServer # timeout so we can .tick and keep parent from SIGKILL-ing us worker.tick = time_now.to_i - ret = IO.select(readers, nil, nil, @timeout) and ready = ret[0] + waiter.get_readers(ready, readers, @timeout) rescue => e redo if reopen && readers[0] Unicorn.log_error(@logger, "listen loop error", e) if readers[0] diff --git a/lib/unicorn/select_waiter.rb b/lib/unicorn/select_waiter.rb new file mode 100644 index 0000000..cb84aab --- /dev/null +++ b/lib/unicorn/select_waiter.rb @@ -0,0 +1,6 @@ +# fallback for non-Linux and Linux <4.5 systems w/o EPOLLEXCLUSIVE +class Unicorn::SelectWaiter # :nodoc: + def get_readers(ready, readers, timeout) # :nodoc: + ret = IO.select(readers, nil, nil, timeout) and ready.replace(ret[0]) + end +end -- cgit v1.2.3-24-ge0c7