1 files changed, 94 insertions, 31 deletions
diff --git a/lib/unicorn.rb b/lib/unicorn.rb
index a5aaf7f..9575944 100644
--- a/lib/unicorn.rb
+++ b/lib/unicorn.rb
@@ -44,6 +44,15 @@ module Unicorn
            server.logger.info("worker=#{worker_nr} spawning...")
          },
      }
+
+    Worker = Struct.new(:nr, :tempfile) unless defined?(Worker)
+    class Worker
+      # worker objects may be compared to just plain numbers
+      def ==(other_nr)
+        self.nr == other_nr
+      end
+    end
+
      # Creates a working server on host:port (strange things happen if
      # port isn't a Number).  Use HttpServer::run to start the server and
      # HttpServer.workers.join to join the thread that's processing
@@ -117,7 +126,9 @@ module Unicorn
      end
  
      # monitors children and receives signals forever
-    # (or until a termination signal is sent)
+    # (or until a termination signal is sent).  This handles signals
+    # one-at-a-time time and we'll happily drop signals in case somebody
+    # is signalling us too often.
      def join
        %w(QUIT INT TERM USR1 USR2 HUP).each { |sig| trap_deferred(sig) }
        begin
@@ -126,6 +137,7 @@ module Unicorn
            case @mode
            when :idle
              kill_each_worker(0) # ensure they're running
+            murder_lazy_workers
              spawn_missing_workers
            when 'QUIT' # graceful shutdown
              break
@@ -148,11 +160,15 @@ module Unicorn
              @mode = :idle
            end
            reap_all_workers
-          ready = IO.select([@rd_sig], nil, nil, 1) or next
+
+          ready = begin
+            IO.select([@rd_sig], nil, nil, 1) or next
+          rescue Errno::EINTR # next
+          end
            ready[0] && ready[0][0] or next
            begin # just consume the pipe when we're awakened, @mode is set
              loop { @rd_sig.sysread(Const::CHUNK_SIZE) }
-          rescue Errno::EAGAIN
+          rescue Errno::EAGAIN, Errno::EINTR # next
            end
          end
        rescue Errno::EINTR
@@ -207,8 +223,10 @@ module Unicorn
        begin
          loop do
            pid = waitpid(-1, WNOHANG) or break
-          worker_nr = @workers.delete(pid)
-          logger.info "reaped pid=#{pid} worker=#{worker_nr || 'unknown'} " \
+          worker = @workers.delete(pid)
+          worker.tempfile.close rescue nil
+          logger.info "reaped pid=#{pid} " \
+                      "worker=#{worker && worker.nr || 'unknown'} " \
                        "status=#{$?.exitstatus}"
          end
        rescue Errno::ECHILD
@@ -248,19 +266,39 @@ module Unicorn
        end
      end
  
+    # forcibly terminate all workers that haven't checked in in @timeout
+    # seconds.  The timeout is implemented using an unlinked tempfile
+    # shared between the parent process and each worker.  The worker
+    # runs File#chmod to modify the ctime of the tempfile.  If the ctime
+    # is stale for >@timeout seconds, then we'll kill the corresponding
+    # worker.
+    def murder_lazy_workers
+      now = Time.now
+      @workers.each_pair do |pid, worker|
+        (now - worker.tempfile.ctime) <= @timeout and next
+        logger.error "worker=#{worker.nr} pid=#{pid} is too old, killing"
+        kill_worker('KILL', pid) # take no prisoners for @timeout violations
+        worker.tempfile.close rescue nil
+      end
+    end
+
      def spawn_missing_workers
        return if @workers.size == @nr_workers
        (0...@nr_workers).each do |worker_nr|
          @workers.values.include?(worker_nr) and next
-        @before_fork.call(self, worker_nr)
-        pid = fork { worker_loop(worker_nr) }
-        @workers[pid] = worker_nr
+        tempfile = Tempfile.new('unicorn_worker')
+        tempfile.unlink # don't allow other processes to find or see it
+        tempfile.sync = true
+        worker = Worker.new(worker_nr, tempfile)
+        @before_fork.call(self, worker.nr)
+        pid = fork { worker_loop(worker) }
+        @workers[pid] = worker
        end
      end
  
      # once a client is accepted, it is processed in its entirety here
      # in 3 easy steps: read request, call app, write app response
-    def process_client(client, client_nr)
+    def process_client(client)
        env = @request.read(client) or return
        app_response = @app.call(env)
        HttpResponse.write(client, app_response)
@@ -279,18 +317,29 @@ module Unicorn
        @request.reset
      end
  
-    # runs inside each forked worker, this sits around and waits
-    # for connections and doesn't die until the parent dies
-    def worker_loop(worker_nr)
+    # gets rid of stuff the worker has no business keeping track of
+    # to free some resources and drops all sig handlers.
+    # traps for USR1, USR2, and HUP may be set in the @after_fork Proc
+    # by the user.
+    def init_worker_process
+      %w(TERM INT QUIT USR1 USR2 HUP).each { |sig| trap(sig, 'IGNORE') }
        @rd_sig.close
        @wr_sig.close
-      # allow @after_fork to override these signals:
-      %w(USR1 USR2 HUP).each { |sig| trap(sig, 'IGNORE') }
-      @after_fork.call(self, worker_nr) if @after_fork
-
+      @workers.values.each { |other| other.tempfile.close rescue nil }
+      @workers.clear
+      @start_ctx.clear
+      @mode = @start_ctx = @workers = @rd_sig = @wr_sig = nil
        @listeners.each { |sock| set_cloexec(sock) }
-      nr_before = nr = 0
-      client = nil
+    end
+
+    # runs inside each forked worker, this sits around and waits
+    # for connections and doesn't die until the parent dies (or is
+    # given a INT, QUIT, or TERM signal)
+    def worker_loop(worker)
+      init_worker_process
+      @after_fork.call(self, worker.nr) if @after_fork
+      nr = 0
+      tempfile = worker.tempfile
        alive = true
        ready = @listeners
        %w(TERM INT).each { |sig| trap(sig) { exit(0) } } # instant shutdown
@@ -300,8 +349,17 @@ module Unicorn
        end
  
        while alive && @master_pid == ppid
+        # we're a goner in @timeout seconds anyways if tempfile.chmod
+        # breaks, so don't trap the exception.  Using fchmod() since
+        # futimes() is not available in base Ruby and I very strongly
+        # prefer temporary files to be unlinked for security,
+        # performance and reliability reasons, so utime is out.  No-op
+        # changes with chmod doesn't update ctime on all filesystems; so
+        # we increment our counter each and every time.
+        tempfile.chmod(nr += 1)
+
          begin
-          nr_before = nr
+          accepted = false
            ready.each do |sock|
              begin
                client = begin
@@ -309,29 +367,30 @@ module Unicorn
                rescue Errno::EAGAIN
                  next
                end
-              client.sync = true
+              accepted = client.sync = true
                client.nonblock = false
                set_client_sockopt(client) if client.class == TCPSocket
-              nr += 1
-              process_client(client, nr)
+              process_client(client)
              rescue Errno::ECONNABORTED
                # client closed the socket even before accept
                if client && !client.closed?
                  client.close rescue nil
                end
              end
+            tempfile.chmod(nr += 1)
            end
  
            # make the following bet: if we accepted clients this round,
            # we're probably reasonably busy, so avoid calling select(2)
            # and try to do a blind non-blocking accept(2) on everything
            # before we sleep again in select
-          if nr != nr_before
+          if accepted
              ready = @listeners
            else
              begin
+              tempfile.chmod(nr += 1)
                # timeout used so we can detect parent death:
-              ret = IO.select(@listeners, nil, nil, @timeout) or next
+              ret = IO.select(@listeners, nil, nil, @timeout/2.0) or next
                ready = ret[0]
              rescue Errno::EBADF => e
                exit(alive ? 1 : 0)
@@ -348,15 +407,19 @@ module Unicorn
        end
      end
  
+    # delivers a signal to a worker and fails gracefully if the worker
+    # is no longer running.
+    def kill_worker(signal, pid)
+      begin
+        Process.kill(signal, pid)
+      rescue Errno::ESRCH
+        worker = @workers.delete(pid) and worker.tempfile.close rescue nil
+      end
+    end
+
      # delivers a signal to each worker
      def kill_each_worker(signal)
-      @workers.keys.each do |pid|
-        begin
-          Process.kill(signal, pid)
-        rescue Errno::ESRCH
-          @workers.delete(pid)
-        end
-      end
+      @workers.keys.each { |pid| kill_worker(signal, pid) }
      end
  
    end