From a105f96edd9bc842170c6272b0d37b8891152824 Mon Sep 17 00:00:00 2001
From: Eric Wong <normalperson@yhbt.net>
Date: Thu, 5 Feb 2009 20:19:15 -0800
Subject: Make HttpRequest object (and temp files) persistent

This will help prevent TMPDIR from becoming bloated when
handling thousands of large uploads a day.  This is a problem in
many UNIX filesystems (including ext3): names of entries never
expire even after files are gone and the only way to clear it is
to get rid of the directory itself.
---
 lib/unicorn/http_request.rb | 151 +++++++++++++++++++++-----------------------
 1 file changed, 73 insertions(+), 78 deletions(-)

(limited to 'lib/unicorn/http_request.rb')

diff --git a/lib/unicorn/http_request.rb b/lib/unicorn/http_request.rb
index 7a95ee5..1e43bc9 100644
--- a/lib/unicorn/http_request.rb
+++ b/lib/unicorn/http_request.rb
@@ -6,98 +6,93 @@ module Unicorn
   # a StringIO object.  To be safe, you should assume it works like a file.
   # 
   class HttpRequest
-    attr_reader :body, :params, :logger
+    attr_reader :logger
 
     # You don't really call this.  It's made for you.
-    # Main thing it does is hook up the params, and store any remaining
-    # body data into the HttpRequest.body attribute.
-    def initialize(params, socket, logger)
-      @params = params
-      @socket = socket
+    def initialize(logger)
       @logger = logger
-      http_body = @params[Const::HTTP_BODY]
-      content_length = @params[Const::CONTENT_LENGTH].to_i
+      @tempfile = @body = nil
+    end
+
+    def reset!
+      @body.truncate(0) rescue nil
+      @body.close rescue nil
+      @body = nil
+    end
+
+    # returns an environment hash suitable for Rack if successful
+    # returns nil if the socket closed prematurely (e.g. user aborted upload)
+    def consume(params, socket)
+      http_body = params[Const::HTTP_BODY]
+      content_length = params[Const::CONTENT_LENGTH].to_i
       remain = content_length - http_body.length
 
-      # Some clients (like FF1.0) report 0 for body and then send a body.  This will probably truncate them but at least the request goes through usually.
-      if remain <= 0
-        # we've got everything, pack it up
+      # must read more data to complete body
+      if remain < Const::MAX_BODY
+        # small body, just use that
         @body = StringIO.new(http_body)
-      elsif remain > 0
-        # must read more data to complete body
-        if remain > Const::MAX_BODY
-          # huge body, put it in a tempfile
-          @body = Tempfile.new(Const::UNICORN_TMP_BASE)
-          @body.binmode
-          @body.write(http_body)
-        else
-          # small body, just use that
-          @body = StringIO.new(http_body)
-        end
-
-        read_body(remain, content_length)
+      else # huge body, put it in a tempfile
+        @tempfile ||= Tempfile.new(Const::UNICORN_TMP_BASE)
+        @body = File.open(@tempfile.path, "wb+")
+        @body.sync = true
+        @body.syswrite(http_body)
+        @body
       end
 
-      @body.rewind if @body
+      # Some clients (like FF1.0) report 0 for body and then send a body.
+      # This will probably truncate them but at least the request goes through
+      # usually.
+      if remain > 0
+        read_body(socket, remain) or return nil # fail!
+      end
+      @body.rewind
+      @body.sysseek(0) if @body.respond_to?(:sysseek)
+      rack_env(params)
     end
 
-    # Returns an environment which is rackable: http://rack.rubyforge.org/doc/files/SPEC.html
-    # Copied directly from Rack's old Unicorn handler.
-    def env
-      env = params.clone
-      env["QUERY_STRING"] ||= ''
-      env.delete "HTTP_CONTENT_TYPE"
-      env.delete "HTTP_CONTENT_LENGTH"
-      env.update({"rack.version" => [0,1],
-              "rack.input" => @body,
-              "rack.errors" => STDERR,
-
-              "rack.multithread" => false,
-              "rack.multiprocess" => true,
-              "rack.run_once" => false,
-
-              "rack.url_scheme" => "http",
-            }) 
+    # Returns an environment which is rackable:
+    # http://rack.rubyforge.org/doc/files/SPEC.html
+    # Copied directly from Rack's old Mongrel handler.
+    def rack_env(params)
+      params["QUERY_STRING"] ||= ''
+      params.delete "HTTP_CONTENT_TYPE"
+      params.delete "HTTP_CONTENT_LENGTH"
+      params.update({ "rack.version" => [0,1],
+                      "rack.input" => @body,
+                      "rack.errors" => STDERR,
+                      "rack.multithread" => false,
+                      "rack.multiprocess" => true,
+                      "rack.run_once" => false,
+                      "rack.url_scheme" => "http",
+                    })
     end
 
-    # Does the heavy lifting of properly reading the larger body requests in 
-    # small chunks.  It expects @body to be an IO object, @socket to be valid,
-    # and will set @body = nil if the request fails.  It also expects any initial
-    # part of the body that has been read to be in the @body already.
-    def read_body(remain, total)
-      begin
-        # Write the odd sized chunk first
-        buffer = read_socket(remain % Const::CHUNK_SIZE)
-        remain -= @body.write(buffer)
-
-        # Then stream out nothing but perfectly sized chunks
-        until remain <= 0 or @socket.closed?
-          # ASSUME: we are writing to a disk and these writes always write the requested amount
-          buffer = read_socket(Const::CHUNK_SIZE)
-          remain -= @body.write(buffer)
+    # Does the heavy lifting of properly reading the larger body requests in
+    # small chunks.  It expects @body to be an IO object, socket to be valid,
+    # It also expects any initial part of the body that has been read to be in
+    # the @body already.  It will return true if successful and false if not.
+    def read_body(socket, remain)
+      buf = ' ' # this string is reused for the lifetime of the loop
+      while remain > 0
+        begin
+          socket.sysread(remain, buf) # short read if it's a socket
+        rescue Errno::EINTR, Errno::EAGAIN
+          retry
         end
-      rescue Object => e
-        logger.error "Error reading HTTP body: #{e.inspect}"
-        # Any errors means we should delete the file, including if the file is dumped
-        @socket.close rescue nil
-        @body.close! if @body.class == Tempfile
-        @body = nil # signals that there was a problem
-      end
-    end
- 
-    def read_socket(len)
-      if !@socket.closed?
-        data = @socket.read(len)
-        if !data
-          raise "Socket read return nil"
-        elsif data.length != len
-          raise "Socket read returned insufficient data: #{data.length}"
-        else
-          data
-        end
-      else
-        raise "Socket already closed when reading."
+
+        # ASSUME: we are writing to a disk and these writes always write the
+        # requested amount.  This is true on Linux.
+        remain -= @body.syswrite(buf)
       end
+      true # success!
+    rescue Object => e
+      logger.error "Error reading HTTP body: #{e.inspect}"
+      socket.close rescue nil
+
+      # Any errors means we should delete the file, including if the file
+      # is dumped.  Truncate it ASAP to help avoid page flushes to disk.
+      reset!
+      false
     end
   end
 end
-- 
cgit v1.2.3-24-ge0c7