8 files changed, 305 insertions, 92 deletions
diff --git a/lib/unicorn.rb b/lib/unicorn.rb
index eefbfc1..e36cb1e 100644
--- a/lib/unicorn.rb
+++ b/lib/unicorn.rb
@@ -135,7 +135,7 @@ module Unicorn
      def listen(address)
        return if String === address && listener_names.include?(address)
  
-      if io = bind_listen(address, @backlog)
+      if io = bind_listen(address, { :backlog => @backlog })
          if Socket == io.class
            @io_purgatory << io
            io = server_cast(io)
diff --git a/lib/unicorn/app/old_rails.rb b/lib/unicorn/app/old_rails.rb
new file mode 100644
index 0000000..bb9577a
--- /dev/null
+++ b/lib/unicorn/app/old_rails.rb
@@ -0,0 +1,23 @@
+# This code is based on the original Rails handler in Mongrel
+# Copyright (c) 2005 Zed A. Shaw
+# Copyright (c) 2009 Eric Wong
+# You can redistribute it and/or modify it under the same terms as Ruby.
+# Additional work donated by contributors.  See CONTRIBUTORS for more info.
+require 'unicorn/cgi_wrapper'
+require 'dispatcher'
+
+module Unicorn; module App; end; end
+
+# Implements a handler that can run Rails.
+class Unicorn::App::OldRails
+
+  def call(env)
+    cgi = Unicorn::CGIWrapper.new(env)
+    Dispatcher.dispatch(cgi,
+        ActionController::CgiRequest::DEFAULT_SESSION_OPTIONS,
+        cgi.body)
+    cgi.out  # finalize the response
+    cgi.rack_response
+  end
+
+end
diff --git a/lib/unicorn/app/old_rails/static.rb b/lib/unicorn/app/old_rails/static.rb
new file mode 100644
index 0000000..c9366d2
--- /dev/null
+++ b/lib/unicorn/app/old_rails/static.rb
@@ -0,0 +1,58 @@
+# This code is based on the original Rails handler in Mongrel
+# Copyright (c) 2005 Zed A. Shaw
+# Copyright (c) 2009 Eric Wong
+# You can redistribute it and/or modify it under the same terms as Ruby.
+
+require 'rack/file'
+
+# Static file handler for Rails < 2.3.  This handler is only provided
+# as a convenience for developers.  Performance-minded deployments should
+# use nginx (or similar) for serving static files.
+#
+# This supports page caching directly and will try to resolve a
+# request in the following order:
+#
+# * If the requested exact PATH_INFO exists as a file then serve it.
+# * If it exists at PATH_INFO+rest_operator+".html" exists
+#   then serve that.
+#
+# This means that if you are using page caching it will actually work
+# with Unicorn and you should see a decent speed boost (but not as
+# fast as if you use a static server like nginx).
+class Unicorn::App::OldRails::Static
+  FILE_METHODS = { 'GET' => true, 'HEAD' => true }.freeze
+
+  def initialize(app)
+    @app = app
+    @root = "#{::RAILS_ROOT}/public"
+    @file_server = ::Rack::File.new(@root)
+  end
+
+  def call(env)
+    # short circuit this ASAP if serving non-file methods
+    FILE_METHODS.include?(env[Unicorn::Const::REQUEST_METHOD]) or
+      return @app.call(env)
+
+    # first try the path as-is
+    path_info = env[Unicorn::Const::PATH_INFO].chomp("/")
+    if File.file?("#@root/#{::Rack::Utils.unescape(path_info)}")
+      # File exists as-is so serve it up
+      env[Unicorn::Const::PATH_INFO] = path_info
+      return @file_server.call(env)
+    end
+
+    # then try the cached version:
+
+    # grab the semi-colon REST operator used by old versions of Rails
+    # this is the reason we didn't just copy the new Rails::Rack::Static
+    env[Unicorn::Const::REQUEST_URI] =~ /^#{Regexp.escape(path_info)}(;[^\?]+)/
+    path_info << "#$1#{ActionController::Base.page_cache_extension}"
+
+    if File.file?("#@root/#{::Rack::Utils.unescape(path_info)}")
+      env[Unicorn::Const::PATH_INFO] = path_info
+      return @file_server.call(env)
+    end
+
+    @app.call(env) # call OldRails
+  end
+end if defined?(Unicorn::App::OldRails)
diff --git a/lib/unicorn/cgi_wrapper.rb b/lib/unicorn/cgi_wrapper.rb
new file mode 100644
index 0000000..816b0a0
--- /dev/null
+++ b/lib/unicorn/cgi_wrapper.rb
@@ -0,0 +1,139 @@
+# This code is based on the original CGIWrapper from Mongrel
+# Copyright (c) 2005 Zed A. Shaw
+# Copyright (c) 2009 Eric Wong
+# You can redistribute it and/or modify it under the same terms as Ruby.
+#
+# Additional work donated by contributors.  See CONTRIBUTORS for more info.
+
+require 'cgi'
+
+module Unicorn; end
+
+# The beginning of a complete wrapper around Unicorn's internal HTTP
+# processing system but maintaining the original Ruby CGI module.  Use
+# this only as a crutch to get existing CGI based systems working.  It
+# should handle everything, but please notify us if you see special
+# warnings.  This work is still very alpha so we need testers to help
+# work out the various corner cases.
+class Unicorn::CGIWrapper < ::CGI
+  undef_method :env_table
+  attr_reader :env_table
+  attr_reader :body
+
+  # these are stripped out of any keys passed to CGIWrapper.header function
+  NPH = 'nph'.freeze # Completely ignored, Unicorn outputs the date regardless
+  CONNECTION = 'connection'.freeze # Completely ignored. Why is CGI doing this?
+  CHARSET = 'charset'.freeze # this gets appended to Content-Type
+  COOKIE = 'cookie'.freeze # maps (Hash,Array,String) to "Set-Cookie" headers
+  STATUS = 'status'.freeze # stored as @status
+
+  # some of these are common strings, but this is the only module
+  # using them and the reason they're not in Unicorn::Const
+  SET_COOKIE = 'Set-Cookie'.freeze
+  CONTENT_TYPE = 'Content-Type'.freeze
+  CONTENT_LENGTH = 'Content-Length'.freeze # this is NOT Const::CONTENT_LENGTH
+  RACK_INPUT = 'rack.input'.freeze
+  RACK_ERRORS = 'rack.errors'.freeze
+
+  # this maps CGI header names to HTTP header names
+  HEADER_MAP = {
+    'type' => CONTENT_TYPE,
+    'server' => 'Server'.freeze,
+    'language' => 'Content-Language'.freeze,
+    'expires' => 'Expires'.freeze,
+    'length' => CONTENT_LENGTH,
+  }.freeze
+
+  # Takes an a Rackable environment, plus any additional CGI.new
+  # arguments These are used internally to create a wrapper around the
+  # real CGI while maintaining Rack/Unicorn's view of the world.  This
+  # this will NOT deal well with large responses that take up a lot of
+  # memory, but neither does the CGI nor the original CGIWrapper from
+  # Mongrel...
+  def initialize(rack_env, *args)
+    @env_table = rack_env
+    @status = 200
+    @head = { :cookies => [] }
+    @body = StringIO.new
+    super(*args)
+  end
+
+  # finalizes the response in a way Rack applications would expect
+  def rack_response
+    cookies = @head.delete(:cookies)
+    cookies.empty? or @head[SET_COOKIE] = cookies.join("\n")
+    @head[CONTENT_LENGTH] ||= @body.size
+
+    [ @status, @head, [ @body.string ] ]
+  end
+
+  # The header is typically called to send back the header.  In our case we
+  # collect it into a hash for later usage.  This can be called multiple
+  # times to set different cookies.
+  def header(options = "text/html")
+    # if they pass in a string then just write the Content-Type
+    if String === options
+      @head[CONTENT_TYPE] ||= options
+    else
+      HEADER_MAP.each_pair do |from, to|
+        from = options.delete(from) or next
+        @head[to] = from
+      end
+
+      @head[CONTENT_TYPE] ||= "text/html"
+      if charset = options.delete(CHARSET)
+        @head[CONTENT_TYPE] << "; charset=#{charset}"
+      end
+
+      # lots of ways to set cookies
+      if cookie = options.delete(COOKIE)
+        cookies = @head[:cookies]
+        case cookie
+        when Array
+          cookie.each { |c| cookies << c.to_s }
+        when Hash
+          cookie.each_value { |c| cookies << c.to_s }
+        else
+          cookies << cookie.to_s
+        end
+      end
+      @status ||= (status = options.delete(STATUS))
+      # drop the keys we don't want anymore
+      options.delete(NPH)
+      options.delete(CONNECTION)
+
+      # finally, set the rest of the headers as-is
+      options.each_pair { |k,v| @head[k] = v }
+    end
+
+    # doing this fakes out the cgi library to think the headers are empty
+    # we then do the real headers in the out function call later
+    ""
+  end
+
+  # The dumb thing is people can call header or this or both and in
+  # any order.  So, we just reuse header and then finalize the
+  # HttpResponse the right way.  This will have no effect if called
+  # the second time if the first "outputted" anything.
+  def out(options = "text/html")
+    header(options)
+    @body.size == 0 or return
+    @body << yield
+  end
+
+  # Used to wrap the normal stdinput variable used inside CGI.
+  def stdinput
+    @env_table[RACK_INPUT]
+  end
+
+  # The stdoutput should be completely bypassed but we'll drop a
+  # warning just in case
+  def stdoutput
+    err = @env_table[RACK_ERRORS]
+    err.puts "WARNING: Your program is doing something not expected."
+    err.puts "Please tell Eric that stdoutput was used and what software " \
+             "you are running.  Thanks."
+    @body
+  end
+
+end
diff --git a/lib/unicorn/const.rb b/lib/unicorn/const.rb
index ed7f5b1..4e78171 100644
--- a/lib/unicorn/const.rb
+++ b/lib/unicorn/const.rb
@@ -48,10 +48,6 @@ module Unicorn
    # the constant just refers to a string with the same contents.  Using these constants
    # gave about a 3% to 10% performance improvement over using the strings directly.
    # Symbols did not really improve things much compared to constants.
-  #
-  # While Unicorn does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT,
-  # REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or 
-  # too taxing on performance.
    module Const
      DATE="Date".freeze
  
@@ -61,10 +57,7 @@ module Unicorn
      # Request body
      HTTP_BODY="HTTP_BODY".freeze
  
-    # This is the initial part that your handler is identified as by URIClassifier.
-    SCRIPT_NAME="SCRIPT_NAME".freeze
-
-    # The original URI requested by the client.  Passed to URIClassifier to build PATH_INFO and SCRIPT_NAME.
+    # The original URI requested by the client.
      REQUEST_URI='REQUEST_URI'.freeze
      REQUEST_PATH='REQUEST_PATH'.freeze
      
@@ -76,14 +69,6 @@ module Unicorn
      DEFAULT_PORT = "8080".freeze    # default TCP listen port
      DEFAULT_LISTEN = "#{DEFAULT_HOST}:#{DEFAULT_PORT}".freeze
  
-    # The standard empty 404 response for bad requests.  Use Error4040Handler for custom stuff.
-    ERROR_404_RESPONSE="HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: Unicorn #{UNICORN_VERSION}\r\n\r\nNOT FOUND".freeze
-
-    CONTENT_LENGTH="CONTENT_LENGTH".freeze
-
-    # A common header for indicating the server is too busy.  Not used yet.
-    ERROR_503_RESPONSE="HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY".freeze
-
      # The basic max request size we'll try to read.
      CHUNK_SIZE=(16 * 1024)
  
@@ -95,22 +80,11 @@ module Unicorn
      MAX_BODY=MAX_HEADER
  
      # A frozen format for this is about 15% faster
-    CONTENT_TYPE = "Content-Type".freeze
-    LAST_MODIFIED = "Last-Modified".freeze
-    ETAG = "ETag".freeze
-    REQUEST_METHOD="REQUEST_METHOD".freeze
-    GET="GET".freeze
-    HEAD="HEAD".freeze
-    # ETag is based on the apache standard of hex mtime-size-inode (inode is 0 on win32)
-    ETAG_FORMAT="\"%x-%x-%x\"".freeze
-    LINE_END="\r\n".freeze
+    CONTENT_LENGTH="CONTENT_LENGTH".freeze
      REMOTE_ADDR="REMOTE_ADDR".freeze
      HTTP_X_FORWARDED_FOR="HTTP_X_FORWARDED_FOR".freeze
-    HTTP_IF_MODIFIED_SINCE="HTTP_IF_MODIFIED_SINCE".freeze
-    HTTP_IF_NONE_MATCH="HTTP_IF_NONE_MATCH".freeze
-    REDIRECT = "HTTP/1.1 302 Found\r\nLocation: %s\r\nConnection: close\r\n\r\n".freeze
-    HOST = "HOST".freeze
-    CONNECTION = "Connection".freeze
+    QUERY_STRING="QUERY_STRING".freeze
+    RACK_INPUT="rack.input".freeze
    end
  
  end
diff --git a/lib/unicorn/http_request.rb b/lib/unicorn/http_request.rb
index ee407ab..7106f62 100644
--- a/lib/unicorn/http_request.rb
+++ b/lib/unicorn/http_request.rb
@@ -13,6 +13,20 @@ module Unicorn
    # 
    class HttpRequest
  
+     # default parameters we merge into the request env for Rack handlers
+     DEF_PARAMS = {
+       "rack.errors" => $stderr,
+       "rack.multiprocess" => true,
+       "rack.multithread" => false,
+       "rack.run_once" => false,
+       "rack.url_scheme" => "http",
+       "rack.version" => [0, 1],
+       "SCRIPT_NAME" => "",
+
+       # this is not in the Rack spec, but some apps may rely on it
+       "SERVER_SOFTWARE" => "Unicorn #{Const::UNICORN_VERSION}"
+     }.freeze
+
      def initialize(logger)
        @logger = logger
        @body = nil
@@ -29,59 +43,39 @@ module Unicorn
        @body = nil
      end
  
-    #
      # Does the majority of the IO processing.  It has been written in
-    # Ruby using about 7 different IO processing strategies and no
-    # matter how it's done the performance just does not improve.  It is
-    # currently carefully constructed to make sure that it gets the best
-    # possible performance, but anyone who thinks they can make it
-    # faster is more than welcome to take a crack at it.
+    # Ruby using about 8 different IO processing strategies.
+    #
+    # It is currently carefully constructed to make sure that it gets
+    # the best possible performance for the common case: GET requests
+    # that are fully complete after a single read(2)
+    #
+    # Anyone who thinks they can make it faster is more than welcome to
+    # take a crack at it.
      #
      # returns an environment hash suitable for Rack if successful
      # This does minimal exception trapping and it is up to the caller
      # to handle any socket errors (e.g. user aborted upload).
      def read(socket)
-      data = String.new(read_socket(socket))
-      nparsed = 0
-
-      # Assumption: nparsed will always be less since data will get
-      # filled with more after each parsing.  If it doesn't get more
-      # then there was a problem with the read operation on the client
-      # socket.  Effect is to stop processing when the socket can't
-      # fill the buffer for further parsing.
-      while nparsed < data.length
-        nparsed = @parser.execute(@params, data, nparsed)
-
-        if @parser.finished?
-          # From http://www.ietf.org/rfc/rfc3875:
-          # "Script authors should be aware that the REMOTE_ADDR and
-          #  REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9)
-          #  may not identify the ultimate source of the request.  They
-          #  identify the client for the immediate request to the server;
-          #  that client may be a proxy, gateway, or other intermediary
-          #  acting on behalf of the actual source client."
-          @params[Const::REMOTE_ADDR] = socket.unicorn_peeraddr
-
-          handle_body(socket) and return rack_env # success!
-          return nil # fail
-        else
-          # Parser is not done, queue up more data to read and continue
-          # parsing
-          data << read_socket(socket)
-          if data.length >= Const::MAX_HEADER
-            raise HttpParserError.new("HEADER is longer than allowed, " \
-                                      "aborting client early.")
-          end
-        end
+      # short circuit the common case with small GET requests first
+      @parser.execute(@params, read_socket(socket)) and
+          return handle_body(socket)
+
+      data = @buffer.dup # read_socket will clobber @buffer
+
+      # Parser is not done, queue up more data to read and continue parsing
+      # an Exception thrown from the @parser will throw us out of the loop
+      loop do
+        data << read_socket(socket)
+        @parser.execute(@params, data) and
+            return handle_body(socket)
        end
-      nil # XXX bug?
        rescue HttpParserError => e
          @logger.error "HTTP parse error, malformed request " \
                        "(#{@params[Const::HTTP_X_FORWARDED_FOR] ||
                            socket.unicorn_peeraddr}): #{e.inspect}"
          @logger.error "REQUEST DATA: #{data.inspect}\n---\n" \
                        "PARAMS: #{@params.inspect}\n---\n"
-        socket.closed? or socket.close rescue nil
          nil
      end
  
@@ -109,7 +103,7 @@ module Unicorn
        # This will probably truncate them but at least the request goes through
        # usually.
        if remain > 0
-        read_body(socket, remain) or return false # fail!
+        read_body(socket, remain) or return nil # fail!
        end
        @body.rewind
        @body.sysseek(0) if @body.respond_to?(:sysseek)
@@ -118,29 +112,37 @@ module Unicorn
        # another request, we'll truncate it.  Again, we don't do pipelining
        # or keepalive
        @body.truncate(content_length)
-      true
+      rack_env(socket)
      end
  
      # Returns an environment which is rackable:
      # http://rack.rubyforge.org/doc/files/SPEC.html
      # Based on Rack's old Mongrel handler.
-    def rack_env
+    def rack_env(socket)
+      # I'm considering enabling "unicorn.client".  It gives
+      # applications some rope to do some "interesting" things like
+      # replacing a worker with another process that has full control
+      # over the HTTP response.
+      # @params["unicorn.client"] = socket
+
+      # From http://www.ietf.org/rfc/rfc3875:
+      # "Script authors should be aware that the REMOTE_ADDR and
+      #  REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9)
+      #  may not identify the ultimate source of the request.  They
+      #  identify the client for the immediate request to the server;
+      #  that client may be a proxy, gateway, or other intermediary
+      #  acting on behalf of the actual source client."
+      @params[Const::REMOTE_ADDR] = socket.unicorn_peeraddr
+
        # It might be a dumbass full host request header
-      @params[Const::REQUEST_PATH] ||=
-                           URI.parse(@params[Const::REQUEST_URI]).path
-      raise "No REQUEST PATH" unless @params[Const::REQUEST_PATH]
-
-      @params["QUERY_STRING"] ||= ''
-      @params.update({ "rack.version" => [0,1],
-                      "rack.input" => @body,
-                      "rack.errors" => $stderr,
-                      "rack.multithread" => false,
-                      "rack.multiprocess" => true,
-                      "rack.run_once" => false,
-                      "rack.url_scheme" => "http",
-                      Const::PATH_INFO => @params[Const::REQUEST_PATH],
-                      Const::SCRIPT_NAME => "",
-                    })
+      @params[Const::PATH_INFO] = (
+          @params[Const::REQUEST_PATH] ||=
+              URI.parse(@params[Const::REQUEST_URI]).path) or
+         raise "No REQUEST_PATH"
+
+      @params[Const::QUERY_STRING] ||= ''
+      @params[Const::RACK_INPUT] = @body
+      @params.update(DEF_PARAMS)
      end
  
      # Does the heavy lifting of properly reading the larger body requests in
diff --git a/lib/unicorn/http_response.rb b/lib/unicorn/http_response.rb
index c8aa3f9..f928baa 100644
--- a/lib/unicorn/http_response.rb
+++ b/lib/unicorn/http_response.rb
@@ -35,7 +35,11 @@ module Unicorn
        # the time anyways so just hope our app knows what it's doing
        headers.each do |key, value|
          next if SKIP.include?(key.downcase)
-        value.split(/\n/).each { |v| out << "#{key}: #{v}" }
+        if value =~ /\n/
+          value.split(/\n/).each { |v| out << "#{key}: #{v}" }
+        else
+          out << "#{key}: #{value}"
+        end
        end
  
        # Rack should enforce Content-Length or chunked transfer encoding,
diff --git a/lib/unicorn/socket.rb b/lib/unicorn/socket.rb
index 9519448..4870133 100644
--- a/lib/unicorn/socket.rb
+++ b/lib/unicorn/socket.rb
@@ -62,10 +62,17 @@ module Unicorn
        end
      end
  
+    def log_buffer_sizes(sock, pfx = '')
+      respond_to?(:logger) or return
+      rcvbuf = sock.getsockopt(SOL_SOCKET, SO_RCVBUF).unpack('i')
+      sndbuf = sock.getsockopt(SOL_SOCKET, SO_SNDBUF).unpack('i')
+      logger.info "#{pfx}#{sock_name(sock)} rcvbuf=#{rcvbuf} sndbuf=#{sndbuf}"
+    end
+
      # creates a new server, socket. address may be a HOST:PORT or
      # an absolute path to a UNIX socket.  address can even be a Socket
      # object in which case it is immediately returned
-    def bind_listen(address = '0.0.0.0:8080', backlog = 1024)
+    def bind_listen(address = '0.0.0.0:8080', opt = { :backlog => 1024 })
        return address unless String === address
  
        domain, bind_addr = if address[0..0] == "/"
@@ -95,7 +102,13 @@ module Unicorn
          sock.close rescue nil
          return nil
        end
-      sock.listen(backlog)
+      if opt[:rcvbuf] || opt[:sndbuf]
+        log_buffer_sizes(sock, "before: ")
+        sock.setsockopt(SOL_SOCKET, SO_RCVBUF, opt[:rcvbuf]) if opt[:rcvbuf]
+        sock.setsockopt(SOL_SOCKET, SO_SNDBUF, opt[:sndbuf]) if opt[:sndbuf]
+        log_buffer_sizes(sock, " after: ")
+      end
+      sock.listen(opt[:backlog] || 1024)
        set_server_sockopt(sock) if domain == AF_INET
        sock
      end