diff options
Diffstat (limited to 'lib/mongrel.rb')
-rw-r--r-- | lib/mongrel.rb | 290 |
1 files changed, 242 insertions, 48 deletions
diff --git a/lib/mongrel.rb b/lib/mongrel.rb index 4912b8a..4b59549 100644 --- a/lib/mongrel.rb +++ b/lib/mongrel.rb @@ -8,6 +8,9 @@ require 'stringio' # functionality to service web application requests fast as possible. module Mongrel + # Every standard HTTP code mapped to the appropriate message. These are + # used so frequently that they are placed directly in Mongrel for easy + # access rather than Mongrel::Const. HTTP_STATUS_CODES = { 100 => 'Continue', 101 => 'Switching Protocols', @@ -48,19 +51,89 @@ module Mongrel 505 => 'HTTP Version not supported' } + # Frequently used constants when constructing requests or responses. Many times + # the constant just refers to a string with the same contents. Using these constants + # gave about a 3% to 10% performance improvement over using the strings directly. + # Symbols did not really improve things much compared to constants. + # + # While Mongrel does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT, + # REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or + # too taxing on performance. + module Const + # This is the part of the path after the SCRIPT_NAME. URIClassifier will determine this. + PATH_INFO="PATH_INFO" + # This is the intial part that your handler is identified as by URIClassifier. + SCRIPT_NAME="SCRIPT_NAME" + # The original URI requested by the client. Passed to URIClassifier to build PATH_INFO and SCRIPT_NAME. + REQUEST_URI='REQUEST_URI' + + # Content length (also available as HTTP_CONTENT_LENGTH). + CONTENT_LENGTH='CONTENT_LENGTH' + + # Content length (also available as CONTENT_LENGTH). + HTTP_CONTENT_LENGTH='HTTP_CONTENT_LENGTH' + + # Content type (also available as HTTP_CONTENT_TYPE). + CONTENT_TYPE='CONTENT_TYPE' + + # Content type (also available as CONTENT_TYPE). + HTTP_CONTENT_TYPE='HTTP_CONTENT_TYPE' + + # Gateway interface key in the HttpRequest parameters. + GATEWAY_INTERFACE='GATEWAY_INTERFACE' + # We claim to support CGI/1.2. + GATEWAY_INTERFACE_VALUE='CGI/1.2' + + # Hosts remote IP address. Mongrel does not do DNS resolves since that slows + # processing down considerably. + REMOTE_ADDR='REMOTE_ADDR' + + # This is not given since Mongrel does not do DNS resolves. It is only here for + # completeness for the CGI standard. + REMOTE_HOST='REMOTE_HOST' + + # The name/host of our server as given by the HttpServer.new(host,port) call. + SERVER_NAME='SERVER_NAME' + + # The port of our server as given by the HttpServer.new(host,port) call. + SERVER_PORT='SERVER_PORT' + + # Official server protocol key in the HttpRequest parameters. + SERVER_PROTOCOL='SERVER_PROTOCOL' + # Mongrel claims to support HTTP/1.1. + SERVER_PROTOCOL_VALUE='HTTP/1.1' + + # The actual server software being used (it's Mongrel man). + SERVER_SOFTWARE='SERVER_SOFTWARE' + + # Current Mongrel version (used for SERVER_SOFTWARE and other response headers). + MONGREL_VERSION='Mongrel 0.2.2' + + # The standard empty 404 response for bad requests. Use Error4040Handler for custom stuff. + ERROR_404_RESPONSE="HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: #{MONGREL_VERSION}\r\n\r\nNOT FOUND" + + # A common header for indicating the server is too busy. Not used yet. + ERROR_503_RESPONSE="HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY" + + # The basic max request size we'll try to read. + CHUNK_SIZE=(16 * 1024) + + end + + # When a handler is found for a registered URI then this class is constructed # and passed to your HttpHandler::process method. You should assume that # *one* handler processes all requests. Included in the HttpReqeust is a # HttpRequest.params Hash that matches common CGI params, and a HttpRequest.body # which is a string containing the request body (raw for now). # - # Mongrel really only support small-ish request bodies right now since really + # Mongrel really only supports small-ish request bodies right now since really # huge ones have to be completely read off the wire and put into a string. # Later there will be several options for efficiently handling large file # uploads. class HttpRequest attr_reader :body, :params - + # You don't really call this. It's made for you. # Main thing it does is hook up the params, and store any remaining # body data into the HttpRequest.body attribute. @@ -68,13 +141,14 @@ module Mongrel @body = initial_body || "" @params = params @socket = socket - + # fix up the CGI requirements - params['CONTENT_LENGTH'] = params['HTTP_CONTENT_LENGTH'] || 0 + params[Const::CONTENT_LENGTH] = params[Const::HTTP_CONTENT_LENGTH] || 0 + params[Const::CONTENT_TYPE] ||= params[Const::HTTP_CONTENT_TYPE] # now, if the initial_body isn't long enough for the content length we have to fill it # TODO: adapt for big ass stuff by writing to a temp file - clen = params['HTTP_CONTENT_LENGTH'].to_i + clen = params[Const::HTTP_CONTENT_LENGTH].to_i if @body.length < clen @body << @socket.read(clen - @body.length) end @@ -82,6 +156,13 @@ module Mongrel end + # This class implements a simple way of constructing the HTTP headers dynamically + # via a Hash syntax. Think of it as a write-only Hash. Refer to HttpResponse for + # information on how this is used. + # + # One consequence of this write-only nature is that you can write multiple headers + # by just doing them twice (which is sometimes needed in HTTP), but that the normal + # semantics for Hash (where doing an insert replaces) is not there. class HeaderOut attr_reader :out @@ -89,6 +170,7 @@ module Mongrel @out = out end + # Simply writes "#{key}: #{value}" to an output buffer. def[]=(key,value) @out.write(key) @out.write(": ") @@ -97,7 +179,35 @@ module Mongrel end end - + # Writes and controls your response to the client using the HTTP/1.1 specification. + # You use it by simply doing: + # + # response.start(200) do |head,out| + # head['Content-Type'] = 'text/plain' + # out.write("hello\n") + # end + # + # The parameter to start is the response code--which Mongrel will translate for you + # based on HTTP_STATUS_CODES. The head parameter is how you write custom headers. + # The out parameter is where you write your body. The default status code for + # HttpResponse.start is 200 so the above example is redundant. + # + # As you can see, it's just like using a Hash and as you do this it writes the proper + # header to the output on the fly. You can even intermix specifying headers and + # writing content. The HttpResponse class with write the things in the proper order + # once the HttpResponse.block is ended. + # + # You may also work the HttpResponse object directly using the various attributes available + # for the raw socket, body, header, and status codes. If you do this you're on your own. + # A design decision was made to force the client to not pipeline requests. HTTP/1.1 + # pipelining really kills the performance due to how it has to be handled and how + # unclear the standard is. To fix this the HttpResponse gives a "Connection: close" + # header which forces the client to close right away. The bonus for this is that it + # gives a pretty nice speed boost to most clients since they can close their connection + # immediately. + # + # One additional caveat is that you don't have to specify the Content-length header + # as the HttpResponse will write this for you based on the out length. class HttpResponse attr_reader :socket attr_reader :body @@ -112,12 +222,25 @@ module Mongrel @header = HeaderOut.new(StringIO.new) end + # Receives a block passing it the header and body for you to work with. + # When the block is finished it writes everything you've done to + # the socket in the proper order. This lets you intermix header and + # body content as needed. def start(status=200) @status = status yield @header, @body finished end - + + # Primarily used in exception handling to reset the response output in order to write + # an alternative response. + def reset + @header.out.rewind + @body.rewind + end + + # This takes whatever has been done to header and body and then writes it in the + # proper format to make an HTTP/1.1 response. def finished @header.out.rewind @body.rewind @@ -136,29 +259,8 @@ module Mongrel # a response. Look at the HttpRequest and HttpResponse objects for how # to use them. class HttpHandler - attr_accessor :script_name - - def process(request, response) - end - end - - - # The server normally returns a 404 response if a URI is requested, but it - # also returns a lame empty message. This lets you do a 404 response - # with a custom message for special URIs. - class Error404Handler < HttpHandler - - # Sets the message to return. This is constructed once for the handler - # so it's pretty efficient. - def initialize(msg) - @response = HttpServer::ERROR_404_RESPONSE + msg - end - - # Just kicks back the standard 404 response with your special message. def process(request, response) - response.socket.write(@response) end - end @@ -183,16 +285,6 @@ module Mongrel class HttpServer attr_reader :acceptor - # The standard empty 404 response for bad requests. Use Error4040Handler for custom stuff. - ERROR_404_RESPONSE="HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: Mongrel/0.2\r\n\r\nNOT FOUND" - ERROR_503_RESPONSE="HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY" - - # The basic max request size we'll try to read. - CHUNK_SIZE=(16 * 1024) - - PATH_INFO="PATH_INFO" - SCRIPT_NAME="SCRIPT_NAME" - # Creates a working server on host:port (strange things happen if port isn't a Number). # Use HttpServer::run to start the server. # @@ -210,8 +302,13 @@ module Mongrel # Future versions of Mongrel will make this more dynamic (hopefully). def initialize(host, port, num_processors=20) @socket = TCPServer.new(host, port) + @classifier = URIClassifier.new @req_queue = Queue.new + @host = host + @port = port + @num_procesors = num_processors + num_processors.times {|i| Thread.new do while client = @req_queue.deq process_client(client) @@ -223,30 +320,35 @@ module Mongrel # Does the majority of the IO processing. It has been written in Ruby using # about 7 different IO processing strategies and no matter how it's done - # the performance just does not improve. Ruby's use of select to implement - # threads means that it will most likely never improve, so the only remaining - # approach is to write all or some of this function in C. That will be the - # focus of future releases. + # the performance just does not improve. It is currently carefully constructed + # to make sure that it gets the best possible performance, but anyone who + # thinks they can make it faster is more than welcome to take a crack at it. def process_client(client) begin parser = HttpParser.new params = {} - data = client.readpartial(CHUNK_SIZE) + data = client.readpartial(Const::CHUNK_SIZE) while true nread = parser.execute(params, data) if parser.finished? - script_name, path_info, handler = @classifier.resolve(params[PATH_INFO]) + script_name, path_info, handler = @classifier.resolve(params[Const::REQUEST_URI]) if handler - params[PATH_INFO] = path_info - params[SCRIPT_NAME] = script_name + params[Const::PATH_INFO] = path_info + params[Const::SCRIPT_NAME] = script_name + params[Const::GATEWAY_INTERFACE]=Const::GATEWAY_INTERFACE_VALUE + params[Const::REMOTE_ADDR]=client.peeraddr + params[Const::SERVER_NAME]=@host + params[Const::SERVER_PORT]=@port + params[Const::SERVER_PROTOCOL]=Const::SERVER_PROTOCOL_VALUE + params[Const::SERVER_SOFTWARE]=Const::MONGREL_VERSION request = HttpRequest.new(params, data[nread ... data.length], client) response = HttpResponse.new(client) handler.process(request, response) else - client.write(ERROR_404_RESPONSE) + client.write(Const::ERROR_404_RESPONSE) end break @@ -254,7 +356,7 @@ module Mongrel # gotta stream and read again until we can get the parser to be character safe # TODO: make this more efficient since this means we're parsing a lot repeatedly parser.reset - data << client.readpartial(CHUNK_SIZE) + data << client.readpartial(Const::CHUNK_SIZE) end end rescue EOFError @@ -274,6 +376,7 @@ module Mongrel # Runs the thing. It returns the thread used so you can "join" it. You can also # access the HttpServer::acceptor attribute to get the thread later. def run + BasicSocket.do_not_reverse_lookup=true @acceptor = Thread.new do while true @req_queue << @socket.accept @@ -295,4 +398,95 @@ module Mongrel @classifier.unregister(uri) end end + + + # The server normally returns a 404 response if a URI is requested, but it + # also returns a lame empty message. This lets you do a 404 response + # with a custom message for special URIs. + class Error404Handler < HttpHandler + + # Sets the message to return. This is constructed once for the handler + # so it's pretty efficient. + def initialize(msg) + @response = HttpServer::ERROR_404_RESPONSE + msg + end + + # Just kicks back the standard 404 response with your special message. + def process(request, response) + response.socket.write(@response) + end + + end + + + # Serves the contents of a directory. You give it the path to the root + # where the files are located, and it tries to find the files based on + # the PATH_INFO inside the directory. If the requested path is a + # directory then it returns a simple directory listing. + # + # It does a simple protection against going outside it's root path by + # converting all paths to an absolute expanded path, and then making sure + # that the final expanded path includes the root path. If it doesn't + # than it simply gives a 404. + class DirHandler < HttpHandler + + def initialize(path, listing_allowed=true) + @path = File.expand_path(path) + @listing_allowed=listing_allowed + puts "DIR: #@path" + end + + def send_dir_listing(base, dir, response) + if @listing_allowed + response.start(200) do |head,out| + head['Content-Type'] = "text/html" + out << "<html><head><title>Directory Listing</title></head><body>" + Dir.entries(dir).each do |child| + out << "<a href=\"#{base}/#{child}\">#{child}</a><br/>" + end + out << "</body></html>" + end + else + response.start(403) do |head,out| + out.write("Directory listings not allowed") + end + end + end + + + def send_file(req, response) + response.start(200) do |head,out| + open(req, "r") do |f| + out.write(f.read) + end + end + end + + + def process(request, response) + req = File.expand_path("." + request.params['PATH_INFO'], @path) + puts "FIND: #{req}" + if req.index(@path) != 0 or !File.exist? req + # not found, return a 404 + response.start(404) do |head,out| + out << "File not found" + end + else + begin + if File.directory? req + send_dir_listing(request.params["REQUEST_URI"],req, response) + else + send_file(req, response) + end + rescue => details + response.reset + response.start(403) do |head,out| + out << "Error accessing file" + end + STDERR.puts "ERROR: #{details}" + end + end + end + end + end |