From a70468036d9b780bc7ec921f7feb6e1275778169 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 28 Aug 2009 20:47:43 -0700 Subject: initial import --- lib/clogger.rb | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/clogger/format.rb | 25 ++++++++++ lib/clogger/pure.rb | 126 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 284 insertions(+) create mode 100644 lib/clogger.rb create mode 100644 lib/clogger/format.rb create mode 100644 lib/clogger/pure.rb (limited to 'lib') diff --git a/lib/clogger.rb b/lib/clogger.rb new file mode 100644 index 0000000..0e4148e --- /dev/null +++ b/lib/clogger.rb @@ -0,0 +1,133 @@ +# -*- encoding: binary -*- +class Clogger + VERSION = '0.0.1' + + OP_LITERAL = 0 + OP_REQUEST = 1 + OP_RESPONSE = 2 + OP_SPECIAL = 3 + OP_EVAL = 4 + OP_TIME_LOCAL = 5 + OP_TIME_UTC = 6 + OP_REQUEST_TIME = 7 + OP_TIME = 8 + OP_COOKIE = 9 + + # support nginx variables that are less customizable than our own + ALIASES = { + '$request_time' => '$request_time{3}', + '$time_local' => '$time_local{%d/%b/%Y:%H:%M:%S %z}', + '$msec' => '$time{3}', + '$usec' => '$time{6}', + } + + SPECIAL_VARS = { + :body_bytes_sent => 0, + :status => 1, + :request => 2, # REQUEST_METHOD PATH_INFO?QUERY_STRING HTTP_VERSION + :request_length => 3, # env['rack.input'].size + :response_length => 4, # like body_bytes_sent, except "-" instead of "0" + :ip => 5, # HTTP_X_FORWARDED_FOR || REMOTE_ADDR || - + :pid => 6, # getpid() + } + +private + + CGI_ENV = Regexp.new('\A\$(' << + %w(remote_addr remote_ident remote_user + path_info query_string script_name + server_name server_port).join('|') << ')\z').freeze + + SCAN = /([^$]*)(\$+(?:env\{\w+(?:\.[\w\.]+)?\}| + e\{[^\}]+\}| + (?:request_)?time\{\d+\}| + time_(?:utc|local)\{[^\}]+\}| + \w*))?([^$]*)/x + + def compile_format(str) + rv = [] + str.scan(SCAN).each do |pre,tok,post| + rv << [ OP_LITERAL, pre ] if pre && pre != "" + + unless tok.nil? + if tok.sub!(/\A(\$+)\$/, '$') + rv << [ OP_LITERAL, $1.dup ] + end + + compat = ALIASES[tok] and tok = compat + + case tok + when /\A(\$*)\z/ + rv << [ OP_LITERAL, $1.dup ] + when /\A\$env\{(\w+(?:\.[\w\.]+))\}\z/ + rv << [ OP_REQUEST, $1.freeze ] + when /\A\$e\{([^\}]+)\}\z/ + rv << [ OP_EVAL, $1.dup ] + when /\A\$cookie_(\w+)\z/ + rv << [ OP_COOKIE, $1.dup.freeze ] + when CGI_ENV, /\A\$(http_\w+)\z/ + rv << [ OP_REQUEST, $1.upcase.freeze ] + when /\A\$sent_http_(\w+)\z/ + rv << [ OP_RESPONSE, $1.downcase.tr('_','-').freeze ] + when /\A\$time_local\{([^\}]+)\}\z/ + rv << [ OP_TIME_LOCAL, $1.dup ] + when /\A\$time_utc\{([^\}]+)\}\z/ + rv << [ OP_TIME_UTC, $1.dup ] + when /\A\$time\{(\d+)\}\z/ + rv << [ OP_TIME, *usec_conv_pair(tok, $1.to_i) ] + when /\A\$request_time\{(\d+)\}\z/ + rv << [ OP_REQUEST_TIME, *usec_conv_pair(tok, $1.to_i) ] + else + tok_sym = tok[1..-1].to_sym + if special_code = SPECIAL_VARS[tok_sym] + rv << [ OP_SPECIAL, special_code ] + else + raise ArgumentError, "unable to make sense of token: #{tok}" + end + end + end + + rv << [ OP_LITERAL, post ] if post && post != "" + end + + # auto-append a newline + last = rv.last or return rv + op = last.first + if (op == OP_LITERAL && /\n\z/ !~ last.last) || op != OP_LITERAL + rv << [ OP_LITERAL, "\n" ] + end + + rv + end + + def usec_conv_pair(tok, prec) + if prec == 0 + [ "%d", 1 ] # stupid... + elsif prec > 6 + raise ArgumentError, "#{tok}: too high precision: #{prec} (max=6)" + else + [ "%d.%0#{prec}d", 10 ** (6 - prec) ] + end + end + + def need_response_headers?(fmt_ops) + fmt_ops.any? { |op| OP_RESPONSE == op[0] } + end + + def need_wrap_body?(fmt_ops) + fmt_ops.any? do |op| + (OP_REQUEST_TIME == op[0]) || (OP_SPECIAL == op[0] && + (SPECIAL_VARS[:body_bytes_sent] == op[1] || + SPECIAL_VARS[:response_length] == op[1])) + end + end + +end + +require 'clogger/format' + +begin + require 'clogger_ext' +rescue LoadError + require 'clogger/pure' +end diff --git a/lib/clogger/format.rb b/lib/clogger/format.rb new file mode 100644 index 0000000..9e4f59f --- /dev/null +++ b/lib/clogger/format.rb @@ -0,0 +1,25 @@ +# -*- encoding: binary -*- + +class Clogger + + # predefined log formats in wide use + module Format + # common log format used by Apache: + # http://httpd.apache.org/docs/2.2/logs.html + Common = "$remote_addr - $remote_user [$time_local] " \ + '"$request" $status $response_length'.freeze + + # combined log format used by Apache: + # http://httpd.apache.org/docs/2.2/logs.html + Combined = %Q|#{Common} "$http_referer" "$http_user_agent"|.freeze + + # combined log format used by nginx: + # http://wiki.nginx.org/NginxHttpLogModule + NginxCombined = Combined.gsub(/response_length/, 'body_bytes_sent').freeze + + # log format used by Rack 1.0 + Rack_1_0 = "$ip - $remote_user [$time_local{%d/%b/%Y %H:%M:%S}] " \ + '"$request" $status $response_length $request_time{4}'.freeze + end + +end diff --git a/lib/clogger/pure.rb b/lib/clogger/pure.rb new file mode 100644 index 0000000..11c03f4 --- /dev/null +++ b/lib/clogger/pure.rb @@ -0,0 +1,126 @@ +# -*- encoding: binary -*- +# :stopdoc: +# +# Not at all optimized for performance, this was written based on +# the original C extension code so it's not very Ruby-ish... +class Clogger + + def initialize(app, opts = {}) + @app = app + @logger = opts[:logger] + @fmt_ops = compile_format(opts[:format] || Format::Common) + @wrap_body = need_wrap_body?(@fmt_ops) + @reentrant = nil + @body_bytes_sent = 0 + end + + def call(env) + @start = Time.now + status, headers, body = @app.call(env) + if wrap_body? + @reentrant = env['rack.multithread'] + @env, @status, @headers, @body = env, status, headers, body + return [ status, headers, reentrant? ? self.dup : self ] + end + log(env, status, headers) + [ status, headers, body ] + end + + def each + @body_bytes_sent = 0 + @body.each do |part| + @body_bytes_sent += part.size + yield part + end + ensure + log(@env, @status, @headers) + end + + def close + @body.close + end + + def reentrant? + @reentrant + end + + def wrap_body? + @wrap_body + end + + def fileno + @logger.fileno rescue nil + end + +private + + def byte_xs(s) + s = s.dup + s.force_encoding(Encoding::BINARY) if defined?(Encoding::BINARY) + s.gsub!(/(['"\x00-\x1f])/) { |x| "\\x#{$1.unpack('H2').first}" } + s + end + + SPECIAL_RMAP = SPECIAL_VARS.inject([]) { |ary, (k,v)| ary[v] = k; ary } + + def special_var(special_nr, env, status, headers) + case SPECIAL_RMAP[special_nr] + when :body_bytes_sent + @body_bytes_sent.to_s + when :status + status = status.to_i + status >= 100 && status <= 999 ? ('%03d' % status) : '-' + when :request + qs = env['QUERY_STRING'] + qs.empty? or qs = "?#{byte_xs(qs)}" + "#{env['REQUEST_METHOD']} " \ + "#{byte_xs(env['PATH_INFO'])}#{qs} " \ + "#{byte_xs(env['HTTP_VERSION'])}" + when :request_length + env['rack.input'].size.to_s + when :response_length + @body_bytes_sent == 0 ? '-' : @body_bytes_sent.to_s + when :ip + xff = env['HTTP_X_FORWARDED_FOR'] and return byte_xs(xff) + env['REMOTE_ADDR'] || '-' + when :pid + $$.to_s + else + raise "EDOOFUS #{special_nr}" + end + end + + def time_format(sec, usec, format, div) + format % [ sec, usec / div ] + end + + def log(env, status, headers) + (@logger || env['rack.errors']) << @fmt_ops.map { |op| + case op[0] + when OP_LITERAL; op[1] + when OP_REQUEST; byte_xs(env[op[1]] || "-") + when OP_RESPONSE; byte_xs(get_sent_header(headers, op[1])) + when OP_SPECIAL; special_var(op[1], env, status, headers) + when OP_EVAL; eval(op[1]).to_s rescue "-" + when OP_TIME_LOCAL; Time.now.strftime(op[1]) + when OP_TIME_UTC; Time.now.utc.strftime(op[1]) + when OP_REQUEST_TIME + t = Time.now - @start + time_format(t.to_i, (t - t.to_i) * 1000000, op[1], op[2]) + when OP_TIME + t = Time.now + time_format(t.sec, t.usec, op[1], op[2]) + when OP_COOKIE + (env['rack.request.cookie_hash'][op[1]] rescue "-") || "-" + else + raise "EDOOFUS #{op.inspect}" + end + }.join('') + end + + def get_sent_header(headers, match) + headers.each { |key, value| match == key.downcase and return value } + "-" + end + +end -- cgit v1.2.3-24-ge0c7