about summary refs log tree commit homepage
path: root/ext/kcar/kcar.rl
diff options
context:
space:
mode:
Diffstat (limited to 'ext/kcar/kcar.rl')
-rw-r--r--ext/kcar/kcar.rl634
1 files changed, 634 insertions, 0 deletions
diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl
new file mode 100644
index 0000000..0d22866
--- /dev/null
+++ b/ext/kcar/kcar.rl
@@ -0,0 +1,634 @@
+/**
+ * Copyright (c) 2009, 2010 Eric Wong (all bugs are Eric's fault)
+ * Copyright (c) 2005 Zed A. Shaw
+ * You can redistribute it and/or modify it under the same terms as Ruby.
+ */
+#include "ruby.h"
+#include "ext_help.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include "c_util.h"
+
+static VALUE eParserError;
+static ID id_sq, id_sq_set;
+
+/** Defines common length and error messages for input length validation. */
+#define DEF_MAX_LENGTH(N, length) \
+  static const size_t MAX_##N##_LENGTH = length; \
+  static const char MAX_##N##_LENGTH_ERR[] = \
+    "HTTP element " # N  " is longer than the " # length " allowed length."
+
+/**
+ * Validates the max length of given input and throws an ParserError
+ * exception if over.
+ */
+#define VALIDATE_MAX_LENGTH(len, N) do { \
+  if (len > MAX_##N##_LENGTH) \
+    rb_raise(eParserError, MAX_##N##_LENGTH_ERR); \
+} while (0)
+
+/* Defines the maximum allowed lengths for various input elements.*/
+DEF_MAX_LENGTH(FIELD_NAME, 256);
+DEF_MAX_LENGTH(FIELD_VALUE, 80 * 1024);
+DEF_MAX_LENGTH(HEADER, (1024 * (80 + 32)));
+DEF_MAX_LENGTH(REASON, 256);
+
+#define UH_FL_CHUNKED  0x1
+#define UH_FL_HASBODY  0x2
+#define UH_FL_INBODY   0x4
+#define UH_FL_INTRAILER 0x10
+#define UH_FL_INCHUNK  0x20
+#define UH_FL_KEEPALIVE 0x40
+
+struct http_parser {
+  int cs; /* Ragel internal state */
+  unsigned int flags;
+  size_t mark;
+  size_t offset;
+  union { /* these 2 fields don't nest */
+    size_t field;
+    size_t query;
+  } start;
+  union {
+    size_t field_len; /* only used during header processing */
+    size_t dest_offset; /* only used during body processing */
+  } s;
+  VALUE cont; /* Qfalse: unset, Qnil: ignored header, T_STRING: append */
+  VALUE status; /* String or Qnil */
+  union {
+    off_t content;
+    off_t chunk;
+  } len;
+};
+
+#define REMAINING (unsigned long)(pe - p)
+#define LEN(AT, FPC) (FPC - buffer - hp->AT)
+#define MARK(M,FPC) (hp->M = (FPC) - buffer)
+#define PTR_TO(F) (buffer + hp->F)
+#define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC))
+
+#define HP_FL_TEST(hp,fl) ((hp)->flags & (UH_FL_##fl))
+#define HP_FL_SET(hp,fl) ((hp)->flags |= (UH_FL_##fl))
+#define HP_FL_UNSET(hp,fl) ((hp)->flags &= ~(UH_FL_##fl))
+#define HP_FL_ALL(hp,fl) (HP_FL_TEST(hp, fl) == (UH_FL_##fl))
+
+/*
+ * handles values of the "Connection:" header, keepalive is implied
+ * for HTTP/1.1 but needs to be explicitly enabled with HTTP/1.0
+ * Additionally, we require GET/HEAD requests to support keepalive.
+ */
+static void hp_keepalive_connection(struct http_parser *hp, VALUE val)
+{
+  /* REQUEST_METHOD is always set before any headers */
+  if (STR_CSTR_CASE_EQ(val, "keep-alive")) {
+    /* basically have HTTP/1.0 masquerade as HTTP/1.1+ */
+    HP_FL_SET(hp, KEEPALIVE);
+  } else if (STR_CSTR_CASE_EQ(val, "close")) {
+    /*
+     * it doesn't matter what HTTP version or request method we have,
+     * if a server says "Connection: close", we disable keepalive
+     */
+    HP_FL_UNSET(hp, KEEPALIVE);
+  } else {
+    /*
+     * server could've sent anything, ignore it for now.  Maybe
+     * "HP_FL_UNSET(hp, KEEPALIVE);" just in case?
+     * Raising an exception might be too mean...
+     */
+  }
+}
+
+static void
+http_version(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len)
+{
+  if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) {
+    /* HTTP/1.1 implies keepalive unless "Connection: close" is set */
+    HP_FL_SET(hp, KEEPALIVE);
+  }
+}
+
+static void
+status_phrase(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len)
+{
+  long nr;
+
+  hp->status = rb_str_new(ptr, len);
+
+  /* RSTRING_PTR is null terminated, ptr is not */
+  nr = strtol(RSTRING_PTR(hp->status), NULL, 10);
+
+  if (nr < 100 || nr > 999)
+    rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->status));
+
+  if ( !((nr >= 100 && nr <= 199) || nr == 204 || nr == 304) )
+    HP_FL_SET(hp, HASBODY);
+}
+
+static inline void invalid_if_trailer(struct http_parser *hp)
+{
+  if (HP_FL_TEST(hp, INTRAILER))
+    rb_raise(eParserError, "invalid Trailer");
+}
+
+static void write_cont_value(struct http_parser *hp,
+                             char *buffer, const char *p)
+{
+  char *vptr;
+
+  if (hp->cont == Qfalse)
+    rb_raise(eParserError, "invalid continuation line");
+
+  if (NIL_P(hp->cont))
+    return; /* we're ignoring this header (probably Status:) */
+
+  assert(TYPE(hp->cont) == T_STRING && "continuation line is not a string");
+  assert(hp->mark > 0 && "impossible continuation line offset");
+
+  if (LEN(mark, p) == 0)
+    return;
+
+  if (RSTRING_LEN(hp->cont) > 0)
+    --hp->mark;
+
+  vptr = PTR_TO(mark);
+
+  if (RSTRING_LEN(hp->cont) > 0) {
+    assert((' ' == *vptr || '\t' == *vptr) && "invalid leading white space");
+    *vptr = ' ';
+  }
+  rb_str_buf_cat(hp->cont, vptr, LEN(mark, p));
+}
+
+static void write_value(VALUE hdr, struct http_parser *hp,
+                        const char *buffer, const char *p)
+{
+  VALUE f, v;
+  VALUE hclass;
+  const char *fptr = PTR_TO(start.field);
+  long flen = hp->s.field_len;
+  const char *vptr;
+  long vlen;
+
+  /* Rack does not like Status headers, so we never send them */
+  if (CSTR_CASE_EQ(fptr, flen, "status")) {
+    hp->cont = Qnil;
+    return;
+  }
+
+  vptr = PTR_TO(mark);
+  vlen = LEN(mark, p);
+  VALIDATE_MAX_LENGTH(vlen, FIELD_VALUE);
+  VALIDATE_MAX_LENGTH(flen, FIELD_NAME);
+  f = rb_str_new(fptr, flen);
+  v = rb_str_new(vptr, vlen);
+
+  if (STR_CSTR_CASE_EQ(f, "connection")) {
+    hp_keepalive_connection(hp, v);
+  } else if (STR_CSTR_CASE_EQ(f, "content-length")) {
+    if (! HP_FL_TEST(hp, HASBODY))
+      rb_raise(eParserError, "Content-Length with no body");
+    hp->len.content = parse_length(vptr, vlen);
+
+    if (hp->len.content < 0)
+      rb_raise(eParserError, "invalid Content-Length");
+
+    invalid_if_trailer(hp);
+  } else if (STR_CSTR_CASE_EQ(f, "transfer-encoding")) {
+    if (STR_CSTR_CASE_EQ(v, "chunked")) {
+      if (! HP_FL_TEST(hp, HASBODY))
+        rb_raise(eParserError, "chunked Transfer-Encoding with no body");
+
+      hp->len.chunk = 0;
+      HP_FL_SET(hp, CHUNKED);
+    }
+    invalid_if_trailer(hp);
+  } else if (STR_CSTR_CASE_EQ(f, "trailer")) {
+      if (! HP_FL_TEST(hp, HASBODY))
+        rb_raise(eParserError, "trailer with no body");
+    invalid_if_trailer(hp);
+  }
+
+  hclass = CLASS_OF(hdr);
+  if (hclass == rb_cArray) {
+    rb_ary_push(hdr, rb_ary_new3(2, f, v));
+    hp->cont = v;
+  } else {
+    /* hash-ish, try rb_hash_* first and fall back to slow rb_funcall */
+    VALUE e;
+
+    /* try to read the existing value */
+    if (hclass == rb_cHash)
+      e = rb_hash_aref(hdr, f);
+    else
+      e = rb_funcall(hdr, id_sq, 1, f);
+
+    if (NIL_P(e)) {
+      OBJ_FREEZE(f);
+
+      if (hclass == rb_cHash)
+        rb_hash_aset(hdr, f, v);
+      else
+        rb_funcall(hdr, id_sq_set, 2, f, v);
+
+      hp->cont = v;
+    } else {
+      rb_str_buf_cat(e, "\n", 1);
+      hp->cont = rb_str_buf_append(e, v);
+    }
+  }
+}
+
+/** Machine **/
+
+%%{
+  machine http_parser;
+
+  action mark {MARK(mark, fpc); }
+
+  action start_field { MARK(start.field, fpc); }
+  action write_field { hp->s.field_len = LEN(start.field, fpc); }
+  action start_value { MARK(mark, fpc); }
+  action write_value { write_value(hdr, hp, buffer, fpc); }
+  action write_cont_value { write_cont_value(hp, buffer, fpc); }
+  action http_version { http_version(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
+  action status_phrase { status_phrase(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); }
+
+  action add_to_chunk_size {
+    hp->len.chunk = step_incr(hp->len.chunk, fc, 16);
+    if (hp->len.chunk < 0)
+      rb_raise(eParserError, "invalid chunk size");
+  }
+  action header_done {
+    cs = http_parser_first_final;
+
+    if (HP_FL_TEST(hp, CHUNKED))
+      cs = http_parser_en_ChunkedBody;
+
+    /*
+     * go back to Ruby so we can call the Rack application, we'll reenter
+     * the parser iff the body needs to be processed.
+     */
+    goto post_exec;
+  }
+
+  action end_trailers {
+    cs = http_parser_first_final;
+    goto post_exec;
+  }
+
+  action end_chunked_body {
+    HP_FL_SET(hp, INTRAILER);
+    cs = http_parser_en_Trailers;
+    ++p;
+    assert(p <= pe && "buffer overflow after chunked body");
+    goto post_exec;
+  }
+
+  action skip_chunk_data {
+  skip_chunk_data_hack: {
+    size_t nr = MIN((size_t)hp->len.chunk, REMAINING);
+    memcpy(RSTRING_PTR(hdr) + hp->s.dest_offset, fpc, nr);
+    hp->s.dest_offset += nr;
+    hp->len.chunk -= nr;
+    p += nr;
+    assert(hp->len.chunk >= 0 && "negative chunk length");
+    if ((size_t)hp->len.chunk > REMAINING) {
+      HP_FL_SET(hp, INCHUNK);
+      goto post_exec;
+    } else {
+      fhold;
+      fgoto chunk_end;
+    }
+  }}
+
+  include kcar_http_common "kcar_http_common.rl";
+}%%
+
+/** Data **/
+%% write data;
+
+static void http_parser_init(struct http_parser *hp)
+{
+  int cs = 0;
+  memset(hp, 0, sizeof(struct http_parser));
+  hp->cont = Qfalse; /* zero on MRI, should be optimized away by above */
+  hp->status = Qnil;
+  hp->len.content = -1;
+  %% write init;
+  hp->cs = cs;
+}
+
+/** exec **/
+static void http_parser_execute(struct http_parser *hp,
+  VALUE hdr, char *buffer, size_t len)
+{
+  const char *p, *pe;
+  int cs = hp->cs;
+  size_t off = hp->offset;
+
+  if (cs == http_parser_first_final)
+    return;
+
+  assert(off <= len && "offset past end of buffer");
+
+  p = buffer+off;
+  pe = buffer+len;
+
+  assert((void *)(pe - p) == (void *)(len - off) &&
+         "pointers aren't same distance");
+
+  if (HP_FL_TEST(hp, INCHUNK)) {
+    HP_FL_UNSET(hp, INCHUNK);
+    goto skip_chunk_data_hack;
+  }
+  %% write exec;
+post_exec: /* "_out:" also goes here */
+  if (hp->cs != http_parser_error)
+    hp->cs = cs;
+  hp->offset = p - buffer;
+
+  assert(p <= pe && "buffer overflow after parsing execute");
+  assert(hp->offset <= len && "offset longer than length");
+}
+
+static struct http_parser *data_get(VALUE self)
+{
+  struct http_parser *hp;
+
+  Data_Get_Struct(self, struct http_parser, hp);
+  assert(hp && "failed to extract http_parser struct");
+  return hp;
+}
+
+static void mark(void *ptr)
+{
+  struct http_parser *hp = ptr;
+
+  rb_gc_mark(hp->cont);
+  rb_gc_mark(hp->status);
+}
+
+static VALUE alloc(VALUE klass)
+{
+  struct http_parser *hp;
+  return Data_Make_Struct(klass, struct http_parser, mark, -1, hp);
+}
+
+/**
+ * call-seq:
+ *    Kcar::Parser.new => parser
+ *
+ * Creates a new parser.
+ *
+ * Document-method: reset
+ *
+ * call-seq:
+ *    parser.reset => parser
+ *
+ * Resets the parser so it can be reused by another client
+ */
+static VALUE initialize(VALUE self)
+{
+  http_parser_init(data_get(self));
+
+  return self;
+}
+
+static void advance_str(VALUE str, off_t nr)
+{
+  long len = RSTRING_LEN(str);
+
+  if (len == 0)
+    return;
+
+  rb_str_modify(str);
+
+  assert(nr <= len && "trying to advance past end of buffer");
+  len -= nr;
+  if (len > 0) /* unlikely, len is usually 0 */
+    memmove(RSTRING_PTR(str), RSTRING_PTR(str) + nr, len);
+  rb_str_set_len(str, len);
+}
+
+/**
+ * call-seq:
+ *   parser.body_bytes_left => nil or Integer
+ *
+ * Returns the number of bytes left to run through Parser#filter_body.
+ * This will initially be the value of the "Content-Length" HTTP header
+ * after header parsing is complete and will decrease in value as
+ * Parser#filter_body is called for each chunk.  This should return
+ * zero for responses with no body.
+ *
+ * This will return nil on "Transfer-Encoding: chunked" responses as
+ * well as HTTP/1.0 responses where Content-Length is not set
+ */
+static VALUE body_bytes_left(VALUE self)
+{
+  struct http_parser *hp = data_get(self);
+
+  if (HP_FL_TEST(hp, CHUNKED))
+    return Qnil;
+  if (hp->len.content >= 0)
+    return OFFT2NUM(hp->len.content);
+
+  return Qnil;
+}
+
+static VALUE chunked(VALUE self)
+{
+  struct http_parser *hp = data_get(self);
+
+  return HP_FL_TEST(hp, CHUNKED) ? Qtrue : Qfalse;
+}
+
+/**
+ * Document-method: headers
+ * call-seq:
+ *    parser.headers(hdr, data) => hdr or nil
+ *
+ * Takes a Hash and a String of data, parses the String of data filling
+ * in the Hash returning the Hash if parsing is finished, nil otherwise
+ * When returning the hdr Hash, it may modify data to point to where
+ * body processing should begin.
+ *
+ * Raises ParserError if there are parsing errors.
+ */
+static VALUE headers(VALUE self, VALUE hdr, VALUE data)
+{
+  struct http_parser *hp = data_get(self);
+
+  rb_str_update(data);
+
+  http_parser_execute(hp, hdr, RSTRING_PTR(data), RSTRING_LEN(data));
+  VALIDATE_MAX_LENGTH(hp->offset, HEADER);
+
+  if (hp->cs == http_parser_first_final ||
+      hp->cs == http_parser_en_ChunkedBody) {
+    advance_str(data, hp->offset + 1);
+    hp->offset = 0;
+    if (HP_FL_TEST(hp, INTRAILER))
+      return hdr;
+    else
+      return rb_ary_new3(2, hp->status, hdr);
+  }
+
+  if (hp->cs == http_parser_error)
+    rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
+
+  return Qnil;
+}
+
+static int chunked_eof(struct http_parser *hp)
+{
+  return ((hp->cs == http_parser_first_final) || HP_FL_TEST(hp, INTRAILER));
+}
+
+/**
+ * call-seq:
+ *    parser.body_eof? => true or false
+ *
+ * Detects if we're done filtering the body or not.  This can be used
+ * to detect when to stop calling Parser#filter_body.
+ */
+static VALUE body_eof(VALUE self)
+{
+  struct http_parser *hp = data_get(self);
+
+  if (HP_FL_TEST(hp, CHUNKED))
+    return chunked_eof(hp) ? Qtrue : Qfalse;
+
+  if (! HP_FL_TEST(hp, HASBODY))
+    return Qtrue;
+
+  return hp->len.content == 0 ? Qtrue : Qfalse;
+}
+
+/**
+ * call-seq:
+ *    parser.keepalive? => true or false
+ *
+ * This should be used to detect if a request can really handle
+ * keepalives and pipelining.  Currently, the rules are:
+ *
+ * 1. MUST be HTTP/1.1 +or+ HTTP/1.0 with "Connection: keep-alive"
+ * 2. MUST NOT have "Connection: close" set
+ */
+static VALUE keepalive(VALUE self)
+{
+  struct http_parser *hp = data_get(self);
+
+  if (HP_FL_ALL(hp, KEEPALIVE)) {
+    if ( HP_FL_TEST(hp, HASBODY) ) {
+      if (HP_FL_TEST(hp, CHUNKED) || (hp->len.content >= 0))
+        return Qtrue;
+      return Qfalse;
+    } else { /* 100 Continue */
+      return Qtrue;
+    }
+  }
+  return Qfalse;
+}
+
+/**
+ * call-seq:
+ *    parser.filter_body(buf, data) => nil/data
+ *
+ * Takes a String of +data+, will modify data if dechunking is done.
+ * Returns +nil+ if there is more data left to process.  Returns
+ * +data+ if body processing is complete. When returning +data+,
+ * it may modify +data+ so the start of the string points to where
+ * the body ended so that trailer processing can begin.
+ *
+ * Raises ParserError if there are dechunking errors.
+ * Basically this is a glorified memcpy(3) that copies +data+
+ * into +buf+ while filtering it through the dechunker.
+ */
+static VALUE filter_body(VALUE self, VALUE buf, VALUE data)
+{
+  struct http_parser *hp = data_get(self);
+  char *dptr;
+  long dlen;
+
+  rb_str_update(data);
+  dptr = RSTRING_PTR(data);
+  dlen = RSTRING_LEN(data);
+
+  StringValue(buf);
+  rb_str_resize(buf, dlen); /* we can never copy more than dlen bytes */
+  OBJ_TAINT(buf); /* keep weirdo $SAFE users happy */
+
+  if (HP_FL_TEST(hp, CHUNKED)) {
+    if (!chunked_eof(hp)) {
+      hp->s.dest_offset = 0;
+      http_parser_execute(hp, buf, dptr, dlen);
+      if (hp->cs == http_parser_error)
+        rb_raise(eParserError, "Invalid HTTP format, parsing fails.");
+
+      assert(hp->s.dest_offset <= hp->offset &&
+             "destination buffer overflow");
+      advance_str(data, hp->offset);
+      rb_str_set_len(buf, hp->s.dest_offset);
+
+      if (RSTRING_LEN(buf) == 0 && chunked_eof(hp)) {
+        assert(hp->len.chunk == 0 && "chunk at EOF but more to parse");
+      } else {
+        data = Qnil;
+      }
+    }
+  } else {
+    /* no need to enter the Ragel machine for unchunked transfers */
+    assert(hp->len.content >= 0 && "negative Content-Length");
+    if (hp->len.content > 0) {
+      long nr = MIN(dlen, hp->len.content);
+
+      memcpy(RSTRING_PTR(buf), dptr, nr);
+      hp->len.content -= nr;
+      if (hp->len.content == 0)
+        hp->cs = http_parser_first_final;
+      advance_str(data, nr);
+      rb_str_set_len(buf, nr);
+      data = Qnil;
+    }
+  }
+  hp->offset = 0; /* for trailer parsing */
+  return data;
+}
+
+void Init_kcar_ext(void)
+{
+  VALUE mKcar = rb_define_module("Kcar");
+  VALUE cParser = rb_define_class_under(mKcar, "Parser", rb_cObject);
+
+  eParserError = rb_define_class_under(mKcar, "ParserError", rb_eIOError);
+
+  rb_define_alloc_func(cParser, alloc);
+  rb_define_method(cParser, "initialize", initialize, 0);
+  rb_define_method(cParser, "reset", initialize, 0);
+  rb_define_method(cParser, "headers", headers, 2);
+  rb_define_method(cParser, "trailers", headers, 2);
+  rb_define_method(cParser, "filter_body", filter_body, 2);
+  rb_define_method(cParser, "body_bytes_left", body_bytes_left, 0);
+  rb_define_method(cParser, "body_eof?", body_eof, 0);
+  rb_define_method(cParser, "keepalive?", keepalive, 0);
+  rb_define_method(cParser, "chunked?", chunked, 0);
+
+  /*
+   * The maximum size a single chunk when using chunked transfer encoding.
+   * This is only a theoretical maximum used to detect errors in clients,
+   * it is highly unlikely to encounter clients that send more than
+   * several kilobytes at once.
+   */
+  rb_define_const(cParser, "CHUNK_MAX", OFFT2NUM(UH_OFF_T_MAX));
+
+  /*
+   * The maximum size of the body as specified by Content-Length.
+   * This is only a theoretical maximum, the actual limit is subject
+   * to the limits of the file system used for +Dir.tmpdir+.
+   */
+  rb_define_const(cParser, "LENGTH_MAX", OFFT2NUM(UH_OFF_T_MAX));
+  id_sq = rb_intern("[]");
+  id_sq_set = rb_intern("[]=");
+}