diff options
Diffstat (limited to 'ext/kcar/kcar.rl')
-rw-r--r-- | ext/kcar/kcar.rl | 634 |
1 files changed, 634 insertions, 0 deletions
diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl new file mode 100644 index 0000000..0d22866 --- /dev/null +++ b/ext/kcar/kcar.rl @@ -0,0 +1,634 @@ +/** + * Copyright (c) 2009, 2010 Eric Wong (all bugs are Eric's fault) + * Copyright (c) 2005 Zed A. Shaw + * You can redistribute it and/or modify it under the same terms as Ruby. + */ +#include "ruby.h" +#include "ext_help.h" +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include "c_util.h" + +static VALUE eParserError; +static ID id_sq, id_sq_set; + +/** Defines common length and error messages for input length validation. */ +#define DEF_MAX_LENGTH(N, length) \ + static const size_t MAX_##N##_LENGTH = length; \ + static const char MAX_##N##_LENGTH_ERR[] = \ + "HTTP element " # N " is longer than the " # length " allowed length." + +/** + * Validates the max length of given input and throws an ParserError + * exception if over. + */ +#define VALIDATE_MAX_LENGTH(len, N) do { \ + if (len > MAX_##N##_LENGTH) \ + rb_raise(eParserError, MAX_##N##_LENGTH_ERR); \ +} while (0) + +/* Defines the maximum allowed lengths for various input elements.*/ +DEF_MAX_LENGTH(FIELD_NAME, 256); +DEF_MAX_LENGTH(FIELD_VALUE, 80 * 1024); +DEF_MAX_LENGTH(HEADER, (1024 * (80 + 32))); +DEF_MAX_LENGTH(REASON, 256); + +#define UH_FL_CHUNKED 0x1 +#define UH_FL_HASBODY 0x2 +#define UH_FL_INBODY 0x4 +#define UH_FL_INTRAILER 0x10 +#define UH_FL_INCHUNK 0x20 +#define UH_FL_KEEPALIVE 0x40 + +struct http_parser { + int cs; /* Ragel internal state */ + unsigned int flags; + size_t mark; + size_t offset; + union { /* these 2 fields don't nest */ + size_t field; + size_t query; + } start; + union { + size_t field_len; /* only used during header processing */ + size_t dest_offset; /* only used during body processing */ + } s; + VALUE cont; /* Qfalse: unset, Qnil: ignored header, T_STRING: append */ + VALUE status; /* String or Qnil */ + union { + off_t content; + off_t chunk; + } len; +}; + +#define REMAINING (unsigned long)(pe - p) +#define LEN(AT, FPC) (FPC - buffer - hp->AT) +#define MARK(M,FPC) (hp->M = (FPC) - buffer) +#define PTR_TO(F) (buffer + hp->F) +#define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC)) + +#define HP_FL_TEST(hp,fl) ((hp)->flags & (UH_FL_##fl)) +#define HP_FL_SET(hp,fl) ((hp)->flags |= (UH_FL_##fl)) +#define HP_FL_UNSET(hp,fl) ((hp)->flags &= ~(UH_FL_##fl)) +#define HP_FL_ALL(hp,fl) (HP_FL_TEST(hp, fl) == (UH_FL_##fl)) + +/* + * handles values of the "Connection:" header, keepalive is implied + * for HTTP/1.1 but needs to be explicitly enabled with HTTP/1.0 + * Additionally, we require GET/HEAD requests to support keepalive. + */ +static void hp_keepalive_connection(struct http_parser *hp, VALUE val) +{ + /* REQUEST_METHOD is always set before any headers */ + if (STR_CSTR_CASE_EQ(val, "keep-alive")) { + /* basically have HTTP/1.0 masquerade as HTTP/1.1+ */ + HP_FL_SET(hp, KEEPALIVE); + } else if (STR_CSTR_CASE_EQ(val, "close")) { + /* + * it doesn't matter what HTTP version or request method we have, + * if a server says "Connection: close", we disable keepalive + */ + HP_FL_UNSET(hp, KEEPALIVE); + } else { + /* + * server could've sent anything, ignore it for now. Maybe + * "HP_FL_UNSET(hp, KEEPALIVE);" just in case? + * Raising an exception might be too mean... + */ + } +} + +static void +http_version(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len) +{ + if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) { + /* HTTP/1.1 implies keepalive unless "Connection: close" is set */ + HP_FL_SET(hp, KEEPALIVE); + } +} + +static void +status_phrase(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len) +{ + long nr; + + hp->status = rb_str_new(ptr, len); + + /* RSTRING_PTR is null terminated, ptr is not */ + nr = strtol(RSTRING_PTR(hp->status), NULL, 10); + + if (nr < 100 || nr > 999) + rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->status)); + + if ( !((nr >= 100 && nr <= 199) || nr == 204 || nr == 304) ) + HP_FL_SET(hp, HASBODY); +} + +static inline void invalid_if_trailer(struct http_parser *hp) +{ + if (HP_FL_TEST(hp, INTRAILER)) + rb_raise(eParserError, "invalid Trailer"); +} + +static void write_cont_value(struct http_parser *hp, + char *buffer, const char *p) +{ + char *vptr; + + if (hp->cont == Qfalse) + rb_raise(eParserError, "invalid continuation line"); + + if (NIL_P(hp->cont)) + return; /* we're ignoring this header (probably Status:) */ + + assert(TYPE(hp->cont) == T_STRING && "continuation line is not a string"); + assert(hp->mark > 0 && "impossible continuation line offset"); + + if (LEN(mark, p) == 0) + return; + + if (RSTRING_LEN(hp->cont) > 0) + --hp->mark; + + vptr = PTR_TO(mark); + + if (RSTRING_LEN(hp->cont) > 0) { + assert((' ' == *vptr || '\t' == *vptr) && "invalid leading white space"); + *vptr = ' '; + } + rb_str_buf_cat(hp->cont, vptr, LEN(mark, p)); +} + +static void write_value(VALUE hdr, struct http_parser *hp, + const char *buffer, const char *p) +{ + VALUE f, v; + VALUE hclass; + const char *fptr = PTR_TO(start.field); + long flen = hp->s.field_len; + const char *vptr; + long vlen; + + /* Rack does not like Status headers, so we never send them */ + if (CSTR_CASE_EQ(fptr, flen, "status")) { + hp->cont = Qnil; + return; + } + + vptr = PTR_TO(mark); + vlen = LEN(mark, p); + VALIDATE_MAX_LENGTH(vlen, FIELD_VALUE); + VALIDATE_MAX_LENGTH(flen, FIELD_NAME); + f = rb_str_new(fptr, flen); + v = rb_str_new(vptr, vlen); + + if (STR_CSTR_CASE_EQ(f, "connection")) { + hp_keepalive_connection(hp, v); + } else if (STR_CSTR_CASE_EQ(f, "content-length")) { + if (! HP_FL_TEST(hp, HASBODY)) + rb_raise(eParserError, "Content-Length with no body"); + hp->len.content = parse_length(vptr, vlen); + + if (hp->len.content < 0) + rb_raise(eParserError, "invalid Content-Length"); + + invalid_if_trailer(hp); + } else if (STR_CSTR_CASE_EQ(f, "transfer-encoding")) { + if (STR_CSTR_CASE_EQ(v, "chunked")) { + if (! HP_FL_TEST(hp, HASBODY)) + rb_raise(eParserError, "chunked Transfer-Encoding with no body"); + + hp->len.chunk = 0; + HP_FL_SET(hp, CHUNKED); + } + invalid_if_trailer(hp); + } else if (STR_CSTR_CASE_EQ(f, "trailer")) { + if (! HP_FL_TEST(hp, HASBODY)) + rb_raise(eParserError, "trailer with no body"); + invalid_if_trailer(hp); + } + + hclass = CLASS_OF(hdr); + if (hclass == rb_cArray) { + rb_ary_push(hdr, rb_ary_new3(2, f, v)); + hp->cont = v; + } else { + /* hash-ish, try rb_hash_* first and fall back to slow rb_funcall */ + VALUE e; + + /* try to read the existing value */ + if (hclass == rb_cHash) + e = rb_hash_aref(hdr, f); + else + e = rb_funcall(hdr, id_sq, 1, f); + + if (NIL_P(e)) { + OBJ_FREEZE(f); + + if (hclass == rb_cHash) + rb_hash_aset(hdr, f, v); + else + rb_funcall(hdr, id_sq_set, 2, f, v); + + hp->cont = v; + } else { + rb_str_buf_cat(e, "\n", 1); + hp->cont = rb_str_buf_append(e, v); + } + } +} + +/** Machine **/ + +%%{ + machine http_parser; + + action mark {MARK(mark, fpc); } + + action start_field { MARK(start.field, fpc); } + action write_field { hp->s.field_len = LEN(start.field, fpc); } + action start_value { MARK(mark, fpc); } + action write_value { write_value(hdr, hp, buffer, fpc); } + action write_cont_value { write_cont_value(hp, buffer, fpc); } + action http_version { http_version(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); } + action status_phrase { status_phrase(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); } + + action add_to_chunk_size { + hp->len.chunk = step_incr(hp->len.chunk, fc, 16); + if (hp->len.chunk < 0) + rb_raise(eParserError, "invalid chunk size"); + } + action header_done { + cs = http_parser_first_final; + + if (HP_FL_TEST(hp, CHUNKED)) + cs = http_parser_en_ChunkedBody; + + /* + * go back to Ruby so we can call the Rack application, we'll reenter + * the parser iff the body needs to be processed. + */ + goto post_exec; + } + + action end_trailers { + cs = http_parser_first_final; + goto post_exec; + } + + action end_chunked_body { + HP_FL_SET(hp, INTRAILER); + cs = http_parser_en_Trailers; + ++p; + assert(p <= pe && "buffer overflow after chunked body"); + goto post_exec; + } + + action skip_chunk_data { + skip_chunk_data_hack: { + size_t nr = MIN((size_t)hp->len.chunk, REMAINING); + memcpy(RSTRING_PTR(hdr) + hp->s.dest_offset, fpc, nr); + hp->s.dest_offset += nr; + hp->len.chunk -= nr; + p += nr; + assert(hp->len.chunk >= 0 && "negative chunk length"); + if ((size_t)hp->len.chunk > REMAINING) { + HP_FL_SET(hp, INCHUNK); + goto post_exec; + } else { + fhold; + fgoto chunk_end; + } + }} + + include kcar_http_common "kcar_http_common.rl"; +}%% + +/** Data **/ +%% write data; + +static void http_parser_init(struct http_parser *hp) +{ + int cs = 0; + memset(hp, 0, sizeof(struct http_parser)); + hp->cont = Qfalse; /* zero on MRI, should be optimized away by above */ + hp->status = Qnil; + hp->len.content = -1; + %% write init; + hp->cs = cs; +} + +/** exec **/ +static void http_parser_execute(struct http_parser *hp, + VALUE hdr, char *buffer, size_t len) +{ + const char *p, *pe; + int cs = hp->cs; + size_t off = hp->offset; + + if (cs == http_parser_first_final) + return; + + assert(off <= len && "offset past end of buffer"); + + p = buffer+off; + pe = buffer+len; + + assert((void *)(pe - p) == (void *)(len - off) && + "pointers aren't same distance"); + + if (HP_FL_TEST(hp, INCHUNK)) { + HP_FL_UNSET(hp, INCHUNK); + goto skip_chunk_data_hack; + } + %% write exec; +post_exec: /* "_out:" also goes here */ + if (hp->cs != http_parser_error) + hp->cs = cs; + hp->offset = p - buffer; + + assert(p <= pe && "buffer overflow after parsing execute"); + assert(hp->offset <= len && "offset longer than length"); +} + +static struct http_parser *data_get(VALUE self) +{ + struct http_parser *hp; + + Data_Get_Struct(self, struct http_parser, hp); + assert(hp && "failed to extract http_parser struct"); + return hp; +} + +static void mark(void *ptr) +{ + struct http_parser *hp = ptr; + + rb_gc_mark(hp->cont); + rb_gc_mark(hp->status); +} + +static VALUE alloc(VALUE klass) +{ + struct http_parser *hp; + return Data_Make_Struct(klass, struct http_parser, mark, -1, hp); +} + +/** + * call-seq: + * Kcar::Parser.new => parser + * + * Creates a new parser. + * + * Document-method: reset + * + * call-seq: + * parser.reset => parser + * + * Resets the parser so it can be reused by another client + */ +static VALUE initialize(VALUE self) +{ + http_parser_init(data_get(self)); + + return self; +} + +static void advance_str(VALUE str, off_t nr) +{ + long len = RSTRING_LEN(str); + + if (len == 0) + return; + + rb_str_modify(str); + + assert(nr <= len && "trying to advance past end of buffer"); + len -= nr; + if (len > 0) /* unlikely, len is usually 0 */ + memmove(RSTRING_PTR(str), RSTRING_PTR(str) + nr, len); + rb_str_set_len(str, len); +} + +/** + * call-seq: + * parser.body_bytes_left => nil or Integer + * + * Returns the number of bytes left to run through Parser#filter_body. + * This will initially be the value of the "Content-Length" HTTP header + * after header parsing is complete and will decrease in value as + * Parser#filter_body is called for each chunk. This should return + * zero for responses with no body. + * + * This will return nil on "Transfer-Encoding: chunked" responses as + * well as HTTP/1.0 responses where Content-Length is not set + */ +static VALUE body_bytes_left(VALUE self) +{ + struct http_parser *hp = data_get(self); + + if (HP_FL_TEST(hp, CHUNKED)) + return Qnil; + if (hp->len.content >= 0) + return OFFT2NUM(hp->len.content); + + return Qnil; +} + +static VALUE chunked(VALUE self) +{ + struct http_parser *hp = data_get(self); + + return HP_FL_TEST(hp, CHUNKED) ? Qtrue : Qfalse; +} + +/** + * Document-method: headers + * call-seq: + * parser.headers(hdr, data) => hdr or nil + * + * Takes a Hash and a String of data, parses the String of data filling + * in the Hash returning the Hash if parsing is finished, nil otherwise + * When returning the hdr Hash, it may modify data to point to where + * body processing should begin. + * + * Raises ParserError if there are parsing errors. + */ +static VALUE headers(VALUE self, VALUE hdr, VALUE data) +{ + struct http_parser *hp = data_get(self); + + rb_str_update(data); + + http_parser_execute(hp, hdr, RSTRING_PTR(data), RSTRING_LEN(data)); + VALIDATE_MAX_LENGTH(hp->offset, HEADER); + + if (hp->cs == http_parser_first_final || + hp->cs == http_parser_en_ChunkedBody) { + advance_str(data, hp->offset + 1); + hp->offset = 0; + if (HP_FL_TEST(hp, INTRAILER)) + return hdr; + else + return rb_ary_new3(2, hp->status, hdr); + } + + if (hp->cs == http_parser_error) + rb_raise(eParserError, "Invalid HTTP format, parsing fails."); + + return Qnil; +} + +static int chunked_eof(struct http_parser *hp) +{ + return ((hp->cs == http_parser_first_final) || HP_FL_TEST(hp, INTRAILER)); +} + +/** + * call-seq: + * parser.body_eof? => true or false + * + * Detects if we're done filtering the body or not. This can be used + * to detect when to stop calling Parser#filter_body. + */ +static VALUE body_eof(VALUE self) +{ + struct http_parser *hp = data_get(self); + + if (HP_FL_TEST(hp, CHUNKED)) + return chunked_eof(hp) ? Qtrue : Qfalse; + + if (! HP_FL_TEST(hp, HASBODY)) + return Qtrue; + + return hp->len.content == 0 ? Qtrue : Qfalse; +} + +/** + * call-seq: + * parser.keepalive? => true or false + * + * This should be used to detect if a request can really handle + * keepalives and pipelining. Currently, the rules are: + * + * 1. MUST be HTTP/1.1 +or+ HTTP/1.0 with "Connection: keep-alive" + * 2. MUST NOT have "Connection: close" set + */ +static VALUE keepalive(VALUE self) +{ + struct http_parser *hp = data_get(self); + + if (HP_FL_ALL(hp, KEEPALIVE)) { + if ( HP_FL_TEST(hp, HASBODY) ) { + if (HP_FL_TEST(hp, CHUNKED) || (hp->len.content >= 0)) + return Qtrue; + return Qfalse; + } else { /* 100 Continue */ + return Qtrue; + } + } + return Qfalse; +} + +/** + * call-seq: + * parser.filter_body(buf, data) => nil/data + * + * Takes a String of +data+, will modify data if dechunking is done. + * Returns +nil+ if there is more data left to process. Returns + * +data+ if body processing is complete. When returning +data+, + * it may modify +data+ so the start of the string points to where + * the body ended so that trailer processing can begin. + * + * Raises ParserError if there are dechunking errors. + * Basically this is a glorified memcpy(3) that copies +data+ + * into +buf+ while filtering it through the dechunker. + */ +static VALUE filter_body(VALUE self, VALUE buf, VALUE data) +{ + struct http_parser *hp = data_get(self); + char *dptr; + long dlen; + + rb_str_update(data); + dptr = RSTRING_PTR(data); + dlen = RSTRING_LEN(data); + + StringValue(buf); + rb_str_resize(buf, dlen); /* we can never copy more than dlen bytes */ + OBJ_TAINT(buf); /* keep weirdo $SAFE users happy */ + + if (HP_FL_TEST(hp, CHUNKED)) { + if (!chunked_eof(hp)) { + hp->s.dest_offset = 0; + http_parser_execute(hp, buf, dptr, dlen); + if (hp->cs == http_parser_error) + rb_raise(eParserError, "Invalid HTTP format, parsing fails."); + + assert(hp->s.dest_offset <= hp->offset && + "destination buffer overflow"); + advance_str(data, hp->offset); + rb_str_set_len(buf, hp->s.dest_offset); + + if (RSTRING_LEN(buf) == 0 && chunked_eof(hp)) { + assert(hp->len.chunk == 0 && "chunk at EOF but more to parse"); + } else { + data = Qnil; + } + } + } else { + /* no need to enter the Ragel machine for unchunked transfers */ + assert(hp->len.content >= 0 && "negative Content-Length"); + if (hp->len.content > 0) { + long nr = MIN(dlen, hp->len.content); + + memcpy(RSTRING_PTR(buf), dptr, nr); + hp->len.content -= nr; + if (hp->len.content == 0) + hp->cs = http_parser_first_final; + advance_str(data, nr); + rb_str_set_len(buf, nr); + data = Qnil; + } + } + hp->offset = 0; /* for trailer parsing */ + return data; +} + +void Init_kcar_ext(void) +{ + VALUE mKcar = rb_define_module("Kcar"); + VALUE cParser = rb_define_class_under(mKcar, "Parser", rb_cObject); + + eParserError = rb_define_class_under(mKcar, "ParserError", rb_eIOError); + + rb_define_alloc_func(cParser, alloc); + rb_define_method(cParser, "initialize", initialize, 0); + rb_define_method(cParser, "reset", initialize, 0); + rb_define_method(cParser, "headers", headers, 2); + rb_define_method(cParser, "trailers", headers, 2); + rb_define_method(cParser, "filter_body", filter_body, 2); + rb_define_method(cParser, "body_bytes_left", body_bytes_left, 0); + rb_define_method(cParser, "body_eof?", body_eof, 0); + rb_define_method(cParser, "keepalive?", keepalive, 0); + rb_define_method(cParser, "chunked?", chunked, 0); + + /* + * The maximum size a single chunk when using chunked transfer encoding. + * This is only a theoretical maximum used to detect errors in clients, + * it is highly unlikely to encounter clients that send more than + * several kilobytes at once. + */ + rb_define_const(cParser, "CHUNK_MAX", OFFT2NUM(UH_OFF_T_MAX)); + + /* + * The maximum size of the body as specified by Content-Length. + * This is only a theoretical maximum, the actual limit is subject + * to the limits of the file system used for +Dir.tmpdir+. + */ + rb_define_const(cParser, "LENGTH_MAX", OFFT2NUM(UH_OFF_T_MAX)); + id_sq = rb_intern("[]"); + id_sq_set = rb_intern("[]="); +} |