diff options
author | Eric Wong <normalperson@yhbt.net> | 2010-04-26 18:29:21 -0700 |
---|---|---|
committer | Eric Wong <normalperson@yhbt.net> | 2010-04-26 18:53:47 -0700 |
commit | f071a6f936ba0b22610b1f7ce68e414403f29996 (patch) | |
tree | c73efc2f68710a200758ce8d987ac90db1cada4d | |
download | kcar-f071a6f936ba0b22610b1f7ce68e414403f29996.tar.gz |
-rw-r--r-- | .gitignore | 20 | ||||
-rwxr-xr-x | GIT-VERSION-GEN | 40 | ||||
-rw-r--r-- | GNUmakefile | 180 | ||||
-rw-r--r-- | README | 11 | ||||
-rw-r--r-- | ext/kcar/c_util.h | 105 | ||||
-rw-r--r-- | ext/kcar/ext_help.h | 82 | ||||
-rw-r--r-- | ext/kcar/extconf.rb | 14 | ||||
-rw-r--r-- | ext/kcar/kcar.rl | 634 | ||||
-rw-r--r-- | ext/kcar/kcar_http_common.rl | 56 | ||||
-rw-r--r-- | lib/kcar.rb | 10 | ||||
-rw-r--r-- | lib/kcar/parser.rb | 39 | ||||
-rw-r--r-- | lib/kcar/session.rb | 130 | ||||
-rw-r--r-- | test/test_parser.rb | 257 | ||||
-rw-r--r-- | test/test_session.rb | 342 |
14 files changed, 1920 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..73ba280 --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +*.o +*.log +*.so +*.rbc +/.config +/InstalledFiles +/doc +/local.mk +/test/install-* +ext/kcar/Makefile +ext/kcar/kcar.c +log/ +pkg/ +/NEWS +/ChangeLog +/.manifest +/GIT-VERSION-FILE +/man +tags +TAGS diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN new file mode 100755 index 0000000..4e7f0e3 --- /dev/null +++ b/GIT-VERSION-GEN @@ -0,0 +1,40 @@ +#!/bin/sh + +GVF=GIT-VERSION-FILE +DEF_VER=v0.1.0.GIT + +LF=' +' + +# First see if there is a version file (included in release tarballs), +# then try git-describe, then default. +if test -f version +then + VN=$(cat version) || VN="$DEF_VER" +elif test -d .git -o -f .git && + VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && + case "$VN" in + *$LF*) (exit 1) ;; + v[0-9]*) + git update-index -q --refresh + test -z "$(git diff-index --name-only HEAD --)" || + VN="$VN-dirty" ;; + esac +then + VN=$(echo "$VN" | sed -e 's/-/./g'); +else + VN="$DEF_VER" +fi + +VN=$(expr "$VN" : v*'\(.*\)') + +if test -r $GVF +then + VC=$(sed -e 's/^GIT_VERSION = //' <$GVF) +else + VC=unset +fi +test "$VN" = "$VC" || { + echo >&2 "GIT_VERSION = $VN" + echo "GIT_VERSION = $VN" >$GVF +} diff --git a/GNUmakefile b/GNUmakefile new file mode 100644 index 0000000..982c577 --- /dev/null +++ b/GNUmakefile @@ -0,0 +1,180 @@ +# use GNU Make to run tests in parallel, and without depending on RubyGems +all:: +RUBY = ruby +RAKE = rake +RAGEL = ragel +GIT_URL = git://git.bogomips.org/kcar.git +RLFLAGS = -G2 + +GIT-VERSION-FILE: .FORCE-GIT-VERSION-FILE + @./GIT-VERSION-GEN +-include GIT-VERSION-FILE +-include local.mk +ifeq ($(DLEXT),) # "so" for Linux + DLEXT := $(shell $(RUBY) -rrbconfig -e 'puts Config::CONFIG["DLEXT"]') +endif +ifeq ($(RUBY_VERSION),) + RUBY_VERSION := $(shell $(RUBY) -e 'puts RUBY_VERSION') +endif + +install: $(bins) + $(prep_setup_rb) + $(RM) -r .install-tmp + mkdir .install-tmp + cp -p bin/* .install-tmp + $(RUBY) setup.rb all + $(RM) $^ + mv .install-tmp/* bin/ + $(RM) -r .install-tmp + $(prep_setup_rb) + +setup_rb_files := .config InstalledFiles +prep_setup_rb := @-$(RM) $(setup_rb_files);$(MAKE) -C $(ext) clean + +clean: + -$(MAKE) -C ext/kcar clean + $(RM) $(setup_rb_files) ext/kcar/Makefile + +pkg_extra := GIT-VERSION-FILE NEWS ChangeLog +manifest: $(pkg_extra) + $(RM) .manifest + $(MAKE) .manifest + +.manifest: + (git ls-files && \ + for i in $@ $(pkg_extra) $(man1_paths); \ + do echo $$i; done) | LC_ALL=C sort > $@+ + cmp $@+ $@ || mv $@+ $@ + $(RM) $@+ + +NEWS: GIT-VERSION-FILE + $(RAKE) -s news_rdoc > $@+ + mv $@+ $@ + +SINCE = +ChangeLog: LOG_VERSION = \ + $(shell git rev-parse -q "$(GIT_VERSION)" >/dev/null 2>&1 && \ + echo $(GIT_VERSION) || git describe) +ifneq ($(SINCE),) +ChangeLog: log_range = v$(SINCE)..$(LOG_VERSION) +endif +ChangeLog: GIT-VERSION-FILE + @echo "ChangeLog from $(GIT_URL) ($(log_range))" > $@+ + @echo >> $@+ + git log $(log_range) | sed -e 's/^/ /' >> $@+ + mv $@+ $@ + +news_atom := http://bogomips.org/kcar/NEWS.atom.xml +cgit_atom := http://git.bogomips.org/cgit/kcar.git/atom/?h=master +atom = <link rel="alternate" title="Atom feed" href="$(1)" \ + type="application/atom+xml"/> + +# using rdoc 2.4.1+ +doc: .document NEWS ChangeLog + for i in $(man1_bins); do > $$i; done + rdoc -Na -t "$(shell sed -ne '1s/^= //p' README)" + install -m644 COPYING doc/COPYING + install -m644 $(shell grep '^[A-Z]' .document) doc/ + cd doc && for i in $(base_bins); do \ + html=$$(echo $$i | sed 's/\.rb/_rb/')_1.html; \ + sed -e '/"documentation">/r man1/'$$i'.1.html' \ + < $$html > tmp && mv tmp $$html; done + $(RUBY) -i -p -e \ + '$$_.gsub!("</title>",%q{\&$(call atom,$(cgit_atom))})' \ + doc/ChangeLog.html + $(RUBY) -i -p -e \ + '$$_.gsub!("</title>",%q{\&$(call atom,$(news_atom))})' \ + doc/NEWS.html doc/README.html + $(RAKE) -s news_atom > doc/NEWS.atom.xml + cd doc && ln README.html tmp && mv tmp index.html + $(RM) $(man1_bins) + +ifneq ($(VERSION),) +rfproject := rainbows +rfpackage := kcar +pkggem := pkg/$(rfpackage)-$(VERSION).gem +pkgtgz := pkg/$(rfpackage)-$(VERSION).tgz +release_notes := release_notes-$(VERSION) +release_changes := release_changes-$(VERSION) + +release-notes: $(release_notes) +release-changes: $(release_changes) +$(release_changes): + $(RAKE) -s release_changes > $@+ + $(VISUAL) $@+ && test -s $@+ && mv $@+ $@ +$(release_notes): + GIT_URL=$(GIT_URL) $(RAKE) -s release_notes > $@+ + $(VISUAL) $@+ && test -s $@+ && mv $@+ $@ + +# ensures we're actually on the tagged $(VERSION), only used for release +verify: + test x"$(shell umask)" = x0022 + git rev-parse --verify refs/tags/v$(VERSION)^{} + git diff-index --quiet HEAD^0 + test `git rev-parse --verify HEAD^0` = \ + `git rev-parse --verify refs/tags/v$(VERSION)^{}` + +fix-perms: + -git ls-tree -r HEAD | awk '/^100644 / {print $$NF}' | xargs chmod 644 + -git ls-tree -r HEAD | awk '/^100755 / {print $$NF}' | xargs chmod 755 + +gem: $(pkggem) + +install-gem: $(pkggem) + gem install $(CURDIR)/$< + +$(pkggem): manifest fix-perms + gem build $(rfpackage).gemspec + mkdir -p pkg + mv $(@F) $@ + +$(pkgtgz): distdir = $(basename $@) +$(pkgtgz): HEAD = v$(VERSION) +$(pkgtgz): manifest fix-perms + @test -n "$(distdir)" + $(RM) -r $(distdir) + mkdir -p $(distdir) + tar c `cat .manifest` | (cd $(distdir) && tar x) + cd pkg && tar c $(basename $(@F)) | gzip -9 > $(@F)+ + mv $@+ $@ + +package: $(pkgtgz) $(pkggem) + +test-release: verify package $(release_notes) $(release_changes) +release: verify package $(release_notes) $(release_changes) + # make tgz release on RubyForge + rubyforge add_release -f -n $(release_notes) -a $(release_changes) \ + $(rfproject) $(rfpackage) $(VERSION) $(pkgtgz) + # push gem to Gemcutter + gem push $(pkggem) + # in case of gem downloads from RubyForge releases page + -rubyforge add_file \ + $(rfproject) $(rfpackage) $(VERSION) $(pkggem) +else +gem install-gem: GIT-VERSION-FILE + $(MAKE) $@ VERSION=$(GIT_VERSION) +endif + +ext := ext/kcar/kcar_ext.$(DLEXT) +hdr := $(wildcard $(addprefix ext/kcar/,*.h)) +ragel: $(ext) +ext/kcar/Makefile: ext/kcar/extconf.rb + cd $(@D) && $(RUBY) extconf.rb + +ext/kcar/kcar.c: ext/kcar/kcar.rl ext/kcar/kcar_http_common.rl + cd $(@D) && $(RAGEL) kcar.rl -C $(RLFLAGS) -o $(@F) + +$(ext): ext/kcar/kcar.c $(hdr) ext/kcar/Makefile + $(MAKE) -C $(@D) + +all:: test + +export STRESS BENCHMARK +build: $(ext) +test_units := $(wildcard test/test_*.rb) +test: test-unit +test-unit: $(test_units) +$(test_units): build + $(RUBY) -w -I lib:ext/kcar $@ + +.PHONY: .FORCE-GIT-VERSION-FILE doc manifest man test $(test_units) @@ -0,0 +1,11 @@ += kcar - retrevnoc esnopser kcaR ot maertsetyb + +== Features + +* RFC2616-compliant Ragel/C HTTP parser adapted from Unicorn and Mongrel + +* decodes chunked bodies + +* handles odd things like trailers and multiline headers + +* streaming interface for response bodies for incremental processing diff --git a/ext/kcar/c_util.h b/ext/kcar/c_util.h new file mode 100644 index 0000000..624b4b3 --- /dev/null +++ b/ext/kcar/c_util.h @@ -0,0 +1,105 @@ +/* + * Generic C functions and macros go here, there are no dependencies + * on Unicorn internal structures or the Ruby C API in here. + */ + +#ifndef UH_util_h +#define UH_util_h + +#include <unistd.h> +#include <assert.h> + +#define MIN(a,b) (a < b ? a : b) +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) + +#ifndef SIZEOF_OFF_T +# define SIZEOF_OFF_T 4 +# warning SIZEOF_OFF_T not defined, guessing 4. Did you run extconf.rb? +#endif + +#if SIZEOF_OFF_T == 4 +# define UH_OFF_T_MAX 0x7fffffff +#elif SIZEOF_OFF_T == 8 +# if SIZEOF_LONG == 4 +# define UH_OFF_T_MAX 0x7fffffffffffffffLL +# else +# define UH_OFF_T_MAX 0x7fffffffffffffff +# endif +#else +# error off_t size unknown for this platform! +#endif /* SIZEOF_OFF_T check */ + +/* + * ragel enforces fpc as a const, and merely casting can make picky + * compilers unhappy, so we have this little helper do our dirty work + */ +static inline void *deconst(const void *in) +{ + union { const void *in; void *out; } tmp; + + tmp.in = in; + + return tmp.out; +} + +static int hexchar2int(int xdigit) +{ + if (xdigit >= 'A' && xdigit <= 'F') + return xdigit - 'A' + 10; + if (xdigit >= 'a' && xdigit <= 'f') + return xdigit - 'a' + 10; + + /* Ragel already does runtime range checking for us in Unicorn: */ + assert(xdigit >= '0' && xdigit <= '9' && "invalid digit character"); + + return xdigit - '0'; +} + +/* + * multiplies +i+ by +base+ and increments the result by the parsed + * integer value of +xdigit+. +xdigit+ is a character byte + * representing a number the range of 0..(base-1) + * returns the new value of +i+ on success + * returns -1 on errors (including overflow) + */ +static off_t step_incr(off_t i, int xdigit, const int base) +{ + static const off_t max = UH_OFF_T_MAX; + const off_t next_max = (max - (max % base)) / base; + off_t offset = hexchar2int(xdigit); + + if (offset > (base - 1)) + return -1; + if (i > next_max) + return -1; + i *= base; + + if ((offset > (base - 1)) || ((max - i) < offset)) + return -1; + + return i + offset; +} + +/* + * parses a non-negative length according to base-10 and + * returns it as an off_t value. Returns -1 on errors + * (including overflow). + */ +static off_t parse_length(const char *value, size_t length) +{ + off_t rv; + + for (rv = 0; length-- && rv >= 0; ++value) { + if (*value >= '0' && *value <= '9') + rv = step_incr(rv, *value, 10); + else + return -1; + } + + return rv; +} + +#define CONST_MEM_EQ(const_p, buf, len) \ + ((sizeof(const_p) - 1) == len && !memcmp(const_p, buf, sizeof(const_p) - 1)) + +#endif /* UH_util_h */ diff --git a/ext/kcar/ext_help.h b/ext/kcar/ext_help.h new file mode 100644 index 0000000..b6be7ce --- /dev/null +++ b/ext/kcar/ext_help.h @@ -0,0 +1,82 @@ +#ifndef ext_help_h +#define ext_help_h + +#ifndef RSTRING_PTR +#define RSTRING_PTR(s) (RSTRING(s)->ptr) +#endif /* !defined(RSTRING_PTR) */ +#ifndef RSTRING_LEN +#define RSTRING_LEN(s) (RSTRING(s)->len) +#endif /* !defined(RSTRING_LEN) */ + +#ifndef RUBINIUS +# define rb_str_update(x) do {} while (0) +# define rb_str_flush(x) do {} while (0) +#endif /* !RUBINIUS */ + +#ifndef HAVE_RB_STR_SET_LEN +# ifdef RUBINIUS +# define rb_str_set_len(str,len) rb_str_resize(str,len) +# else /* 1.8.6 optimized version */ +/* this is taken from Ruby 1.8.7, 1.8.6 may not have it */ +static void rb_18_str_set_len(VALUE str, long len) +{ + RSTRING(str)->len = len; + RSTRING(str)->ptr[len] = '\0'; + rb_str_flush(str); +} +# define rb_str_set_len(str,len) rb_18_str_set_len(str,len) +# endif /* ! RUBINIUS */ +#endif /* !defined(HAVE_RB_STR_SET_LEN) */ + +/* not all Ruby implementations support frozen objects (Rubinius does not) */ +#if defined(OBJ_FROZEN) +# define assert_frozen(f) assert(OBJ_FROZEN(f) && "unfrozen object") +#else +# define assert_frozen(f) do {} while (0) +#endif /* !defined(OBJ_FROZEN) */ + +#if !defined(OFFT2NUM) +# if SIZEOF_OFF_T == SIZEOF_LONG +# define OFFT2NUM(n) LONG2NUM(n) +# else +# define OFFT2NUM(n) LL2NUM(n) +# endif +#endif /* ! defined(OFFT2NUM) */ + +#ifndef HAVE_RB_STR_MODIFY +# define rb_str_modify(x) do {} while (0) +#endif /* ! defined(HAVE_RB_STR_MODIFY) */ + +static inline int str_cstr_eq(VALUE val, const char *ptr, long len) +{ + return (RSTRING_LEN(val) == len && !memcmp(ptr, RSTRING_PTR(val), len)); +} + +#define STR_CSTR_EQ(val, const_str) \ + str_cstr_eq(val, const_str, sizeof(const_str) - 1) + +static int cstr_case_eq(const char *a, long alen, const char *b, long blen) +{ + if (alen == blen) { + for (; blen--; ++a, ++b) { + if ((*a == *b) || ((*a >= 'A' && *a <= 'Z') && (*a | 0x20) == *b)) + continue; + return 0; + } + return 1; + } + return 0; +} + +/* strcasecmp isn't locale independent */ +static int str_cstr_case_eq(VALUE val, const char *ptr, long len) +{ + return cstr_case_eq(RSTRING_PTR(val), RSTRING_LEN(val), ptr, len); +} + +#define STR_CSTR_CASE_EQ(val, const_str) \ + str_cstr_case_eq(val, const_str, sizeof(const_str) - 1) +#define CSTR_CASE_EQ(ptr, len, const_str) \ + cstr_case_eq(ptr, len, const_str, sizeof(const_str) - 1) + +#endif /* ext_help_h */ diff --git a/ext/kcar/extconf.rb b/ext/kcar/extconf.rb new file mode 100644 index 0000000..4f19c4a --- /dev/null +++ b/ext/kcar/extconf.rb @@ -0,0 +1,14 @@ +# -*- encoding: binary -*- +require 'mkmf' + +dir_config("kcar") + +have_macro("SIZEOF_OFF_T", "ruby.h") or check_sizeof("off_t", "sys/types.h") +have_macro("SIZEOF_LONG", "ruby.h") or check_sizeof("long", "sys/types.h") +have_func("rb_str_set_len", "ruby.h") +have_func("rb_str_modify", "ruby.h") + +# -fPIC is needed for Rubinius, MRI already uses it regardless +with_cflags($CFLAGS + " -fPIC ") do + create_makefile("kcar_ext") +end diff --git a/ext/kcar/kcar.rl b/ext/kcar/kcar.rl new file mode 100644 index 0000000..0d22866 --- /dev/null +++ b/ext/kcar/kcar.rl @@ -0,0 +1,634 @@ +/** + * Copyright (c) 2009, 2010 Eric Wong (all bugs are Eric's fault) + * Copyright (c) 2005 Zed A. Shaw + * You can redistribute it and/or modify it under the same terms as Ruby. + */ +#include "ruby.h" +#include "ext_help.h" +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include "c_util.h" + +static VALUE eParserError; +static ID id_sq, id_sq_set; + +/** Defines common length and error messages for input length validation. */ +#define DEF_MAX_LENGTH(N, length) \ + static const size_t MAX_##N##_LENGTH = length; \ + static const char MAX_##N##_LENGTH_ERR[] = \ + "HTTP element " # N " is longer than the " # length " allowed length." + +/** + * Validates the max length of given input and throws an ParserError + * exception if over. + */ +#define VALIDATE_MAX_LENGTH(len, N) do { \ + if (len > MAX_##N##_LENGTH) \ + rb_raise(eParserError, MAX_##N##_LENGTH_ERR); \ +} while (0) + +/* Defines the maximum allowed lengths for various input elements.*/ +DEF_MAX_LENGTH(FIELD_NAME, 256); +DEF_MAX_LENGTH(FIELD_VALUE, 80 * 1024); +DEF_MAX_LENGTH(HEADER, (1024 * (80 + 32))); +DEF_MAX_LENGTH(REASON, 256); + +#define UH_FL_CHUNKED 0x1 +#define UH_FL_HASBODY 0x2 +#define UH_FL_INBODY 0x4 +#define UH_FL_INTRAILER 0x10 +#define UH_FL_INCHUNK 0x20 +#define UH_FL_KEEPALIVE 0x40 + +struct http_parser { + int cs; /* Ragel internal state */ + unsigned int flags; + size_t mark; + size_t offset; + union { /* these 2 fields don't nest */ + size_t field; + size_t query; + } start; + union { + size_t field_len; /* only used during header processing */ + size_t dest_offset; /* only used during body processing */ + } s; + VALUE cont; /* Qfalse: unset, Qnil: ignored header, T_STRING: append */ + VALUE status; /* String or Qnil */ + union { + off_t content; + off_t chunk; + } len; +}; + +#define REMAINING (unsigned long)(pe - p) +#define LEN(AT, FPC) (FPC - buffer - hp->AT) +#define MARK(M,FPC) (hp->M = (FPC) - buffer) +#define PTR_TO(F) (buffer + hp->F) +#define STR_NEW(M,FPC) rb_str_new(PTR_TO(M), LEN(M, FPC)) + +#define HP_FL_TEST(hp,fl) ((hp)->flags & (UH_FL_##fl)) +#define HP_FL_SET(hp,fl) ((hp)->flags |= (UH_FL_##fl)) +#define HP_FL_UNSET(hp,fl) ((hp)->flags &= ~(UH_FL_##fl)) +#define HP_FL_ALL(hp,fl) (HP_FL_TEST(hp, fl) == (UH_FL_##fl)) + +/* + * handles values of the "Connection:" header, keepalive is implied + * for HTTP/1.1 but needs to be explicitly enabled with HTTP/1.0 + * Additionally, we require GET/HEAD requests to support keepalive. + */ +static void hp_keepalive_connection(struct http_parser *hp, VALUE val) +{ + /* REQUEST_METHOD is always set before any headers */ + if (STR_CSTR_CASE_EQ(val, "keep-alive")) { + /* basically have HTTP/1.0 masquerade as HTTP/1.1+ */ + HP_FL_SET(hp, KEEPALIVE); + } else if (STR_CSTR_CASE_EQ(val, "close")) { + /* + * it doesn't matter what HTTP version or request method we have, + * if a server says "Connection: close", we disable keepalive + */ + HP_FL_UNSET(hp, KEEPALIVE); + } else { + /* + * server could've sent anything, ignore it for now. Maybe + * "HP_FL_UNSET(hp, KEEPALIVE);" just in case? + * Raising an exception might be too mean... + */ + } +} + +static void +http_version(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len) +{ + if (CONST_MEM_EQ("HTTP/1.1", ptr, len)) { + /* HTTP/1.1 implies keepalive unless "Connection: close" is set */ + HP_FL_SET(hp, KEEPALIVE); + } +} + +static void +status_phrase(struct http_parser *hp, VALUE hdr, const char *ptr, size_t len) +{ + long nr; + + hp->status = rb_str_new(ptr, len); + + /* RSTRING_PTR is null terminated, ptr is not */ + nr = strtol(RSTRING_PTR(hp->status), NULL, 10); + + if (nr < 100 || nr > 999) + rb_raise(eParserError, "invalid status: %s", RSTRING_PTR(hp->status)); + + if ( !((nr >= 100 && nr <= 199) || nr == 204 || nr == 304) ) + HP_FL_SET(hp, HASBODY); +} + +static inline void invalid_if_trailer(struct http_parser *hp) +{ + if (HP_FL_TEST(hp, INTRAILER)) + rb_raise(eParserError, "invalid Trailer"); +} + +static void write_cont_value(struct http_parser *hp, + char *buffer, const char *p) +{ + char *vptr; + + if (hp->cont == Qfalse) + rb_raise(eParserError, "invalid continuation line"); + + if (NIL_P(hp->cont)) + return; /* we're ignoring this header (probably Status:) */ + + assert(TYPE(hp->cont) == T_STRING && "continuation line is not a string"); + assert(hp->mark > 0 && "impossible continuation line offset"); + + if (LEN(mark, p) == 0) + return; + + if (RSTRING_LEN(hp->cont) > 0) + --hp->mark; + + vptr = PTR_TO(mark); + + if (RSTRING_LEN(hp->cont) > 0) { + assert((' ' == *vptr || '\t' == *vptr) && "invalid leading white space"); + *vptr = ' '; + } + rb_str_buf_cat(hp->cont, vptr, LEN(mark, p)); +} + +static void write_value(VALUE hdr, struct http_parser *hp, + const char *buffer, const char *p) +{ + VALUE f, v; + VALUE hclass; + const char *fptr = PTR_TO(start.field); + long flen = hp->s.field_len; + const char *vptr; + long vlen; + + /* Rack does not like Status headers, so we never send them */ + if (CSTR_CASE_EQ(fptr, flen, "status")) { + hp->cont = Qnil; + return; + } + + vptr = PTR_TO(mark); + vlen = LEN(mark, p); + VALIDATE_MAX_LENGTH(vlen, FIELD_VALUE); + VALIDATE_MAX_LENGTH(flen, FIELD_NAME); + f = rb_str_new(fptr, flen); + v = rb_str_new(vptr, vlen); + + if (STR_CSTR_CASE_EQ(f, "connection")) { + hp_keepalive_connection(hp, v); + } else if (STR_CSTR_CASE_EQ(f, "content-length")) { + if (! HP_FL_TEST(hp, HASBODY)) + rb_raise(eParserError, "Content-Length with no body"); + hp->len.content = parse_length(vptr, vlen); + + if (hp->len.content < 0) + rb_raise(eParserError, "invalid Content-Length"); + + invalid_if_trailer(hp); + } else if (STR_CSTR_CASE_EQ(f, "transfer-encoding")) { + if (STR_CSTR_CASE_EQ(v, "chunked")) { + if (! HP_FL_TEST(hp, HASBODY)) + rb_raise(eParserError, "chunked Transfer-Encoding with no body"); + + hp->len.chunk = 0; + HP_FL_SET(hp, CHUNKED); + } + invalid_if_trailer(hp); + } else if (STR_CSTR_CASE_EQ(f, "trailer")) { + if (! HP_FL_TEST(hp, HASBODY)) + rb_raise(eParserError, "trailer with no body"); + invalid_if_trailer(hp); + } + + hclass = CLASS_OF(hdr); + if (hclass == rb_cArray) { + rb_ary_push(hdr, rb_ary_new3(2, f, v)); + hp->cont = v; + } else { + /* hash-ish, try rb_hash_* first and fall back to slow rb_funcall */ + VALUE e; + + /* try to read the existing value */ + if (hclass == rb_cHash) + e = rb_hash_aref(hdr, f); + else + e = rb_funcall(hdr, id_sq, 1, f); + + if (NIL_P(e)) { + OBJ_FREEZE(f); + + if (hclass == rb_cHash) + rb_hash_aset(hdr, f, v); + else + rb_funcall(hdr, id_sq_set, 2, f, v); + + hp->cont = v; + } else { + rb_str_buf_cat(e, "\n", 1); + hp->cont = rb_str_buf_append(e, v); + } + } +} + +/** Machine **/ + +%%{ + machine http_parser; + + action mark {MARK(mark, fpc); } + + action start_field { MARK(start.field, fpc); } + action write_field { hp->s.field_len = LEN(start.field, fpc); } + action start_value { MARK(mark, fpc); } + action write_value { write_value(hdr, hp, buffer, fpc); } + action write_cont_value { write_cont_value(hp, buffer, fpc); } + action http_version { http_version(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); } + action status_phrase { status_phrase(hp, hdr, PTR_TO(mark), LEN(mark, fpc)); } + + action add_to_chunk_size { + hp->len.chunk = step_incr(hp->len.chunk, fc, 16); + if (hp->len.chunk < 0) + rb_raise(eParserError, "invalid chunk size"); + } + action header_done { + cs = http_parser_first_final; + + if (HP_FL_TEST(hp, CHUNKED)) + cs = http_parser_en_ChunkedBody; + + /* + * go back to Ruby so we can call the Rack application, we'll reenter + * the parser iff the body needs to be processed. + */ + goto post_exec; + } + + action end_trailers { + cs = http_parser_first_final; + goto post_exec; + } + + action end_chunked_body { + HP_FL_SET(hp, INTRAILER); + cs = http_parser_en_Trailers; + ++p; + assert(p <= pe && "buffer overflow after chunked body"); + goto post_exec; + } + + action skip_chunk_data { + skip_chunk_data_hack: { + size_t nr = MIN((size_t)hp->len.chunk, REMAINING); + memcpy(RSTRING_PTR(hdr) + hp->s.dest_offset, fpc, nr); + hp->s.dest_offset += nr; + hp->len.chunk -= nr; + p += nr; + assert(hp->len.chunk >= 0 && "negative chunk length"); + if ((size_t)hp->len.chunk > REMAINING) { + HP_FL_SET(hp, INCHUNK); + goto post_exec; + } else { + fhold; + fgoto chunk_end; + } + }} + + include kcar_http_common "kcar_http_common.rl"; +}%% + +/** Data **/ +%% write data; + +static void http_parser_init(struct http_parser *hp) +{ + int cs = 0; + memset(hp, 0, sizeof(struct http_parser)); + hp->cont = Qfalse; /* zero on MRI, should be optimized away by above */ + hp->status = Qnil; + hp->len.content = -1; + %% write init; + hp->cs = cs; +} + +/** exec **/ +static void http_parser_execute(struct http_parser *hp, + VALUE hdr, char *buffer, size_t len) +{ + const char *p, *pe; + int cs = hp->cs; + size_t off = hp->offset; + + if (cs == http_parser_first_final) + return; + + assert(off <= len && "offset past end of buffer"); + + p = buffer+off; + pe = buffer+len; + + assert((void *)(pe - p) == (void *)(len - off) && + "pointers aren't same distance"); + + if (HP_FL_TEST(hp, INCHUNK)) { + HP_FL_UNSET(hp, INCHUNK); + goto skip_chunk_data_hack; + } + %% write exec; +post_exec: /* "_out:" also goes here */ + if (hp->cs != http_parser_error) + hp->cs = cs; + hp->offset = p - buffer; + + assert(p <= pe && "buffer overflow after parsing execute"); + assert(hp->offset <= len && "offset longer than length"); +} + +static struct http_parser *data_get(VALUE self) +{ + struct http_parser *hp; + + Data_Get_Struct(self, struct http_parser, hp); + assert(hp && "failed to extract http_parser struct"); + return hp; +} + +static void mark(void *ptr) +{ + struct http_parser *hp = ptr; + + rb_gc_mark(hp->cont); + rb_gc_mark(hp->status); +} + +static VALUE alloc(VALUE klass) +{ + struct http_parser *hp; + return Data_Make_Struct(klass, struct http_parser, mark, -1, hp); +} + +/** + * call-seq: + * Kcar::Parser.new => parser + * + * Creates a new parser. + * + * Document-method: reset + * + * call-seq: + * parser.reset => parser + * + * Resets the parser so it can be reused by another client + */ +static VALUE initialize(VALUE self) +{ + http_parser_init(data_get(self)); + + return self; +} + +static void advance_str(VALUE str, off_t nr) +{ + long len = RSTRING_LEN(str); + + if (len == 0) + return; + + rb_str_modify(str); + + assert(nr <= len && "trying to advance past end of buffer"); + len -= nr; + if (len > 0) /* unlikely, len is usually 0 */ + memmove(RSTRING_PTR(str), RSTRING_PTR(str) + nr, len); + rb_str_set_len(str, len); +} + +/** + * call-seq: + * parser.body_bytes_left => nil or Integer + * + * Returns the number of bytes left to run through Parser#filter_body. + * This will initially be the value of the "Content-Length" HTTP header + * after header parsing is complete and will decrease in value as + * Parser#filter_body is called for each chunk. This should return + * zero for responses with no body. + * + * This will return nil on "Transfer-Encoding: chunked" responses as + * well as HTTP/1.0 responses where Content-Length is not set + */ +static VALUE body_bytes_left(VALUE self) +{ + struct http_parser *hp = data_get(self); + + if (HP_FL_TEST(hp, CHUNKED)) + return Qnil; + if (hp->len.content >= 0) + return OFFT2NUM(hp->len.content); + + return Qnil; +} + +static VALUE chunked(VALUE self) +{ + struct http_parser *hp = data_get(self); + + return HP_FL_TEST(hp, CHUNKED) ? Qtrue : Qfalse; +} + +/** + * Document-method: headers + * call-seq: + * parser.headers(hdr, data) => hdr or nil + * + * Takes a Hash and a String of data, parses the String of data filling + * in the Hash returning the Hash if parsing is finished, nil otherwise + * When returning the hdr Hash, it may modify data to point to where + * body processing should begin. + * + * Raises ParserError if there are parsing errors. + */ +static VALUE headers(VALUE self, VALUE hdr, VALUE data) +{ + struct http_parser *hp = data_get(self); + + rb_str_update(data); + + http_parser_execute(hp, hdr, RSTRING_PTR(data), RSTRING_LEN(data)); + VALIDATE_MAX_LENGTH(hp->offset, HEADER); + + if (hp->cs == http_parser_first_final || + hp->cs == http_parser_en_ChunkedBody) { + advance_str(data, hp->offset + 1); + hp->offset = 0; + if (HP_FL_TEST(hp, INTRAILER)) + return hdr; + else + return rb_ary_new3(2, hp->status, hdr); + } + + if (hp->cs == http_parser_error) + rb_raise(eParserError, "Invalid HTTP format, parsing fails."); + + return Qnil; +} + +static int chunked_eof(struct http_parser *hp) +{ + return ((hp->cs == http_parser_first_final) || HP_FL_TEST(hp, INTRAILER)); +} + +/** + * call-seq: + * parser.body_eof? => true or false + * + * Detects if we're done filtering the body or not. This can be used + * to detect when to stop calling Parser#filter_body. + */ +static VALUE body_eof(VALUE self) +{ + struct http_parser *hp = data_get(self); + + if (HP_FL_TEST(hp, CHUNKED)) + return chunked_eof(hp) ? Qtrue : Qfalse; + + if (! HP_FL_TEST(hp, HASBODY)) + return Qtrue; + + return hp->len.content == 0 ? Qtrue : Qfalse; +} + +/** + * call-seq: + * parser.keepalive? => true or false + * + * This should be used to detect if a request can really handle + * keepalives and pipelining. Currently, the rules are: + * + * 1. MUST be HTTP/1.1 +or+ HTTP/1.0 with "Connection: keep-alive" + * 2. MUST NOT have "Connection: close" set + */ +static VALUE keepalive(VALUE self) +{ + struct http_parser *hp = data_get(self); + + if (HP_FL_ALL(hp, KEEPALIVE)) { + if ( HP_FL_TEST(hp, HASBODY) ) { + if (HP_FL_TEST(hp, CHUNKED) || (hp->len.content >= 0)) + return Qtrue; + return Qfalse; + } else { /* 100 Continue */ + return Qtrue; + } + } + return Qfalse; +} + +/** + * call-seq: + * parser.filter_body(buf, data) => nil/data + * + * Takes a String of +data+, will modify data if dechunking is done. + * Returns +nil+ if there is more data left to process. Returns + * +data+ if body processing is complete. When returning +data+, + * it may modify +data+ so the start of the string points to where + * the body ended so that trailer processing can begin. + * + * Raises ParserError if there are dechunking errors. + * Basically this is a glorified memcpy(3) that copies +data+ + * into +buf+ while filtering it through the dechunker. + */ +static VALUE filter_body(VALUE self, VALUE buf, VALUE data) +{ + struct http_parser *hp = data_get(self); + char *dptr; + long dlen; + + rb_str_update(data); + dptr = RSTRING_PTR(data); + dlen = RSTRING_LEN(data); + + StringValue(buf); + rb_str_resize(buf, dlen); /* we can never copy more than dlen bytes */ + OBJ_TAINT(buf); /* keep weirdo $SAFE users happy */ + + if (HP_FL_TEST(hp, CHUNKED)) { + if (!chunked_eof(hp)) { + hp->s.dest_offset = 0; + http_parser_execute(hp, buf, dptr, dlen); + if (hp->cs == http_parser_error) + rb_raise(eParserError, "Invalid HTTP format, parsing fails."); + + assert(hp->s.dest_offset <= hp->offset && + "destination buffer overflow"); + advance_str(data, hp->offset); + rb_str_set_len(buf, hp->s.dest_offset); + + if (RSTRING_LEN(buf) == 0 && chunked_eof(hp)) { + assert(hp->len.chunk == 0 && "chunk at EOF but more to parse"); + } else { + data = Qnil; + } + } + } else { + /* no need to enter the Ragel machine for unchunked transfers */ + assert(hp->len.content >= 0 && "negative Content-Length"); + if (hp->len.content > 0) { + long nr = MIN(dlen, hp->len.content); + + memcpy(RSTRING_PTR(buf), dptr, nr); + hp->len.content -= nr; + if (hp->len.content == 0) + hp->cs = http_parser_first_final; + advance_str(data, nr); + rb_str_set_len(buf, nr); + data = Qnil; + } + } + hp->offset = 0; /* for trailer parsing */ + return data; +} + +void Init_kcar_ext(void) +{ + VALUE mKcar = rb_define_module("Kcar"); + VALUE cParser = rb_define_class_under(mKcar, "Parser", rb_cObject); + + eParserError = rb_define_class_under(mKcar, "ParserError", rb_eIOError); + + rb_define_alloc_func(cParser, alloc); + rb_define_method(cParser, "initialize", initialize, 0); + rb_define_method(cParser, "reset", initialize, 0); + rb_define_method(cParser, "headers", headers, 2); + rb_define_method(cParser, "trailers", headers, 2); + rb_define_method(cParser, "filter_body", filter_body, 2); + rb_define_method(cParser, "body_bytes_left", body_bytes_left, 0); + rb_define_method(cParser, "body_eof?", body_eof, 0); + rb_define_method(cParser, "keepalive?", keepalive, 0); + rb_define_method(cParser, "chunked?", chunked, 0); + + /* + * The maximum size a single chunk when using chunked transfer encoding. + * This is only a theoretical maximum used to detect errors in clients, + * it is highly unlikely to encounter clients that send more than + * several kilobytes at once. + */ + rb_define_const(cParser, "CHUNK_MAX", OFFT2NUM(UH_OFF_T_MAX)); + + /* + * The maximum size of the body as specified by Content-Length. + * This is only a theoretical maximum, the actual limit is subject + * to the limits of the file system used for +Dir.tmpdir+. + */ + rb_define_const(cParser, "LENGTH_MAX", OFFT2NUM(UH_OFF_T_MAX)); + id_sq = rb_intern("[]"); + id_sq_set = rb_intern("[]="); +} diff --git a/ext/kcar/kcar_http_common.rl b/ext/kcar/kcar_http_common.rl new file mode 100644 index 0000000..54206ed --- /dev/null +++ b/ext/kcar/kcar_http_common.rl @@ -0,0 +1,56 @@ +%%{ + + machine kcar_http_common; + +#### HTTP PROTOCOL GRAMMAR +# line endings, some sites (notably http://news.ycombinator.com as of +# April 2010) do not send '\r', only '\n', so we need to accomodate them. + CRLF = ("\r")? "\n"; + +# character types + CTL = (cntrl | 127); + safe = ("$" | "-" | "_" | "."); + extra = ("!" | "*" | "'" | "(" | ")" | ","); + reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+"); + sorta_safe = ("\"" | "<" | ">"); + unsafe = (CTL | " " | "#" | "%" | sorta_safe); + national = any -- (alpha | digit | reserved | extra | safe | unsafe); + unreserved = (alpha | digit | safe | extra | national); + escape = ("%" xdigit xdigit); + uchar = (unreserved | escape | sorta_safe); + pchar = (uchar | ":" | "@" | "&" | "=" | "+"); + tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t"); + lws = (" " | "\t"); + +# elements + token = (ascii -- (CTL | tspecials)); + phrase = (any -- CRLF)+; + Status_Phrase = (digit+ (" "+ phrase)?) >mark %status_phrase ; + http_number = (digit+ "." digit+) ; + HTTP_Version = ("HTTP/" http_number) >mark %http_version ; + Status_Line = HTTP_Version " "+ Status_Phrase :> CRLF; + + field_name = ( token -- ":" )+ >start_field %write_field; + + field_value = any* >start_value %write_value; + + value_cont = lws+ any* >start_value %write_cont_value; + + message_header = ((field_name ":" " "* field_value)|value_cont) :> CRLF; + chunk_ext_val = token*; + chunk_ext_name = token*; + chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*; + last_chunk = "0"+ chunk_extension CRLF; + chunk_size = (xdigit* [1-9a-fA-F] xdigit*) $add_to_chunk_size; + chunk_end = CRLF; + chunk_body = any >skip_chunk_data; + chunk_begin = chunk_size chunk_extension CRLF; + chunk = chunk_begin chunk_body chunk_end; + ChunkedBody := chunk* last_chunk @end_chunked_body; + Trailers := (message_header)* CRLF @end_trailers; + + FullResponse = Status_Line (message_header)* CRLF @header_done; + +main := FullResponse; + +}%% diff --git a/lib/kcar.rb b/lib/kcar.rb new file mode 100644 index 0000000..2a4518d --- /dev/null +++ b/lib/kcar.rb @@ -0,0 +1,10 @@ +# -*- encoding: binary -*- + +require 'kcar/session' +require 'kcar/parser' +module Kcar + + VERSION = '0.1.0' + +end +require 'kcar_ext' diff --git a/lib/kcar/parser.rb b/lib/kcar/parser.rb new file mode 100644 index 0000000..dd094e6 --- /dev/null +++ b/lib/kcar/parser.rb @@ -0,0 +1,39 @@ +module Kcar +class Parser + + # extract trailers that were set in the header object as + # an array of arrays + # + # parser.extract_trailers(hdr) => + # [ [ 'Content-MD5', '1B2M2Y8AsgTpgAmY7PhCfg==' ] ] + def extract_trailers(hdr) + trailers = [] + + if hdr.kind_of?(Array) + t = {} + + # the HTTP spec (and our parser) guarantees trailers will appear + # after the "Trailer" header is inserted in the array + hdr.each do |key, value| + if key =~ %r{\ATrailer\z}i + value.split(/\s*,+\s*/).each do |key| + t[key] = true + end + elsif false == t.empty? && key =~ /\A(#{t.keys.join('|')})\z/i + k = $1 + trailers.concat(value.split(/\n+/).map! { |v| [ k, v ] }) + end + end + elsif t = hdr['Trailer'] + t.split(/\s*[,\n]+\s*/).each do |k| + value = hdr[k] or next + trailers.concat(value.split(/\n+/).map! { |v| [ k, v ] }) + end + end + + trailers + end + +end + +end diff --git a/lib/kcar/session.rb b/lib/kcar/session.rb new file mode 100644 index 0000000..bb67dae --- /dev/null +++ b/lib/kcar/session.rb @@ -0,0 +1,130 @@ +# -*- encoding: binary -*- + +module Kcar +class Session < Struct.new(:sock, :unchunk, :hdr, :buf, :parser) + + LAST_CHUNK = "0\r\n" + CRLF = "\r\n" + READ_SIZE = 0x4000 + + def initialize(sock, unchunk = true, hdr = {}) + super(sock, unchunk, hdr, "", Parser.new) + end + + def read + buf << sock.readpartial(READ_SIZE) if buf.empty? + while (response = parser.headers(hdr, buf)).nil? + buf << sock.readpartial(READ_SIZE) + end + response << self + end + + def reset + parser.reset + hdr.clear + end + + # this method allows Kcar::Session to be used as a Rack response body + def each(&block) + return if parser.body_eof? + if unchunk + parser.chunked? ? each_unchunk(&block) : each_identity(&block) + else + if parser.keepalive? + parser.chunked? ? each_rechunk(&block) : each_identity(&block) + else + each_until_eof(&block) # fastest path + end + end + rescue EOFError + end + + def each_rechunk(&block) + # We have to filter_body to keep track of parser state + # (which sucks). Also, as a benefit to clients we'll rechunk + # to increase the likelyhood of network transfers being on + # chunk boundaries so we're less likely to trigger bugs in + # other people's code :) + dst = "" + begin + parser.filter_body(dst, buf) and break + size = dst.size + if size > 0 + yield("#{size.to_s(16)}\r\n") + yield(dst << CRLF) + end + break if parser.body_eof? + end while buf << sock.readpartial(READ_SIZE, dst) + + yield LAST_CHUNK + + while parser.trailers(hdr, buf).nil? + buf << sock.readpartial(READ_SIZE, dst) + end + + # since Rack does not provide a way to explicitly send trailers + # in the response, we'll just yield a stringified version to our + # server and pretend it's part of the body. + if trailers = parser.extract_trailers(hdr) + yield(trailers.map! { |k,v| "#{k}: #{v}\r\n" }.join("") << "\r\n") + end + end + + # this is called by our Rack server + def close + parser.keepalive? ? reset : sock.close + end + + def each_until_eof(&block) + yield buf unless buf.empty? + # easy, just read and write everything until EOFError + dst = sock.readpartial(READ_SIZE) + begin + yield dst + end while sock.readpartial(READ_SIZE, dst) + end + + def each_identity(&block) + len = parser.body_bytes_left + if len.nil? + each_until_eof(&block) + else + dst = buf + if dst.size > 0 + # in case of keepalive we need to read the second response, + # so modify buf so that the second response is at the front + # of the buffer + if dst.size >= len + tmp = dst[len, dst.size] + dst = dst[0, len] + buf.replace(tmp) + end + + len -= dst.size + yield dst + end + while len > 0 + len -= sock.readpartial(len > READ_SIZE ? CHUNK_SIZE : len, dst).size + yield dst + end + end + end + + def each_unchunk(&block) + dst = "" + begin + parser.filter_body(dst, buf) and break + yield dst if dst.size > 0 + parser.body_eof? and break + end while buf << sock.readpartial(READ_SIZE, dst) + + # we can't pass trailers to the client since we unchunk + # the response, so just read them off the socket and + # stash them in hdr just in case... + while parser.headers(hdr, buf).nil? + buf << sock.readpartial(READ_SIZE, dst) + end + end + +end # class Session +end # module Kcar diff --git a/test/test_parser.rb b/test/test_parser.rb new file mode 100644 index 0000000..40c6e01 --- /dev/null +++ b/test/test_parser.rb @@ -0,0 +1,257 @@ +# -*- encoding: binary -*- +require 'test/unit' +require 'pp' +require 'kcar' +require 'rack' + +class TestParser < Test::Unit::TestCase + + def setup + @hp = Kcar::Parser.new + end + + def test_reset + assert_nothing_raised { @hp.reset } + end + + def test_parser_status_eof + buf = "HTTP/1.0 200 OK\r\n\r\n" + hdr = [] + hdr_object_id = hdr.object_id + response = @hp.headers(hdr, buf) + assert_equal(["200 OK", hdr], response) + assert hdr.empty? + assert ! @hp.keepalive? + assert_equal hdr_object_id, hdr.object_id + assert_equal "", buf + end + + def test_parser_status_eof_one_one + buf = "HTTP/1.1 200 OK\r\n\r\n" + hdr = [] + response = @hp.headers(hdr, buf) + assert_equal(["200 OK", hdr], response) + assert hdr.empty? + assert ! @hp.keepalive? # no content-length + end + + def test_parser_status_with_content_length + buf = "HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n" + hdr = [] + response = @hp.headers(hdr, buf) + assert_equal(["200 OK", hdr], response) + assert_equal([%w(Content-Length 0)], hdr) + assert @hp.keepalive? + end + + def test_parser_content_length + buf = "HTTP/1.1 200 OK\r\nContent-Length: 5\r\n\r\n" + rv = @hp.headers([], buf) + assert_equal "200 OK", rv[0] + assert_equal([ %w(Content-Length 5) ], rv[1]) + assert_equal 2, rv.size + assert_equal "", buf + assert_equal 5, @hp.body_bytes_left + end + + def test_parser_content_length_with_body + buf = "HTTP/1.1 200 OK\r\nContent-Length: 5\r\n\r\nabcde" + rv = @hp.headers([], buf) + assert_equal "200 OK", rv[0] + assert_equal([ %w(Content-Length 5) ], rv[1]) + assert_equal "abcde", buf + assert_equal 5, @hp.body_bytes_left + end + + def test_bad_crlf + buf = "HTTP/1.1 200 OK\nContent-Length: 5\n\r\nabcde" + rv = @hp.headers([], buf) + assert_equal "200 OK", rv[0] + assert_equal([ %w(Content-Length 5) ], rv[1]) + assert_equal "abcde", buf + assert_equal 5, @hp.body_bytes_left + assert ! @hp.chunked? + end + + def test_chunky_bad_crlf + buf = "HTTP/1.1 200 OK\n" \ + "Transfer-Encoding: chunked\n\n" \ + "6\nabcdef\n0\n\n" + rv = @hp.headers([], buf) + assert_equal "200 OK", rv[0] + assert_equal([ %w(Transfer-Encoding chunked) ], rv[1]) + assert_equal "6\nabcdef\n0\n\n", buf + assert_nil @hp.body_bytes_left + assert @hp.chunked? + assert_nil @hp.filter_body(tmp = "", buf) + assert_equal "abcdef", tmp + assert @hp.keepalive? + end + + def test_chunky + buf = "HTTP/1.1 200 OK\r\n" \ + "Transfer-Encoding: chunked\r\n\r\n" \ + "6\r\nabcdef\r\n0\r\n\r\n" + rv = @hp.headers([], buf) + assert_equal "200 OK", rv[0] + assert_equal([ %w(Transfer-Encoding chunked) ], rv[1]) + assert_equal "6\r\nabcdef\r\n0\r\n\r\n", buf + assert_nil @hp.body_bytes_left + assert @hp.chunked? + assert_nil @hp.filter_body(tmp = "", buf) + assert_equal "abcdef", tmp + assert @hp.body_eof? + assert @hp.keepalive? + end + + def test_chunky_two_step + buf = "HTTP/1.1 200 OK\r\n" \ + "Transfer-Encoding: chunked\r\n\r\n" \ + "6\r\nabcd" + buf2 = "ef\r\n0\r\n\r\n" + rv = @hp.headers([], buf) + assert_equal "200 OK", rv[0] + assert_equal([ %w(Transfer-Encoding chunked) ], rv[1]) + assert_equal "6\r\nabcd", buf + assert_nil @hp.body_bytes_left + assert @hp.chunked? + assert_nil @hp.filter_body(tmp = "", buf) + assert_equal "abcd", tmp + assert_equal "", buf + assert ! @hp.body_eof? + assert_nil @hp.filter_body(tmp = "", buf2) + assert_equal "ef", tmp + assert @hp.body_eof? + assert_equal({}, @hp.trailers(tmp = {}, buf2)) + assert @hp.keepalive? + assert_nothing_raised { @hp.reset } + end + + def test_trailers_ary + buf = "HTTP/1.1 200 OK\r\n" \ + "Trailer: Foo\r\n" \ + "Transfer-Encoding: chunked\r\n\r\n" \ + "6\r\nabcdef\r\n0\r\nFoo: bar\r\n\r\n" + rv = @hp.headers([], buf) + assert_equal "200 OK", rv[0] + assert_equal([ %w(Trailer Foo), %w(Transfer-Encoding chunked) ], rv[1]) + assert_equal "6\r\nabcdef\r\n0\r\nFoo: bar\r\n\r\n", buf + assert_nil @hp.body_bytes_left + assert @hp.chunked? + assert_nil @hp.filter_body(tmp = "", buf) + assert_equal "abcdef", tmp + assert @hp.body_eof? + expect = [ %w(Trailer Foo), + %w(Transfer-Encoding chunked), + %w(Foo bar) ] + assert_equal(expect, @hp.trailers(rv[1], buf)) + assert @hp.keepalive? + assert_nothing_raised { @hp.reset } + end + + def test_extract_trailers_ary + tmp = [ %w(Trailer Foo), %w(Transfer-Encoding chunked), %w(Foo bar) ] + assert_equal [ %w(Foo bar) ], @hp.extract_trailers(tmp) + end + + def test_extract_trailers_hash + tmp = { + 'Trailer' => 'Foo', + 'Transfer-Encoding' => 'chunked', + 'Foo' => 'bar' + } + assert_equal [ %w(Foo bar) ], @hp.extract_trailers(tmp) + end + + def test_extract_trailers_header_hash + tmp = Rack::Utils::HeaderHash.new( + 'Trailer' => 'foo', + 'Transfer-Encoding' => 'chunked', + 'Foo' => 'bar' + ) + assert_equal [ %w(foo bar) ], @hp.extract_trailers(tmp) + end + + def test_repeated_headers_rack_hash + hdr = Rack::Utils::HeaderHash.new + buf = "HTTP/1.1 200 OK\r\nSet-Cookie: a=b\r\n" + assert_nil @hp.headers(hdr, buf) + assert_equal({ 'Set-Cookie' => 'a=b' }, hdr.to_hash) + assert_nil @hp.headers(hdr, buf << "set-cookie: c=d\r\n") + assert_equal([ "200 OK", hdr ], @hp.headers(hdr, buf << "\r\n")) + assert_equal "", buf + assert_equal({ 'Set-Cookie' => "a=b\nc=d" }, hdr.to_hash) + end + + def test_repeated_headers_plain_hash + hdr = {} + buf = "HTTP/1.1 200 OK\r\nSet-Cookie: a=b\r\n" + assert_nil @hp.headers(hdr, buf) + assert_equal({ 'Set-Cookie' => 'a=b' }, hdr) + assert_nil @hp.headers(hdr, buf << "set-cookie: c=d\r\n") + assert_equal([ "200 OK", hdr ], @hp.headers(hdr, buf << "\r\n")) + assert_equal "", buf + assert_equal({ 'Set-Cookie' => 'a=b', 'set-cookie' => 'c=d' }, hdr) + end + + def test_repeated_headers_array + hdr = [] + buf = "HTTP/1.1 200 OK\r\nSet-Cookie: a=b\r\n" + assert_nil @hp.headers(hdr, buf) + assert_equal([ %w(Set-Cookie a=b) ] , hdr) + assert_nil @hp.headers(hdr, buf << "set-cookie: c=d\r\n") + assert_equal([ "200 OK", hdr ], @hp.headers(hdr, buf << "\r\n")) + assert_equal "", buf + assert_equal([ %w(Set-Cookie a=b), %w(set-cookie c=d) ], hdr) + end + + def test_long_line_headers_array + hdr = [] + buf = "HTTP/1.1 200 OK\r\na: b\r\n" + assert_nil @hp.headers(hdr, buf) + assert_equal([ %w(a b) ] , hdr) + assert_nil @hp.headers(hdr, buf << " c\r\n") + assert_equal([ [ 'a', 'b c'] ], hdr) + assert_nil @hp.headers(hdr, buf << " d\n") + assert_equal([ [ 'a', 'b c d'] ], hdr) + assert_equal([ "200 OK", hdr ], @hp.headers(hdr, buf << "\r\n")) + assert_equal([ [ 'a', 'b c d'] ], hdr) + end + + def test_long_line_headers_plain_hash + hdr = {} + buf = "HTTP/1.1 200 OK\r\na: b\r\n" + assert_nil @hp.headers(hdr, buf) + assert_equal({ 'a' => 'b' }, hdr) + assert_nil @hp.headers(hdr, buf << " c\r\n") + assert_equal({ 'a' => 'b c' }, hdr) + assert_nil @hp.headers(hdr, buf << " d\r\n") + assert_equal({ 'a' => 'b c d' }, hdr) + assert_equal([ "200 OK", hdr ], @hp.headers(hdr, buf << "\r\n")) + assert_equal({ 'a' => 'b c d' }, hdr) + end + + def test_long_line_headers_rack_hash + hdr = Rack::Utils::HeaderHash.new + buf = "HTTP/1.1 200 OK\r\na: b\r\n" + assert_nil @hp.headers(hdr, buf) + assert_equal({ 'a' => 'b' }, hdr.to_hash) + assert_nil @hp.headers(hdr, buf << " c\r\n") + assert_equal({ 'a' => 'b c' }, hdr) + assert_nil @hp.headers(hdr, buf << " d\r\n") + assert_equal({ 'a' => 'b c d' }, hdr) + assert_nil @hp.headers(hdr, buf << "A: e\r\n") + assert_equal([ "200 OK", hdr ], @hp.headers(hdr, buf << "\r\n")) + assert_equal({ 'a' => "b c d\ne"}, hdr.to_hash) + end + + def test_content_length_invalid + assert_raises(Kcar::ParserError) do + @hp.headers([], "HTTP/1.1 200 OK\r\nContent-Length: 5a\r\n\r\n") + end + assert_raises(Kcar::ParserError) do + @hp.headers([], "HTTP/1.1 200 OK\r\nContent-Length: -1\r\n\r\n") + end + end + +end diff --git a/test/test_session.rb b/test/test_session.rb new file mode 100644 index 0000000..aef7d70 --- /dev/null +++ b/test/test_session.rb @@ -0,0 +1,342 @@ +# -*- encoding: binary -*- +require 'test/unit' +require 'pp' +require 'socket' +require 'kcar' + +class TestSession < Test::Unit::TestCase + def setup + @s, @c = UNIXSocket.pair + end + + def test_http_one_zero + pid = fork do + @s << "HTTP/1.0 200 OK\r\n\r\nHI" + @s.close + end + @s.close + @session = Kcar::Session.new(@c) + status, headers, body = @session.read + assert_equal status, "200 OK" + assert headers.empty? + tmp = [] + assert ! body.parser.keepalive? + assert_nothing_raised { body.each { |chunk| tmp << chunk.dup } } + assert_equal [ "HI" ], tmp + _, status = Process.waitpid2(pid) + assert status.success? + body.close + assert @c.closed? + end + + def test_http_keepalive + pid = fork do + @s << "HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nHI" + end + @session = Kcar::Session.new(@c) + status, headers, body = @session.read + assert_equal status, "200 OK" + assert_equal({"Content-Length" => "2" }, headers) + tmp = [] + assert body.parser.keepalive? + assert_nothing_raised { body.each { |chunk| tmp << chunk.dup } } + assert_equal [ "HI" ], tmp + _, status = Process.waitpid2(pid) + assert status.success? + body.close + assert ! @c.closed? + + pid = fork do + @s << "HTTP/1.1 200 OK\r\nContent-Length: 3\r\n" + @s << "Connection: close\r\n\r\nBYE" + end + status, headers, body = @session.read + assert_equal status, "200 OK" + assert_equal({ "Content-Length" => "3", "Connection" => "close" }, headers) + tmp = [] + assert ! body.parser.keepalive? + assert_nothing_raised { body.each { |chunk| tmp << chunk.dup } } + assert_equal [ "BYE" ], tmp + _, status = Process.waitpid2(pid) + assert status.success? + body.close + assert @c.closed? + end + + def test_http_keepalive_chunky + @session = Kcar::Session.new(@c) + pid = fork do + @s << "HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n" + @s << "5\r\nabcde\r\n" + @s << "0\r\n\r\nHTTP/1.1 " # partial response + end + status, headers, body = @session.read + assert_equal status, "200 OK" + assert_equal({"Transfer-Encoding" => "chunked" }, headers) + tmp = [] + assert body.parser.keepalive? + assert_nothing_raised { body.each { |chunk| tmp << chunk.dup } } + assert_equal [ "abcde" ], tmp + _, status = Process.waitpid2(pid) + assert status.success? + body.close + assert ! @c.closed? + assert_equal "HTTP/1.1 ", @session.buf + + pid = fork do + @s << "200 OK\r\nContent-Length: 3\r\n" + @s << "Connection: close\r\n\r\nBYE" + end + status, headers, body = @session.read + assert_equal status, "200 OK" + assert_equal({ "Content-Length" => "3", "Connection" => "close" }, headers) + tmp = [] + assert ! body.parser.keepalive? + assert_nothing_raised { body.each { |chunk| tmp << chunk.dup } } + assert_equal [ "BYE" ], tmp + _, status = Process.waitpid2(pid) + assert status.success? + body.close + assert @c.closed? + end + + def test_http_no_body_keepalive + pid = fork { @s << "HTTP/1.1 100 Continue\r\n\r\n" } + @session = Kcar::Session.new(@c) + status, headers, body = @session.read + assert_equal status, "100 Continue" + assert_equal({}, headers) + tmp = [] + assert body.parser.keepalive? + assert_nothing_raised { body.each { |chunk| tmp << chunk.dup } } + assert tmp.empty? + _, status = Process.waitpid2(pid) + assert status.success? + body.close + assert ! @c.closed? + + pid = fork { @s << "HTTP/1.1 200 OK\r\nConnection: close\r\n\r\nhello" } + @s.close + status, headers, body = @session.read + assert_equal status, "200 OK" + assert_equal({'Connection' => 'close'}, headers) + tmp = [] + assert ! body.parser.keepalive? + assert_nothing_raised { body.each { |chunk| tmp << chunk.dup } } + assert_equal(%w(hello), tmp) + _, status = Process.waitpid2(pid) + assert status.success? + body.close + assert @c.closed? + end + + def test_trailers + pid = fork do + @s << "HTTP/1.1 200 OK\r\nTrailer: Foo\r\n" + @s << "Transfer-Encoding: chunked\r\n\r\n" + end + @session = Kcar::Session.new(@c) + status, headers, body = @session.read + assert_equal status, "200 OK" + expect = { + "Trailer" => "Foo", + "Transfer-Encoding" => "chunked", + } + assert_equal(expect, headers) + assert body.parser.keepalive? + _, status = Process.waitpid2(pid) + assert status.success? + tmp = [] + pid = fork { @s << "5\r\nhello\r\n0\r\nFoo: bar\r\n\r\n" } + assert_nothing_raised { body.each { |chunk| tmp << chunk.dup } } + assert_equal %w(hello), tmp + expect['Foo'] = 'bar' + assert_equal(expect, headers) + _, status = Process.waitpid2(pid) + assert status.success? + body.close + assert ! @c.closed? + end + + def test_trailers_pass_through + pid = fork do + @s << "HTTP/1.1 200 OK\r\nTrailer: Foo\r\n" + @s << "Transfer-Encoding: chunked\r\n\r\n" + end + @session = Kcar::Session.new(@c, false) + status, headers, body = @session.read + assert_equal status, "200 OK" + expect = { + "Trailer" => "Foo", + "Transfer-Encoding" => "chunked", + } + assert_equal(expect, headers) + assert body.parser.keepalive? + _, status = Process.waitpid2(pid) + assert status.success? + tmp = [] + pid = fork { @s << "5\r\nhello\r\n0\r\nFoo: bar\r\n\r\n" } + assert_nothing_raised { body.each { |chunk| tmp << chunk.dup } } + assert_equal ["5\r\n", "hello\r\n", "0\r\n", "Foo: bar\r\n\r\n"], tmp + expect['Foo'] = 'bar' + assert_equal(expect, headers) + _, status = Process.waitpid2(pid) + assert status.success? + body.close + assert ! @c.closed? + end + + def test_pass_through_one_oh + pid = fork do + @s << "HTTP/1.0 200 OK\r\n" + @s << "Content-Type: text/plain\r\n\r\n" + end + @session = Kcar::Session.new(@c, false) + status, headers, body = @session.read + assert_equal status, "200 OK" + expect = { "Content-Type" => "text/plain", } + assert_equal(expect, headers) + assert ! body.parser.keepalive? + _, status = Process.waitpid2(pid) + assert status.success? + tmp = [] + pid = fork { @s << "hello" } + @s.close + assert_nothing_raised { body.each { |chunk| tmp << chunk.dup } } + assert_equal %w(hello), tmp + assert_equal(expect, headers) + _, status = Process.waitpid2(pid) + assert status.success? + body.close + assert @c.closed? + end + + def test_trailers_burpy + pid = fork do + @s << "HTTP/1.1 200 OK\r\nTrailer: Foo\r\n" + @s << "Transfer-Encoding: chunked\r\n\r\n" + end + @session = Kcar::Session.new(@c) + status, headers, body = @session.read + assert_equal status, "200 OK" + expect = { + "Trailer" => "Foo", + "Transfer-Encoding" => "chunked", + } + assert_equal(expect, headers) + assert body.parser.keepalive? + _, status = Process.waitpid2(pid) + assert status.success? + tmp = [] + pid = fork { @s << "5\r\nhello\r\n0\r\nFoo: bar" } + rd, wr = IO.pipe + crlf_pid = fork do + wr.close + @s << rd.sysread(4) + end + rd.close + assert_nothing_raised do + first = true + body.each do |chunk| + tmp << chunk.dup + if first + first = false + wr.syswrite "\r\n\r\n" + end + end + end + assert_equal %w(hello), tmp + _, status = Process.waitpid2(pid) + assert status.success? + _, status = Process.waitpid2(crlf_pid) + assert status.success? + expect['Foo'] = 'bar' + assert_equal(expect, headers) + body.close + assert ! @c.closed? + end + + def test_pass_through_trailers_burpy + pid = fork do + @s << "HTTP/1.1 200 OK\r\nTrailer: Foo\r\n" + @s << "Transfer-Encoding: chunked\r\n\r\n" + end + @session = Kcar::Session.new(@c, false) + status, headers, body = @session.read + assert_equal status, "200 OK" + expect = { + "Trailer" => "Foo", + "Transfer-Encoding" => "chunked", + } + assert_equal(expect, headers) + assert body.parser.keepalive? + _, status = Process.waitpid2(pid) + assert status.success? + tmp = [] + pid = fork { @s << "5\r\nhello\r\n0\r\nFoo: bar" } + rd, wr = IO.pipe + crlf_pid = fork do + wr.close + @s << rd.sysread(4) + end + rd.close + assert_nothing_raised do + first = true + body.each do |chunk| + tmp << chunk.dup + if first + first = false + wr.syswrite "\r\n\r\n" + end + end + end + assert_equal ["5\r\n", "hello\r\n", "0\r\n", "Foo: bar\r\n\r\n"], tmp + _, status = Process.waitpid2(pid) + assert status.success? + _, status = Process.waitpid2(crlf_pid) + assert status.success? + expect['Foo'] = 'bar' + assert_equal(expect, headers) + body.close + assert ! @c.closed? + end + + def test_identity_burpy + pid = fork { @s << "HTTP/1.1 200 OK\r\nContent-Length: 5\r\n\r\n" } + @session = Kcar::Session.new(@c) + status, headers, body = @session.read + assert_equal status, "200 OK" + expect = { "Content-Length" => '5' } + assert_equal(expect, headers) + assert body.parser.keepalive? + _, status = Process.waitpid2(pid) + assert status.success? + tmp = [] + pid = fork { @s << "h" } + rd, wr = IO.pipe + crlf_pid = fork do + wr.close + @s << rd.sysread(4) + end + rd.close + assert_nothing_raised do + first = true + body.each do |chunk| + tmp << chunk.dup + if first + first = false + wr.syswrite "ello" + end + end + end + assert_equal %w(h ello), tmp + _, status = Process.waitpid2(pid) + assert status.success? + _, status = Process.waitpid2(crlf_pid) + assert status.success? + assert_equal(expect, headers) + body.close + assert ! @c.closed? + end + +end |