From 0af6e2d0c8fbb606db58b213ff4d0d28a1e38f4e Mon Sep 17 00:00:00 2001
From: Eric Wong <bofh@yhbt.net>
Date: Fri, 22 Oct 2021 09:58:41 +0000
Subject: ext: switch filter_body to picohttpparser

This should open the door for us to process chunked data faster
by modifying the buffer in-place rather than copying to a
destination buffer.
---
 ext/unicorn_http/picohttpparser.c.h     | 670 ++++++++++++++++++++++++++++++++
 ext/unicorn_http/picohttpparser.h       |  92 +++++
 ext/unicorn_http/unicorn_http.rl        | 138 +++----
 ext/unicorn_http/unicorn_http_common.rl |  10 -
 test/unit/test_http_parser.rb           |   2 +-
 test/unit/test_http_parser_ng.rb        |  13 +-
 6 files changed, 828 insertions(+), 97 deletions(-)
 create mode 100644 ext/unicorn_http/picohttpparser.c.h
 create mode 100644 ext/unicorn_http/picohttpparser.h

diff --git a/ext/unicorn_http/picohttpparser.c.h b/ext/unicorn_http/picohttpparser.c.h
new file mode 100644
index 0000000..f4e295f
--- /dev/null
+++ b/ext/unicorn_http/picohttpparser.c.h
@@ -0,0 +1,670 @@
+/*
+ * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
+ *                         Shigeo Mitsunari
+ *
+ * The software is licensed under either the MIT License (below) or the Perl
+ * license.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+#ifdef __SSE4_2__
+#ifdef _MSC_VER
+#include <nmmintrin.h>
+#else
+#include <x86intrin.h>
+#endif
+#endif
+#include "picohttpparser.h"
+
+#if __GNUC__ >= 3
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+#define likely(x) (x)
+#define unlikely(x) (x)
+#endif
+
+#ifdef _MSC_VER
+#define ALIGNED(n) _declspec(align(n))
+#else
+#define ALIGNED(n) __attribute__((aligned(n)))
+#endif
+
+#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
+
+#define CHECK_EOF()                                                                                                                \
+    if (buf == buf_end) {                                                                                                          \
+        *ret = -2;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }
+
+#define EXPECT_CHAR_NO_CHECK(ch)                                                                                                   \
+    if (*buf++ != ch) {                                                                                                            \
+        *ret = -1;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }
+
+#define EXPECT_CHAR(ch)                                                                                                            \
+    CHECK_EOF();                                                                                                                   \
+    EXPECT_CHAR_NO_CHECK(ch);
+
+#define ADVANCE_TOKEN(tok, toklen)                                                                                                 \
+    do {                                                                                                                           \
+        const char *tok_start = buf;                                                                                               \
+        static const char ALIGNED(16) ranges2[16] = "\000\040\177\177";                                                            \
+        int found2;                                                                                                                \
+        buf = findchar_fast(buf, buf_end, ranges2, 4, &found2);                                                                    \
+        if (!found2) {                                                                                                             \
+            CHECK_EOF();                                                                                                           \
+        }                                                                                                                          \
+        while (1) {                                                                                                                \
+            if (*buf == ' ') {                                                                                                     \
+                break;                                                                                                             \
+            } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {                                                                      \
+                if ((unsigned char)*buf < '\040' || *buf == '\177') {                                                              \
+                    *ret = -1;                                                                                                     \
+                    return NULL;                                                                                                   \
+                }                                                                                                                  \
+            }                                                                                                                      \
+            ++buf;                                                                                                                 \
+            CHECK_EOF();                                                                                                           \
+        }                                                                                                                          \
+        tok = tok_start;                                                                                                           \
+        toklen = buf - tok_start;                                                                                                  \
+    } while (0)
+
+static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
+                                    "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
+                                    "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+
+static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
+{
+    *found = 0;
+#if __SSE4_2__
+    if (likely(buf_end - buf >= 16)) {
+        __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
+
+        size_t left = (buf_end - buf) & ~15;
+        do {
+            __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
+            int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
+            if (unlikely(r != 16)) {
+                buf += r;
+                *found = 1;
+                break;
+            }
+            buf += 16;
+            left -= 16;
+        } while (likely(left != 0));
+    }
+#else
+    /* suppress unused parameter warning */
+    (void)buf_end;
+    (void)ranges;
+    (void)ranges_size;
+#endif
+    return buf;
+}
+
+static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
+{
+    const char *token_start = buf;
+
+#ifdef __SSE4_2__
+    static const char ALIGNED(16) ranges1[16] = "\0\010"    /* allow HT */
+                                                "\012\037"  /* allow SP and up to but not including DEL */
+                                                "\177\177"; /* allow chars w. MSB set */
+    int found;
+    buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
+    if (found)
+        goto FOUND_CTL;
+#else
+    /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
+    while (likely(buf_end - buf >= 8)) {
+#define DOIT()                                                                                                                     \
+    do {                                                                                                                           \
+        if (unlikely(!IS_PRINTABLE_ASCII(*buf)))                                                                                   \
+            goto NonPrintable;                                                                                                     \
+        ++buf;                                                                                                                     \
+    } while (0)
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+#undef DOIT
+        continue;
+    NonPrintable:
+        if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
+            goto FOUND_CTL;
+        }
+        ++buf;
+    }
+#endif
+    for (;; ++buf) {
+        CHECK_EOF();
+        if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
+            if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
+                goto FOUND_CTL;
+            }
+        }
+    }
+FOUND_CTL:
+    if (likely(*buf == '\015')) {
+        ++buf;
+        EXPECT_CHAR('\012');
+        *token_len = buf - 2 - token_start;
+    } else if (*buf == '\012') {
+        *token_len = buf - token_start;
+        ++buf;
+    } else {
+        *ret = -1;
+        return NULL;
+    }
+    *token = token_start;
+
+    return buf;
+}
+
+static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
+{
+    int ret_cnt = 0;
+    buf = last_len < 3 ? buf : buf + last_len - 3;
+
+    while (1) {
+        CHECK_EOF();
+        if (*buf == '\015') {
+            ++buf;
+            CHECK_EOF();
+            EXPECT_CHAR('\012');
+            ++ret_cnt;
+        } else if (*buf == '\012') {
+            ++buf;
+            ++ret_cnt;
+        } else {
+            ++buf;
+            ret_cnt = 0;
+        }
+        if (ret_cnt == 2) {
+            return buf;
+        }
+    }
+
+    *ret = -2;
+    return NULL;
+}
+
+#define PARSE_INT(valp_, mul_)                                                                                                     \
+    if (*buf < '0' || '9' < *buf) {                                                                                                \
+        buf++;                                                                                                                     \
+        *ret = -1;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }                                                                                                                              \
+    *(valp_) = (mul_) * (*buf++ - '0');
+
+#define PARSE_INT_3(valp_)                                                                                                         \
+    do {                                                                                                                           \
+        int res_ = 0;                                                                                                              \
+        PARSE_INT(&res_, 100)                                                                                                      \
+        *valp_ = res_;                                                                                                             \
+        PARSE_INT(&res_, 10)                                                                                                       \
+        *valp_ += res_;                                                                                                            \
+        PARSE_INT(&res_, 1)                                                                                                        \
+        *valp_ += res_;                                                                                                            \
+    } while (0)
+
+/* returned pointer is always within [buf, buf_end), or null */
+static const char *parse_token(const char *buf, const char *buf_end, const char **token, size_t *token_len, char next_char,
+                               int *ret)
+{
+    /* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128
+     * bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */
+    static const char ALIGNED(16) ranges[] = "\x00 "  /* control chars and up to SP */
+                                             "\"\""   /* 0x22 */
+                                             "()"     /* 0x28,0x29 */
+                                             ",,"     /* 0x2c */
+                                             "//"     /* 0x2f */
+                                             ":@"     /* 0x3a-0x40 */
+                                             "[]"     /* 0x5b-0x5d */
+                                             "{\xff"; /* 0x7b-0xff */
+    const char *buf_start = buf;
+    int found;
+    buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found);
+    if (!found) {
+        CHECK_EOF();
+    }
+    while (1) {
+        if (*buf == next_char) {
+            break;
+        } else if (!token_char_map[(unsigned char)*buf]) {
+            *ret = -1;
+            return NULL;
+        }
+        ++buf;
+        CHECK_EOF();
+    }
+    *token = buf_start;
+    *token_len = buf - buf_start;
+    return buf;
+}
+
+/* returned pointer is always within [buf, buf_end), or null */
+static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
+{
+    /* we want at least [HTTP/1.<two chars>] to try to parse */
+    if (buf_end - buf < 9) {
+        *ret = -2;
+        return NULL;
+    }
+    EXPECT_CHAR_NO_CHECK('H');
+    EXPECT_CHAR_NO_CHECK('T');
+    EXPECT_CHAR_NO_CHECK('T');
+    EXPECT_CHAR_NO_CHECK('P');
+    EXPECT_CHAR_NO_CHECK('/');
+    EXPECT_CHAR_NO_CHECK('1');
+    EXPECT_CHAR_NO_CHECK('.');
+    PARSE_INT(minor_version, 1);
+    return buf;
+}
+
+static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
+                                 size_t max_headers, int *ret)
+{
+    for (;; ++*num_headers) {
+        CHECK_EOF();
+        if (*buf == '\015') {
+            ++buf;
+            EXPECT_CHAR('\012');
+            break;
+        } else if (*buf == '\012') {
+            ++buf;
+            break;
+        }
+        if (*num_headers == max_headers) {
+            *ret = -1;
+            return NULL;
+        }
+        if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
+            /* parsing name, but do not discard SP before colon, see
+             * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
+            if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name, &headers[*num_headers].name_len, ':', ret)) == NULL) {
+                return NULL;
+            }
+            if (headers[*num_headers].name_len == 0) {
+                *ret = -1;
+                return NULL;
+            }
+            ++buf;
+            for (;; ++buf) {
+                CHECK_EOF();
+                if (!(*buf == ' ' || *buf == '\t')) {
+                    break;
+                }
+            }
+        } else {
+            headers[*num_headers].name = NULL;
+            headers[*num_headers].name_len = 0;
+        }
+        const char *value;
+        size_t value_len;
+        if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
+            return NULL;
+        }
+        /* remove trailing SPs and HTABs */
+        const char *value_end = value + value_len;
+        for (; value_end != value; --value_end) {
+            const char c = *(value_end - 1);
+            if (!(c == ' ' || c == '\t')) {
+                break;
+            }
+        }
+        headers[*num_headers].value = value;
+        headers[*num_headers].value_len = value_end - value;
+    }
+    return buf;
+}
+
+static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
+                                 size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
+                                 size_t max_headers, int *ret)
+{
+    /* skip first empty line (some clients add CRLF after POST content) */
+    CHECK_EOF();
+    if (*buf == '\015') {
+        ++buf;
+        EXPECT_CHAR('\012');
+    } else if (*buf == '\012') {
+        ++buf;
+    }
+
+    /* parse request line */
+    if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) {
+        return NULL;
+    }
+    do {
+        ++buf;
+        CHECK_EOF();
+    } while (*buf == ' ');
+    ADVANCE_TOKEN(*path, *path_len);
+    do {
+        ++buf;
+        CHECK_EOF();
+    } while (*buf == ' ');
+    if (*method_len == 0 || *path_len == 0) {
+        *ret = -1;
+        return NULL;
+    }
+    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
+        return NULL;
+    }
+    if (*buf == '\015') {
+        ++buf;
+        EXPECT_CHAR('\012');
+    } else if (*buf == '\012') {
+        ++buf;
+    } else {
+        *ret = -1;
+        return NULL;
+    }
+
+    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
+}
+
+static
+int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
+                      size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
+{
+    const char *buf = buf_start, *buf_end = buf_start + len;
+    size_t max_headers = *num_headers;
+    int r = -2;
+
+    *method = NULL;
+    *method_len = 0;
+    *path = NULL;
+    *path_len = 0;
+    *minor_version = -1;
+    *num_headers = 0;
+
+    /* if last_len != 0, check if the request is complete (a fast countermeasure
+       againt slowloris */
+    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
+        return r;
+    }
+
+    if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
+                             &r)) == NULL) {
+        return r;
+    }
+
+    return (int)(buf - buf_start);
+}
+
+static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
+                                  size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
+{
+    /* parse "HTTP/1.x" */
+    if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
+        return NULL;
+    }
+    /* skip space */
+    if (*buf != ' ') {
+        *ret = -1;
+        return NULL;
+    }
+    do {
+        ++buf;
+        CHECK_EOF();
+    } while (*buf == ' ');
+    /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
+    if (buf_end - buf < 4) {
+        *ret = -2;
+        return NULL;
+    }
+    PARSE_INT_3(status);
+
+    /* get message including preceding space */
+    if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
+        return NULL;
+    }
+    if (*msg_len == 0) {
+        /* ok */
+    } else if (**msg == ' ') {
+        /* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP
+         * before running past the end of the given buffer. */
+        do {
+            ++*msg;
+            --*msg_len;
+        } while (**msg == ' ');
+    } else {
+        /* garbage found after status code */
+        *ret = -1;
+        return NULL;
+    }
+
+    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
+}
+
+static
+int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
+                       struct phr_header *headers, size_t *num_headers, size_t last_len)
+{
+    const char *buf = buf_start, *buf_end = buf + len;
+    size_t max_headers = *num_headers;
+    int r;
+
+    *minor_version = -1;
+    *status = 0;
+    *msg = NULL;
+    *msg_len = 0;
+    *num_headers = 0;
+
+    /* if last_len != 0, check if the response is complete (a fast countermeasure
+       against slowloris */
+    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
+        return r;
+    }
+
+    if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
+        return r;
+    }
+
+    return (int)(buf - buf_start);
+}
+
+static
+int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
+{
+    const char *buf = buf_start, *buf_end = buf + len;
+    size_t max_headers = *num_headers;
+    int r;
+
+    *num_headers = 0;
+
+    /* if last_len != 0, check if the response is complete (a fast countermeasure
+       against slowloris */
+    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
+        return r;
+    }
+
+    if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
+        return r;
+    }
+
+    return (int)(buf - buf_start);
+}
+
+enum {
+    CHUNKED_IN_CHUNK_SIZE,
+    CHUNKED_IN_CHUNK_EXT,
+    CHUNKED_IN_CHUNK_DATA,
+    CHUNKED_IN_CHUNK_CRLF,
+    CHUNKED_IN_TRAILERS_LINE_HEAD,
+    CHUNKED_IN_TRAILERS_LINE_MIDDLE
+};
+
+static int decode_hex(int ch)
+{
+    if ('0' <= ch && ch <= '9') {
+        return ch - '0';
+    } else if ('A' <= ch && ch <= 'F') {
+        return ch - 'A' + 0xa;
+    } else if ('a' <= ch && ch <= 'f') {
+        return ch - 'a' + 0xa;
+    } else {
+        return -1;
+    }
+}
+
+static
+ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
+{
+    size_t dst = 0, src = 0, bufsz = *_bufsz;
+    ssize_t ret = -2; /* incomplete */
+
+    while (1) {
+        switch (decoder->_state) {
+        case CHUNKED_IN_CHUNK_SIZE:
+            for (;; ++src) {
+                int v;
+                if (src == bufsz)
+                    goto Exit;
+                if ((v = decode_hex(buf[src])) == -1) {
+                    if (decoder->_hex_count == 0) {
+                        ret = -1;
+                        goto Exit;
+                    }
+                    break;
+                }
+                if (decoder->_hex_count == sizeof(size_t) * 2) {
+                    ret = -1;
+                    goto Exit;
+                }
+                decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
+                ++decoder->_hex_count;
+            }
+            decoder->_hex_count = 0;
+            decoder->_state = CHUNKED_IN_CHUNK_EXT;
+        /* fallthru */
+        case CHUNKED_IN_CHUNK_EXT:
+            /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] == '\012')
+                    break;
+            }
+            ++src;
+            if (decoder->bytes_left_in_chunk == 0) {
+                if (decoder->consume_trailer) {
+                    decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
+                    break;
+                } else {
+                    goto Complete;
+                }
+            }
+            decoder->_state = CHUNKED_IN_CHUNK_DATA;
+        /* fallthru */
+        case CHUNKED_IN_CHUNK_DATA: {
+            size_t avail = bufsz - src;
+            if (avail < decoder->bytes_left_in_chunk) {
+                if (dst != src)
+                    memmove(buf + dst, buf + src, avail);
+                src += avail;
+                dst += avail;
+                decoder->bytes_left_in_chunk -= avail;
+                goto Exit;
+            }
+            if (dst != src)
+                memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
+            src += decoder->bytes_left_in_chunk;
+            dst += decoder->bytes_left_in_chunk;
+            decoder->bytes_left_in_chunk = 0;
+            decoder->_state = CHUNKED_IN_CHUNK_CRLF;
+        }
+        /* fallthru */
+        case CHUNKED_IN_CHUNK_CRLF:
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] != '\015')
+                    break;
+            }
+            if (buf[src] != '\012') {
+                ret = -1;
+                goto Exit;
+            }
+            ++src;
+            decoder->_state = CHUNKED_IN_CHUNK_SIZE;
+            break;
+        case CHUNKED_IN_TRAILERS_LINE_HEAD:
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] != '\015')
+                    break;
+            }
+            if (buf[src++] == '\012')
+                goto Complete;
+            decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
+        /* fallthru */
+        case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] == '\012')
+                    break;
+            }
+            ++src;
+            decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
+            break;
+        default:
+            assert(!"decoder is corrupt");
+        }
+    }
+
+Complete:
+    ret = bufsz - src;
+Exit:
+    if (dst != src)
+        memmove(buf + dst, buf + src, bufsz - src);
+    *_bufsz = dst;
+    return ret;
+}
+
+static
+int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
+{
+    return decoder->_state == CHUNKED_IN_CHUNK_DATA;
+}
+
+#undef CHECK_EOF
+#undef EXPECT_CHAR
+#undef ADVANCE_TOKEN
diff --git a/ext/unicorn_http/picohttpparser.h b/ext/unicorn_http/picohttpparser.h
new file mode 100644
index 0000000..3f87b64
--- /dev/null
+++ b/ext/unicorn_http/picohttpparser.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
+ *                         Shigeo Mitsunari
+ *
+ * The software is licensed under either the MIT License (below) or the Perl
+ * license.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef picohttpparser_h
+#define picohttpparser_h
+
+#include <sys/types.h>
+
+#ifdef _MSC_VER
+#define ssize_t intptr_t
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* contains name and value of a header (name == NULL if is a continuing line
+ * of a multiline header */
+struct phr_header {
+    const char *name;
+    size_t name_len;
+    const char *value;
+    size_t value_len;
+};
+
+/* returns number of bytes consumed if successful, -2 if request is partial,
+ * -1 if failed */
+static
+int phr_parse_request(const char *buf, size_t len, const char **method, size_t *method_len, const char **path, size_t *path_len,
+                      int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len);
+
+/* ditto */
+static
+int phr_parse_response(const char *_buf, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
+                       struct phr_header *headers, size_t *num_headers, size_t last_len);
+
+/* ditto */
+static
+int phr_parse_headers(const char *buf, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len);
+
+/* should be zero-filled before start */
+struct phr_chunked_decoder {
+    size_t bytes_left_in_chunk; /* number of bytes left in current chunk */
+    char consume_trailer;       /* if trailing headers should be consumed */
+    char _hex_count;
+    char _state;
+};
+
+/* the function rewrites the buffer given as (buf, bufsz) removing the chunked-
+ * encoding headers.  When the function returns without an error, bufsz is
+ * updated to the length of the decoded data available.  Applications should
+ * repeatedly call the function while it returns -2 (incomplete) every time
+ * supplying newly arrived data.  If the end of the chunked-encoded data is
+ * found, the function returns a non-negative number indicating the number of
+ * octets left undecoded, that starts from the offset returned by `*bufsz`.
+ * Returns -1 on error.
+ */
+static
+ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz);
+
+/* returns if the chunked decoder is in middle of chunked data */
+static
+int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/ext/unicorn_http/unicorn_http.rl b/ext/unicorn_http/unicorn_http.rl
index ba23438..ea7a453 100644
--- a/ext/unicorn_http/unicorn_http.rl
+++ b/ext/unicorn_http/unicorn_http.rl
@@ -13,6 +13,8 @@
 #include "global_variables.h"
 #include "c_util.h"
 #include "epollexclusive.h"
+#include "picohttpparser.h"
+#include "picohttpparser.c.h"
 
 void init_unicorn_httpdate(void);
 
@@ -21,7 +23,6 @@ void init_unicorn_httpdate(void);
 #define UH_FL_INBODY   0x4
 #define UH_FL_HASTRAILER 0x8
 #define UH_FL_INTRAILER 0x10
-#define UH_FL_INCHUNK  0x20
 #define UH_FL_REQEOF 0x40
 #define UH_FL_KAVERSION 0x80
 #define UH_FL_HASHEADER 0x100
@@ -52,15 +53,14 @@ struct http_parser {
   } start;
   union {
     unsigned int field_len; /* only used during header processing */
-    unsigned int dest_offset; /* only used during body processing */
   } s;
   VALUE buf;
   VALUE env;
   VALUE cont; /* Qfalse: unset, Qnil: ignored header, T_STRING: append */
   union {
-    off_t content;
-    off_t chunk;
-  } len;
+    off_t clen;
+    struct phr_chunked_decoder pcd;
+  } bdy;
 };
 
 static ID id_set_backtrace, id_is_chunked_p;
@@ -250,12 +250,12 @@ static void write_value(struct http_parser *hp,
   } else if (f == g_http_connection) {
     hp_keepalive_connection(hp, v);
   } else if (f == g_content_length && !HP_FL_TEST(hp, CHUNKED)) {
-    if (hp->len.content)
+    if (hp->bdy.clen)
       parser_raise(eHttpParserError, "Content-Length already set");
-    hp->len.content = parse_length(RSTRING_PTR(v), RSTRING_LEN(v));
-    if (hp->len.content < 0)
+    hp->bdy.clen = parse_length(RSTRING_PTR(v), RSTRING_LEN(v));
+    if (hp->bdy.clen < 0)
       parser_raise(eHttpParserError, "invalid Content-Length");
-    if (hp->len.content != 0)
+    if (hp->bdy.clen != 0)
       HP_FL_SET(hp, HASBODY);
     hp_invalid_if_trailer(hp);
   } else if (f == g_http_transfer_encoding) {
@@ -272,7 +272,7 @@ static void write_value(struct http_parser *hp,
       HP_FL_SET(hp, HASBODY);
 
       /* RFC 7230 3.3.3, 3: favor chunked if Content-Length exists */
-      hp->len.content = 0;
+      hp->bdy.clen = 0;
     } else if (HP_FL_TEST(hp, CHUNKED)) {
       /*
        * RFC 7230 3.3.3, point 3 states:
@@ -362,19 +362,12 @@ static void write_value(struct http_parser *hp,
     if (!STR_CSTR_EQ(val, "*"))
       rb_hash_aset(hp->env, g_path_info, val);
   }
-  action add_to_chunk_size {
-    hp->len.chunk = step_incr(hp->len.chunk, fc, 16);
-    if (hp->len.chunk < 0)
-      parser_raise(eHttpParserError, "invalid chunk size");
-  }
   action header_done {
     finalize_header(hp);
 
     cs = http_parser_first_final;
     if (HP_FL_TEST(hp, HASBODY)) {
       HP_FL_SET(hp, INBODY);
-      if (HP_FL_TEST(hp, CHUNKED))
-        cs = http_parser_en_ChunkedBody;
     } else {
       HP_FL_SET(hp, REQEOF);
       assert(!HP_FL_TEST(hp, CHUNKED) && "chunked encoding without body!");
@@ -385,37 +378,10 @@ static void write_value(struct http_parser *hp,
      */
     goto post_exec;
   }
-
   action end_trailers {
     cs = http_parser_first_final;
     goto post_exec;
   }
-
-  action end_chunked_body {
-    HP_FL_SET(hp, INTRAILER);
-    cs = http_parser_en_Trailers;
-    ++p;
-    assert(p <= pe && "buffer overflow after chunked body");
-    goto post_exec;
-  }
-
-  action skip_chunk_data {
-  skip_chunk_data_hack: {
-    size_t nr = MIN((size_t)hp->len.chunk, REMAINING);
-    memcpy(RSTRING_PTR(hp->cont) + hp->s.dest_offset, fpc, nr);
-    hp->s.dest_offset += nr;
-    hp->len.chunk -= nr;
-    p += nr;
-    assert(hp->len.chunk >= 0 && "negative chunk length");
-    if ((size_t)hp->len.chunk > REMAINING) {
-      HP_FL_SET(hp, INCHUNK);
-      goto post_exec;
-    } else {
-      fhold;
-      fgoto chunk_end;
-    }
-  }}
-
   include unicorn_http_common "unicorn_http_common.rl";
 }%%
 
@@ -430,8 +396,8 @@ static void http_parser_init(struct http_parser *hp)
   hp->offset = 0;
   hp->start.field = 0;
   hp->s.field_len = 0;
-  hp->len.content = 0;
   hp->cont = Qfalse; /* zero on MRI, should be optimized away by above */
+  memset(&hp->bdy.pcd, 0, sizeof(hp->bdy.pcd));
   %% write init;
   hp->cs = cs;
 }
@@ -454,11 +420,6 @@ http_parser_execute(struct http_parser *hp, char *buffer, size_t len)
 
   assert((void *)(pe - p) == (void *)(len - off) &&
          "pointers aren't same distance");
-
-  if (HP_FL_TEST(hp, INCHUNK)) {
-    HP_FL_UNSET(hp, INCHUNK);
-    goto skip_chunk_data_hack;
-  }
   %% write exec;
 post_exec: /* "_out:" also goes here */
   if (hp->cs != http_parser_error)
@@ -676,7 +637,7 @@ static VALUE HttpParser_content_length(VALUE self)
 {
   struct http_parser *hp = data_get(self);
 
-  return HP_FL_TEST(hp, CHUNKED) ? Qnil : OFFT2NUM(hp->len.content);
+  return HP_FL_TEST(hp, CHUNKED) ? Qnil : OFFT2NUM(hp->bdy.clen);
 }
 
 /**
@@ -703,8 +664,7 @@ static VALUE HttpParser_parse(VALUE self)
   if (hp->offset > MAX_HEADER_LEN)
     parser_raise(e413, "HTTP header is too large");
 
-  if (hp->cs == http_parser_first_final ||
-      hp->cs == http_parser_en_ChunkedBody) {
+  if (hp->cs == http_parser_first_final) {
     advance_str(data, hp->offset + 1);
     hp->offset = 0;
     if (HP_FL_TEST(hp, INTRAILER))
@@ -763,7 +723,7 @@ static VALUE HttpParser_headers(VALUE self, VALUE env, VALUE buf)
 
 static int chunked_eof(struct http_parser *hp)
 {
-  return ((hp->cs == http_parser_first_final) || HP_FL_TEST(hp, INTRAILER));
+  return HP_FL_TEST(hp, INTRAILER);
 }
 
 /**
@@ -780,7 +740,7 @@ static VALUE HttpParser_body_eof(VALUE self)
   if (HP_FL_TEST(hp, CHUNKED))
     return chunked_eof(hp) ? Qtrue : Qfalse;
 
-  return hp->len.content == 0 ? Qtrue : Qfalse;
+  return hp->bdy.clen == 0 ? Qtrue : Qfalse;
 }
 
 /**
@@ -853,6 +813,14 @@ static VALUE HttpParser_hijacked_bang(VALUE self)
   return self;
 }
 
+static VALUE parse_trailers(struct http_parser *hp, VALUE src)
+{
+  hp->cs = http_parser_en_Trailers;
+  hp->buf = src;
+  return src;
+  /* TODO: switch to pico, here */
+}
+
 /**
  * call-seq:
  *    parser.filter_body(dst, src) => nil/src
@@ -870,42 +838,50 @@ static VALUE HttpParser_hijacked_bang(VALUE self)
 static VALUE HttpParser_filter_body(VALUE self, VALUE dst, VALUE src)
 {
   struct http_parser *hp = data_get(self);
-  char *srcptr;
-  long srclen;
-
-  srcptr = RSTRING_PTR(src);
-  srclen = RSTRING_LEN(src);
+  const char *srcptr = RSTRING_PTR(src);
+  long srclen = RSTRING_LEN(src);
 
   StringValue(dst);
 
   if (HP_FL_TEST(hp, CHUNKED)) {
     if (!chunked_eof(hp)) {
+      size_t bufsz = srclen;
+      char *dstptr;
+      ssize_t pret;
+
       rb_str_modify(dst);
       rb_str_resize(dst, srclen); /* we can never copy more than srclen bytes */
-
-      hp->s.dest_offset = 0;
-      hp->cont = dst;
+      dstptr = RSTRING_PTR(dst);
+      memcpy(dstptr, srcptr, srclen);
       hp->buf = src;
-      http_parser_execute(hp, srcptr, srclen);
-      if (hp->cs == http_parser_error)
-        parser_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");
-
-      assert(hp->s.dest_offset <= hp->offset &&
-             "destination buffer overflow");
-      advance_str(src, hp->offset);
-      rb_str_set_len(dst, hp->s.dest_offset);
-
-      if (RSTRING_LEN(dst) == 0 && chunked_eof(hp)) {
-        assert(hp->len.chunk == 0 && "chunk at EOF but more to parse");
+      pret = phr_decode_chunked(&hp->bdy.pcd, dstptr, &bufsz);
+      if (pret >= 0) {
+        rb_str_modify(src);
+	if (pret)
+          memcpy(RSTRING_PTR(src), dstptr + bufsz, pret);
+        rb_str_set_len(src, (long)pret);
+        rb_str_set_len(dst, (long)bufsz);
+        HP_FL_SET(hp, INTRAILER);
       } else {
-        src = Qnil;
+        switch (pret) {
+        case -2: /* incomplete */
+          rb_str_set_len(dst, (long)bufsz);
+          rb_str_set_len(src, 0);
+          return Qnil;
+        case -1:
+          parser_raise(eHttpParserError, "Invalid HTTP format, parsing fails.");
+        default:
+          assert(pret >= 0 && "phr_decode_chunked returned < -2");
+        }
       }
     }
+    assert(HP_FL_TEST(hp, INTRAILER) && "INTRAILER not set");
+    return parse_trailers(hp, src);
   } else {
     /* no need to enter the Ragel machine for unchunked transfers */
-    assert(hp->len.content >= 0 && "negative Content-Length");
-    if (hp->len.content > 0) {
-      long nr = MIN(srclen, hp->len.content);
+    assert(hp->bdy.clen >= 0 && "negative Content-Length");
+    if (hp->bdy.clen > 0) {
+      long nr = MIN(srclen, hp->bdy.clen);
 
       rb_str_modify(dst);
       rb_str_resize(dst, nr);
@@ -918,8 +894,8 @@ static VALUE HttpParser_filter_body(VALUE self, VALUE dst, VALUE src)
        */
       hp->buf = src;
       memcpy(RSTRING_PTR(dst), srcptr, nr);
-      hp->len.content -= nr;
-      if (hp->len.content == 0) {
+      hp->bdy.clen -= nr;
+      if (hp->bdy.clen == 0) {
         HP_FL_SET(hp, REQEOF);
         hp->cs = http_parser_first_final;
       }
@@ -995,7 +971,7 @@ void Init_unicorn_http(void)
    * it is highly unlikely to encounter clients that send more than
    * several kilobytes at once.
    */
-  rb_define_const(cHttpParser, "CHUNK_MAX", OFFT2NUM(UH_OFF_T_MAX));
+  rb_define_const(cHttpParser, "CHUNK_MAX", SIZET2NUM(SIZE_MAX));
 
   /*
    * The maximum size of the body as specified by Content-Length.
diff --git a/ext/unicorn_http/unicorn_http_common.rl b/ext/unicorn_http/unicorn_http_common.rl
index 0988b54..7570433 100644
--- a/ext/unicorn_http/unicorn_http_common.rl
+++ b/ext/unicorn_http/unicorn_http_common.rl
@@ -56,16 +56,6 @@
   value_cont = lws+ content* >start_value %write_cont_value;
 
   message_header = ((field_name ":" lws* field_value)|value_cont) :> CRLF;
-  chunk_ext_val = token*;
-  chunk_ext_name = token*;
-  chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*;
-  last_chunk = "0"+ chunk_extension CRLF;
-  chunk_size = (xdigit* [1-9a-fA-F] xdigit*) $add_to_chunk_size;
-  chunk_end = CRLF;
-  chunk_body = any >skip_chunk_data;
-  chunk_begin = chunk_size chunk_extension CRLF;
-  chunk = chunk_begin chunk_body chunk_end;
-  ChunkedBody := chunk* last_chunk @end_chunked_body;
   Trailers := (message_header)* CRLF @end_trailers;
 
   FullRequest = Request_Line (message_header)* CRLF @header_done;
diff --git a/test/unit/test_http_parser.rb b/test/unit/test_http_parser.rb
index 697af44..68d48b8 100644
--- a/test/unit/test_http_parser.rb
+++ b/test/unit/test_http_parser.rb
@@ -859,7 +859,7 @@ class HttpParserTest < Test::Unit::TestCase
       # need to update this when 128-bit machines come out
       # n.b. actual struct size on 64-bit is 56 bytes + 40 bytes for RVALUE
       # Ruby <= 2.2 objspace did not count the 40-byte RVALUE, 2.3 does.
-      assert_operator n, :<=, 96
+      assert_operator n, :<=, 104 # TODO: drop to <= 96
       assert_operator n, :>, 0
     end
   rescue LoadError
diff --git a/test/unit/test_http_parser_ng.rb b/test/unit/test_http_parser_ng.rb
index 425d5ad..40fe2e3 100644
--- a/test/unit/test_http_parser_ng.rb
+++ b/test/unit/test_http_parser_ng.rb
@@ -230,8 +230,10 @@ class HttpParserNgTest < Test::Unit::TestCase
     tmp = ""
     assert_nil @parser.filter_body(tmp, str << "..")
     assert_equal "..", tmp
-    assert_nil @parser.filter_body(tmp, str << "abcd\r\n0\r\n")
+    assert_nil @parser.filter_body(tmp, str << "abcd")
     assert_equal "abcd", tmp
+    @parser.filter_body(tmp, str << "\r\n0\r\n")
+    assert_equal "", tmp
     assert_equal str.object_id, @parser.filter_body(tmp, str << "PUT").object_id
     assert_equal "PUT", str
     assert ! @parser.keepalive?
@@ -318,7 +320,7 @@ class HttpParserNgTest < Test::Unit::TestCase
            "1\r\na\r\n2\r\n..\r\n0\r\n"
     assert_equal req, @parser.parse
     tmp = ''
-    assert_nil @parser.filter_body(tmp, str)
+    @parser.filter_body(tmp, str)
     assert_equal 'a..', tmp
     rv = @parser.filter_body(tmp, str)
     assert_equal rv.object_id, str.object_id
@@ -357,7 +359,8 @@ class HttpParserNgTest < Test::Unit::TestCase
     assert_equal 'Content-MD5', req['HTTP_TRAILER']
     assert_nil req['HTTP_CONTENT_MD5']
     tmp = ''
-    assert_nil @parser.filter_body(tmp, str)
+    # assert_nil @parser.filter_body(tmp, str)
+    @parser.filter_body(tmp, str)
     assert_equal 'a..', tmp
     md5_b64 = [ Digest::MD5.digest(tmp) ].pack('m').strip.freeze
     rv = @parser.filter_body(tmp, str)
@@ -387,7 +390,7 @@ class HttpParserNgTest < Test::Unit::TestCase
     assert_equal 'Content-MD5', req['HTTP_TRAILER']
     assert_nil req['HTTP_CONTENT_MD5']
     tmp = ''
-    assert_nil @parser.filter_body(tmp, str)
+    @parser.filter_body(tmp, str)
     assert_equal 'a..', tmp
     md5_b64 = [ Digest::MD5.digest(tmp) ].pack('m').strip.freeze
     rv = @parser.filter_body(tmp, str)
@@ -471,7 +474,7 @@ class HttpParserNgTest < Test::Unit::TestCase
     assert_equal req, @parser.parse
     assert_equal 'Transfer-Encoding', req['HTTP_TRAILER']
     tmp = ''
-    assert_nil @parser.filter_body(tmp, str)
+    @parser.filter_body(tmp, str)
     assert_equal 'a..', tmp
     assert_equal '', str
     str << "Transfer-Encoding: identity\r\n\r\n"
-- 
cgit v1.2.3-24-ge0c7