1 files changed, 651 insertions, 0 deletions
diff --git a/plugins/picohttpparser/picohttpparser.c b/plugins/picohttpparser/picohttpparser.c
new file mode 100644
index 0000000..d0bfac6
--- /dev/null
+++ b/plugins/picohttpparser/picohttpparser.c
@@ -0,0 +1,651 @@
+/*
+ * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
+ *                         Shigeo Mitsunari
+ *
+ * The software is licensed under either the MIT License (below) or the Perl
+ * license.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+#ifdef __SSE4_2__
+#ifdef _MSC_VER
+#include <nmmintrin.h>
+#else
+#include <x86intrin.h>
+#endif
+#endif
+#include "picohttpparser.h"
+#if __GNUC__ >= 3
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+#define likely(x) (x)
+#define unlikely(x) (x)
+#endif
+#ifdef _MSC_VER
+#define ALIGNED(n) _declspec(align(n))
+#else
+#define ALIGNED(n) __attribute__((aligned(n)))
+#endif
+#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
+#define CHECK_EOF()                                                                                                                \
+    if (buf == buf_end) {                                                                                                          \
+        *ret = -2;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }
+#define EXPECT_CHAR_NO_CHECK(ch)                                                                                                   \
+    if (*buf++ != ch) {                                                                                                            \
+        *ret = -1;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }
+#define EXPECT_CHAR(ch)                                                                                                            \
+    CHECK_EOF();                                                                                                                   \
+    EXPECT_CHAR_NO_CHECK(ch);
+#define ADVANCE_TOKEN(tok, toklen)                                                                                                 \
+    do {                                                                                                                           \
+        const char *tok_start = buf;                                                                                               \
+        static const char ALIGNED(16) ranges2[16] = "\000\040\177\177";                                                            \
+        int found2;                                                                                                                \
+        buf = findchar_fast(buf, buf_end, ranges2, 4, &found2);                                                                    \
+        if (!found2) {                                                                                                             \
+            CHECK_EOF();                                                                                                           \
+        }                                                                                                                          \
+        while (1) {                                                                                                                \
+            if (*buf == ' ') {                                                                                                     \
+                break;                                                                                                             \
+            } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {                                                                      \
+                if ((unsigned char)*buf < '\040' || *buf == '\177') {                                                              \
+                    *ret = -1;                                                                                                     \
+                    return NULL;                                                                                                   \
+                }                                                                                                                  \
+            }                                                                                                                      \
+            ++buf;                                                                                                                 \
+            CHECK_EOF();                                                                                                           \
+        }                                                                                                                          \
+        tok = tok_start;                                                                                                           \
+        toklen = buf - tok_start;                                                                                                  \
+    } while (0)
+static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
+                                    "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
+                                    "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+                                    "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
+{
+    *found = 0;
+#if __SSE4_2__
+    if (likely(buf_end - buf >= 16)) {
+        __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
+        size_t left = (buf_end - buf) & ~15;
+        do {
+            __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
+            int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
+            if (unlikely(r != 16)) {
+                buf += r;
+                *found = 1;
+                break;
+            }
+            buf += 16;
+            left -= 16;
+        } while (likely(left != 0));
+    }
+#else
+    /* suppress unused parameter warning */
+    (void)buf_end;
+    (void)ranges;
+    (void)ranges_size;
+#endif
+    return buf;
+}
+static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
+{
+    const char *token_start = buf;
+#ifdef __SSE4_2__
+    static const char ALIGNED(16) ranges1[16] = "\0\010"    /* allow HT */
+                                                "\012\037"  /* allow SP and up to but not including DEL */
+                                                "\177\177"; /* allow chars w. MSB set */
+    int found;
+    buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
+    if (found)
+        goto FOUND_CTL;
+#else
+    /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
+    while (likely(buf_end - buf >= 8)) {
+#define DOIT()                                                                                                                     \
+    do {                                                                                                                           \
+        if (unlikely(!IS_PRINTABLE_ASCII(*buf)))                                                                                   \
+            goto NonPrintable;                                                                                                     \
+        ++buf;                                                                                                                     \
+    } while (0)
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+        DOIT();
+#undef DOIT
+        continue;
+    NonPrintable:
+        if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
+            goto FOUND_CTL;
+        }
+        ++buf;
+    }
+#endif
+    for (;; ++buf) {
+        CHECK_EOF();
+        if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
+            if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
+                goto FOUND_CTL;
+            }
+        }
+    }
+FOUND_CTL:
+    if (likely(*buf == '\015')) {
+        ++buf;
+        EXPECT_CHAR('\012');
+        *token_len = buf - 2 - token_start;
+    } else if (*buf == '\012') {
+        *token_len = buf - token_start;
+        ++buf;
+    } else {
+        *ret = -1;
+        return NULL;
+    }
+    *token = token_start;
+    return buf;
+}
+static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
+{
+    int ret_cnt = 0;
+    buf = last_len < 3 ? buf : buf + last_len - 3;
+    while (1) {
+        CHECK_EOF();
+        if (*buf == '\015') {
+            ++buf;
+            CHECK_EOF();
+            EXPECT_CHAR('\012');
+            ++ret_cnt;
+        } else if (*buf == '\012') {
+            ++buf;
+            ++ret_cnt;
+        } else {
+            ++buf;
+            ret_cnt = 0;
+        }
+        if (ret_cnt == 2) {
+            return buf;
+        }
+    }
+    *ret = -2;
+    return NULL;
+}
+#define PARSE_INT(valp_, mul_)                                                                                                     \
+    if (*buf < '0' || '9' < *buf) {                                                                                                \
+        buf++;                                                                                                                     \
+        *ret = -1;                                                                                                                 \
+        return NULL;                                                                                                               \
+    }                                                                                                                              \
+    *(valp_) = (mul_) * (*buf++ - '0');
+#define PARSE_INT_3(valp_)                                                                                                         \
+    do {                                                                                                                           \
+        int res_ = 0;                                                                                                              \
+        PARSE_INT(&res_, 100)                                                                                                      \
+        *valp_ = res_;                                                                                                             \
+        PARSE_INT(&res_, 10)                                                                                                       \
+        *valp_ += res_;                                                                                                            \
+        PARSE_INT(&res_, 1)                                                                                                        \
+        *valp_ += res_;                                                                                                            \
+    } while (0)
+/* returned pointer is always within [buf, buf_end), or null */
+static const char *parse_http_version(const char *buf, const char *buf_end, int *major_version, int *minor_version, int *ret)
+{
+    /* we want at least [HTTP/1.<two chars>] to try to parse */
+    if (buf_end - buf < 9) {
+        *ret = -2;
+        return NULL;
+    }
+    EXPECT_CHAR_NO_CHECK('H');
+    EXPECT_CHAR_NO_CHECK('T');
+    EXPECT_CHAR_NO_CHECK('T');
+    EXPECT_CHAR_NO_CHECK('P');
+    EXPECT_CHAR_NO_CHECK('/');
+    PARSE_INT(major_version, 1);
+    if (*major_version == 1) {
+        EXPECT_CHAR_NO_CHECK('.');
+        PARSE_INT(minor_version, 1);
+    } else {
+        *minor_version = 0;
+    }
+    return buf;
+}
+static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
+                                 size_t max_headers, int *ret)
+{
+    for (;; ++*num_headers) {
+        CHECK_EOF();
+        if (*buf == '\015') {
+            ++buf;
+            EXPECT_CHAR('\012');
+            break;
+        } else if (*buf == '\012') {
+            ++buf;
+            break;
+        }
+        if (*num_headers == max_headers) {
+            *ret = -1;
+            return NULL;
+        }
+        if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
+            /* parsing name, but do not discard SP before colon, see
+             * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
+            headers[*num_headers].name = buf;
+            static const char ALIGNED(16) ranges1[] = "\x00 "  /* control chars and up to SP */
+                                                      "\"\""   /* 0x22 */
+                                                      "()"     /* 0x28,0x29 */
+                                                      ",,"     /* 0x2c */
+                                                      "//"     /* 0x2f */
+                                                      ":@"     /* 0x3a-0x40 */
+                                                      "[]"     /* 0x5b-0x5d */
+                                                      "{\377"; /* 0x7b-0xff */
+            int found;
+            buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
+            if (!found) {
+                CHECK_EOF();
+            }
+            while (1) {
+                if (*buf == ':') {
+                    break;
+                } else if (!token_char_map[(unsigned char)*buf]) {
+                    *ret = -1;
+                    return NULL;
+                }
+                ++buf;
+                CHECK_EOF();
+            }
+            if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) {
+                *ret = -1;
+                return NULL;
+            }
+            ++buf;
+            for (;; ++buf) {
+                CHECK_EOF();
+                if (!(*buf == ' ' || *buf == '\t')) {
+                    break;
+                }
+            }
+        } else {
+            headers[*num_headers].name = NULL;
+            headers[*num_headers].name_len = 0;
+        }
+        const char *value;
+        size_t value_len;
+        if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
+            return NULL;
+        }
+        /* remove trailing SPs and HTABs */
+        const char *value_end = value + value_len;
+        for (; value_end != value; --value_end) {
+            const char c = *(value_end - 1);
+            if (!(c == ' ' || c == '\t')) {
+                break;
+            }
+        }
+        headers[*num_headers].value = value;
+        headers[*num_headers].value_len = value_end - value;
+    }
+    return buf;
+}
+static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
+                                 size_t *path_len, int *major_version, int *minor_version, struct phr_header *headers, size_t *num_headers,
+                                 size_t max_headers, int *ret)
+{
+    /* skip first empty line (some clients add CRLF after POST content) */
+    CHECK_EOF();
+    if (*buf == '\015') {
+        ++buf;
+        EXPECT_CHAR('\012');
+    } else if (*buf == '\012') {
+        ++buf;
+    }
+    /* parse request line */
+    ADVANCE_TOKEN(*method, *method_len);
+    do {
+        ++buf;
+    } while (*buf == ' ');
+    ADVANCE_TOKEN(*path, *path_len);
+    do {
+        ++buf;
+    } while (*buf == ' ');
+    if (*method_len == 0 || *path_len == 0) {
+        *ret = -1;
+        return NULL;
+    }
+    if ((buf = parse_http_version(buf, buf_end, major_version, minor_version, ret)) == NULL) {
+        return NULL;
+    }
+    if (*buf == '\015') {
+        ++buf;
+        EXPECT_CHAR('\012');
+    } else if (*buf == '\012') {
+        ++buf;
+    } else {
+        *ret = -1;
+        return NULL;
+    }
+    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
+}
+int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
+                      size_t *path_len, int *major_version, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
+{
+    const char *buf = buf_start, *buf_end = buf_start + len;
+    size_t max_headers = *num_headers;
+    int r;
+    *method = NULL;
+    *method_len = 0;
+    *path = NULL;
+    *path_len = 0;
+    *major_version = -1;
+    *minor_version = -1;
+    *num_headers = 0;
+    /* if last_len != 0, check if the request is complete (a fast countermeasure
+       against slowloris */
+    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
+        return r;
+    }
+    if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, major_version, minor_version, headers, num_headers, max_headers,
+                             &r)) == NULL) {
+        return r;
+    }
+    return (int)(buf - buf_start);
+}
+static const char *parse_response(const char *buf, const char *buf_end, int *major_version, int *minor_version, int *status, const char **msg,
+                                  size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
+{
+    /* parse "HTTP/1.x" */
+    if ((buf = parse_http_version(buf, buf_end, major_version, minor_version, ret)) == NULL) {
+        return NULL;
+    }
+    /* skip space */
+    if (*buf != ' ') {
+        *ret = -1;
+        return NULL;
+    }
+    do {
+        ++buf;
+    } while (*buf == ' ');
+    /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
+    if (buf_end - buf < 4) {
+        *ret = -2;
+        return NULL;
+    }
+    PARSE_INT_3(status);
+    /* get message including preceding space */
+    if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
+        return NULL;
+    }
+    if (*msg_len == 0) {
+        /* ok */
+    } else if (**msg == ' ') {
+        /* remove preceding space */
+        do {
+            ++*msg;
+            --*msg_len;
+        } while (**msg == ' ');
+    } else {
+        /* garbage found after status code */
+        *ret = -1;
+        return NULL;
+    }
+    return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
+}
+int phr_parse_response(const char *buf_start, size_t len, int *major_version, int *minor_version, int *status, const char **msg, size_t *msg_len,
+                       struct phr_header *headers, size_t *num_headers, size_t last_len)
+{
+    const char *buf = buf_start, *buf_end = buf + len;
+    size_t max_headers = *num_headers;
+    int r;
+    *major_version = -1;
+    *minor_version = -1;
+    *status = 0;
+    *msg = NULL;
+    *msg_len = 0;
+    *num_headers = 0;
+    /* if last_len != 0, check if the response is complete (a fast countermeasure
+       against slowloris */
+    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
+        return r;
+    }
+    if ((buf = parse_response(buf, buf_end, major_version, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
+        return r;
+    }
+    return (int)(buf - buf_start);
+}
+int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
+{
+    const char *buf = buf_start, *buf_end = buf + len;
+    size_t max_headers = *num_headers;
+    int r;
+    *num_headers = 0;
+    /* if last_len != 0, check if the response is complete (a fast countermeasure
+       against slowloris */
+    if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
+        return r;
+    }
+    if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
+        return r;
+    }
+    return (int)(buf - buf_start);
+}
+enum {
+    CHUNKED_IN_CHUNK_SIZE,
+    CHUNKED_IN_CHUNK_EXT,
+    CHUNKED_IN_CHUNK_DATA,
+    CHUNKED_IN_CHUNK_CRLF,
+    CHUNKED_IN_TRAILERS_LINE_HEAD,
+    CHUNKED_IN_TRAILERS_LINE_MIDDLE
+};
+static int decode_hex(int ch)
+{
+    if ('0' <= ch && ch <= '9') {
+        return ch - '0';
+    } else if ('A' <= ch && ch <= 'F') {
+        return ch - 'A' + 0xa;
+    } else if ('a' <= ch && ch <= 'f') {
+        return ch - 'a' + 0xa;
+    } else {
+        return -1;
+    }
+}
+ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
+{
+    size_t dst = 0, src = 0, bufsz = *_bufsz;
+    ssize_t ret = -2; /* incomplete */
+    while (1) {
+        switch (decoder->_state) {
+        case CHUNKED_IN_CHUNK_SIZE:
+            for (;; ++src) {
+                int v;
+                if (src == bufsz)
+                    goto Exit;
+                if ((v = decode_hex(buf[src])) == -1) {
+                    if (decoder->_hex_count == 0) {
+                        ret = -1;
+                        goto Exit;
+                    }
+                    break;
+                }
+                if (decoder->_hex_count == sizeof(size_t) * 2) {
+                    ret = -1;
+                    goto Exit;
+                }
+                decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
+                ++decoder->_hex_count;
+            }
+            decoder->_hex_count = 0;
+            decoder->_state = CHUNKED_IN_CHUNK_EXT;
+        /* fallthru */
+        case CHUNKED_IN_CHUNK_EXT:
+            /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] == '\012')
+                    break;
+            }
+            ++src;
+            if (decoder->bytes_left_in_chunk == 0) {
+                if (decoder->consume_trailer) {
+                    decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
+                    break;
+                } else {
+                    goto Complete;
+                }
+            }
+            decoder->_state = CHUNKED_IN_CHUNK_DATA;
+        /* fallthru */
+        case CHUNKED_IN_CHUNK_DATA: {
+            size_t avail = bufsz - src;
+            if (avail < decoder->bytes_left_in_chunk) {
+                if (dst != src)
+                    memmove(buf + dst, buf + src, avail);
+                src += avail;
+                dst += avail;
+                decoder->bytes_left_in_chunk -= avail;
+                goto Exit;
+            }
+            if (dst != src)
+                memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
+            src += decoder->bytes_left_in_chunk;
+            dst += decoder->bytes_left_in_chunk;
+            decoder->bytes_left_in_chunk = 0;
+            decoder->_state = CHUNKED_IN_CHUNK_CRLF;
+        }
+        /* fallthru */
+        case CHUNKED_IN_CHUNK_CRLF:
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] != '\015')
+                    break;
+            }
+            if (buf[src] != '\012') {
+                ret = -1;
+                goto Exit;
+            }
+            ++src;
+            decoder->_state = CHUNKED_IN_CHUNK_SIZE;
+            break;
+        case CHUNKED_IN_TRAILERS_LINE_HEAD:
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] != '\015')
+                    break;
+            }
+            if (buf[src++] == '\012')
+                goto Complete;
+            decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
+        /* fallthru */
+        case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
+            for (;; ++src) {
+                if (src == bufsz)
+                    goto Exit;
+                if (buf[src] == '\012')
+                    break;
+            }
+            ++src;
+            decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
+            break;
+        default:
+            assert(!"decoder is corrupt");
+        }
+    }
+Complete:
+    ret = bufsz - src;
+Exit:
+    if (dst != src)
+        memmove(buf + dst, buf + src, bufsz - src);
+    *_bufsz = dst;
+    return ret;
+}
+int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
+{
+    return decoder->_state == CHUNKED_IN_CHUNK_DATA;
+}
+#undef CHECK_EOF
+#undef EXPECT_CHAR
+#undef ADVANCE_TOKEN

diff --git a/plugins/picohttpparser/picohttpparser.c b/plugins/picohttpparser/picohttpparser.c new file mode 100644 index 0000000..d0bfac6 --- /dev/null +++ b/plugins/picohttpparser/picohttpparser.c
@@ -0,0 +1,651 @@
	1	/*
	2	* Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
	3	* Shigeo Mitsunari
	4	*
	5	* The software is licensed under either the MIT License (below) or the Perl
	6	* license.
	7	*
	8	* Permission is hereby granted, free of charge, to any person obtaining a copy
	9	* of this software and associated documentation files (the "Software"), to
	10	* deal in the Software without restriction, including without limitation the
	11	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
	12	* sell copies of the Software, and to permit persons to whom the Software is
	13	* furnished to do so, subject to the following conditions:
	14	*
	15	* The above copyright notice and this permission notice shall be included in
	16	* all copies or substantial portions of the Software.
	17	*
	18	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	19	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	20	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	21	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	22	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	23	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
	24	* IN THE SOFTWARE.
	25	*/
	26
	27	#include <assert.h>
	28	#include <stddef.h>
	29	#include <string.h>
	30	#ifdef __SSE4_2__
	31	#ifdef _MSC_VER
	32	#include <nmmintrin.h>
	33	#else
	34	#include <x86intrin.h>
	35	#endif
	36	#endif
	37	#include "picohttpparser.h"
	38
	39	#if __GNUC__ >= 3
	40	#define likely(x) __builtin_expect(!!(x), 1)
	41	#define unlikely(x) __builtin_expect(!!(x), 0)
	42	#else
	43	#define likely(x) (x)
	44	#define unlikely(x) (x)
	45	#endif
	46
	47	#ifdef _MSC_VER
	48	#define ALIGNED(n) _declspec(align(n))
	49	#else
	50	#define ALIGNED(n) __attribute__((aligned(n)))
	51	#endif
	52
	53	#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
	54
	55	#define CHECK_EOF() \
	56	if (buf == buf_end) { \
	57	*ret = -2; \
	58	return NULL; \
	59	}
	60
	61	#define EXPECT_CHAR_NO_CHECK(ch) \
	62	if (*buf++ != ch) { \
	63	*ret = -1; \
	64	return NULL; \
	65	}
	66
	67	#define EXPECT_CHAR(ch) \
	68	CHECK_EOF(); \
	69	EXPECT_CHAR_NO_CHECK(ch);
	70
	71	#define ADVANCE_TOKEN(tok, toklen) \
	72	do { \
	73	const char *tok_start = buf; \
	74	static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \
	75	int found2; \
	76	buf = findchar_fast(buf, buf_end, ranges2, 4, &found2); \
	77	if (!found2) { \
	78	CHECK_EOF(); \
	79	} \
	80	while (1) { \
	81	if (*buf == ' ') { \
	82	break; \
	83	} else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
	84	if ((unsigned char)buf < '\040' \|\| buf == '\177') { \
	85	*ret = -1; \
	86	return NULL; \
	87	} \
	88	} \
	89	++buf; \
	90	CHECK_EOF(); \
	91	} \
	92	tok = tok_start; \
	93	toklen = buf - tok_start; \
	94	} while (0)
	95
	96	static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
	97	"\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
	98	"\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
	99	"\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
	100	"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
	101	"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
	102	"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
	103	"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
	104
	105	static const char findchar_fast(const char buf, const char buf_end, const char ranges, size_t ranges_size, int *found)
	106	{
	107	*found = 0;
	108	#if __SSE4_2__
	109	if (likely(buf_end - buf >= 16)) {
	110	__m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
	111
	112	size_t left = (buf_end - buf) & ~15;
	113	do {
	114	__m128i b16 = _mm_loadu_si128((const __m128i *)buf);
	115	int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT \| _SIDD_CMP_RANGES \| _SIDD_UBYTE_OPS);
	116	if (unlikely(r != 16)) {
	117	buf += r;
	118	*found = 1;
	119	break;
	120	}
	121	buf += 16;
	122	left -= 16;
	123	} while (likely(left != 0));
	124	}
	125	#else
	126	/* suppress unused parameter warning */
	127	(void)buf_end;
	128	(void)ranges;
	129	(void)ranges_size;
	130	#endif
	131	return buf;
	132	}
	133
	134	static const char get_token_to_eol(const char buf, const char buf_end, const char token, size_t token_len, int *ret)
	135	{
	136	const char *token_start = buf;
	137
	138	#ifdef __SSE4_2__
	139	static const char ALIGNED(16) ranges1[16] = "\0\010" /* allow HT */
	140	"\012\037" /* allow SP and up to but not including DEL */
	141	"\177\177"; /* allow chars w. MSB set */
	142	int found;
	143	buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
	144	if (found)
	145	goto FOUND_CTL;
	146	#else
	147	/* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
	148	while (likely(buf_end - buf >= 8)) {
	149	#define DOIT() \
	150	do { \
	151	if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
	152	goto NonPrintable; \
	153	++buf; \
	154	} while (0)
	155	DOIT();
	156	DOIT();
	157	DOIT();
	158	DOIT();
	159	DOIT();
	160	DOIT();
	161	DOIT();
	162	DOIT();
	163	#undef DOIT
	164	continue;
	165	NonPrintable:
	166	if ((likely((unsigned char)buf < '\040') && likely(buf != '\011')) \|\| unlikely(*buf == '\177')) {
	167	goto FOUND_CTL;
	168	}
	169	++buf;
	170	}
	171	#endif
	172	for (;; ++buf) {
	173	CHECK_EOF();
	174	if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
	175	if ((likely((unsigned char)buf < '\040') && likely(buf != '\011')) \|\| unlikely(*buf == '\177')) {
	176	goto FOUND_CTL;
	177	}
	178	}
	179	}
	180	FOUND_CTL:
	181	if (likely(*buf == '\015')) {
	182	++buf;
	183	EXPECT_CHAR('\012');
	184	*token_len = buf - 2 - token_start;
	185	} else if (*buf == '\012') {
	186	*token_len = buf - token_start;
	187	++buf;
	188	} else {
	189	*ret = -1;
	190	return NULL;
	191	}
	192	*token = token_start;
	193
	194	return buf;
	195	}
	196
	197	static const char is_complete(const char buf, const char buf_end, size_t last_len, int ret)
	198	{
	199	int ret_cnt = 0;
	200	buf = last_len < 3 ? buf : buf + last_len - 3;
	201
	202	while (1) {
	203	CHECK_EOF();
	204	if (*buf == '\015') {
	205	++buf;
	206	CHECK_EOF();
	207	EXPECT_CHAR('\012');
	208	++ret_cnt;
	209	} else if (*buf == '\012') {
	210	++buf;
	211	++ret_cnt;
	212	} else {
	213	++buf;
	214	ret_cnt = 0;
	215	}
	216	if (ret_cnt == 2) {
	217	return buf;
	218	}
	219	}
	220
	221	*ret = -2;
	222	return NULL;
	223	}
	224
	225	#define PARSE_INT(valp_, mul_) \
	226	if (buf < '0' \|\| '9' < buf) { \
	227	buf++; \
	228	*ret = -1; \
	229	return NULL; \
	230	} \
	231	(valp_) = (mul_) (*buf++ - '0');
	232
	233	#define PARSE_INT_3(valp_) \
	234	do { \
	235	int res_ = 0; \
	236	PARSE_INT(&res_, 100) \
	237	*valp_ = res_; \
	238	PARSE_INT(&res_, 10) \
	239	*valp_ += res_; \
	240	PARSE_INT(&res_, 1) \
	241	*valp_ += res_; \
	242	} while (0)
	243
	244	/* returned pointer is always within [buf, buf_end), or null */
	245	static const char parse_http_version(const char buf, const char buf_end, int major_version, int minor_version, int ret)
	246	{
	247	/* we want at least [HTTP/1.<two chars>] to try to parse */
	248	if (buf_end - buf < 9) {
	249	*ret = -2;
	250	return NULL;
	251	}
	252	EXPECT_CHAR_NO_CHECK('H');
	253	EXPECT_CHAR_NO_CHECK('T');
	254	EXPECT_CHAR_NO_CHECK('T');
	255	EXPECT_CHAR_NO_CHECK('P');
	256	EXPECT_CHAR_NO_CHECK('/');
	257	PARSE_INT(major_version, 1);
	258	if (*major_version == 1) {
	259	EXPECT_CHAR_NO_CHECK('.');
	260	PARSE_INT(minor_version, 1);
	261	} else {
	262	*minor_version = 0;
	263	}
	264	return buf;
	265	}
	266
	267	static const char parse_headers(const char buf, const char buf_end, struct phr_header headers, size_t *num_headers,
	268	size_t max_headers, int *ret)
	269	{
	270	for (;; ++*num_headers) {
	271	CHECK_EOF();
	272	if (*buf == '\015') {
	273	++buf;
	274	EXPECT_CHAR('\012');
	275	break;
	276	} else if (*buf == '\012') {
	277	++buf;
	278	break;
	279	}
	280	if (*num_headers == max_headers) {
	281	*ret = -1;
	282	return NULL;
	283	}
	284	if (!(num_headers != 0 && (buf == ' ' \|\| *buf == '\t'))) {
	285	/* parsing name, but do not discard SP before colon, see
	286	* http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
	287	headers[*num_headers].name = buf;
	288	static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */
	289	"\"\"" /* 0x22 */
	290	"()" /* 0x28,0x29 */
	291	",," /* 0x2c */
	292	"//" /* 0x2f */
	293	":@" /* 0x3a-0x40 */
	294	"[]" /* 0x5b-0x5d */
	295	"{\377"; /* 0x7b-0xff */
	296	int found;
	297	buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
	298	if (!found) {
	299	CHECK_EOF();
	300	}
	301	while (1) {
	302	if (*buf == ':') {
	303	break;
	304	} else if (!token_char_map[(unsigned char)*buf]) {
	305	*ret = -1;
	306	return NULL;
	307	}
	308	++buf;
	309	CHECK_EOF();
	310	}
	311	if ((headers[num_headers].name_len = buf - headers[num_headers].name) == 0) {
	312	*ret = -1;
	313	return NULL;
	314	}
	315	++buf;
	316	for (;; ++buf) {
	317	CHECK_EOF();
	318	if (!(buf == ' ' \|\| buf == '\t')) {
	319	break;
	320	}
	321	}
	322	} else {
	323	headers[*num_headers].name = NULL;
	324	headers[*num_headers].name_len = 0;
	325	}
	326	const char *value;
	327	size_t value_len;
	328	if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
	329	return NULL;
	330	}
	331	/* remove trailing SPs and HTABs */
	332	const char *value_end = value + value_len;
	333	for (; value_end != value; --value_end) {
	334	const char c = *(value_end - 1);
	335	if (!(c == ' ' \|\| c == '\t')) {
	336	break;
	337	}
	338	}
	339	headers[*num_headers].value = value;
	340	headers[*num_headers].value_len = value_end - value;
	341	}
	342	return buf;
	343	}
	344
	345	static const char parse_request(const char buf, const char buf_end, const char method, size_t method_len, const char **path,
	346	size_t path_len, int major_version, int minor_version, struct phr_header headers, size_t *num_headers,
	347	size_t max_headers, int *ret)
	348	{
	349	/* skip first empty line (some clients add CRLF after POST content) */
	350	CHECK_EOF();
	351	if (*buf == '\015') {
	352	++buf;
	353	EXPECT_CHAR('\012');
	354	} else if (*buf == '\012') {
	355	++buf;
	356	}
	357
	358	/* parse request line */
	359	ADVANCE_TOKEN(method, method_len);
	360	do {
	361	++buf;
	362	} while (*buf == ' ');
	363	ADVANCE_TOKEN(path, path_len);
	364	do {
	365	++buf;
	366	} while (*buf == ' ');
	367	if (method_len == 0 \|\| path_len == 0) {
	368	*ret = -1;
	369	return NULL;
	370	}
	371	if ((buf = parse_http_version(buf, buf_end, major_version, minor_version, ret)) == NULL) {
	372	return NULL;
	373	}
	374	if (*buf == '\015') {
	375	++buf;
	376	EXPECT_CHAR('\012');
	377	} else if (*buf == '\012') {
	378	++buf;
	379	} else {
	380	*ret = -1;
	381	return NULL;
	382	}
	383
	384	return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
	385	}
	386
	387	int phr_parse_request(const char buf_start, size_t len, const char method, size_t method_len, const char **path,
	388	size_t path_len, int major_version, int minor_version, struct phr_header headers, size_t *num_headers, size_t last_len)
	389	{
	390	const char buf = buf_start, buf_end = buf_start + len;
	391	size_t max_headers = *num_headers;
	392	int r;
	393
	394	*method = NULL;
	395	*method_len = 0;
	396	*path = NULL;
	397	*path_len = 0;
	398	*major_version = -1;
	399	*minor_version = -1;
	400	*num_headers = 0;
	401
	402	/* if last_len != 0, check if the request is complete (a fast countermeasure
	403	against slowloris */
	404	if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
	405	return r;
	406	}
	407
	408	if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, major_version, minor_version, headers, num_headers, max_headers,
	409	&r)) == NULL) {
	410	return r;
	411	}
	412
	413	return (int)(buf - buf_start);
	414	}
	415
	416	static const char parse_response(const char buf, const char buf_end, int major_version, int minor_version, int status, const char **msg,
	417	size_t msg_len, struct phr_header headers, size_t num_headers, size_t max_headers, int ret)
	418	{
	419	/* parse "HTTP/1.x" */
	420	if ((buf = parse_http_version(buf, buf_end, major_version, minor_version, ret)) == NULL) {
	421	return NULL;
	422	}
	423	/* skip space */
	424	if (*buf != ' ') {
	425	*ret = -1;
	426	return NULL;
	427	}
	428	do {
	429	++buf;
	430	} while (*buf == ' ');
	431	/* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
	432	if (buf_end - buf < 4) {
	433	*ret = -2;
	434	return NULL;
	435	}
	436	PARSE_INT_3(status);
	437
	438	/* get message including preceding space */
	439	if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
	440	return NULL;
	441	}
	442	if (*msg_len == 0) {
	443	/* ok */
	444	} else if (**msg == ' ') {
	445	/* remove preceding space */
	446	do {
	447	++*msg;
	448	--*msg_len;
	449	} while (**msg == ' ');
	450	} else {
	451	/* garbage found after status code */
	452	*ret = -1;
	453	return NULL;
	454	}
	455
	456	return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
	457	}
	458
	459	int phr_parse_response(const char buf_start, size_t len, int major_version, int minor_version, int status, const char *msg, size_t msg_len,
	460	struct phr_header headers, size_t num_headers, size_t last_len)
	461	{
	462	const char buf = buf_start, buf_end = buf + len;
	463	size_t max_headers = *num_headers;
	464	int r;
	465
	466	*major_version = -1;
	467	*minor_version = -1;
	468	*status = 0;
	469	*msg = NULL;
	470	*msg_len = 0;
	471	*num_headers = 0;
	472
	473	/* if last_len != 0, check if the response is complete (a fast countermeasure
	474	against slowloris */
	475	if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
	476	return r;
	477	}
	478
	479	if ((buf = parse_response(buf, buf_end, major_version, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
	480	return r;
	481	}
	482
	483	return (int)(buf - buf_start);
	484	}
	485
	486	int phr_parse_headers(const char buf_start, size_t len, struct phr_header headers, size_t *num_headers, size_t last_len)
	487	{
	488	const char buf = buf_start, buf_end = buf + len;
	489	size_t max_headers = *num_headers;
	490	int r;
	491
	492	*num_headers = 0;
	493
	494	/* if last_len != 0, check if the response is complete (a fast countermeasure
	495	against slowloris */
	496	if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
	497	return r;
	498	}
	499
	500	if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
	501	return r;
	502	}
	503
	504	return (int)(buf - buf_start);
	505	}
	506
	507	enum {
	508	CHUNKED_IN_CHUNK_SIZE,
	509	CHUNKED_IN_CHUNK_EXT,
	510	CHUNKED_IN_CHUNK_DATA,
	511	CHUNKED_IN_CHUNK_CRLF,
	512	CHUNKED_IN_TRAILERS_LINE_HEAD,
	513	CHUNKED_IN_TRAILERS_LINE_MIDDLE
	514	};
	515
	516	static int decode_hex(int ch)
	517	{
	518	if ('0' <= ch && ch <= '9') {
	519	return ch - '0';
	520	} else if ('A' <= ch && ch <= 'F') {
	521	return ch - 'A' + 0xa;
	522	} else if ('a' <= ch && ch <= 'f') {
	523	return ch - 'a' + 0xa;
	524	} else {
	525	return -1;
	526	}
	527	}
	528
	529	ssize_t phr_decode_chunked(struct phr_chunked_decoder decoder, char buf, size_t *_bufsz)
	530	{
	531	size_t dst = 0, src = 0, bufsz = *_bufsz;
	532	ssize_t ret = -2; /* incomplete */
	533
	534	while (1) {
	535	switch (decoder->_state) {
	536	case CHUNKED_IN_CHUNK_SIZE:
	537	for (;; ++src) {
	538	int v;
	539	if (src == bufsz)
	540	goto Exit;
	541	if ((v = decode_hex(buf[src])) == -1) {
	542	if (decoder->_hex_count == 0) {
	543	ret = -1;
	544	goto Exit;
	545	}
	546	break;
	547	}
	548	if (decoder->_hex_count == sizeof(size_t) * 2) {
	549	ret = -1;
	550	goto Exit;
	551	}
	552	decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
	553	++decoder->_hex_count;
	554	}
	555	decoder->_hex_count = 0;
	556	decoder->_state = CHUNKED_IN_CHUNK_EXT;
	557	/* fallthru */
	558	case CHUNKED_IN_CHUNK_EXT:
	559	/* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
	560	for (;; ++src) {
	561	if (src == bufsz)
	562	goto Exit;
	563	if (buf[src] == '\012')
	564	break;
	565	}
	566	++src;
	567	if (decoder->bytes_left_in_chunk == 0) {
	568	if (decoder->consume_trailer) {
	569	decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
	570	break;
	571	} else {
	572	goto Complete;
	573	}
	574	}
	575	decoder->_state = CHUNKED_IN_CHUNK_DATA;
	576	/* fallthru */
	577	case CHUNKED_IN_CHUNK_DATA: {
	578	size_t avail = bufsz - src;
	579	if (avail < decoder->bytes_left_in_chunk) {
	580	if (dst != src)
	581	memmove(buf + dst, buf + src, avail);
	582	src += avail;
	583	dst += avail;
	584	decoder->bytes_left_in_chunk -= avail;
	585	goto Exit;
	586	}
	587	if (dst != src)
	588	memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
	589	src += decoder->bytes_left_in_chunk;
	590	dst += decoder->bytes_left_in_chunk;
	591	decoder->bytes_left_in_chunk = 0;
	592	decoder->_state = CHUNKED_IN_CHUNK_CRLF;
	593	}
	594	/* fallthru */
	595	case CHUNKED_IN_CHUNK_CRLF:
	596	for (;; ++src) {
	597	if (src == bufsz)
	598	goto Exit;
	599	if (buf[src] != '\015')
	600	break;
	601	}
	602	if (buf[src] != '\012') {
	603	ret = -1;
	604	goto Exit;
	605	}
	606	++src;
	607	decoder->_state = CHUNKED_IN_CHUNK_SIZE;
	608	break;
	609	case CHUNKED_IN_TRAILERS_LINE_HEAD:
	610	for (;; ++src) {
	611	if (src == bufsz)
	612	goto Exit;
	613	if (buf[src] != '\015')
	614	break;
	615	}
	616	if (buf[src++] == '\012')
	617	goto Complete;
	618	decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
	619	/* fallthru */
	620	case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
	621	for (;; ++src) {
	622	if (src == bufsz)
	623	goto Exit;
	624	if (buf[src] == '\012')
	625	break;
	626	}
	627	++src;
	628	decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
	629	break;
	630	default:
	631	assert(!"decoder is corrupt");
	632	}
	633	}
	634
	635	Complete:
	636	ret = bufsz - src;
	637	Exit:
	638	if (dst != src)
	639	memmove(buf + dst, buf + src, bufsz - src);
	640	*_bufsz = dst;
	641	return ret;
	642	}
	643
	644	int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
	645	{
	646	return decoder->_state == CHUNKED_IN_CHUNK_DATA;
	647	}
	648
	649	#undef CHECK_EOF
	650	#undef EXPECT_CHAR
	651	#undef ADVANCE_TOKEN