summaryrefslogtreecommitdiffstats
path: root/plugins/picohttpparser/picohttpparser.c
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/picohttpparser/picohttpparser.c')
-rw-r--r--plugins/picohttpparser/picohttpparser.c645
1 files changed, 645 insertions, 0 deletions
diff --git a/plugins/picohttpparser/picohttpparser.c b/plugins/picohttpparser/picohttpparser.c
new file mode 100644
index 0000000..74ccc3e
--- /dev/null
+++ b/plugins/picohttpparser/picohttpparser.c
@@ -0,0 +1,645 @@
1/*
2 * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase,
3 * Shigeo Mitsunari
4 *
5 * The software is licensed under either the MIT License (below) or the Perl
6 * license.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to
10 * deal in the Software without restriction, including without limitation the
11 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12 * sell copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * IN THE SOFTWARE.
25 */
26
27#include <assert.h>
28#include <stddef.h>
29#include <string.h>
30#ifdef __SSE4_2__
31#ifdef _MSC_VER
32#include <nmmintrin.h>
33#else
34#include <x86intrin.h>
35#endif
36#endif
37#include "picohttpparser.h"
38
39#if __GNUC__ >= 3
40#define likely(x) __builtin_expect(!!(x), 1)
41#define unlikely(x) __builtin_expect(!!(x), 0)
42#else
43#define likely(x) (x)
44#define unlikely(x) (x)
45#endif
46
47#ifdef _MSC_VER
48#define ALIGNED(n) _declspec(align(n))
49#else
50#define ALIGNED(n) __attribute__((aligned(n)))
51#endif
52
53#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u)
54
55#define CHECK_EOF() \
56 if (buf == buf_end) { \
57 *ret = -2; \
58 return NULL; \
59 }
60
61#define EXPECT_CHAR_NO_CHECK(ch) \
62 if (*buf++ != ch) { \
63 *ret = -1; \
64 return NULL; \
65 }
66
67#define EXPECT_CHAR(ch) \
68 CHECK_EOF(); \
69 EXPECT_CHAR_NO_CHECK(ch);
70
71#define ADVANCE_TOKEN(tok, toklen) \
72 do { \
73 const char *tok_start = buf; \
74 static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \
75 int found2; \
76 buf = findchar_fast(buf, buf_end, ranges2, 4, &found2); \
77 if (!found2) { \
78 CHECK_EOF(); \
79 } \
80 while (1) { \
81 if (*buf == ' ') { \
82 break; \
83 } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \
84 if ((unsigned char)*buf < '\040' || *buf == '\177') { \
85 *ret = -1; \
86 return NULL; \
87 } \
88 } \
89 ++buf; \
90 CHECK_EOF(); \
91 } \
92 tok = tok_start; \
93 toklen = buf - tok_start; \
94 } while (0)
95
96static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
97 "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0"
98 "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1"
99 "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0"
100 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
101 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
102 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
103 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
104
105static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found)
106{
107 *found = 0;
108#if __SSE4_2__
109 if (likely(buf_end - buf >= 16)) {
110 __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges);
111
112 size_t left = (buf_end - buf) & ~15;
113 do {
114 __m128i b16 = _mm_loadu_si128((const __m128i *)buf);
115 int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS);
116 if (unlikely(r != 16)) {
117 buf += r;
118 *found = 1;
119 break;
120 }
121 buf += 16;
122 left -= 16;
123 } while (likely(left != 0));
124 }
125#else
126 /* suppress unused parameter warning */
127 (void)buf_end;
128 (void)ranges;
129 (void)ranges_size;
130#endif
131 return buf;
132}
133
134static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret)
135{
136 const char *token_start = buf;
137
138#ifdef __SSE4_2__
139 static const char ALIGNED(16) ranges1[16] = "\0\010" /* allow HT */
140 "\012\037" /* allow SP and up to but not including DEL */
141 "\177\177"; /* allow chars w. MSB set */
142 int found;
143 buf = findchar_fast(buf, buf_end, ranges1, 6, &found);
144 if (found)
145 goto FOUND_CTL;
146#else
147 /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */
148 while (likely(buf_end - buf >= 8)) {
149#define DOIT() \
150 do { \
151 if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \
152 goto NonPrintable; \
153 ++buf; \
154 } while (0)
155 DOIT();
156 DOIT();
157 DOIT();
158 DOIT();
159 DOIT();
160 DOIT();
161 DOIT();
162 DOIT();
163#undef DOIT
164 continue;
165 NonPrintable:
166 if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
167 goto FOUND_CTL;
168 }
169 ++buf;
170 }
171#endif
172 for (;; ++buf) {
173 CHECK_EOF();
174 if (unlikely(!IS_PRINTABLE_ASCII(*buf))) {
175 if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) {
176 goto FOUND_CTL;
177 }
178 }
179 }
180FOUND_CTL:
181 if (likely(*buf == '\015')) {
182 ++buf;
183 EXPECT_CHAR('\012');
184 *token_len = buf - 2 - token_start;
185 } else if (*buf == '\012') {
186 *token_len = buf - token_start;
187 ++buf;
188 } else {
189 *ret = -1;
190 return NULL;
191 }
192 *token = token_start;
193
194 return buf;
195}
196
197static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret)
198{
199 int ret_cnt = 0;
200 buf = last_len < 3 ? buf : buf + last_len - 3;
201
202 while (1) {
203 CHECK_EOF();
204 if (*buf == '\015') {
205 ++buf;
206 CHECK_EOF();
207 EXPECT_CHAR('\012');
208 ++ret_cnt;
209 } else if (*buf == '\012') {
210 ++buf;
211 ++ret_cnt;
212 } else {
213 ++buf;
214 ret_cnt = 0;
215 }
216 if (ret_cnt == 2) {
217 return buf;
218 }
219 }
220
221 *ret = -2;
222 return NULL;
223}
224
225#define PARSE_INT(valp_, mul_) \
226 if (*buf < '0' || '9' < *buf) { \
227 buf++; \
228 *ret = -1; \
229 return NULL; \
230 } \
231 *(valp_) = (mul_) * (*buf++ - '0');
232
233#define PARSE_INT_3(valp_) \
234 do { \
235 int res_ = 0; \
236 PARSE_INT(&res_, 100) \
237 *valp_ = res_; \
238 PARSE_INT(&res_, 10) \
239 *valp_ += res_; \
240 PARSE_INT(&res_, 1) \
241 *valp_ += res_; \
242 } while (0)
243
244/* returned pointer is always within [buf, buf_end), or null */
245static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret)
246{
247 /* we want at least [HTTP/1.<two chars>] to try to parse */
248 if (buf_end - buf < 9) {
249 *ret = -2;
250 return NULL;
251 }
252 EXPECT_CHAR_NO_CHECK('H');
253 EXPECT_CHAR_NO_CHECK('T');
254 EXPECT_CHAR_NO_CHECK('T');
255 EXPECT_CHAR_NO_CHECK('P');
256 EXPECT_CHAR_NO_CHECK('/');
257 EXPECT_CHAR_NO_CHECK('1');
258 EXPECT_CHAR_NO_CHECK('.');
259 PARSE_INT(minor_version, 1);
260 return buf;
261}
262
263static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers,
264 size_t max_headers, int *ret)
265{
266 for (;; ++*num_headers) {
267 CHECK_EOF();
268 if (*buf == '\015') {
269 ++buf;
270 EXPECT_CHAR('\012');
271 break;
272 } else if (*buf == '\012') {
273 ++buf;
274 break;
275 }
276 if (*num_headers == max_headers) {
277 *ret = -1;
278 return NULL;
279 }
280 if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) {
281 /* parsing name, but do not discard SP before colon, see
282 * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */
283 headers[*num_headers].name = buf;
284 static const char ALIGNED(16) ranges1[] = "\x00 " /* control chars and up to SP */
285 "\"\"" /* 0x22 */
286 "()" /* 0x28,0x29 */
287 ",," /* 0x2c */
288 "//" /* 0x2f */
289 ":@" /* 0x3a-0x40 */
290 "[]" /* 0x5b-0x5d */
291 "{\377"; /* 0x7b-0xff */
292 int found;
293 buf = findchar_fast(buf, buf_end, ranges1, sizeof(ranges1) - 1, &found);
294 if (!found) {
295 CHECK_EOF();
296 }
297 while (1) {
298 if (*buf == ':') {
299 break;
300 } else if (!token_char_map[(unsigned char)*buf]) {
301 *ret = -1;
302 return NULL;
303 }
304 ++buf;
305 CHECK_EOF();
306 }
307 if ((headers[*num_headers].name_len = buf - headers[*num_headers].name) == 0) {
308 *ret = -1;
309 return NULL;
310 }
311 ++buf;
312 for (;; ++buf) {
313 CHECK_EOF();
314 if (!(*buf == ' ' || *buf == '\t')) {
315 break;
316 }
317 }
318 } else {
319 headers[*num_headers].name = NULL;
320 headers[*num_headers].name_len = 0;
321 }
322 const char *value;
323 size_t value_len;
324 if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) {
325 return NULL;
326 }
327 /* remove trailing SPs and HTABs */
328 const char *value_end = value + value_len;
329 for (; value_end != value; --value_end) {
330 const char c = *(value_end - 1);
331 if (!(c == ' ' || c == '\t')) {
332 break;
333 }
334 }
335 headers[*num_headers].value = value;
336 headers[*num_headers].value_len = value_end - value;
337 }
338 return buf;
339}
340
341static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path,
342 size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers,
343 size_t max_headers, int *ret)
344{
345 /* skip first empty line (some clients add CRLF after POST content) */
346 CHECK_EOF();
347 if (*buf == '\015') {
348 ++buf;
349 EXPECT_CHAR('\012');
350 } else if (*buf == '\012') {
351 ++buf;
352 }
353
354 /* parse request line */
355 ADVANCE_TOKEN(*method, *method_len);
356 do {
357 ++buf;
358 } while (*buf == ' ');
359 ADVANCE_TOKEN(*path, *path_len);
360 do {
361 ++buf;
362 } while (*buf == ' ');
363 if (*method_len == 0 || *path_len == 0) {
364 *ret = -1;
365 return NULL;
366 }
367 if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
368 return NULL;
369 }
370 if (*buf == '\015') {
371 ++buf;
372 EXPECT_CHAR('\012');
373 } else if (*buf == '\012') {
374 ++buf;
375 } else {
376 *ret = -1;
377 return NULL;
378 }
379
380 return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
381}
382
383int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path,
384 size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len)
385{
386 const char *buf = buf_start, *buf_end = buf_start + len;
387 size_t max_headers = *num_headers;
388 int r;
389
390 *method = NULL;
391 *method_len = 0;
392 *path = NULL;
393 *path_len = 0;
394 *minor_version = -1;
395 *num_headers = 0;
396
397 /* if last_len != 0, check if the request is complete (a fast countermeasure
398 againt slowloris */
399 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
400 return r;
401 }
402
403 if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers,
404 &r)) == NULL) {
405 return r;
406 }
407
408 return (int)(buf - buf_start);
409}
410
411static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg,
412 size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret)
413{
414 /* parse "HTTP/1.x" */
415 if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) {
416 return NULL;
417 }
418 /* skip space */
419 if (*buf != ' ') {
420 *ret = -1;
421 return NULL;
422 }
423 do {
424 ++buf;
425 } while (*buf == ' ');
426 /* parse status code, we want at least [:digit:][:digit:][:digit:]<other char> to try to parse */
427 if (buf_end - buf < 4) {
428 *ret = -2;
429 return NULL;
430 }
431 PARSE_INT_3(status);
432
433 /* get message includig preceding space */
434 if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) {
435 return NULL;
436 }
437 if (*msg_len == 0) {
438 /* ok */
439 } else if (**msg == ' ') {
440 /* remove preceding space */
441 do {
442 ++*msg;
443 --*msg_len;
444 } while (**msg == ' ');
445 } else {
446 /* garbage found after status code */
447 *ret = -1;
448 return NULL;
449 }
450
451 return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret);
452}
453
454int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len,
455 struct phr_header *headers, size_t *num_headers, size_t last_len)
456{
457 const char *buf = buf_start, *buf_end = buf + len;
458 size_t max_headers = *num_headers;
459 int r;
460
461 *minor_version = -1;
462 *status = 0;
463 *msg = NULL;
464 *msg_len = 0;
465 *num_headers = 0;
466
467 /* if last_len != 0, check if the response is complete (a fast countermeasure
468 against slowloris */
469 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
470 return r;
471 }
472
473 if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) {
474 return r;
475 }
476
477 return (int)(buf - buf_start);
478}
479
480int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len)
481{
482 const char *buf = buf_start, *buf_end = buf + len;
483 size_t max_headers = *num_headers;
484 int r;
485
486 *num_headers = 0;
487
488 /* if last_len != 0, check if the response is complete (a fast countermeasure
489 against slowloris */
490 if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) {
491 return r;
492 }
493
494 if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) {
495 return r;
496 }
497
498 return (int)(buf - buf_start);
499}
500
501enum {
502 CHUNKED_IN_CHUNK_SIZE,
503 CHUNKED_IN_CHUNK_EXT,
504 CHUNKED_IN_CHUNK_DATA,
505 CHUNKED_IN_CHUNK_CRLF,
506 CHUNKED_IN_TRAILERS_LINE_HEAD,
507 CHUNKED_IN_TRAILERS_LINE_MIDDLE
508};
509
510static int decode_hex(int ch)
511{
512 if ('0' <= ch && ch <= '9') {
513 return ch - '0';
514 } else if ('A' <= ch && ch <= 'F') {
515 return ch - 'A' + 0xa;
516 } else if ('a' <= ch && ch <= 'f') {
517 return ch - 'a' + 0xa;
518 } else {
519 return -1;
520 }
521}
522
523ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz)
524{
525 size_t dst = 0, src = 0, bufsz = *_bufsz;
526 ssize_t ret = -2; /* incomplete */
527
528 while (1) {
529 switch (decoder->_state) {
530 case CHUNKED_IN_CHUNK_SIZE:
531 for (;; ++src) {
532 int v;
533 if (src == bufsz)
534 goto Exit;
535 if ((v = decode_hex(buf[src])) == -1) {
536 if (decoder->_hex_count == 0) {
537 ret = -1;
538 goto Exit;
539 }
540 break;
541 }
542 if (decoder->_hex_count == sizeof(size_t) * 2) {
543 ret = -1;
544 goto Exit;
545 }
546 decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v;
547 ++decoder->_hex_count;
548 }
549 decoder->_hex_count = 0;
550 decoder->_state = CHUNKED_IN_CHUNK_EXT;
551 /* fallthru */
552 case CHUNKED_IN_CHUNK_EXT:
553 /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */
554 for (;; ++src) {
555 if (src == bufsz)
556 goto Exit;
557 if (buf[src] == '\012')
558 break;
559 }
560 ++src;
561 if (decoder->bytes_left_in_chunk == 0) {
562 if (decoder->consume_trailer) {
563 decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
564 break;
565 } else {
566 goto Complete;
567 }
568 }
569 decoder->_state = CHUNKED_IN_CHUNK_DATA;
570 /* fallthru */
571 case CHUNKED_IN_CHUNK_DATA: {
572 size_t avail = bufsz - src;
573 if (avail < decoder->bytes_left_in_chunk) {
574 if (dst != src)
575 memmove(buf + dst, buf + src, avail);
576 src += avail;
577 dst += avail;
578 decoder->bytes_left_in_chunk -= avail;
579 goto Exit;
580 }
581 if (dst != src)
582 memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk);
583 src += decoder->bytes_left_in_chunk;
584 dst += decoder->bytes_left_in_chunk;
585 decoder->bytes_left_in_chunk = 0;
586 decoder->_state = CHUNKED_IN_CHUNK_CRLF;
587 }
588 /* fallthru */
589 case CHUNKED_IN_CHUNK_CRLF:
590 for (;; ++src) {
591 if (src == bufsz)
592 goto Exit;
593 if (buf[src] != '\015')
594 break;
595 }
596 if (buf[src] != '\012') {
597 ret = -1;
598 goto Exit;
599 }
600 ++src;
601 decoder->_state = CHUNKED_IN_CHUNK_SIZE;
602 break;
603 case CHUNKED_IN_TRAILERS_LINE_HEAD:
604 for (;; ++src) {
605 if (src == bufsz)
606 goto Exit;
607 if (buf[src] != '\015')
608 break;
609 }
610 if (buf[src++] == '\012')
611 goto Complete;
612 decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE;
613 /* fallthru */
614 case CHUNKED_IN_TRAILERS_LINE_MIDDLE:
615 for (;; ++src) {
616 if (src == bufsz)
617 goto Exit;
618 if (buf[src] == '\012')
619 break;
620 }
621 ++src;
622 decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD;
623 break;
624 default:
625 assert(!"decoder is corrupt");
626 }
627 }
628
629Complete:
630 ret = bufsz - src;
631Exit:
632 if (dst != src)
633 memmove(buf + dst, buf + src, bufsz - src);
634 *_bufsz = dst;
635 return ret;
636}
637
638int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder)
639{
640 return decoder->_state == CHUNKED_IN_CHUNK_DATA;
641}
642
643#undef CHECK_EOF
644#undef EXPECT_CHAR
645#undef ADVANCE_TOKEN