From afe92468a54ec44cdda35e46a1eabd0d0de78840 Mon Sep 17 00:00:00 2001 From: RincewindsHat <12514511+RincewindsHat@users.noreply.github.com> Date: Sun, 13 Nov 2022 23:07:14 +0100 Subject: Implement chunked encoding decoding diff --git a/plugins/check_http.c b/plugins/check_http.c index a2c7571..5710cfe 100644 --- a/plugins/check_http.c +++ b/plugins/check_http.c @@ -146,6 +146,7 @@ char *perfd_time_transfer(double microsec); char *perfd_size(int page_len); void print_help(void); void print_usage(void); +char *unchunk_content(char *content); int main(int argc, char **argv) { int result = STATE_UNKNOWN; @@ -1252,7 +1253,26 @@ int check_http(void) { } } - if (strlen(string_expect)) { + // At this point we should test if the content is chunked and unchunk it, so + // it can be searched (and possibly printed) + const char *chunked_header_regex_string = "Transfer-Encoding:\\s*chunked\\s*"CRLF; + regex_t chunked_header_regex; + + if (regcomp(&chunked_header_regex, chunked_header_regex_string, 0)) { + die(STATE_UNKNOWN, "HTTP %s: %s\n", state_text(STATE_UNKNOWN), "Failed to compile chunked_header_regex regex"); + } + + regmatch_t chre_pmatch[1]; // We actually do not care about this, since we only want to know IF it was found + + if (regexec(&chunked_header_regex, header, 1, chre_pmatch, 0) == 0) { + // We actually found the chunked header + char *tmp = unchunk_content(page); + if (tmp == NULL) { + die(STATE_UNKNOWN, "HTTP %s: %s\n", state_text(STATE_UNKNOWN), "Failed to unchunk message body"); + } + } + + if (strlen(string_expect) > 0) { if (!strstr(page, string_expect)) { // We found the string the body, the rest is for building the output char output_string_search[30] = ""; @@ -1342,6 +1362,87 @@ int check_http(void) { return STATE_UNKNOWN; } +/* Receivces a pointer to the beginning of the body of a HTTP message + * which is chunked and returns a pointer to a freshly allocated memory + * region containing the unchunked body or NULL if something failed. + * The result must be freed by the caller. + */ +char *unchunk_content(const char *content) { + // https://en.wikipedia.org/wiki/Chunked_transfer_encoding + // https://www.rfc-editor.org/rfc/rfc7230#section-4.1 + char *result = NULL; + size_t content_length = strlen(content); + char *start_of_chunk, end_of_chunk; + long size_of_chunk; + char *pointer = content; + char *endptr; + long length_of_chunk = 0; + size_t overall_size = 0; + char *result_ptr; + + while (true) { + size_of_chunk = strtol(pointer, &endptr, 16); + if (size_of_chunk == LONG_MIN || size_of_chunk == LONG_MAX) { + // Apparently underflow or overflow, should not happen + if (verbose) { + printf("Got an underflow or overflow from strtol at: %u\n", __LINE__); + } + return NULL; + } + if (endptr == pointer) { + // Apparently this was not a number + if (verbose) { + printf("Chunked content did not start with a number at all (Line: %u)\n", __LINE__); + } + return NULL + } + + // So, we got the length of the chunk + if (*endptr == ';') { + // Chunk extension starts here + // TODO + while (*endptr != '\r') { + endptr++; + } + } + + start_of_chunk = endptr + 2; + end_of_chunk = start_of_chunk + size_of_chunk; + length_of_chunk = end_of_chunk - start_of_chunk; + + if (length_of_chunk == 0) { + // Chunk length is 0, so this is the last one + break; + } + + overall_size += length_of_chunk; + + if (result == NULL) { + result = (char *)calloc(length_of_chunk, sizeof(char)); + if (result == NULL) { + if (verbose) { + printf("Failed to allocate memory for unchunked body\n"); + } + return NULL; + } + result_ptr = result; + } else { + void *tmp = realloc(result, overall_size); + if (tmp == NULL) { + if (verbose) { + printf("Failed to allocate memory for unchunked body\n"); + } + return NULL; + } + } + + memcpy(result_ptr, start_of_chunk, size_of_chunk); + result_ptr = result_ptr + size_of_chunk; + } + + return result +} + /* per RFC 2396 */ #define URI_HTTP "%5[HTPShtps]" #define URI_HOST \ -- cgit v0.10-9-g596f