From c55c3f65c4aabf6153cf2ea7a75167af70214f0f Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Thu, 9 Nov 2023 20:11:46 +0100 Subject: [PATCH 01/11] include config, enrich differential output --- src/pcre2_fuzzsupport.c | 204 ++++++++++++++++++++++++++++++---------- 1 file changed, 152 insertions(+), 52 deletions(-) diff --git a/src/pcre2_fuzzsupport.c b/src/pcre2_fuzzsupport.c index a2585180f..a4bf3a18e 100644 --- a/src/pcre2_fuzzsupport.c +++ b/src/pcre2_fuzzsupport.c @@ -14,6 +14,7 @@ Written by Philip Hazel, October 2016 #include #include +#include "config.h" #define PCRE2_CODE_UNIT_WIDTH 8 #include "pcre2.h" @@ -36,6 +37,148 @@ Written by Philip Hazel, October 2016 PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \ PCRE2_PARTIAL_SOFT) +static void print_compile_options(FILE *stream, uint32_t compile_options) +{ +fprintf(stream, "Compile options %.8x never_backslash_c", compile_options); +fprintf(stream, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + ((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "", + ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "", + ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "", + ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "", + ((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "", + ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "", + ((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "", + ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "", + ((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "", + ((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "", + ((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "", + ((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "", + ((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "", + ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "", + ((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "", + ((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "", + ((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "", + ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "", + ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "", + ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "", + ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "", + ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "", + ((compile_options & PCRE2_UCP) != 0)? ",ucp" : "", + ((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "", + ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "", + ((compile_options & PCRE2_UTF) != 0)? ",utf" : ""); +} + +static void print_match_options(FILE *stream, uint32_t match_options) +{ +fprintf(stream, "Match options %.8x", match_options); +fprintf(stream, "%s%s%s%s%s%s%s%s%s\n", + ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "", + ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "", + ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "", + ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "", + ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "", + ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "", + ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "", + ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "", + ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : ""); +} + +static void dump_matches(FILE *stream, pcre2_match_data *match_data, pcre2_match_context *match_context) +{ +PCRE2_UCHAR error_buf[256]; +int errorcode; +uint32_t ovector_count = pcre2_get_ovector_count(match_data); + +for (uint32_t ovector = ovector_count; ovector < ovector_count; ovector++) + { + PCRE2_UCHAR *bufferptr = NULL; + PCRE2_SIZE bufflen = 0; + + errorcode = pcre2_substring_get_bynumber(match_data, ovector, &bufferptr, &bufflen); + + if (errorcode >= 0) + { + fprintf(stream, "Match %d (hex encoded): ", ovector); + for (PCRE2_SIZE i = 0; i < bufflen; i++) + { + fprintf(stderr, "%02x", bufferptr[i]); + } + fprintf(stderr, "\n"); + } + else + { + pcre2_get_error_message(errorcode, error_buf, 256); + fprintf(stream, "Match %d failed: %s\n", ovector, error_buf); + } + } +} + +/* This function describes the current test case being evaluated, then aborts */ + +#ifdef SUPPORT_JIT +static void describe_failure( + const char *task, + const unsigned char *data, + size_t size, + uint32_t compile_options, + uint32_t match_options, + int errorcode, + pcre2_match_data *match_data, + int errorcode_jit, + pcre2_match_data *match_data_jit, + pcre2_match_context *match_context +) { +PCRE2_UCHAR buffer[256]; + +fprintf(stderr, "Encountered failure while performing %s; context:\n", task); + +fprintf(stderr, "Pattern/sample string (hex encoded): "); +for (size_t i = 0; i < size; i++) + { + fprintf(stderr, "%02x", data[i]); + } +fprintf(stderr, "\n"); + +print_compile_options(stderr, compile_options); +print_match_options(stderr, match_options); + +if (errorcode < 0) + { + pcre2_get_error_message(errorcode, buffer, 256); + fprintf(stderr, "Non-JIT'd operation emitted an error: %s\n", buffer); + } +else + { + fprintf(stderr, "Non-JIT'd operation did not emit an error.\n"); + if (match_data != NULL) + { + fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", pcre2_get_ovector_count(match_data)); + dump_matches(stderr, match_data, match_context); + fprintf(stderr, "\n"); + } + } + +if (errorcode_jit < 0) + { + pcre2_get_error_message(errorcode_jit, buffer, 256); + fprintf(stderr, "JIT'd operation emitted an error: %s\n", buffer); + } +else + { + fprintf(stderr, "JIT'd operation did not emit an error.\n"); + if (match_data_jit != NULL) + { + fprintf(stderr, "%d matches discovered by JIT'd regex:\n", pcre2_get_ovector_count(match_data_jit)); + dump_matches(stderr, match_data_jit, match_context); + fprintf(stderr, "\n"); + } + } + +abort(); +} +#endif + /* This is the callout function. Its only purpose is to halt matching if there are more than 100 callouts, as one way of stopping too much time being spent on fruitless matches. The callout data is a pointer to the counter. */ @@ -110,34 +253,7 @@ for (i = 0; i < 2; i++) pcre2_code *code; #ifdef STANDALONE - printf("Compile options %.8x never_backslash_c", compile_options); - printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", - ((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "", - ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "", - ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "", - ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "", - ((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "", - ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "", - ((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "", - ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "", - ((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "", - ((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "", - ((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "", - ((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "", - ((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "", - ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "", - ((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "", - ((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "", - ((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "", - ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "", - ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "", - ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "", - ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "", - ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "", - ((compile_options & PCRE2_UCP) != 0)? ",ucp" : "", - ((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "", - ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "", - ((compile_options & PCRE2_UTF) != 0)? ",utf" : ""); + print_compile_options(stdout, compile_options); #endif code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options, @@ -169,7 +285,7 @@ for (i = 0; i < 2; i++) #endif { #ifdef STANDALONE - printf("** Failed to create match data block\n"); + fprintf(stderr, "** Failed to create match data block\n"); #endif abort(); } @@ -181,7 +297,7 @@ for (i = 0; i < 2; i++) if (match_context == NULL) { #ifdef STANDALONE - printf("** Failed to create match context block\n"); + fprintf(stderr, "** Failed to create match context block\n"); #endif abort(); } @@ -195,18 +311,7 @@ for (i = 0; i < 2; i++) for (j = 0; j < 2; j++) { #ifdef STANDALONE - printf("Match options %.8x", match_options); - printf("%s%s%s%s%s%s%s%s%s%s\n", - ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "", - ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "", - ((match_options & PCRE2_NO_JIT) != 0)? ",no_jit" : "", - ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "", - ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "", - ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "", - ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "", - ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "", - ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "", - ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : ""); + print_match_options(stdout, match_options); #endif callout_count = 0; @@ -231,16 +336,14 @@ for (i = 0; i < 2; i++) if (errorcode_jit != errorcode) { - printf("JIT errorcode %d did not match original errorcode %d\n", errorcode_jit, errorcode); - abort(); + describe_failure("match errorcode comparison", data, size, compile_options, match_options, errorcode, match_data, errorcode_jit, match_data_jit, match_context); } ovector_count = pcre2_get_ovector_count(match_data); if (ovector_count != pcre2_get_ovector_count(match_data_jit)) { - puts("JIT ovector count did not match original"); - abort(); + describe_failure("ovector count comparison", data, size, compile_options, match_options, errorcode, match_data, errorcode_jit, match_data_jit, match_context); } for (uint32_t ovector = 0; ovector < ovector_count; ovector++) @@ -256,22 +359,19 @@ for (i = 0; i < 2; i++) if (errorcode != errorcode_jit) { - printf("when extracting substring, JIT errorcode %d did not match original %d\n", errorcode_jit, errorcode); - abort(); + describe_failure("ovector entry errorcode comparison", data, size, compile_options, match_options, errorcode, match_data, errorcode_jit, match_data_jit, match_context); } if (errorcode >= 0) { if (bufflen != bufflen_jit) { - printf("when extracting substring, JIT buffer length %zu did not match original %zu\n", bufflen_jit, bufflen); - abort(); + describe_failure("ovector entry length comparison", data, size, compile_options, match_options, errorcode, match_data, errorcode_jit, match_data_jit, match_context); } if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0) { - puts("when extracting substring, JIT buffer contents did not match original"); - abort(); + describe_failure("ovector entry content comparison", data, size, compile_options, match_options, errorcode, match_data, errorcode_jit, match_data_jit, match_context); } } From 044375f0e14bbe913a0ee7f01b8c2f2330234aa2 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Thu, 9 Nov 2023 20:25:03 +0100 Subject: [PATCH 02/11] fix additional misunderstanding --- src/pcre2_fuzzsupport.c | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/src/pcre2_fuzzsupport.c b/src/pcre2_fuzzsupport.c index a4bf3a18e..e3fb5ecf7 100644 --- a/src/pcre2_fuzzsupport.c +++ b/src/pcre2_fuzzsupport.c @@ -84,32 +84,31 @@ fprintf(stream, "%s%s%s%s%s%s%s%s%s\n", ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : ""); } -static void dump_matches(FILE *stream, pcre2_match_data *match_data, pcre2_match_context *match_context) +static void dump_matches(FILE *stream, int count, pcre2_match_data *match_data, pcre2_match_context *match_context) { PCRE2_UCHAR error_buf[256]; int errorcode; -uint32_t ovector_count = pcre2_get_ovector_count(match_data); -for (uint32_t ovector = ovector_count; ovector < ovector_count; ovector++) +for (uint32_t index = 0; index < count; index++) { PCRE2_UCHAR *bufferptr = NULL; PCRE2_SIZE bufflen = 0; - errorcode = pcre2_substring_get_bynumber(match_data, ovector, &bufferptr, &bufflen); + errorcode = pcre2_substring_get_bynumber(match_data, index, &bufferptr, &bufflen); if (errorcode >= 0) { - fprintf(stream, "Match %d (hex encoded): ", ovector); + fprintf(stream, "Match %d (hex encoded): ", index); for (PCRE2_SIZE i = 0; i < bufflen; i++) { - fprintf(stderr, "%02x", bufferptr[i]); + fprintf(stream, "%02x", bufferptr[i]); } - fprintf(stderr, "\n"); + fprintf(stream, "\n"); } else { pcre2_get_error_message(errorcode, error_buf, 256); - fprintf(stream, "Match %d failed: %s\n", ovector, error_buf); + fprintf(stream, "Match %d failed: %s\n", index, error_buf); } } } @@ -153,8 +152,8 @@ else fprintf(stderr, "Non-JIT'd operation did not emit an error.\n"); if (match_data != NULL) { - fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", pcre2_get_ovector_count(match_data)); - dump_matches(stderr, match_data, match_context); + fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", errorcode); + dump_matches(stderr, errorcode, match_data, match_context); fprintf(stderr, "\n"); } } @@ -169,8 +168,8 @@ else fprintf(stderr, "JIT'd operation did not emit an error.\n"); if (match_data_jit != NULL) { - fprintf(stderr, "%d matches discovered by JIT'd regex:\n", pcre2_get_ovector_count(match_data_jit)); - dump_matches(stderr, match_data_jit, match_context); + fprintf(stderr, "%d matches discovered by JIT'd regex:\n", errorcode_jit); + dump_matches(stderr, errorcode_jit, match_data_jit, match_context); fprintf(stderr, "\n"); } } @@ -247,7 +246,6 @@ for (i = 0; i < 2; i++) int errorcode; #ifdef SUPPORT_JIT int errorcode_jit; - uint32_t ovector_count; #endif PCRE2_SIZE erroroffset; pcre2_code *code; @@ -339,14 +337,7 @@ for (i = 0; i < 2; i++) describe_failure("match errorcode comparison", data, size, compile_options, match_options, errorcode, match_data, errorcode_jit, match_data_jit, match_context); } - ovector_count = pcre2_get_ovector_count(match_data); - - if (ovector_count != pcre2_get_ovector_count(match_data_jit)) - { - describe_failure("ovector count comparison", data, size, compile_options, match_options, errorcode, match_data, errorcode_jit, match_data_jit, match_context); - } - - for (uint32_t ovector = 0; ovector < ovector_count; ovector++) + for (int index = 0; index < errorcode; index++) { PCRE2_UCHAR *bufferptr, *bufferptr_jit; PCRE2_SIZE bufflen, bufflen_jit; @@ -354,8 +345,8 @@ for (i = 0; i < 2; i++) bufferptr = bufferptr_jit = NULL; bufflen = bufflen_jit = 0; - errorcode = pcre2_substring_get_bynumber(match_data, ovector, &bufferptr, &bufflen); - errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, ovector, &bufferptr_jit, &bufflen_jit); + errorcode = pcre2_substring_get_bynumber(match_data, (uint32_t) index, &bufferptr, &bufflen); + errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, (uint32_t) index, &bufferptr_jit, &bufflen_jit); if (errorcode != errorcode_jit) { From 3c95411f3c830aa046f869be11a5aecf27f6710d Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Fri, 10 Nov 2023 15:56:32 +0100 Subject: [PATCH 03/11] improve output for clarity --- src/pcre2_fuzzsupport.c | 78 ++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/src/pcre2_fuzzsupport.c b/src/pcre2_fuzzsupport.c index e3fb5ecf7..3e2e844d6 100644 --- a/src/pcre2_fuzzsupport.c +++ b/src/pcre2_fuzzsupport.c @@ -123,8 +123,10 @@ static void describe_failure( uint32_t compile_options, uint32_t match_options, int errorcode, - pcre2_match_data *match_data, int errorcode_jit, + int matches, + int matches_jit, + pcre2_match_data *match_data, pcre2_match_data *match_data_jit, pcre2_match_context *match_context ) { @@ -145,15 +147,15 @@ print_match_options(stderr, match_options); if (errorcode < 0) { pcre2_get_error_message(errorcode, buffer, 256); - fprintf(stderr, "Non-JIT'd operation emitted an error: %s\n", buffer); + fprintf(stderr, "Non-JIT'd operation emitted an error: %s (%d)\n", buffer, errorcode); } -else +if (matches >= 0) { fprintf(stderr, "Non-JIT'd operation did not emit an error.\n"); if (match_data != NULL) { - fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", errorcode); - dump_matches(stderr, errorcode, match_data, match_context); + fprintf(stderr, "%d matches discovered by non-JIT'd regex:\n", matches); + dump_matches(stderr, matches, match_data, match_context); fprintf(stderr, "\n"); } } @@ -161,15 +163,15 @@ else if (errorcode_jit < 0) { pcre2_get_error_message(errorcode_jit, buffer, 256); - fprintf(stderr, "JIT'd operation emitted an error: %s\n", buffer); + fprintf(stderr, "JIT'd operation emitted an error: %s (%d)\n", buffer, errorcode_jit); } -else +if (matches_jit >= 0) { fprintf(stderr, "JIT'd operation did not emit an error.\n"); if (match_data_jit != NULL) { - fprintf(stderr, "%d matches discovered by JIT'd regex:\n", errorcode_jit); - dump_matches(stderr, errorcode_jit, match_data_jit, match_context); + fprintf(stderr, "%d matches discovered by JIT'd regex:\n", matches_jit); + dump_matches(stderr, matches_jit, match_data_jit, match_context); fprintf(stderr, "\n"); } } @@ -246,6 +248,8 @@ for (i = 0; i < 2; i++) int errorcode; #ifdef SUPPORT_JIT int errorcode_jit; + int matches = 0; + int matches_jit = 0; #endif PCRE2_SIZE erroroffset; pcre2_code *code; @@ -332,42 +336,52 @@ for (i = 0; i < 2; i++) errorcode_jit = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0, match_options & ~PCRE2_NO_JIT, match_data_jit, match_context); + matches = errorcode; + matches_jit = errorcode_jit; + if (errorcode_jit != errorcode) { - describe_failure("match errorcode comparison", data, size, compile_options, match_options, errorcode, match_data, errorcode_jit, match_data_jit, match_context); + if (!(errorcode < 0 && errorcode_jit < 0) && + errorcode != PCRE2_ERROR_MATCHLIMIT && + errorcode_jit != PCRE2_ERROR_MATCHLIMIT && errorcode_jit != PCRE2_ERROR_JIT_STACKLIMIT) + { + describe_failure("match errorcode comparison", data, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit, match_context); + } } - - for (int index = 0; index < errorcode; index++) + else { - PCRE2_UCHAR *bufferptr, *bufferptr_jit; - PCRE2_SIZE bufflen, bufflen_jit; - - bufferptr = bufferptr_jit = NULL; - bufflen = bufflen_jit = 0; + for (int index = 0; index < errorcode; index++) + { + PCRE2_UCHAR *bufferptr, *bufferptr_jit; + PCRE2_SIZE bufflen, bufflen_jit; - errorcode = pcre2_substring_get_bynumber(match_data, (uint32_t) index, &bufferptr, &bufflen); - errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, (uint32_t) index, &bufferptr_jit, &bufflen_jit); + bufferptr = bufferptr_jit = NULL; + bufflen = bufflen_jit = 0; - if (errorcode != errorcode_jit) - { - describe_failure("ovector entry errorcode comparison", data, size, compile_options, match_options, errorcode, match_data, errorcode_jit, match_data_jit, match_context); - } + errorcode = pcre2_substring_get_bynumber(match_data, (uint32_t) index, &bufferptr, &bufflen); + errorcode_jit = pcre2_substring_get_bynumber(match_data_jit, (uint32_t) index, &bufferptr_jit, &bufflen_jit); - if (errorcode >= 0) - { - if (bufflen != bufflen_jit) + if (errorcode != errorcode_jit) { - describe_failure("ovector entry length comparison", data, size, compile_options, match_options, errorcode, match_data, errorcode_jit, match_data_jit, match_context); + describe_failure("match entry errorcode comparison", data, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit, match_context); } - if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0) + if (errorcode >= 0) { - describe_failure("ovector entry content comparison", data, size, compile_options, match_options, errorcode, match_data, errorcode_jit, match_data_jit, match_context); + if (bufflen != bufflen_jit) + { + describe_failure("match entry length comparison", data, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit, match_context); + } + + if (memcmp(bufferptr, bufferptr_jit, bufflen) != 0) + { + describe_failure("match entry content comparison", data, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit, match_context); + } } - } - pcre2_substring_free(bufferptr); - pcre2_substring_free(bufferptr_jit); + pcre2_substring_free(bufferptr); + pcre2_substring_free(bufferptr_jit); + } } } #endif From ced89ed754648b31c26baa6d431b8b4f835dbf02 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Sat, 11 Nov 2023 18:17:24 +0100 Subject: [PATCH 04/11] ignore callout errors --- src/pcre2_fuzzsupport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pcre2_fuzzsupport.c b/src/pcre2_fuzzsupport.c index 3e2e844d6..f226b765b 100644 --- a/src/pcre2_fuzzsupport.c +++ b/src/pcre2_fuzzsupport.c @@ -342,8 +342,8 @@ for (i = 0; i < 2; i++) if (errorcode_jit != errorcode) { if (!(errorcode < 0 && errorcode_jit < 0) && - errorcode != PCRE2_ERROR_MATCHLIMIT && - errorcode_jit != PCRE2_ERROR_MATCHLIMIT && errorcode_jit != PCRE2_ERROR_JIT_STACKLIMIT) + errorcode != PCRE2_ERROR_MATCHLIMIT && errorcode != PCRE2_ERROR_CALLOUT && + errorcode_jit != PCRE2_ERROR_MATCHLIMIT && errorcode_jit != PCRE2_ERROR_JIT_STACKLIMIT && errorcode_jit != PCRE2_ERROR_CALLOUT) { describe_failure("match errorcode comparison", data, size, compile_options, match_options, errorcode, errorcode_jit, matches, matches_jit, match_data, match_data_jit, match_context); } From 64dfd5674fb034636e87983e4d298be6e36b3ae2 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Tue, 14 Nov 2023 14:39:53 +0100 Subject: [PATCH 05/11] support 8-, 16-, and 32-bit modes --- .gitignore | 3 ++- Makefile.am | 52 ++++++++++++++++++++++++++++++++++------- src/pcre2_fuzzsupport.c | 39 ++++++++++++++++++++++++++++--- 3 files changed, 82 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index e55689a2d..b69a1d82b 100644 --- a/.gitignore +++ b/.gitignore @@ -50,7 +50,8 @@ pcre2posix_test.exe pcre2posix_test.log pcre2posix_test.trs pcre2demo -pcre2fuzzcheck +pcre2fuzzcheck-* +pcre2fuzzer-* pcre2grep pcre2grep.exe pcre2test diff --git a/Makefile.am b/Makefile.am index 13d1bc8c2..3e3eeb4f8 100644 --- a/Makefile.am +++ b/Makefile.am @@ -540,28 +540,64 @@ if WITH_GCOV pcre2grep_CFLAGS += $(GCOV_CFLAGS) pcre2grep_LDADD += $(GCOV_LIBS) endif # WITH_GCOV +endif # WITH_PCRE2_8 ## If fuzzer support is enabled, build a non-distributed library containing the ## fuzzing function. Also build the standalone checking binary from the same ## source but using -DSTANDALONE. if WITH_FUZZ_SUPPORT -noinst_LIBRARIES = .libs/libpcre2-fuzzsupport.a +noinst_LIBRARIES = +if WITH_PCRE2_8 +noinst_LIBRARIES += .libs/libpcre2-fuzzsupport.a _libs_libpcre2_fuzzsupport_a_SOURCES = src/pcre2_fuzzsupport.c _libs_libpcre2_fuzzsupport_a_CFLAGS = $(AM_CFLAGS) _libs_libpcre2_fuzzsupport_a_LIBADD = -noinst_PROGRAMS += pcre2fuzzcheck -pcre2fuzzcheck_SOURCES = src/pcre2_fuzzsupport.c -pcre2fuzzcheck_CFLAGS = -DSTANDALONE $(AM_CFLAGS) -pcre2fuzzcheck_LDADD = libpcre2-8.la +noinst_PROGRAMS += pcre2fuzzcheck-8 +pcre2fuzzcheck_8_SOURCES = src/pcre2_fuzzsupport.c +pcre2fuzzcheck_8_CFLAGS = -DSTANDALONE $(AM_CFLAGS) +pcre2fuzzcheck_8_LDADD = libpcre2-8.la if WITH_GCOV -pcre2fuzzcheck_CFLAGS += $(GCOV_CFLAGS) -pcre2fuzzcheck_LDADD += $(GCOV_LIBS) +pcre2fuzzcheck_8_CFLAGS += $(GCOV_CFLAGS) +pcre2fuzzcheck_8_LDADD += $(GCOV_LIBS) endif # WITH_GCOV -endif # WITH FUZZ_SUPPORT endif # WITH_PCRE2_8 +if WITH_PCRE2_16 +noinst_LIBRARIES += .libs/libpcre2-fuzzsupport-16.a +_libs_libpcre2_fuzzsupport_16_a_SOURCES = src/pcre2_fuzzsupport.c +_libs_libpcre2_fuzzsupport_16_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=16 +_libs_libpcre2_fuzzsupport_16_a_LIBADD = + +noinst_PROGRAMS += pcre2fuzzcheck-16 +pcre2fuzzcheck_16_SOURCES = src/pcre2_fuzzsupport.c +pcre2fuzzcheck_16_CFLAGS = -DSTANDALONE $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=16 +pcre2fuzzcheck_16_LDADD = libpcre2-16.la +if WITH_GCOV +pcre2fuzzcheck_16_CFLAGS += $(GCOV_CFLAGS) +pcre2fuzzcheck_16_LDADD += $(GCOV_LIBS) +endif # WITH_GCOV +endif # WITH_PCRE2_16 + +if WITH_PCRE2_32 +noinst_LIBRARIES += .libs/libpcre2-fuzzsupport-32.a +_libs_libpcre2_fuzzsupport_32_a_SOURCES = src/pcre2_fuzzsupport.c +_libs_libpcre2_fuzzsupport_32_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=32 +_libs_libpcre2_fuzzsupport_32_a_LIBADD = + +noinst_PROGRAMS += pcre2fuzzcheck-32 +pcre2fuzzcheck_32_SOURCES = src/pcre2_fuzzsupport.c +pcre2fuzzcheck_32_CFLAGS = -DSTANDALONE $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=32 +pcre2fuzzcheck_32_LDADD = libpcre2-32.la +if WITH_GCOV +pcre2fuzzcheck_32_CFLAGS += $(GCOV_CFLAGS) +pcre2fuzzcheck_32_LDADD += $(GCOV_LIBS) +endif # WITH_GCOV +endif # WITH_PCRE2_32 + +endif # WITH_FUZZ_SUPPORT + ## -------- Testing ---------- ## If the 8-bit library is enabled, build the POSIX wrapper test program and diff --git a/src/pcre2_fuzzsupport.c b/src/pcre2_fuzzsupport.c index f226b765b..e635eb3c2 100644 --- a/src/pcre2_fuzzsupport.c +++ b/src/pcre2_fuzzsupport.c @@ -14,8 +14,11 @@ Written by Philip Hazel, October 2016 #include #include -#include "config.h" +#ifndef PCRE2_CODE_UNIT_WIDTH #define PCRE2_CODE_UNIT_WIDTH 8 +#endif + +#include "config.h" #include "pcre2.h" #define MAX_MATCH_SIZE 1000 @@ -86,7 +89,9 @@ fprintf(stream, "%s%s%s%s%s%s%s%s%s\n", static void dump_matches(FILE *stream, int count, pcre2_match_data *match_data, pcre2_match_context *match_context) { +#if PCRE2_CODE_UNIT_WIDTH == 8 PCRE2_UCHAR error_buf[256]; +#endif int errorcode; for (uint32_t index = 0; index < count; index++) @@ -107,8 +112,12 @@ for (uint32_t index = 0; index < count; index++) } else { +#if PCRE2_CODE_UNIT_WIDTH == 8 pcre2_get_error_message(errorcode, error_buf, 256); fprintf(stream, "Match %d failed: %s\n", index, error_buf); +#else + fprintf(stream, "Match %d failed: %d\n", index, errorcode); +#endif } } } @@ -130,7 +139,9 @@ static void describe_failure( pcre2_match_data *match_data_jit, pcre2_match_context *match_context ) { +#if PCRE2_CODE_UNIT_WIDTH == 8 PCRE2_UCHAR buffer[256]; +#endif fprintf(stderr, "Encountered failure while performing %s; context:\n", task); @@ -146,8 +157,12 @@ print_match_options(stderr, match_options); if (errorcode < 0) { +#if PCRE2_CODE_UNIT_WIDTH == 8 pcre2_get_error_message(errorcode, buffer, 256); fprintf(stderr, "Non-JIT'd operation emitted an error: %s (%d)\n", buffer, errorcode); +#else + fprintf(stderr, "Non-JIT'd operation emitted an error: %d\n", errorcode); +#endif } if (matches >= 0) { @@ -162,8 +177,12 @@ if (matches >= 0) if (errorcode_jit < 0) { +#if PCRE2_CODE_UNIT_WIDTH == 8 pcre2_get_error_message(errorcode_jit, buffer, 256); fprintf(stderr, "JIT'd operation emitted an error: %s (%d)\n", buffer, errorcode_jit); +#else + fprintf(stderr, "JIT'd operation emitted an error: %d\n", errorcode); +#endif } if (matches_jit >= 0) { @@ -220,6 +239,7 @@ in large trees taking too much time. */ random_options = *(uint64_t *)(data); data += sizeof(random_options); size -= sizeof(random_options); +size /= PCRE2_CODE_UNIT_WIDTH / 8; match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size; @@ -323,9 +343,13 @@ for (i = 0; i < 2; i++) #ifdef STANDALONE if (errorcode >= 0) printf("Match returned %d\n", errorcode); else { +#if PCRE2_CODE_UNIT_WIDTH == 8 unsigned char buffer[256]; pcre2_get_error_message(errorcode, buffer, 256); printf("Match failed: error %d: %s\n", errorcode, buffer); +#else + printf("Match failed: error %d\n", errorcode); +#endif } #endif @@ -417,9 +441,13 @@ for (i = 0; i < 2; i++) #ifdef STANDALONE if (errorcode >= 0) printf("Match returned %d\n", errorcode); else { +#if PCRE2_CODE_UNIT_WIDTH == 8 unsigned char buffer[256]; pcre2_get_error_message(errorcode, buffer, 256); printf("Match failed: error %d: %s\n", errorcode, buffer); +#else + printf("Match failed: error %d\n", errorcode); +#endif } #endif @@ -434,12 +462,17 @@ for (i = 0; i < 2; i++) else { +#ifdef STANDALONE +#if PCRE2_CODE_UNIT_WIDTH == 8 unsigned char buffer[256]; pcre2_get_error_message(errorcode, buffer, 256); -#ifdef STANDALONE printf("Error %d at offset %lu: %s\n", errorcode, erroroffset, buffer); #else - if (strstr((const char *)buffer, "internal error") != NULL) abort(); + printf("Error %d at offset %lu\n", errorcode, erroroffset); +#endif + +#else + if (errorcode == PCRE2_ERROR_INTERNAL) abort(); #endif } From 764a438ba14dabbd9c22741d39d93f9792326109 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Mon, 20 Nov 2023 18:06:57 +0100 Subject: [PATCH 06/11] try to enable MSAN support with JIT --- src/pcre2_jit_compile.c | 8 +++++++- src/pcre2_jit_match.c | 13 +++++++++++++ src/sljit/sljitConfigInternal.h | 13 +++++++++++++ src/sljit/sljitNativeX86_common.c | 13 +++++++++++++ 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index fbb30a4a2..050063ec6 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -43,6 +43,12 @@ POSSIBILITY OF SUCH DAMAGE. #include "config.h" #endif +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#include +#endif /* __has_feature(memory_sanitizer) */ +#endif /* defined(__has_feature) */ + #include "pcre2_internal.h" #ifdef SUPPORT_JIT @@ -9830,7 +9836,7 @@ BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL(); return cc + 1 + LINK_SIZE; } -static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) +static sljit_s32 SLJIT_FUNC SLJIT_FUNC_ATTRIBUTE do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) { PCRE2_SPTR begin; PCRE2_SIZE *ovector; diff --git a/src/pcre2_jit_match.c b/src/pcre2_jit_match.c index 1663a1e69..ae5903e20 100644 --- a/src/pcre2_jit_match.c +++ b/src/pcre2_jit_match.c @@ -42,6 +42,12 @@ POSSIBILITY OF SUCH DAMAGE. #error This file must be included from pcre2_jit_compile.c. #endif +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#include +#endif /* __has_feature(memory_sanitizer) */ +#endif /* defined(__has_feature) */ + #ifdef SUPPORT_JIT static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, jit_function executable_func) @@ -179,6 +185,13 @@ match_data->rightchar = 0; match_data->mark = arguments.mark_ptr; match_data->matchedby = PCRE2_MATCHEDBY_JIT; +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +if (rc > 0) + __msan_unpoison(match_data->ovector, 2 * rc * sizeof(match_data->ovector[0])); +#endif /* __has_feature(memory_sanitizer) */ +#endif /* defined(__has_feature) */ + return match_data->rc; #endif /* SUPPORT_JIT */ diff --git a/src/sljit/sljitConfigInternal.h b/src/sljit/sljitConfigInternal.h index d224248c8..ce4e7b04e 100644 --- a/src/sljit/sljitConfigInternal.h +++ b/src/sljit/sljitConfigInternal.h @@ -522,6 +522,19 @@ typedef double sljit_f64; #define SLJIT_FUNC #endif /* !SLJIT_FUNC */ +/* Disable instrumentation for these functions as they may not be sound */ +#ifndef SLJIT_FUNC_ATTRIBUTE +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#define SLJIT_FUNC_ATTRIBUTE __attribute__((no_sanitize("memory"))) +#endif /* __has_feature(memory_sanitizer) */ +#endif /* defined(__has_feature) */ +#endif + +#ifndef SLJIT_FUNC_ATTRIBUTE +#define SLJIT_FUNC_ATTRIBUTE +#endif + #ifndef SLJIT_INDIRECT_CALL #if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (!defined _CALL_ELF || _CALL_ELF == 1)) \ || ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX) diff --git a/src/sljit/sljitNativeX86_common.c b/src/sljit/sljitNativeX86_common.c index 369d8285d..c2c042134 100644 --- a/src/sljit/sljitNativeX86_common.c +++ b/src/sljit/sljitNativeX86_common.c @@ -24,6 +24,12 @@ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#include +#endif /* __has_feature(memory_sanitizer) */ +#endif /* defined(__has_feature) */ + SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) { return "x86" SLJIT_CPUINFO; @@ -484,6 +490,13 @@ static void execute_cpu_id(sljit_u32 info[4]) } #endif /* _MSC_VER && _MSC_VER >= 1400 */ + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +__msan_unpoison(info, 4 * sizeof(sljit_u32)); +#endif /* __has_feature(memory_sanitizer) */ +#endif /* defined(__has_feature) */ + } static void get_cpu_features(void) From e7fa75a5f592a67666117b81e184d28977048e9c Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Mon, 20 Nov 2023 18:47:55 +0100 Subject: [PATCH 07/11] expand stack size implicitly --- src/pcre2_fuzzsupport.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/pcre2_fuzzsupport.c b/src/pcre2_fuzzsupport.c index e635eb3c2..a66770d25 100644 --- a/src/pcre2_fuzzsupport.c +++ b/src/pcre2_fuzzsupport.c @@ -13,6 +13,13 @@ Written by Philip Hazel, October 2016 #include #include #include +#include + +/* stack size adjustment */ +#include +#include + +#define STACK_SIZE_MB 32 #ifndef PCRE2_CODE_UNIT_WIDTH #define PCRE2_CODE_UNIT_WIDTH 8 @@ -213,8 +220,31 @@ return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0; /* Putting in this apparently unnecessary prototype prevents gcc from giving a "no previous prototype" warning when compiling at high warning level. */ +int LLVMFuzzerInitialize(int *, char ***); + int LLVMFuzzerTestOneInput(const unsigned char *, size_t); +int LLVMFuzzerInitialize(int *argc, char ***argv) +{ +int rc; +struct rlimit rlim; +getrlimit(RLIMIT_STACK, &rlim); +rlim.rlim_cur = STACK_SIZE_MB * 1024 * 1024; +if (rlim.rlim_cur > rlim.rlim_max) + { + fprintf(stderr, "hard stack size limit is too small (needed 8MiB)!\n"); + _exit(1); + } +rc = setrlimit(RLIMIT_STACK, &rlim); +if (rc != 0) + { + fprintf(stderr, "failed to expand stack size\n"); + _exit(1); + } + +return 0; +} + /* Here's the driving function. */ int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size) @@ -496,6 +526,8 @@ int main(int argc, char **argv) { int i; +LLVMFuzzerInitialize(&argc, &argv); + if (argc < 2) { printf("** No arguments given\n"); From 2ec85cf145fd980abf9b22fac3282cde77e7c185 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Tue, 21 Nov 2023 21:28:23 +0100 Subject: [PATCH 08/11] use 256MB stack (!) to avoid overflow with link-size 4 --- src/pcre2_fuzzsupport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pcre2_fuzzsupport.c b/src/pcre2_fuzzsupport.c index a66770d25..c61e18f70 100644 --- a/src/pcre2_fuzzsupport.c +++ b/src/pcre2_fuzzsupport.c @@ -19,7 +19,7 @@ Written by Philip Hazel, October 2016 #include #include -#define STACK_SIZE_MB 32 +#define STACK_SIZE_MB 256 #ifndef PCRE2_CODE_UNIT_WIDTH #define PCRE2_CODE_UNIT_WIDTH 8 From a932a7d3be525b982eae6b9ff09a013ad64b9575 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Fri, 8 Dec 2023 15:06:52 +0100 Subject: [PATCH 09/11] try to make some dictionaries for 16-, 32-bit modes --- pcre2_fuzzer_16.dict | 50 ++++++++++++++++++++++++++++++++++++++++++++ pcre2_fuzzer_32.dict | 50 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 pcre2_fuzzer_16.dict create mode 100644 pcre2_fuzzer_32.dict diff --git a/pcre2_fuzzer_16.dict b/pcre2_fuzzer_16.dict new file mode 100644 index 000000000..100a5b71f --- /dev/null +++ b/pcre2_fuzzer_16.dict @@ -0,0 +1,50 @@ +# This is attempt at a fuzzer dictionary for PCRE2. + +"\\\x00A\x00" +"\\\x00b\x00" +"\\\x00B\x00" +"\\\x00d\x00" +"\\\x00D\x00" +"\\\x00h\x00" +"\\\x00H\x00" +"\\\x00n\x00" +"\\\x00N\x00" +"\\\x00s\x00" +"\\\x00S\x00" +"\\\x00w\x00" +"\\\x00W\x00" +"\\\x00z\x00" +"\\\x00Z\x00" + +"(\x00?\x00" +"(\x00?\x00:\x00" +"(\x00?\x00>\x00" +"(\x00?\x00=\x00" +"(\x00?\x00!\x00" +"(\x00?\x00<\x00=\x00" +"(\x00?\x00<\x00!\x00" +"(\x00?\x00|\x00" + +"[\x00:\x00a\x00l\x00n\x00u\x00m\x00:\x00]\x00" +"[\x00:\x00a\x00l\x00p\x00h\x00a\x00:\x00]\x00" +"[\x00:\x00a\x00s\x00c\x00i\x00i\x00:\x00]\x00" +"[\x00:\x00b\x00l\x00a\x00n\x00k\x00:\x00]\x00" +"[\x00:\x00c\x00n\x00t\x00r\x00l\x00:\x00]\x00" +"[\x00:\x00d\x00i\x00g\x00i\x00t\x00:\x00]\x00" +"[\x00:\x00g\x00r\x00a\x00p\x00h\x00:\x00]\x00" +"[\x00:\x00l\x00o\x00w\x00e\x00r\x00:\x00]\x00" +"[\x00:\x00p\x00r\x00i\x00n\x00t\x00:\x00]\x00" +"[\x00:\x00p\x00u\x00n\x00c\x00t\x00:\x00]\x00" +"[\x00:\x00s\x00p\x00a\x00c\x00e\x00:\x00]\x00" +"[\x00:\x00u\x00p\x00p\x00e\x00r\x00:\x00]\x00" +"[\x00:\x00w\x00o\x00r\x00d\x00:\x00]\x00" +"[\x00:\x00x\x00d\x00i\x00g\x00i\x00t\x00:\x00]\x00" + +"(\x00*\x00A\x00C\x00C\x00E\x00P\x00T\x00)\x00" +"(\x00*\x00F\x00A\x00I\x00L\x00)\x00" +"(\x00*\x00C\x00O\x00M\x00M\x00I\x00T\x00)\x00" +"(\x00*\x00P\x00R\x00U\x00N\x00E\x00)\x00" +"(\x00*\x00S\x00K\x00I\x00P\x00)\x00" +"(\x00*\x00T\x00H\x00E\x00N\x00)\x00" + +# End diff --git a/pcre2_fuzzer_32.dict b/pcre2_fuzzer_32.dict new file mode 100644 index 000000000..8b962d5d8 --- /dev/null +++ b/pcre2_fuzzer_32.dict @@ -0,0 +1,50 @@ +# This is attempt at a fuzzer dictionary for PCRE2. + +"\\\x00\x00\x00A\x00\x00\x00" +"\\\x00\x00\x00b\x00\x00\x00" +"\\\x00\x00\x00B\x00\x00\x00" +"\\\x00\x00\x00d\x00\x00\x00" +"\\\x00\x00\x00D\x00\x00\x00" +"\\\x00\x00\x00h\x00\x00\x00" +"\\\x00\x00\x00H\x00\x00\x00" +"\\\x00\x00\x00n\x00\x00\x00" +"\\\x00\x00\x00N\x00\x00\x00" +"\\\x00\x00\x00s\x00\x00\x00" +"\\\x00\x00\x00S\x00\x00\x00" +"\\\x00\x00\x00w\x00\x00\x00" +"\\\x00\x00\x00W\x00\x00\x00" +"\\\x00\x00\x00z\x00\x00\x00" +"\\\x00\x00\x00Z\x00\x00\x00" + +"(\x00\x00\x00?\x00\x00\x00" +"(\x00\x00\x00?\x00\x00\x00:\x00\x00\x00" +"(\x00\x00\x00?\x00\x00\x00>\x00\x00\x00" +"(\x00\x00\x00?\x00\x00\x00=\x00\x00\x00" +"(\x00\x00\x00?\x00\x00\x00!\x00\x00\x00" +"(\x00\x00\x00?\x00\x00\x00<\x00\x00\x00=\x00\x00\x00" +"(\x00\x00\x00?\x00\x00\x00<\x00\x00\x00!\x00\x00\x00" +"(\x00\x00\x00?\x00\x00\x00|\x00\x00\x00" + +"[\x00\x00\x00:\x00\x00\x00a\x00\x00\x00l\x00\x00\x00n\x00\x00\x00u\x00\x00\x00m\x00\x00\x00:\x00\x00\x00]\x00\x00\x00" +"[\x00\x00\x00:\x00\x00\x00a\x00\x00\x00l\x00\x00\x00p\x00\x00\x00h\x00\x00\x00a\x00\x00\x00:\x00\x00\x00]\x00\x00\x00" +"[\x00\x00\x00:\x00\x00\x00a\x00\x00\x00s\x00\x00\x00c\x00\x00\x00i\x00\x00\x00i\x00\x00\x00:\x00\x00\x00]\x00\x00\x00" +"[\x00\x00\x00:\x00\x00\x00b\x00\x00\x00l\x00\x00\x00a\x00\x00\x00n\x00\x00\x00k\x00\x00\x00:\x00\x00\x00]\x00\x00\x00" +"[\x00\x00\x00:\x00\x00\x00c\x00\x00\x00n\x00\x00\x00t\x00\x00\x00r\x00\x00\x00l\x00\x00\x00:\x00\x00\x00]\x00\x00\x00" +"[\x00\x00\x00:\x00\x00\x00d\x00\x00\x00i\x00\x00\x00g\x00\x00\x00i\x00\x00\x00t\x00\x00\x00:\x00\x00\x00]\x00\x00\x00" +"[\x00\x00\x00:\x00\x00\x00g\x00\x00\x00r\x00\x00\x00a\x00\x00\x00p\x00\x00\x00h\x00\x00\x00:\x00\x00\x00]\x00\x00\x00" +"[\x00\x00\x00:\x00\x00\x00l\x00\x00\x00o\x00\x00\x00w\x00\x00\x00e\x00\x00\x00r\x00\x00\x00:\x00\x00\x00]\x00\x00\x00" +"[\x00\x00\x00:\x00\x00\x00p\x00\x00\x00r\x00\x00\x00i\x00\x00\x00n\x00\x00\x00t\x00\x00\x00:\x00\x00\x00]\x00\x00\x00" +"[\x00\x00\x00:\x00\x00\x00p\x00\x00\x00u\x00\x00\x00n\x00\x00\x00c\x00\x00\x00t\x00\x00\x00:\x00\x00\x00]\x00\x00\x00" +"[\x00\x00\x00:\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00c\x00\x00\x00e\x00\x00\x00:\x00\x00\x00]\x00\x00\x00" +"[\x00\x00\x00:\x00\x00\x00u\x00\x00\x00p\x00\x00\x00p\x00\x00\x00e\x00\x00\x00r\x00\x00\x00:\x00\x00\x00]\x00\x00\x00" +"[\x00\x00\x00:\x00\x00\x00w\x00\x00\x00o\x00\x00\x00r\x00\x00\x00d\x00\x00\x00:\x00\x00\x00]\x00\x00\x00" +"[\x00\x00\x00:\x00\x00\x00x\x00\x00\x00d\x00\x00\x00i\x00\x00\x00g\x00\x00\x00i\x00\x00\x00t\x00\x00\x00:\x00\x00\x00]\x00\x00\x00" + +"(\x00\x00\x00*\x00\x00\x00A\x00\x00\x00C\x00\x00\x00C\x00\x00\x00E\x00\x00\x00P\x00\x00\x00T\x00\x00\x00)\x00\x00\x00" +"(\x00\x00\x00*\x00\x00\x00F\x00\x00\x00A\x00\x00\x00I\x00\x00\x00L\x00\x00\x00)\x00\x00\x00" +"(\x00\x00\x00*\x00\x00\x00C\x00\x00\x00O\x00\x00\x00M\x00\x00\x00M\x00\x00\x00I\x00\x00\x00T\x00\x00\x00)\x00\x00\x00" +"(\x00\x00\x00*\x00\x00\x00P\x00\x00\x00R\x00\x00\x00U\x00\x00\x00N\x00\x00\x00E\x00\x00\x00)\x00\x00\x00" +"(\x00\x00\x00*\x00\x00\x00S\x00\x00\x00K\x00\x00\x00I\x00\x00\x00P\x00\x00\x00)\x00\x00\x00" +"(\x00\x00\x00*\x00\x00\x00T\x00\x00\x00H\x00\x00\x00E\x00\x00\x00N\x00\x00\x00)\x00\x00\x00" + +# End From 789be0325d41ae4f336131a388647535b5ce6ce9 Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Fri, 8 Dec 2023 15:09:44 +0100 Subject: [PATCH 10/11] add to options --- pcre2_fuzzer_16.options | 2 ++ pcre2_fuzzer_32.options | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 pcre2_fuzzer_16.options create mode 100644 pcre2_fuzzer_32.options diff --git a/pcre2_fuzzer_16.options b/pcre2_fuzzer_16.options new file mode 100644 index 000000000..1b6d2e670 --- /dev/null +++ b/pcre2_fuzzer_16.options @@ -0,0 +1,2 @@ +[libfuzzer] +dict = pcre2_fuzzer_16.dict diff --git a/pcre2_fuzzer_32.options b/pcre2_fuzzer_32.options new file mode 100644 index 000000000..14a007a42 --- /dev/null +++ b/pcre2_fuzzer_32.options @@ -0,0 +1,2 @@ +[libfuzzer] +dict = pcre2_fuzzer_32.dict From dccf1afe578449006fe3570bd14e3c0fd4ba5bcf Mon Sep 17 00:00:00 2001 From: Addison Crump Date: Mon, 5 Feb 2024 20:42:49 +0100 Subject: [PATCH 11/11] disable recurseloop check in fuzzer --- src/pcre2_fuzzsupport.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pcre2_fuzzsupport.c b/src/pcre2_fuzzsupport.c index c61e18f70..7decec550 100644 --- a/src/pcre2_fuzzsupport.c +++ b/src/pcre2_fuzzsupport.c @@ -281,7 +281,9 @@ reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because compile_options = ((random_options >> 32) & ALLOWED_COMPILE_OPTIONS) | PCRE2_NEVER_BACKSLASH_C; -match_options = (((uint32_t)random_options) & ALLOWED_MATCH_OPTIONS) | PCRE2_NO_JIT; +match_options = (((uint32_t)random_options) & ALLOWED_MATCH_OPTIONS) | + PCRE2_NO_JIT | + PCRE2_DISABLE_RECURSELOOP_CHECK; /* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not allowed together and just give an immediate error return. */