From 4becafaff05d998667843a63cf4dc193c768a330 Mon Sep 17 00:00:00 2001 From: Nicholas Wilson Date: Mon, 2 Dec 2024 12:57:26 +0000 Subject: [PATCH 1/2] Improve coverage of escapes in character classes --- src/pcre2_compile.c | 23 ++++++++++++++++++--- testdata/testinput2 | 31 ++++++++++++++++++++++++++++ testdata/testinput21 | 2 ++ testdata/testinput23 | 2 ++ testdata/testoutput2 | 48 +++++++++++++++++++++++++++++++++++++++++++ testdata/testoutput21 | 3 +++ testdata/testoutput23 | 3 +++ 7 files changed, 109 insertions(+), 3 deletions(-) diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 70c117b2c..a116f2e67 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -1628,6 +1628,7 @@ else hptr >= ptrend || /* Hit end of input */ *hptr != CHAR_RIGHT_CURLY_BRACKET) /* No } terminator */ { + if (isclass) break; /* In a class, just treat as '\u' literal */ escape = ESC_ub; /* Special return */ ptr++; /* Skip { */ break; /* Hex escape not recognized */ @@ -4284,6 +4285,11 @@ while (ptr < ptrend) char_is_literal = FALSE; goto CLASS_LITERAL; + case ESC_k: + c = CHAR_k; /* \k is not special in a class, just like \g */ + char_is_literal = FALSE; + goto CLASS_LITERAL; + case ESC_Q: inescq = TRUE; /* Enter literal mode */ goto CLASS_CONTINUE; @@ -4295,7 +4301,7 @@ while (ptr < ptrend) case ESC_R: case ESC_X: errorcode = ERR7; - ptr--; + ptr--; // TODO https://github.com/PCRE2Project/pcre2/issues/549 goto FAILED; case ESC_N: /* Not permitted by Perl either */ @@ -4342,9 +4348,20 @@ while (ptr < ptrend) #endif break; /* End \P and \p */ - default: /* All others are not allowed in a class */ + /* All others are not allowed in a class */ + + default: + PCRE2_DEBUG_UNREACHABLE(); + /* Fall through */ + + case ESC_A: + case ESC_Z: + case ESC_z: + case ESC_G: + case ESC_K: + case ESC_C: errorcode = ERR7; - ptr--; + ptr--; // TODO https://github.com/PCRE2Project/pcre2/issues/549 goto FAILED; } diff --git a/testdata/testinput2 b/testdata/testinput2 index d9e1dda7f..e73ab8eb7 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -7255,4 +7255,35 @@ a)"xI /[A-\\]/B +/[\A]/ + +/[\Z]/ + +/[\z]/ + +/[\G]/ + +/[\K]/ + +/[\g<1>]/ + < + g +\= Expect no match + \\ + +/[\k<1>]/ + < + k +\= Expect no match + \\ + +/[\u{ 1z}]/alt_bsux,extra_alt_bsux + u + { + } + \x20 + 1 +\= Expect no match + \\ + # End of testinput2 diff --git a/testdata/testinput21 b/testdata/testinput21 index 1d1fbedf0..5904af3de 100644 --- a/testdata/testinput21 +++ b/testdata/testinput21 @@ -13,4 +13,6 @@ /(?<=ab\Cde)X/ abZdeX +/[\C]/ + # End of testinput21 diff --git a/testdata/testinput23 b/testdata/testinput23 index d0a9bc4f5..8a1f4cc49 100644 --- a/testdata/testinput23 +++ b/testdata/testinput23 @@ -4,4 +4,6 @@ /a\Cb/ +/a[\C]b/ + # End of testinput23 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 0f3b8fc55..fd26e4441 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -20933,6 +20933,54 @@ Failed: error 106 at offset 3: missing terminating ] for character class End ------------------------------------------------------------------ +/[\A]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\Z]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\z]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\G]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\K]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + +/[\g<1>]/ + < + 0: < + g + 0: g +\= Expect no match + \\ +No match + +/[\k<1>]/ + < + 0: < + k + 0: k +\= Expect no match + \\ +No match + +/[\u{ 1z}]/alt_bsux,extra_alt_bsux + u + 0: u + { + 0: { + } + 0: } + \x20 + 0: + 1 + 0: 1 +\= Expect no match + \\ +No match + # End of testinput2 Error -70: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data diff --git a/testdata/testoutput21 b/testdata/testoutput21 index fbd74004e..3ded7ed28 100644 --- a/testdata/testoutput21 +++ b/testdata/testoutput21 @@ -91,4 +91,7 @@ Subject length lower bound = 5 abZdeX 0: X +/[\C]/ +Failed: error 107 at offset 2: escape sequence is invalid in character class + # End of testinput21 diff --git a/testdata/testoutput23 b/testdata/testoutput23 index c6f0aa21f..2914767cb 100644 --- a/testdata/testoutput23 +++ b/testdata/testoutput23 @@ -5,4 +5,7 @@ /a\Cb/ Failed: error 185 at offset 3: using \C is disabled in this PCRE2 library +/a[\C]b/ +Failed: error 107 at offset 3: escape sequence is invalid in character class + # End of testinput23 From 6297dd3daf288156fbb964b8ae5829d42355a7e8 Mon Sep 17 00:00:00 2001 From: Nicholas Wilson Date: Mon, 2 Dec 2024 15:43:09 +0000 Subject: [PATCH 2/2] Small coverage improvement in pcre2_match.c --- src/pcre2_match.c | 60 +++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/src/pcre2_match.c b/src/pcre2_match.c index 7e6a5a90f..dc3edb8a6 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -541,38 +541,46 @@ For hard partial matching, we immediately return a partial match. Otherwise, carrying on means that a complete match on the current subject will be sought. A partial match is returned only if no complete match can be found. */ -#define CHECK_PARTIAL()\ - if (Feptr >= mb->end_subject) \ - { \ - SCHECK_PARTIAL(); \ - } - -#define SCHECK_PARTIAL()\ - if (mb->partial != 0 && \ - (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \ - { \ - mb->hitend = TRUE; \ - if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \ - } +#define CHECK_PARTIAL() \ + do { \ + if (Feptr >= mb->end_subject) \ + { \ + SCHECK_PARTIAL(); \ + } \ + } \ + while (0) + +#define SCHECK_PARTIAL() \ + do { \ + if (mb->partial != 0 && \ + (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \ + { \ + mb->hitend = TRUE; \ + if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \ + } \ + } \ + while (0) /* These macros are used to implement backtracking. They simulate a recursive call to the match() function by means of a local vector of frames which remember the backtracking points. */ -#define RMATCH(ra,rb)\ - {\ - start_ecode = ra;\ - Freturn_id = rb;\ - goto MATCH_RECURSE;\ - L_##rb:;\ - } - -#define RRETURN(ra)\ - {\ - rrc = ra;\ - goto RETURN_SWITCH;\ - } +#define RMATCH(ra,rb) \ + do { \ + start_ecode = ra; \ + Freturn_id = rb; \ + goto MATCH_RECURSE; \ + L_##rb:; \ + } \ + while (0) + +#define RRETURN(ra) \ + do { \ + rrc = ra; \ + goto RETURN_SWITCH; \ + } \ + while (0)