Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions src/pcre2_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1628,6 +1628,7 @@ else
hptr >= ptrend || /* Hit end of input */
*hptr != CHAR_RIGHT_CURLY_BRACKET) /* No } terminator */
{
if (isclass) break; /* In a class, just treat as '\u' literal */
escape = ESC_ub; /* Special return */
ptr++; /* Skip { */
break; /* Hex escape not recognized */
Expand Down Expand Up @@ -4284,6 +4285,11 @@ while (ptr < ptrend)
char_is_literal = FALSE;
goto CLASS_LITERAL;

case ESC_k:
c = CHAR_k; /* \k is not special in a class, just like \g */
char_is_literal = FALSE;
goto CLASS_LITERAL;

case ESC_Q:
inescq = TRUE; /* Enter literal mode */
goto CLASS_CONTINUE;
Expand All @@ -4295,7 +4301,7 @@ while (ptr < ptrend)
case ESC_R:
case ESC_X:
errorcode = ERR7;
ptr--;
ptr--; // TODO https://github.com/PCRE2Project/pcre2/issues/549
goto FAILED;

case ESC_N: /* Not permitted by Perl either */
Expand Down Expand Up @@ -4342,9 +4348,20 @@ while (ptr < ptrend)
#endif
break; /* End \P and \p */

default: /* All others are not allowed in a class */
/* All others are not allowed in a class */

default:
PCRE2_DEBUG_UNREACHABLE();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not allowed or not possible?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not possible (as far as I can tell). I have gone through check_escape() and made a list of every ESC_ code it can return (when passed inclass=TRUE), and added a case statement and a test for all of those. There should be no other escapes allowed, so I added a catch-all default which only triggers in Debug builds.

/* Fall through */

case ESC_A:
case ESC_Z:
case ESC_z:
case ESC_G:
case ESC_K:
case ESC_C:
errorcode = ERR7;
ptr--;
ptr--; // TODO https://github.com/PCRE2Project/pcre2/issues/549
goto FAILED;
}

Expand Down
60 changes: 34 additions & 26 deletions src/pcre2_match.c
Original file line number Diff line number Diff line change
Expand Up @@ -541,38 +541,46 @@ For hard partial matching, we immediately return a partial match. Otherwise,
carrying on means that a complete match on the current subject will be sought.
A partial match is returned only if no complete match can be found. */

#define CHECK_PARTIAL()\
if (Feptr >= mb->end_subject) \
{ \
SCHECK_PARTIAL(); \
}

#define SCHECK_PARTIAL()\
if (mb->partial != 0 && \
(Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
{ \
mb->hitend = TRUE; \
if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
}
#define CHECK_PARTIAL() \
do { \
if (Feptr >= mb->end_subject) \
{ \
SCHECK_PARTIAL(); \
} \
} \
while (0)

#define SCHECK_PARTIAL() \
do { \
if (mb->partial != 0 && \
(Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
{ \
mb->hitend = TRUE; \
if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
} \
} \
while (0)


/* These macros are used to implement backtracking. They simulate a recursive
call to the match() function by means of a local vector of frames which
remember the backtracking points. */

#define RMATCH(ra,rb)\
{\
start_ecode = ra;\
Freturn_id = rb;\
goto MATCH_RECURSE;\
L_##rb:;\
}

#define RRETURN(ra)\
{\
rrc = ra;\
goto RETURN_SWITCH;\
}
#define RMATCH(ra,rb) \
do { \
start_ecode = ra; \
Freturn_id = rb; \
goto MATCH_RECURSE; \
L_##rb:; \
} \
while (0)
Comment on lines +569 to +576
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the calling code for the macros like RMATCH(...); uses a trailing semicolon.

The existing code was creating a block, not an expression, so the trailing semicolon was treated as a (harmless) empty statement. However, it was polluting the coverage statistics with unreached code lines, because those statements were never reached.

Using a do {...} while (0) is a good way to avoid surprises with macros.


#define RRETURN(ra) \
do { \
rrc = ra; \
goto RETURN_SWITCH; \
} \
while (0)



Expand Down
31 changes: 31 additions & 0 deletions testdata/testinput2
Original file line number Diff line number Diff line change
Expand Up @@ -7255,4 +7255,35 @@ a)"xI

/[A-\\]/B

/[\A]/

/[\Z]/

/[\z]/

/[\G]/

/[\K]/

/[\g<1>]/
<
g
\= Expect no match
\\

/[\k<1>]/
<
k
\= Expect no match
\\

/[\u{ 1z}]/alt_bsux,extra_alt_bsux
u
{
}
\x20
1
\= Expect no match
\\

# End of testinput2
2 changes: 2 additions & 0 deletions testdata/testinput21
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,6 @@
/(?<=ab\Cde)X/
abZdeX

/[\C]/

# End of testinput21
2 changes: 2 additions & 0 deletions testdata/testinput23
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@

/a\Cb/

/a[\C]b/

# End of testinput23
48 changes: 48 additions & 0 deletions testdata/testoutput2
Original file line number Diff line number Diff line change
Expand Up @@ -20933,6 +20933,54 @@ Failed: error 106 at offset 3: missing terminating ] for character class
End
------------------------------------------------------------------

/[\A]/
Failed: error 107 at offset 2: escape sequence is invalid in character class

/[\Z]/
Failed: error 107 at offset 2: escape sequence is invalid in character class

/[\z]/
Failed: error 107 at offset 2: escape sequence is invalid in character class

/[\G]/
Failed: error 107 at offset 2: escape sequence is invalid in character class

/[\K]/
Failed: error 107 at offset 2: escape sequence is invalid in character class

/[\g<1>]/
<
0: <
g
0: g
\= Expect no match
\\
No match

/[\k<1>]/
<
0: <
k
0: k
\= Expect no match
\\
No match

/[\u{ 1z}]/alt_bsux,extra_alt_bsux
u
0: u
{
0: {
}
0: }
\x20
0:
1
0: 1
\= Expect no match
\\
No match

# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
Expand Down
3 changes: 3 additions & 0 deletions testdata/testoutput21
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,7 @@ Subject length lower bound = 5
abZdeX
0: X

/[\C]/
Failed: error 107 at offset 2: escape sequence is invalid in character class

# End of testinput21
3 changes: 3 additions & 0 deletions testdata/testoutput23
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,7 @@
/a\Cb/
Failed: error 185 at offset 3: using \C is disabled in this PCRE2 library

/a[\C]b/
Failed: error 107 at offset 3: escape sequence is invalid in character class

# End of testinput23
Loading