Skip to content

Commit c130612

Browse files
committed
Fix 32-bit quantifier following a character larger than the maximum UTF character.
1 parent 9783ca9 commit c130612

File tree

5 files changed

+35
-3
lines changed

5 files changed

+35
-3
lines changed

ChangeLog

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,13 @@ consistency with OP_VREVERSE.
155155
40. In some legacy environments with a pre C99 snprintf, pcre2_regerror could
156156
return an incorrect value when the provided buffer was too small.
157157

158+
41. Applied pull request #342 which adds sanity checks for ctype functions and
159+
locks out any accidental sign-extension.
160+
161+
42. In the 32-bit library, in non-UTF mode, a quantifier that followed a
162+
literal character with a value greater than or equal to 0x80000000u caused
163+
undefined behaviour.
164+
158165

159166
Version 10.42 11-December-2022
160167
------------------------------

src/pcre2_compile.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2781,6 +2781,7 @@ uint32_t *verbstartptr = NULL;
27812781
uint32_t *previous_callout = NULL;
27822782
uint32_t *parsed_pattern = cb->parsed_pattern;
27832783
uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
2784+
uint32_t *this_parsed_item = NULL;
27842785
uint32_t meta_quantifier = 0;
27852786
uint32_t add_after_mark = 0;
27862787
uint32_t xoptions = cb->cx->extra_options;
@@ -2866,10 +2867,11 @@ while (ptr < ptrend)
28662867
uint32_t xset, xunset, *xoptset;
28672868
uint32_t terminator;
28682869
uint32_t prev_meta_quantifier;
2870+
uint32_t *prev_parsed_item = this_parsed_item;
28692871
BOOL prev_okquantifier;
28702872
PCRE2_SPTR tempptr;
28712873
PCRE2_SIZE offset;
2872-
2874+
28732875
if (parsed_pattern >= parsed_pattern_end)
28742876
{
28752877
errorcode = ERR63; /* Internal error (parsed pattern overflow) */
@@ -2881,6 +2883,10 @@ while (ptr < ptrend)
28812883
errorcode = ERR19;
28822884
goto FAILED; /* Parentheses too deeply nested */
28832885
}
2886+
2887+
/* Remember where this item started */
2888+
2889+
this_parsed_item = parsed_pattern;
28842890

28852891
/* Get next input character, save its position for callout handling. */
28862892

@@ -3173,7 +3179,6 @@ while (ptr < ptrend)
31733179
continue; /* Next character in pattern */
31743180
}
31753181

3176-
31773182
/* Process the next item in the main part of a pattern. */
31783183

31793184
switch(c)
@@ -3450,7 +3455,7 @@ while (ptr < ptrend)
34503455
wrapping it in non-capturing brackets, but we have to allow for a preceding
34513456
(*MARK) for when (*ACCEPT) has an argument. */
34523457

3453-
if (parsed_pattern[-1] == META_ACCEPT)
3458+
if (*prev_parsed_item == META_ACCEPT)
34543459
{
34553460
uint32_t *p;
34563461
for (p = parsed_pattern - 1; p >= verbstartptr; p--) p[1] = p[0];

testdata/testinput12

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,4 +560,10 @@
560560

561561
# ----------------------------------------------------
562562

563+
# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
564+
# fails in 16-bit mode, but is OK for 32-bit.
565+
566+
/\x{802a0000}*/
567+
\x{802a0000}\x{802a0000}
568+
563569
# End of testinput12

testdata/testoutput12-16

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1803,4 +1803,11 @@ No match
18031803

18041804
# ----------------------------------------------------
18051805

1806+
# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
1807+
# fails in 16-bit mode, but is OK for 32-bit.
1808+
1809+
/\x{802a0000}*/
1810+
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
1811+
\x{802a0000}\x{802a0000}
1812+
18061813
# End of testinput12

testdata/testoutput12-32

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,4 +1801,11 @@ No match
18011801

18021802
# ----------------------------------------------------
18031803

1804+
# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
1805+
# fails in 16-bit mode, but is OK for 32-bit.
1806+
1807+
/\x{802a0000}*/
1808+
\x{802a0000}\x{802a0000}
1809+
0: \x{802a0000}\x{802a0000}
1810+
18041811
# End of testinput12

0 commit comments

Comments
 (0)