Skip to content

Commit 05206d6

Browse files
committed
Fix \z behaviour when matching within invalid UTF
1 parent c130612 commit 05206d6

File tree

7 files changed

+24
-2
lines changed

7 files changed

+24
-2
lines changed

ChangeLog

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ locks out any accidental sign-extension.
162162
literal character with a value greater than or equal to 0x80000000u caused
163163
undefined behaviour.
164164

165+
43. \z was misbehaving when matching fragments inside invalid UTF strings.
166+
165167

166168
Version 10.42 11-December-2022
167169
------------------------------

src/pcre2_match.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6076,10 +6076,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
60766076
if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
60776077

60786078
/* Fall through */
6079-
/* Unconditional end of subject assertion (\z) */
6079+
/* Unconditional end of subject assertion (\z). We must check NOTEOL
6080+
because it gets set for invalid UTF fragments. */
60806081

60816082
case OP_EOD:
6082-
if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
6083+
if (Feptr < mb->end_subject || (mb->moptions & PCRE2_NOTEOL) != 0)
6084+
RRETURN(MATCH_NOMATCH);
60836085
if (mb->partial != 0)
60846086
{
60856087
mb->hitend = TRUE;

testdata/testinput10

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,4 +642,7 @@
642642
line1\nbreak
643643
line0\nline1\nbreak
644644

645+
/A\z/utf,match_invalid_utf
646+
A\x80\x42\n
647+
645648
# End of testinput10

testdata/testinput12

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,9 @@
464464

465465
/aa/utf,ucp,match_invalid_utf,global
466466
\x{d800}aa
467+
468+
/A\z/utf,match_invalid_utf
469+
A\x{df00}\n
467470

468471
# ----------------------------------------------------
469472

testdata/testoutput10

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1921,4 +1921,8 @@ Partial match:
19211921
line0\nline1\nbreak
19221922
No match
19231923

1924+
/A\z/utf,match_invalid_utf
1925+
A\x80\x42\n
1926+
No match
1927+
19241928
# End of testinput10

testdata/testoutput12-16

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1607,6 +1607,10 @@ No match
16071607
/aa/utf,ucp,match_invalid_utf,global
16081608
\x{d800}aa
16091609
0: aa
1610+
1611+
/A\z/utf,match_invalid_utf
1612+
A\x{df00}\n
1613+
No match
16101614

16111615
# ----------------------------------------------------
16121616

testdata/testoutput12-32

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,6 +1605,10 @@ No match
16051605
/aa/utf,ucp,match_invalid_utf,global
16061606
\x{d800}aa
16071607
0: aa
1608+
1609+
/A\z/utf,match_invalid_utf
1610+
A\x{df00}\n
1611+
No match
16081612

16091613
# ----------------------------------------------------
16101614

0 commit comments

Comments
 (0)