Skip to content

Commit 45dcb3d

Browse files
author
Zoltan Herczeg
committed
Fix \X matching in 32 bit mode without UTF in JIT
1 parent a0c5393 commit 45dcb3d

File tree

5 files changed

+23
-3
lines changed

5 files changed

+23
-3
lines changed

ChangeLog

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ undefined behaviour.
166166

167167
44. Implement --group-separator and --no-group-separator for pcre2grep.
168168

169+
45. Fix \X matching in 32 bit mode without UTF in JIT.
170+
169171

170172
Version 10.42 11-December-2022
171173
------------------------------

src/pcre2_jit_compile.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8718,7 +8718,7 @@ c = *cc++;
87188718

87198719
#if PCRE2_CODE_UNIT_WIDTH == 32
87208720
if (c >= 0x110000)
8721-
return NULL;
8721+
return cc;
87228722
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
87238723
lgb = UCD_GRAPHBREAK(c);
87248724

@@ -8958,7 +8958,7 @@ switch(type)
89588958
#else
89598959
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
89608960
common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
8961-
if (!common->utf || common->invalid_utf)
8961+
if (common->invalid_utf)
89628962
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
89638963
#endif
89648964

@@ -12044,7 +12044,7 @@ switch(opcode)
1204412044
}
1204512045

1204612046
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
12047-
if (common->utf)
12047+
if (type == OP_EXTUNI || common->utf)
1204812048
{
1204912049
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
1205012050
detect_partial_match(common, &no_match);

testdata/testinput12

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,4 +569,8 @@
569569
/\x{802a0000}*/
570570
\x{802a0000}\x{802a0000}
571571

572+
# UTF matching without UTF, check invalid UTF characters
573+
/\X++/
574+
a\x{110000}\x{ffffffff}
575+
572576
# End of testinput12

testdata/testoutput12-16

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1814,4 +1814,13 @@ No match
18141814
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
18151815
\x{802a0000}\x{802a0000}
18161816

1817+
# UTF matching without UTF, check invalid UTF characters
1818+
/\X++/
1819+
a\x{110000}\x{ffffffff}
1820+
** Character \x{110000} is greater than 0xffff and UTF-16 mode is not enabled.
1821+
** Truncation will probably give the wrong result.
1822+
** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
1823+
** Truncation will probably give the wrong result.
1824+
0: a\x00\x{ffff}
1825+
18171826
# End of testinput12

testdata/testoutput12-32

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1812,4 +1812,9 @@ No match
18121812
\x{802a0000}\x{802a0000}
18131813
0: \x{802a0000}\x{802a0000}
18141814

1815+
# UTF matching without UTF, check invalid UTF characters
1816+
/\X++/
1817+
a\x{110000}\x{ffffffff}
1818+
0: a\x{110000}\x{ffffffff}
1819+
18151820
# End of testinput12

0 commit comments

Comments
 (0)