Skip to content

Commit 91cf496

Browse files
committed
pcre2_match: improve match_ref() logic for robustness
A recently fixed bug in Scan Substring triggered problems in the current logic that would result in crashes and the skipping of safewards. Add an assert to validate that this function is never called with an `eptr` over the end of the subject and use the correct return value for non partial matches of back references.
1 parent a3c3dbd commit 91cf496

File tree

4 files changed

+19
-8
lines changed

4 files changed

+19
-8
lines changed

src/pcre2_match.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
387387
eptr = eptr_start = Feptr;
388388
p = mb->start_subject + Fovector[offset];
389389
length = Fovector[offset+1] - Fovector[offset];
390+
PCRE2_ASSERT(eptr <= mb->end_subject);
390391

391392
if (caseless)
392393
{
@@ -485,8 +486,8 @@ else
485486

486487
else
487488
{
488-
if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
489-
if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1; /* No match */
489+
if ((PCRE2_SIZE)(mb->end_subject - eptr) < length ||
490+
memcmp(p, eptr, CU2BYTES(length)) != 0) return -1; /* No match */
490491
eptr += length;
491492
}
492493
}

src/pcre2test.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7956,12 +7956,12 @@ Returns: PR_OK continue processing next line
79567956
static int
79577957
process_data(void)
79587958
{
7959-
PCRE2_SIZE len, ulen, arg_ulen;
7959+
PCRE2_SIZE ulen, arg_ulen;
79607960
uint32_t gmatched;
79617961
uint32_t c, k;
79627962
uint32_t g_notempty = 0;
79637963
uint8_t *p, *pp, *start_rep;
7964-
size_t needlen;
7964+
size_t len, needlen;
79657965
void *use_dat_context;
79667966
BOOL utf;
79677967
BOOL subject_literal;
@@ -8478,11 +8478,11 @@ the unused start of the buffer unaddressable. If we are using the POSIX
84788478
interface, or testing zero-termination, we must include the terminating zero in
84798479
the usable data. */
84808480

8481-
c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
8482-
(dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
8483-
pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
8481+
c = code_unit_size * ((((pat_patctl.control & CTL_POSIX) != 0) +
8482+
((dat_datctl.control & CTL_ZERO_TERMINATE) != 0))? 1 : 0);
8483+
pp = memmove(dbuffer + dbuffer_size - (len + c), dbuffer, len + c);
84848484
#ifdef SUPPORT_VALGRIND
8485-
VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
8485+
VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
84868486
#endif
84878487

84888488
#if defined(EBCDIC) && !EBCDIC_IO

testdata/testinput2

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6753,6 +6753,9 @@ a)"xI
67536753
abxyz
67546754
efgxyz
67556755

6756+
/(a)(b+)(*scs:(1)a(*ACCEPT))(\2)/
6757+
abbb
6758+
67566759
# Duplicated capture references
67576760

67586761
/(a)(b)(c)(d)(*scs:(4,3,1,2,2,1,3,3,4,4)x)/B

testdata/testoutput2

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20258,6 +20258,13 @@ No match
2025820258
2:
2025920259
3:
2026020260

20261+
/(a)(b+)(*scs:(1)a(*ACCEPT))(\2)/
20262+
abbb
20263+
0: abb
20264+
1: a
20265+
2: b
20266+
3: b
20267+
2026120268
# Duplicated capture references
2026220269

2026320270
/(a)(b)(c)(d)(*scs:(4,3,1,2,2,1,3,3,4,4)x)/B

0 commit comments

Comments
 (0)