Skip to content

Commit d4a3b9a

Browse files
authored
pcre2_match: improve match_ref() logic for robustness (#781)
Add an assert to validate that match_ref is never called with an `eptr` over the end of the subject, and use the correct return value for non-partial matches of back references.
1 parent 774b305 commit d4a3b9a

File tree

2 files changed

+10
-9
lines changed

2 files changed

+10
-9
lines changed

src/pcre2_match.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ if (offset >= Foffset_top || Fovector[offset] == PCRE2_UNSET)
387387
eptr = eptr_start = Feptr;
388388
p = mb->start_subject + Fovector[offset];
389389
length = Fovector[offset+1] - Fovector[offset];
390+
PCRE2_ASSERT(eptr <= mb->end_subject);
390391

391392
if (caseless)
392393
{
@@ -406,7 +407,7 @@ if (caseless)
406407
bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
407408
sequence of two of the latter. It is important, therefore, to check the
408409
length along the reference, not along the subject (earlier code did this
409-
wrong). UCP without uses Unicode properties but without UTF encoding. */
410+
wrong). UCP uses Unicode properties but without UTF encoding. */
410411

411412
while (p < endptr)
412413
{
@@ -485,8 +486,8 @@ else
485486

486487
else
487488
{
488-
if ((PCRE2_SIZE)(mb->end_subject - eptr) < length) return 1; /* Partial */
489-
if (memcmp(p, eptr, CU2BYTES(length)) != 0) return -1; /* No match */
489+
if ((PCRE2_SIZE)(mb->end_subject - eptr) < length ||
490+
memcmp(p, eptr, CU2BYTES(length)) != 0) return -1; /* No match */
490491
eptr += length;
491492
}
492493
}

src/pcre2test.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7990,12 +7990,12 @@ Returns: PR_OK continue processing next line
79907990
static int
79917991
process_data(void)
79927992
{
7993-
PCRE2_SIZE len, ulen, arg_ulen;
7993+
PCRE2_SIZE ulen, arg_ulen;
79947994
uint32_t gmatched;
79957995
uint32_t c, k;
79967996
uint32_t g_notempty = 0;
79977997
uint8_t *p, *pp, *start_rep;
7998-
size_t needlen;
7998+
size_t len, needlen;
79997999
void *use_dat_context;
80008000
BOOL utf;
80018001
BOOL subject_literal;
@@ -8512,11 +8512,11 @@ the unused start of the buffer unaddressable. If we are using the POSIX
85128512
interface, or testing zero-termination, we must include the terminating zero in
85138513
the usable data. */
85148514

8515-
c = code_unit_size * (((pat_patctl.control & CTL_POSIX) +
8516-
(dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0);
8517-
pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c);
8515+
c = code_unit_size * ((((pat_patctl.control & CTL_POSIX) != 0) +
8516+
((dat_datctl.control & CTL_ZERO_TERMINATE) != 0))? 1 : 0);
8517+
pp = memmove(dbuffer + dbuffer_size - (len + c), dbuffer, len + c);
85188518
#ifdef SUPPORT_VALGRIND
8519-
VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
8519+
VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c));
85208520
#endif
85218521

85228522
#if defined(EBCDIC) && !EBCDIC_IO

0 commit comments

Comments
 (0)