Skip to content

Commit 9f37e19

Browse files
committed
Remove duplicated scan substring captures
1 parent 9d1a620 commit 9f37e19

File tree

5 files changed

+383
-118
lines changed

5 files changed

+383
-118
lines changed

src/pcre2_compile.c

Lines changed: 56 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -6504,6 +6504,60 @@ for (;; pptr++)
65046504
cb->assert_depth += 1;
65056505
goto GROUP_PROCESS;
65066506

6507+
/* Handle the capture list of scan substring. */
6508+
case META_CAPTURE_NAME: /* Capture name */
6509+
case META_CAPTURE_NUMBER: /* Capture number */
6510+
if (lengthptr != NULL)
6511+
{
6512+
if (!PRIV(compile_parse_scan_substr_args)(&pptr, offset, errorcodeptr, cb, lengthptr))
6513+
return 0;
6514+
break;
6515+
}
6516+
6517+
while (TRUE)
6518+
{
6519+
int count, index;
6520+
named_group *ng;
6521+
6522+
switch (META_CODE(*pptr))
6523+
{
6524+
case META_OFFSET:
6525+
pptr++;
6526+
SKIPOFFSET(pptr);
6527+
continue;
6528+
6529+
case META_CAPTURE_NAME:
6530+
ng = cb->named_groups + pptr[1];
6531+
pptr += 2;
6532+
count = 0;
6533+
index = 0;
6534+
6535+
if (!PRIV(compile_find_dupname_details)(ng->name, ng->length, &index,
6536+
&count, errorcodeptr, cb)) return 0;
6537+
6538+
code[0] = OP_DNCREF;
6539+
PUT2(code, 1, index);
6540+
PUT2(code, 1 + IMM2_SIZE, count);
6541+
code += 1 + 2 * IMM2_SIZE;
6542+
continue;
6543+
6544+
case META_CAPTURE_NUMBER:
6545+
pptr += 2;
6546+
if (pptr[-1] == 0) continue;
6547+
6548+
code[0] = OP_CREF;
6549+
PUT2(code, 1, pptr[-1]);
6550+
code += 1 + IMM2_SIZE;
6551+
continue;
6552+
6553+
default:
6554+
break;
6555+
}
6556+
6557+
break;
6558+
}
6559+
--pptr;
6560+
break;
65076561

65086562
/* ===================================================================*/
65096563
/* Handle conditional subpatterns. The case of (?(Rdigits) is ambiguous
@@ -6515,7 +6569,6 @@ for (;; pptr++)
65156569
case META_COND_RNUMBER: /* (?(Rdigits) */
65166570
case META_COND_NAME: /* (?(name) or (?'name') or ?(<name>) */
65176571
case META_COND_RNAME: /* (?(R&name) - test for recursion */
6518-
case META_CAPTURE_NAME: /* Generic capture name */
65196572
bravalue = OP_COND;
65206573

65216574
if (lengthptr != NULL)
@@ -6526,10 +6579,7 @@ for (;; pptr++)
65266579
uint32_t *start_pptr = pptr;
65276580
uint32_t length = *(++pptr);
65286581

6529-
if (meta == META_CAPTURE_NAME)
6530-
offset += meta_arg;
6531-
else
6532-
GETPLUSOFFSET(offset, pptr);
6582+
GETPLUSOFFSET(offset, pptr);
65336583
name = cb->start_pattern + offset;
65346584

65356585
/* In the first pass, the names generated in the pre-pass are available,
@@ -6592,12 +6642,6 @@ for (;; pptr++)
65926642
start_pptr[0] = meta;
65936643
start_pptr[1] = ng->number;
65946644

6595-
if (meta == META_CAPTURE_NAME)
6596-
{
6597-
code += 1 + IMM2_SIZE;
6598-
break;
6599-
}
6600-
66016645
skipunits = 1 + IMM2_SIZE;
66026646
goto GROUP_PROCESS_NOTE_EMPTY;
66036647
}
@@ -6608,12 +6652,6 @@ for (;; pptr++)
66086652
start_pptr[0] = meta | 1;
66096653
start_pptr[1] = (uint32_t)(ng - cb->named_groups);
66106654

6611-
if (meta == META_CAPTURE_NAME)
6612-
{
6613-
code += 1 + 2 * IMM2_SIZE;
6614-
break;
6615-
}
6616-
66176655
/* A duplicated name was found. Note that if an R<digits> name is found
66186656
(META_COND_RNUMBER), it is a reference test, not a recursion test. */
66196657
skipunits = 1 + 2 * IMM2_SIZE;
@@ -6639,15 +6677,6 @@ for (;; pptr++)
66396677

66406678
if (meta_arg == 0)
66416679
{
6642-
if (meta == META_CAPTURE_NAME)
6643-
{
6644-
code[0] = OP_CREF;
6645-
PUT2(code, 1, pptr[1]);
6646-
code += 1 + IMM2_SIZE;
6647-
pptr++;
6648-
break;
6649-
}
6650-
66516680
code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_RREF : OP_CREF;
66526681
PUT2(code, 2 + LINK_SIZE, pptr[1]);
66536682
skipunits = 1 + IMM2_SIZE;
@@ -6663,16 +6692,6 @@ for (;; pptr++)
66636692
if (!PRIV(compile_find_dupname_details)(ng->name, ng->length, &index,
66646693
&count, errorcodeptr, cb)) return 0;
66656694

6666-
if (meta == META_CAPTURE_NAME)
6667-
{
6668-
code[0] = OP_DNCREF;
6669-
PUT2(code, 1, index);
6670-
PUT2(code, 1 + IMM2_SIZE, count);
6671-
code += 1 + 2 * IMM2_SIZE;
6672-
pptr++;
6673-
break;
6674-
}
6675-
66766695
/* A duplicated name was found. Note that if an R<digits> name is found
66776696
(META_COND_RNUMBER), it is a reference test, not a recursion test. */
66786697

@@ -6702,12 +6721,8 @@ for (;; pptr++)
67026721
/* Conditional test of a group's being set. */
67036722

67046723
case META_COND_NUMBER:
6705-
case META_CAPTURE_NUMBER:
67066724
bravalue = OP_COND;
6707-
if (meta == META_CAPTURE_NUMBER)
6708-
offset += meta_arg;
6709-
else
6710-
GETPLUSOFFSET(offset, pptr);
6725+
GETPLUSOFFSET(offset, pptr);
67116726

67126727
groupnumber = *(++pptr);
67136728
if (groupnumber > cb->bracount)
@@ -6718,14 +6733,6 @@ for (;; pptr++)
67186733
}
67196734
if (groupnumber > cb->top_backref) cb->top_backref = groupnumber;
67206735

6721-
if (meta == META_CAPTURE_NUMBER)
6722-
{
6723-
code[0] = OP_CREF;
6724-
PUT2(code, 1, groupnumber);
6725-
code += 1+IMM2_SIZE;
6726-
break;
6727-
}
6728-
67296736
/* Point at initial ( for too many branches error */
67306737
offset -= 2;
67316738
code[1+LINK_SIZE] = OP_CREF;

src/pcre2_compile.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ typedef struct {
274274
#define _pcre2_compile_find_named_group PCRE2_SUFFIX(_pcre2_compile_find_named_group)
275275
#define _pcre2_compile_find_dupname_details PCRE2_SUFFIX(_pcre2_compile_find_dupname_details)
276276
#define _pcre2_compile_add_name_to_table PCRE2_SUFFIX(_pcre2_compile_add_name_to_table)
277+
#define _pcre2_compile_parse_scan_substr_args PCRE2_SUFFIX(_pcre2_compile_parse_scan_substr_args)
277278
#define _pcre2_compile_parse_recurse_args PCRE2_SUFFIX(_pcre2_compile_parse_recurse_args)
278279

279280

@@ -342,6 +343,11 @@ BOOL PRIV(compile_find_dupname_details)(PCRE2_SPTR name, uint32_t length,
342343

343344
/* Parse the arguments of recurse operations. */
344345

346+
BOOL PRIV(compile_parse_scan_substr_args)(uint32_t **pptr_start,
347+
PCRE2_SIZE offset, int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr);
348+
349+
/* Parse the arguments of recurse operations. */
350+
345351
BOOL PRIV(compile_parse_recurse_args)(uint32_t *pptr_start,
346352
PCRE2_SIZE offset, int *errorcodeptr, compile_block *cb);
347353

0 commit comments

Comments
 (0)