Skip to content

Commit dc1a5e7

Browse files
committed
Remove duplicated scan substring captures
1 parent 9d1a620 commit dc1a5e7

File tree

5 files changed

+384
-118
lines changed

5 files changed

+384
-118
lines changed

src/pcre2_compile.c

Lines changed: 57 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -6504,6 +6504,61 @@ for (;; pptr++)
65046504
cb->assert_depth += 1;
65056505
goto GROUP_PROCESS;
65066506

6507+
/* Handle the capture list of scan substring. */
6508+
case META_CAPTURE_NAME: /* Capture name */
6509+
case META_CAPTURE_NUMBER: /* Capture number */
6510+
if (lengthptr != NULL)
6511+
{
6512+
pptr = PRIV(compile_parse_scan_substr_args)(pptr, offset, errorcodeptr, cb, lengthptr);
6513+
if (pptr == NULL)
6514+
return 0;
6515+
break;
6516+
}
6517+
6518+
while (TRUE)
6519+
{
6520+
int count, index;
6521+
named_group *ng;
6522+
6523+
switch (META_CODE(*pptr))
6524+
{
6525+
case META_OFFSET:
6526+
pptr++;
6527+
SKIPOFFSET(pptr);
6528+
continue;
6529+
6530+
case META_CAPTURE_NAME:
6531+
ng = cb->named_groups + pptr[1];
6532+
pptr += 2;
6533+
count = 0;
6534+
index = 0;
6535+
6536+
if (!PRIV(compile_find_dupname_details)(ng->name, ng->length, &index,
6537+
&count, errorcodeptr, cb)) return 0;
6538+
6539+
code[0] = OP_DNCREF;
6540+
PUT2(code, 1, index);
6541+
PUT2(code, 1 + IMM2_SIZE, count);
6542+
code += 1 + 2 * IMM2_SIZE;
6543+
continue;
6544+
6545+
case META_CAPTURE_NUMBER:
6546+
pptr += 2;
6547+
if (pptr[-1] == 0) continue;
6548+
6549+
code[0] = OP_CREF;
6550+
PUT2(code, 1, pptr[-1]);
6551+
code += 1 + IMM2_SIZE;
6552+
continue;
6553+
6554+
default:
6555+
break;
6556+
}
6557+
6558+
break;
6559+
}
6560+
--pptr;
6561+
break;
65076562

65086563
/* ===================================================================*/
65096564
/* Handle conditional subpatterns. The case of (?(Rdigits) is ambiguous
@@ -6515,7 +6570,6 @@ for (;; pptr++)
65156570
case META_COND_RNUMBER: /* (?(Rdigits) */
65166571
case META_COND_NAME: /* (?(name) or (?'name') or ?(<name>) */
65176572
case META_COND_RNAME: /* (?(R&name) - test for recursion */
6518-
case META_CAPTURE_NAME: /* Generic capture name */
65196573
bravalue = OP_COND;
65206574

65216575
if (lengthptr != NULL)
@@ -6526,10 +6580,7 @@ for (;; pptr++)
65266580
uint32_t *start_pptr = pptr;
65276581
uint32_t length = *(++pptr);
65286582

6529-
if (meta == META_CAPTURE_NAME)
6530-
offset += meta_arg;
6531-
else
6532-
GETPLUSOFFSET(offset, pptr);
6583+
GETPLUSOFFSET(offset, pptr);
65336584
name = cb->start_pattern + offset;
65346585

65356586
/* In the first pass, the names generated in the pre-pass are available,
@@ -6592,12 +6643,6 @@ for (;; pptr++)
65926643
start_pptr[0] = meta;
65936644
start_pptr[1] = ng->number;
65946645

6595-
if (meta == META_CAPTURE_NAME)
6596-
{
6597-
code += 1 + IMM2_SIZE;
6598-
break;
6599-
}
6600-
66016646
skipunits = 1 + IMM2_SIZE;
66026647
goto GROUP_PROCESS_NOTE_EMPTY;
66036648
}
@@ -6608,12 +6653,6 @@ for (;; pptr++)
66086653
start_pptr[0] = meta | 1;
66096654
start_pptr[1] = (uint32_t)(ng - cb->named_groups);
66106655

6611-
if (meta == META_CAPTURE_NAME)
6612-
{
6613-
code += 1 + 2 * IMM2_SIZE;
6614-
break;
6615-
}
6616-
66176656
/* A duplicated name was found. Note that if an R<digits> name is found
66186657
(META_COND_RNUMBER), it is a reference test, not a recursion test. */
66196658
skipunits = 1 + 2 * IMM2_SIZE;
@@ -6639,15 +6678,6 @@ for (;; pptr++)
66396678

66406679
if (meta_arg == 0)
66416680
{
6642-
if (meta == META_CAPTURE_NAME)
6643-
{
6644-
code[0] = OP_CREF;
6645-
PUT2(code, 1, pptr[1]);
6646-
code += 1 + IMM2_SIZE;
6647-
pptr++;
6648-
break;
6649-
}
6650-
66516681
code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_RREF : OP_CREF;
66526682
PUT2(code, 2 + LINK_SIZE, pptr[1]);
66536683
skipunits = 1 + IMM2_SIZE;
@@ -6663,16 +6693,6 @@ for (;; pptr++)
66636693
if (!PRIV(compile_find_dupname_details)(ng->name, ng->length, &index,
66646694
&count, errorcodeptr, cb)) return 0;
66656695

6666-
if (meta == META_CAPTURE_NAME)
6667-
{
6668-
code[0] = OP_DNCREF;
6669-
PUT2(code, 1, index);
6670-
PUT2(code, 1 + IMM2_SIZE, count);
6671-
code += 1 + 2 * IMM2_SIZE;
6672-
pptr++;
6673-
break;
6674-
}
6675-
66766696
/* A duplicated name was found. Note that if an R<digits> name is found
66776697
(META_COND_RNUMBER), it is a reference test, not a recursion test. */
66786698

@@ -6702,12 +6722,8 @@ for (;; pptr++)
67026722
/* Conditional test of a group's being set. */
67036723

67046724
case META_COND_NUMBER:
6705-
case META_CAPTURE_NUMBER:
67066725
bravalue = OP_COND;
6707-
if (meta == META_CAPTURE_NUMBER)
6708-
offset += meta_arg;
6709-
else
6710-
GETPLUSOFFSET(offset, pptr);
6726+
GETPLUSOFFSET(offset, pptr);
67116727

67126728
groupnumber = *(++pptr);
67136729
if (groupnumber > cb->bracount)
@@ -6718,14 +6734,6 @@ for (;; pptr++)
67186734
}
67196735
if (groupnumber > cb->top_backref) cb->top_backref = groupnumber;
67206736

6721-
if (meta == META_CAPTURE_NUMBER)
6722-
{
6723-
code[0] = OP_CREF;
6724-
PUT2(code, 1, groupnumber);
6725-
code += 1+IMM2_SIZE;
6726-
break;
6727-
}
6728-
67296737
/* Point at initial ( for too many branches error */
67306738
offset -= 2;
67316739
code[1+LINK_SIZE] = OP_CREF;

src/pcre2_compile.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ typedef struct {
274274
#define _pcre2_compile_find_named_group PCRE2_SUFFIX(_pcre2_compile_find_named_group)
275275
#define _pcre2_compile_find_dupname_details PCRE2_SUFFIX(_pcre2_compile_find_dupname_details)
276276
#define _pcre2_compile_add_name_to_table PCRE2_SUFFIX(_pcre2_compile_add_name_to_table)
277+
#define _pcre2_compile_parse_scan_substr_args PCRE2_SUFFIX(_pcre2_compile_parse_scan_substr_args)
277278
#define _pcre2_compile_parse_recurse_args PCRE2_SUFFIX(_pcre2_compile_parse_recurse_args)
278279

279280

@@ -342,6 +343,11 @@ BOOL PRIV(compile_find_dupname_details)(PCRE2_SPTR name, uint32_t length,
342343

343344
/* Parse the arguments of recurse operations. */
344345

346+
uint32_t * PRIV(compile_parse_scan_substr_args)(uint32_t *pptr, PCRE2_SIZE offset,
347+
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr);
348+
349+
/* Parse the arguments of recurse operations. */
350+
345351
BOOL PRIV(compile_parse_recurse_args)(uint32_t *pptr_start,
346352
PCRE2_SIZE offset, int *errorcodeptr, compile_block *cb);
347353

0 commit comments

Comments
 (0)