Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 58 additions & 50 deletions src/pcre2_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -6495,8 +6495,63 @@ for (;; pptr++)
req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0;
break;

/* ===================================================================*/
/* Handle scan substring. Scan substring assertion starts with META_SCS,
which recursively calls compile_branch. The first opcode processed by
this recursive call is always META_OFFSET. */

case META_OFFSET:
GETPLUSOFFSET(offset, pptr);
if (lengthptr != NULL)
{
pptr = PRIV(compile_parse_scan_substr_args)(pptr, errorcodeptr, cb, lengthptr);
if (pptr == NULL)
return 0;
break;
}

while (TRUE)
{
int count, index;
named_group *ng;

switch (META_CODE(*pptr))
{
case META_OFFSET:
pptr++;
SKIPOFFSET(pptr);
continue;

case META_CAPTURE_NAME:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Err. I don't know how I feel about this, how you've moved the META_CAPTURE_NAME/NUMBER code so it's now hanging under the META_OFFSET handling. I'd named & implemented META_OFFSET in an intentionally generic way, so it could be used outside of scan-substring capture lists.

Do we have to make this change? Maybe we should just merge META_OFFSET and META_SCS now, if you do want to go in this direction.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think they can be merged. META_SCS starts a recursive call to process its block, and another opcode is needed to show that we are processing scan substring. We could change the code generator to support optional arguments (it currently expects a fixed size byte code), but that is complex enough.

Option: the 16 bit arg of META_OFFSET is not used, it could represent the different types of blocks if needed.

ng = cb->named_groups + pptr[1];
pptr += 2;
count = 0;
index = 0;

if (!PRIV(compile_find_dupname_details)(ng->name, ng->length, &index,
&count, errorcodeptr, cb)) return 0;

code[0] = OP_DNCREF;
PUT2(code, 1, index);
PUT2(code, 1 + IMM2_SIZE, count);
code += 1 + 2 * IMM2_SIZE;
continue;

case META_CAPTURE_NUMBER:
pptr += 2;
if (pptr[-1] == 0) continue;

code[0] = OP_CREF;
PUT2(code, 1, pptr[-1]);
code += 1 + IMM2_SIZE;
continue;

default:
break;
}

break;
}
--pptr;
break;

case META_SCS:
Expand All @@ -6515,7 +6570,6 @@ for (;; pptr++)
case META_COND_RNUMBER: /* (?(Rdigits) */
case META_COND_NAME: /* (?(name) or (?'name') or ?(<name>) */
case META_COND_RNAME: /* (?(R&name) - test for recursion */
case META_CAPTURE_NAME: /* Generic capture name */
bravalue = OP_COND;

if (lengthptr != NULL)
Expand All @@ -6526,10 +6580,7 @@ for (;; pptr++)
uint32_t *start_pptr = pptr;
uint32_t length = *(++pptr);

if (meta == META_CAPTURE_NAME)
offset += meta_arg;
else
GETPLUSOFFSET(offset, pptr);
GETPLUSOFFSET(offset, pptr);
name = cb->start_pattern + offset;

/* In the first pass, the names generated in the pre-pass are available,
Expand Down Expand Up @@ -6592,12 +6643,6 @@ for (;; pptr++)
start_pptr[0] = meta;
start_pptr[1] = ng->number;

if (meta == META_CAPTURE_NAME)
{
code += 1 + IMM2_SIZE;
break;
}

Comment on lines -6595 to -6600
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the other hand I do agree that all these META_CAPTURE_NAME/NUMBER special cases were really ugly down here, and it's definitely better to be able to move them somewhere else where they can be handled together.

skipunits = 1 + IMM2_SIZE;
goto GROUP_PROCESS_NOTE_EMPTY;
}
Expand All @@ -6608,12 +6653,6 @@ for (;; pptr++)
start_pptr[0] = meta | 1;
start_pptr[1] = (uint32_t)(ng - cb->named_groups);

if (meta == META_CAPTURE_NAME)
{
code += 1 + 2 * IMM2_SIZE;
break;
}

/* A duplicated name was found. Note that if an R<digits> name is found
(META_COND_RNUMBER), it is a reference test, not a recursion test. */
skipunits = 1 + 2 * IMM2_SIZE;
Expand All @@ -6639,15 +6678,6 @@ for (;; pptr++)

if (meta_arg == 0)
{
if (meta == META_CAPTURE_NAME)
{
code[0] = OP_CREF;
PUT2(code, 1, pptr[1]);
code += 1 + IMM2_SIZE;
pptr++;
break;
}

code[1+LINK_SIZE] = (meta == META_COND_RNAME)? OP_RREF : OP_CREF;
PUT2(code, 2 + LINK_SIZE, pptr[1]);
skipunits = 1 + IMM2_SIZE;
Expand All @@ -6663,16 +6693,6 @@ for (;; pptr++)
if (!PRIV(compile_find_dupname_details)(ng->name, ng->length, &index,
&count, errorcodeptr, cb)) return 0;

if (meta == META_CAPTURE_NAME)
{
code[0] = OP_DNCREF;
PUT2(code, 1, index);
PUT2(code, 1 + IMM2_SIZE, count);
code += 1 + 2 * IMM2_SIZE;
pptr++;
break;
}

/* A duplicated name was found. Note that if an R<digits> name is found
(META_COND_RNUMBER), it is a reference test, not a recursion test. */

Expand Down Expand Up @@ -6702,12 +6722,8 @@ for (;; pptr++)
/* Conditional test of a group's being set. */

case META_COND_NUMBER:
case META_CAPTURE_NUMBER:
bravalue = OP_COND;
if (meta == META_CAPTURE_NUMBER)
offset += meta_arg;
else
GETPLUSOFFSET(offset, pptr);
GETPLUSOFFSET(offset, pptr);

groupnumber = *(++pptr);
if (groupnumber > cb->bracount)
Expand All @@ -6718,14 +6734,6 @@ for (;; pptr++)
}
if (groupnumber > cb->top_backref) cb->top_backref = groupnumber;

if (meta == META_CAPTURE_NUMBER)
{
code[0] = OP_CREF;
PUT2(code, 1, groupnumber);
code += 1+IMM2_SIZE;
break;
}

/* Point at initial ( for too many branches error */
offset -= 2;
code[1+LINK_SIZE] = OP_CREF;
Expand Down
6 changes: 6 additions & 0 deletions src/pcre2_compile.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ typedef struct {
#define _pcre2_compile_find_named_group PCRE2_SUFFIX(_pcre2_compile_find_named_group)
#define _pcre2_compile_find_dupname_details PCRE2_SUFFIX(_pcre2_compile_find_dupname_details)
#define _pcre2_compile_add_name_to_table PCRE2_SUFFIX(_pcre2_compile_add_name_to_table)
#define _pcre2_compile_parse_scan_substr_args PCRE2_SUFFIX(_pcre2_compile_parse_scan_substr_args)
#define _pcre2_compile_parse_recurse_args PCRE2_SUFFIX(_pcre2_compile_parse_recurse_args)


Expand Down Expand Up @@ -342,6 +343,11 @@ BOOL PRIV(compile_find_dupname_details)(PCRE2_SPTR name, uint32_t length,

/* Parse the arguments of recurse operations. */

uint32_t * PRIV(compile_parse_scan_substr_args)(uint32_t *pptr,
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr);

/* Parse the arguments of recurse operations. */

BOOL PRIV(compile_parse_recurse_args)(uint32_t *pptr_start,
PCRE2_SIZE offset, int *errorcodeptr, compile_block *cb);

Expand Down
Loading
Loading