Skip to content

Commit 536e6a1

Browse files
committed
Improve patch in 62a6498 by skipping irrelevant assertions when looking for a starting code unit.
1 parent c9e03ce commit 536e6a1

File tree

3 files changed

+77
-4
lines changed

3 files changed

+77
-4
lines changed

src/pcre2_study.c

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,15 +1111,57 @@ do
11111111
tcode++;
11121112
break;
11131113

1114-
/* For a positive lookahead assertion, inspect what immediately follows.
1115-
If the next item is one that sets a mandatory character, skip this
1116-
assertion. Otherwise, treat it the same as other bracket groups. */
1114+
/* For a positive lookahead assertion, inspect what immediately follows,
1115+
ignoring intermediate assertions and callouts. If the next item is one
1116+
that sets a mandatory character, skip this assertion. Otherwise, treat it
1117+
the same as other bracket groups. */
11171118

11181119
case OP_ASSERT:
11191120
case OP_ASSERT_NA:
11201121
ncode = tcode + GET(tcode, 1);
11211122
while (*ncode == OP_ALT) ncode += GET(ncode, 1);
11221123
ncode += 1 + LINK_SIZE;
1124+
1125+
/* Skip irrelevant items */
1126+
1127+
for (BOOL done = FALSE; !done;)
1128+
{
1129+
switch (*ncode)
1130+
{
1131+
case OP_ASSERT:
1132+
case OP_ASSERT_NOT:
1133+
case OP_ASSERTBACK:
1134+
case OP_ASSERTBACK_NOT:
1135+
case OP_ASSERT_NA:
1136+
case OP_ASSERTBACK_NA:
1137+
ncode += GET(ncode, 1);
1138+
while (*ncode == OP_ALT) ncode += GET(ncode, 1);
1139+
ncode += 1 + LINK_SIZE;
1140+
break;
1141+
1142+
case OP_WORD_BOUNDARY:
1143+
case OP_NOT_WORD_BOUNDARY:
1144+
case OP_UCP_WORD_BOUNDARY:
1145+
case OP_NOT_UCP_WORD_BOUNDARY:
1146+
ncode++;
1147+
break;
1148+
1149+
case OP_CALLOUT:
1150+
ncode += PRIV(OP_lengths)[OP_CALLOUT];
1151+
break;
1152+
1153+
case OP_CALLOUT_STR:
1154+
ncode += GET(ncode, 1 + 2*LINK_SIZE);
1155+
break;
1156+
1157+
default:
1158+
done = TRUE;
1159+
break;
1160+
}
1161+
}
1162+
1163+
/* Now check the next significant item. */
1164+
11231165
switch(*ncode)
11241166
{
11251167
default:
@@ -1149,7 +1191,7 @@ do
11491191
case OP_WHITESPACE:
11501192
case OP_NOT_WHITESPACE:
11511193
tcode = ncode;
1152-
continue; /* With the following opcode */
1194+
continue; /* With the following significant opcode */
11531195
}
11541196
/* Fall through */
11551197

testdata/testinput2

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6087,4 +6087,10 @@ a)"xI
60876087
/(?=b(*COMMIT)c|)d/I,no_start_optimize
60886088
bd
60896089

6090+
/a?(?=bc|)d/I,auto_callout
6091+
bd
6092+
6093+
/a?(?=bc|)\bd/I
6094+
bd
6095+
60906096
# End of testinput2

testdata/testoutput2

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18004,6 +18004,31 @@ Options: no_start_optimize
1800418004
bd
1800518005
No match
1800618006

18007+
/a?(?=bc|)d/I,auto_callout
18008+
Capture group count = 0
18009+
Options: auto_callout
18010+
Starting code units: a d
18011+
Last code unit = 'd'
18012+
Subject length lower bound = 1
18013+
bd
18014+
--->bd
18015+
+0 ^ a?
18016+
+2 ^ (?=
18017+
+5 ^ b
18018+
+8 ^ )
18019+
+9 ^ d
18020+
+10 ^^ End of pattern
18021+
0: d
18022+
18023+
/a?(?=bc|)\bd/I
18024+
Capture group count = 0
18025+
Max lookbehind = 1
18026+
Starting code units: a d
18027+
Last code unit = 'd'
18028+
Subject length lower bound = 1
18029+
bd
18030+
No match
18031+
1800718032
# End of testinput2
1800818033
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
1800918034
Error -62: bad serialized data

0 commit comments

Comments
 (0)