Skip to content

Commit c9dc1dd

Browse files
committed
Start the look-behind threads later
1 parent 7eb9594 commit c9dc1dd

File tree

1 file changed

+49
-4
lines changed

1 file changed

+49
-4
lines changed

regex-automata/src/nfa/thompson/pikevm.rs

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,6 +1258,8 @@ impl PikeVM {
12581258
Some(config) => config,
12591259
};
12601260

1261+
let maximum_look_behind_len = self.nfa.maximum_look_behind_len();
1262+
12611263
let pre =
12621264
if anchored { None } else { self.get_config().get_prefilter() };
12631265
let Cache {
@@ -1274,7 +1276,14 @@ impl PikeVM {
12741276
if let Some(active) = match_lookaround {
12751277
*curr_lookaround = active.clone();
12761278
} else if self.lookaround_count() > 0 {
1277-
// This initializes the look-behind threads from the start of the input
1279+
// If we know the maximum look-behind length, we do not need to
1280+
// start from 0.
1281+
let start_position = usize::saturating_sub(
1282+
input.start(),
1283+
maximum_look_behind_len.unwrap_or(input.start()),
1284+
);
1285+
1286+
// This initializes the look-behind threads from the `start_position`
12781287
// Note: since capture groups are not allowed inside look-behinds,
12791288
// there won't be any Capture epsilon transitions and hence it is ok to
12801289
// use &mut [] for the slots parameter. We need to add the start states
@@ -1289,14 +1298,14 @@ impl PikeVM {
12891298
curr_lookaround,
12901299
lookaround,
12911300
input,
1292-
0,
1301+
start_position,
12931302
*look_behind_start,
12941303
);
12951304
}
12961305
// This is necessary for look-behinds to be able to match outside of the
12971306
// input span.
12981307
self.fast_forward_lookbehinds(
1299-
Span { start: 0, end: input.start() },
1308+
Span { start: start_position, end: input.start() },
13001309
input,
13011310
stack,
13021311
curr_lookaround,
@@ -1346,10 +1355,46 @@ impl PikeVM {
13461355
None => break,
13471356
Some(ref span) => {
13481357
if self.lookaround_count() > 0 {
1358+
// If we know the maximum look-behind length,
1359+
// we might be able to catch up the look-behind
1360+
// threads later than starting at `at`.
1361+
let start_position = usize::max(
1362+
at,
1363+
usize::saturating_sub(
1364+
span.start,
1365+
maximum_look_behind_len
1366+
.unwrap_or(span.start),
1367+
),
1368+
);
1369+
// If we resume from later than `at`, we need
1370+
// to reinitialize the look-behind threads.
1371+
if start_position != at {
1372+
curr_lookaround.set.clear();
1373+
for look_behind_start in self
1374+
.nfa
1375+
.look_behind_starts()
1376+
.iter()
1377+
.rev()
1378+
{
1379+
self.epsilon_closure(
1380+
stack,
1381+
&mut [],
1382+
curr_lookaround,
1383+
lookaround,
1384+
input,
1385+
start_position,
1386+
*look_behind_start,
1387+
);
1388+
}
1389+
}
1390+
13491391
// We are jumping ahead due to the pre-filter, thus we must bring
13501392
// the look-behind threads to the new position.
13511393
self.fast_forward_lookbehinds(
1352-
Span { start: at, end: span.start },
1394+
Span {
1395+
start: start_position,
1396+
end: span.start,
1397+
},
13531398
input,
13541399
stack,
13551400
curr_lookaround,

0 commit comments

Comments
 (0)