Skip to content

Commit 8d29519

Browse files
committed
Fast forward look-around threads upon prefiltering
1 parent a5d8a9e commit 8d29519

File tree

1 file changed

+63
-32
lines changed

1 file changed

+63
-32
lines changed

regex-automata/src/nfa/thompson/pikevm.rs

Lines changed: 63 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1293,24 +1293,16 @@ impl PikeVM {
12931293
*look_behind_start,
12941294
);
12951295
}
1296-
1297-
// This brings the look-behind threads into the state they must be for
1298-
// starting at input.start() instead of the beginning. This is
1299-
// necessary for lookbehinds to be able to match outside of the input
1300-
// span.
1301-
for lb_at in 0..input.start() {
1302-
self.nexts(
1303-
stack,
1304-
curr_lookaround,
1305-
next_lookaround,
1306-
lookaround,
1307-
input,
1308-
lb_at,
1309-
&mut [],
1310-
);
1311-
core::mem::swap(curr_lookaround, next_lookaround);
1312-
next_lookaround.set.clear();
1313-
}
1296+
// This is necessary for look-behinds to be able to match outside of the
1297+
// input span.
1298+
self.fast_forward_lookbehinds(
1299+
Span { start: 0, end: input.start() },
1300+
input,
1301+
stack,
1302+
curr_lookaround,
1303+
next_lookaround,
1304+
lookaround,
1305+
);
13141306
}
13151307

13161308
let mut hm = None;
@@ -1352,7 +1344,21 @@ impl PikeVM {
13521344
let span = Span::from(at..input.end());
13531345
match pre.find(input.haystack(), span) {
13541346
None => break,
1355-
Some(ref span) => at = span.start,
1347+
Some(ref span) => {
1348+
if self.lookaround_count() > 0 {
1349+
// We are jumping ahead due to the pre-filter, thus we must bring
1350+
// the look-behind threads to the new position.
1351+
self.fast_forward_lookbehinds(
1352+
Span { start: at, end: span.start },
1353+
input,
1354+
stack,
1355+
curr_lookaround,
1356+
next_lookaround,
1357+
lookaround,
1358+
);
1359+
}
1360+
at = span.start
1361+
}
13561362
}
13571363
}
13581364
}
@@ -1459,6 +1465,36 @@ impl PikeVM {
14591465
hm
14601466
}
14611467

1468+
/// This brings the look-behind threads into the state they must be for
1469+
/// starting at [input.end]. The assumption is that they are currently
1470+
/// at [input.start].
1471+
fn fast_forward_lookbehinds(
1472+
&self,
1473+
forward_span: Span,
1474+
input: &Input<'_>,
1475+
stack: &mut Vec<FollowEpsilon>,
1476+
curr_lookaround: &mut ActiveStates,
1477+
next_lookaround: &mut ActiveStates,
1478+
lookaround: &mut Vec<Option<NonMaxUsize>>,
1479+
) {
1480+
for lb_at in forward_span.start..forward_span.end {
1481+
self.nexts(
1482+
stack,
1483+
curr_lookaround,
1484+
next_lookaround,
1485+
lookaround,
1486+
input,
1487+
lb_at,
1488+
// Since capture groups are not allowed inside look-arounds,
1489+
// there won't be any Capture epsilon transitions and hence it is ok to
1490+
// use &mut [] for the slots parameter.
1491+
&mut [],
1492+
);
1493+
core::mem::swap(curr_lookaround, next_lookaround);
1494+
next_lookaround.set.clear();
1495+
}
1496+
}
1497+
14621498
/// The implementation for the 'which_overlapping_matches' API. Basically,
14631499
/// we do a single scan through the entire haystack (unless our regex
14641500
/// or search is anchored) and record every pattern that matched. In
@@ -1527,19 +1563,14 @@ impl PikeVM {
15271563
*look_behind_start,
15281564
);
15291565
}
1530-
for lb_at in 0..input.start() {
1531-
self.nexts(
1532-
stack,
1533-
curr_lookaround,
1534-
next_lookaround,
1535-
lookaround,
1536-
input,
1537-
lb_at,
1538-
&mut [],
1539-
);
1540-
core::mem::swap(curr_lookaround, next_lookaround);
1541-
next_lookaround.set.clear();
1542-
}
1566+
self.fast_forward_lookbehinds(
1567+
Span { start: 0, end: input.start() },
1568+
input,
1569+
stack,
1570+
curr_lookaround,
1571+
next_lookaround,
1572+
lookaround,
1573+
);
15431574
for at in input.start()..=input.end() {
15441575
let any_matches = !patset.is_empty();
15451576
if curr.set.is_empty() {

0 commit comments

Comments
 (0)