Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions regex-automata/src/meta/strategy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,11 @@ impl Pre<()> {
if !info.props()[0].look_set().is_empty() {
return None;
}
// For a similar reason, we require that it has zero look-around
// expressions.
if info.props()[0].contains_lookaround_expr() {
return None;
}
// Finally, currently, our prefilters are all oriented around
// leftmost-first match semantics, so don't try to use them if the
// caller asked for anything else.
Expand Down
97 changes: 64 additions & 33 deletions regex-automata/src/nfa/thompson/pikevm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1293,24 +1293,16 @@ impl PikeVM {
*look_behind_start,
);
}

// This brings the look-behind threads into the state they must be for
// starting at input.start() instead of the beginning. This is
// necessary for lookbehinds to be able to match outside of the input
// span.
for lb_at in 0..input.start() {
self.nexts(
stack,
curr_lookaround,
next_lookaround,
lookaround,
input,
lb_at,
&mut [],
);
core::mem::swap(curr_lookaround, next_lookaround);
next_lookaround.set.clear();
}
// This is necessary for look-behinds to be able to match outside of the
// input span.
self.fast_forward_lookbehinds(
Span { start: 0, end: input.start() },
input,
stack,
curr_lookaround,
next_lookaround,
lookaround,
);
}

let mut hm = None;
Expand Down Expand Up @@ -1352,7 +1344,21 @@ impl PikeVM {
let span = Span::from(at..input.end());
match pre.find(input.haystack(), span) {
None => break,
Some(ref span) => at = span.start,
Some(ref span) => {
if self.lookaround_count() > 0 {
// We are jumping ahead due to the pre-filter, thus we must bring
// the look-behind threads to the new position.
self.fast_forward_lookbehinds(
Span { start: at, end: span.start },
input,
stack,
curr_lookaround,
next_lookaround,
lookaround,
);
}
at = span.start
}
}
}
}
Expand Down Expand Up @@ -1459,6 +1465,36 @@ impl PikeVM {
hm
}

/// This brings the look-behind threads into the state they must be for
/// starting at [forward_span.end]. The assumption is that they are currently
/// at [forward_span.start].
fn fast_forward_lookbehinds(
&self,
forward_span: Span,
input: &Input<'_>,
stack: &mut Vec<FollowEpsilon>,
curr_lookaround: &mut ActiveStates,
next_lookaround: &mut ActiveStates,
lookaround: &mut Vec<Option<NonMaxUsize>>,
) {
for lb_at in forward_span.start..forward_span.end {
self.nexts(
stack,
curr_lookaround,
next_lookaround,
lookaround,
input,
lb_at,
// Since capture groups are not allowed inside look-arounds,
// there won't be any Capture epsilon transitions and hence it is ok to
// use &mut [] for the slots parameter.
&mut [],
);
core::mem::swap(curr_lookaround, next_lookaround);
next_lookaround.set.clear();
}
}

/// The implementation for the 'which_overlapping_matches' API. Basically,
/// we do a single scan through the entire haystack (unless our regex
/// or search is anchored) and record every pattern that matched. In
Expand Down Expand Up @@ -1516,7 +1552,7 @@ impl PikeVM {
match_lookaround: _,
} = cache;

for look_behind_start in self.nfa.look_behind_starts() {
for look_behind_start in self.nfa.look_behind_starts().iter().rev() {
self.epsilon_closure(
stack,
&mut [],
Expand All @@ -1527,19 +1563,14 @@ impl PikeVM {
*look_behind_start,
);
}
for lb_at in 0..input.start() {
self.nexts(
stack,
curr_lookaround,
next_lookaround,
lookaround,
input,
lb_at,
&mut [],
);
core::mem::swap(curr_lookaround, next_lookaround);
next_lookaround.set.clear();
}
self.fast_forward_lookbehinds(
Span { start: 0, end: input.start() },
input,
stack,
curr_lookaround,
next_lookaround,
lookaround,
);
for at in input.start()..=input.end() {
let any_matches = !patset.is_empty();
if curr.set.is_empty() {
Expand Down
19 changes: 10 additions & 9 deletions regex-syntax/src/hir/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,9 @@ impl Extractor {
use crate::hir::HirKind::*;

match *hir.kind() {
Empty | Look(_) => Seq::singleton(self::Literal::exact(vec![])),
LookAround(_) => Seq::infinite(),
Empty | Look(_) | LookAround(_) => {
Seq::singleton(self::Literal::exact(vec![]))
}
Literal(hir::Literal(ref bytes)) => {
let mut seq =
Seq::singleton(self::Literal::exact(bytes.to_vec()));
Expand Down Expand Up @@ -2456,16 +2457,16 @@ mod tests {

#[test]
fn lookaround() {
assert_eq!(inexact([I("a")], [I("b")]), e(r"a(?<=qwa)b"));
assert_eq!(inexact([I("a")], [I("b")]), e(r"a(?<!qw1)b"));
assert_eq!(exact([E("ab")]), e(r"a(?<=qwa)b"));
assert_eq!(exact([E("ab")]), e(r"a(?<!qw1)b"));

assert_eq!((Seq::infinite(), seq([I("ab")])), e(r"(?<=qwe)ab"));
assert_eq!((Seq::infinite(), seq([I("ab")])), e(r"(?<!qwe)ab"));
assert_eq!(exact([E("ab")]), e(r"(?<=qwe)ab"));
assert_eq!(exact([E("ab")]), e(r"(?<!qwe)ab"));

assert_eq!((seq([I("ab")]), Seq::infinite()), e(r"ab(?<=qab)"));
assert_eq!((seq([I("ab")]), Seq::infinite()), e(r"ab(?<!qwe)"));
assert_eq!(exact([E("ab")]), e(r"ab(?<=qab)"));
assert_eq!(exact([E("ab")]), e(r"ab(?<!qwe)"));

let expected = (Seq::infinite(), seq([I("Zb"), I("ab")]));
let expected = (seq([I("aZ"), E("ab")]), seq([I("Zb"), E("ab")]));
assert_eq!(expected, e(r"(?<=foo)aZ*b"));
}

Expand Down
6 changes: 6 additions & 0 deletions testdata/lookaround.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,9 @@ matches = [
[[1, 3], [1, 2], [2, 3]],
[[5, 7], [5, 6], [6, 7]],
]

[[test]]
name = "lookbehind matching before the prefiltered start position"
regex = "b(?<=ab)"
haystack = "ab"
matches = [[1, 2]]