Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions regex-automata/src/meta/strategy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,11 @@ impl Pre<()> {
if !info.props()[0].look_set().is_empty() {
return None;
}
// For a similar reason, we require that it has zero look-around
// expressions.
if info.props()[0].contains_lookaround_expr() {
return None;
}
// Finally, currently, our prefilters are all oriented around
// leftmost-first match semantics, so don't try to use them if the
// caller asked for anything else.
Expand Down
95 changes: 63 additions & 32 deletions regex-automata/src/nfa/thompson/pikevm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1293,24 +1293,16 @@ impl PikeVM {
*look_behind_start,
);
}

// This brings the look-behind threads into the state they must be for
// starting at input.start() instead of the beginning. This is
// necessary for lookbehinds to be able to match outside of the input
// span.
for lb_at in 0..input.start() {
self.nexts(
stack,
curr_lookaround,
next_lookaround,
lookaround,
input,
lb_at,
&mut [],
);
core::mem::swap(curr_lookaround, next_lookaround);
next_lookaround.set.clear();
}
// This is necessary for look-behinds to be able to match outside of the
// input span.
self.fast_forward_lookbehinds(
Span { start: 0, end: input.start() },
input,
stack,
curr_lookaround,
next_lookaround,
lookaround,
);
}

let mut hm = None;
Expand Down Expand Up @@ -1352,7 +1344,21 @@ impl PikeVM {
let span = Span::from(at..input.end());
match pre.find(input.haystack(), span) {
None => break,
Some(ref span) => at = span.start,
Some(ref span) => {
if self.lookaround_count() > 0 {
// We are jumping ahead due to the pre-filter, thus we must bring
// the look-behind threads to the new position.
self.fast_forward_lookbehinds(
Span { start: at, end: span.start },
input,
stack,
curr_lookaround,
next_lookaround,
lookaround,
);
}
at = span.start
}
}
}
}
Expand Down Expand Up @@ -1459,6 +1465,36 @@ impl PikeVM {
hm
}

/// This brings the look-behind threads into the state they must be for
/// starting at [input.end]. The assumption is that they are currently
/// at [input.start].
fn fast_forward_lookbehinds(
&self,
forward_span: Span,
input: &Input<'_>,
stack: &mut Vec<FollowEpsilon>,
curr_lookaround: &mut ActiveStates,
next_lookaround: &mut ActiveStates,
lookaround: &mut Vec<Option<NonMaxUsize>>,
) {
for lb_at in forward_span.start..forward_span.end {
self.nexts(
stack,
curr_lookaround,
next_lookaround,
lookaround,
input,
lb_at,
// Since capture groups are not allowed inside look-arounds,
// there won't be any Capture epsilon transitions and hence it is ok to
// use &mut [] for the slots parameter.
&mut [],
);
core::mem::swap(curr_lookaround, next_lookaround);
next_lookaround.set.clear();
}
}

/// The implementation for the 'which_overlapping_matches' API. Basically,
/// we do a single scan through the entire haystack (unless our regex
/// or search is anchored) and record every pattern that matched. In
Expand Down Expand Up @@ -1527,19 +1563,14 @@ impl PikeVM {
*look_behind_start,
);
}
for lb_at in 0..input.start() {
self.nexts(
stack,
curr_lookaround,
next_lookaround,
lookaround,
input,
lb_at,
&mut [],
);
core::mem::swap(curr_lookaround, next_lookaround);
next_lookaround.set.clear();
}
self.fast_forward_lookbehinds(
Span { start: 0, end: input.start() },
input,
stack,
curr_lookaround,
next_lookaround,
lookaround,
);
for at in input.start()..=input.end() {
let any_matches = !patset.is_empty();
if curr.set.is_empty() {
Expand Down
5 changes: 3 additions & 2 deletions regex-syntax/src/hir/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,9 @@ impl Extractor {
use crate::hir::HirKind::*;

match *hir.kind() {
Empty | Look(_) => Seq::singleton(self::Literal::exact(vec![])),
LookAround(_) => Seq::infinite(),
Empty | Look(_) | LookAround(_) => {
Seq::singleton(self::Literal::exact(vec![]))
}
Literal(hir::Literal(ref bytes)) => {
let mut seq =
Seq::singleton(self::Literal::exact(bytes.to_vec()));
Expand Down
6 changes: 6 additions & 0 deletions testdata/lookaround.toml
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,9 @@ matches = [
[[1, 3], [1, 2], [2, 3]],
[[5, 7], [5, 6], [6, 7]],
]

[[test]]
name = "lookbehind matching before the prefiltered start position"
regex = "b(?<=ab)"
haystack = "ab"
matches = [[1, 2]]
Loading