Skip to content

Commit 0af9347

Browse files
Rephrase doc and fix lb start state order
1 parent d6a7d6d commit 0af9347

File tree

2 files changed

+16
-11
lines changed

2 files changed

+16
-11
lines changed

regex-automata/src/nfa/thompson/compiler.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1038,12 +1038,12 @@ impl Compiler {
10381038

10391039
let unanchored =
10401040
self.c_at_least(&Hir::dot(hir::Dot::AnyByte), false, 0)?;
1041+
self.builder.borrow_mut().start_look_behind(unanchored.start);
10411042

10421043
let sub = self.c(lookaround.sub())?;
10431044
let write = self.add_write_lookaround(idx)?;
10441045
self.patch(unanchored.end, sub.start)?;
10451046
self.patch(sub.end, write)?;
1046-
self.builder.borrow_mut().start_look_behind(unanchored.start);
10471047
Ok(ThompsonRef { start: check, end: check })
10481048
}
10491049

regex-automata/src/nfa/thompson/pikevm.rs

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1278,9 +1278,11 @@ impl PikeVM {
12781278
// Note: since capture groups are not allowed inside look-behinds,
12791279
// there won't be any Capture epsilon transitions and hence it is ok to
12801280
// use &mut [] for the slots parameter. We need to add the start states
1281-
// in reverse because nested look-behinds have a higher index but must
1282-
// be executed first.
1283-
for look_behind_start in self.nfa.look_behind_starts() {
1281+
// in reverse because more deeply nested look-behinds have a higher index
1282+
// but must be executed first, so that the result is available for the
1283+
// outer expression.
1284+
for look_behind_start in self.nfa.look_behind_starts().iter().rev()
1285+
{
12841286
self.epsilon_closure(
12851287
stack,
12861288
&mut [],
@@ -2091,8 +2093,10 @@ pub struct Cache {
20912093
curr_lookaround: ActiveStates,
20922094
/// The next set of states to be explored for look-behind subexpressions.
20932095
next_lookaround: ActiveStates,
2094-
/// The active set of states when a match was found. This is needed
2095-
/// to resume a search without recomputing look-behind subexpressions.
2096+
/// The set of active threads, belonging to look-behind expressions,
2097+
/// when a match was found. This is needed to resume a search after a match
2098+
/// was found (to look for further matches), without having to re-scan the
2099+
/// beginning of the haystack.
20962100
match_lookaround: Option<ActiveStates>,
20972101
/// When true, use the states of `match_lookaround` to initialize a search,
20982102
/// otherwise recompute from the beginning of the haystack.
@@ -2168,12 +2172,13 @@ impl Cache {
21682172
self.keep_lookaround_state = false;
21692173
}
21702174

2171-
/// Set this cache to keep the state of look-behind assertions upon a
2172-
/// match being found.
2175+
/// Set this cache to store a copy of the active threads belonging
2176+
/// to look-behind assertions upon a match being found.
21732177
///
2174-
/// This must only be called with a value of `true` when a new search is
2175-
/// started at the end of a previously found match, otherwise the result
2176-
/// of any search after this call will most likely be wrong.
2178+
/// This is a performance optimization and must only be called with a
2179+
/// value of `true` when intending to start a new search at the end of
2180+
/// a previously found match. Otherwise, the result of look-behind
2181+
/// sub-expressions will be out of sync with the main regex.
21772182
///
21782183
/// Calling this function with a value of `false` will clear any previously
21792184
/// stored look-behind state.

0 commit comments

Comments
 (0)