Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion regex-automata/src/dfa/dense.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5084,7 +5084,7 @@ impl BuildError {
}

pub(crate) fn unsupported_lookaround() -> BuildError {
let msg = "cannot build DFAs for regexes with look-around\
let msg = "cannot build DFAs for regexes with look-around \
sub-expressions; use a different regex engine";
BuildError { kind: BuildErrorKind::Unsupported(msg) }
}
Expand Down
13 changes: 12 additions & 1 deletion regex-automata/src/dfa/onepass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,9 @@ impl<'a> InternalBuilder<'a> {
));
}
assert_eq!(DEAD, self.add_empty_state()?);
if self.nfa.lookaround_count() > 0 {
return Err(BuildError::unsupported_lookaround());
}

// This is where the explicit slots start. We care about this because
// we only need to track explicit slots. The implicit slots---two for
Expand Down Expand Up @@ -640,7 +643,7 @@ impl<'a> InternalBuilder<'a> {
match *self.nfa.state(id) {
thompson::State::WriteLookAround { .. }
| thompson::State::CheckLookAround { .. } => {
todo!("check how to handle")
return Err(BuildError::unsupported_lookaround());
}
thompson::State::ByteRange { ref trans } => {
self.compile_transition(dfa_id, trans, epsilons)?;
Expand Down Expand Up @@ -3000,6 +3003,7 @@ enum BuildErrorKind {
UnsupportedLook { look: Look },
ExceededSizeLimit { limit: usize },
NotOnePass { msg: &'static str },
UnsupportedLookAround,
}

impl BuildError {
Expand Down Expand Up @@ -3030,6 +3034,10 @@ impl BuildError {
fn not_one_pass(msg: &'static str) -> BuildError {
BuildError { kind: BuildErrorKind::NotOnePass { msg } }
}

fn unsupported_lookaround() -> BuildError {
BuildError { kind: BuildErrorKind::UnsupportedLookAround }
}
}

#[cfg(feature = "std")]
Expand Down Expand Up @@ -3078,6 +3086,9 @@ impl core::fmt::Display for BuildError {
pattern is not one-pass: {}",
msg,
),
UnsupportedLookAround => {
write!(f, "one-pass DFA does not support look-arounds")
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion regex-automata/src/hybrid/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ impl BuildError {
}

pub(crate) fn unsupported_lookaround() -> BuildError {
let msg = "cannot build DFAs for regexes with look-around\
let msg = "cannot build DFAs for regexes with look-around \
sub-expressions; use a different regex engine";
BuildError { kind: BuildErrorKind::Unsupported(msg) }
}
Expand Down
85 changes: 44 additions & 41 deletions regex-automata/src/meta/strategy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -490,49 +490,52 @@ impl Core {
// we know we aren't going to use the lazy DFA. So we do a config check
// up front, which is in practice the only way we won't try to use the
// DFA.
let (nfarev, hybrid, dfa) =
if !info.config().get_hybrid() && !info.config().get_dfa() {
(None, wrappers::Hybrid::none(), wrappers::DFA::none())
let (nfarev, hybrid, dfa) = if !info.config().get_hybrid()
&& !info.config().get_dfa()
// With look-arounds, the lazy DFA and dense DFA would fail to build
|| nfa.lookaround_count() > 0
{
(None, wrappers::Hybrid::none(), wrappers::DFA::none())
} else {
// FIXME: Technically, we don't quite yet KNOW that we need
// a reverse NFA. It's possible for the DFAs below to both
// fail to build just based on the forward NFA. In which case,
// building the reverse NFA was totally wasted work. But...
// fixing this requires breaking DFA construction apart into
// two pieces: one for the forward part and another for the
// reverse part. Quite annoying. Making it worse, when building
// both DFAs fails, it's quite likely that the NFA is large and
// that it will take quite some time to build the reverse NFA
// too. So... it's really probably worth it to do this!
let nfarev = thompson::Compiler::new()
// Currently, reverse NFAs don't support capturing groups,
// so we MUST disable them. But even if we didn't have to,
// we would, because nothing in this crate does anything
// useful with capturing groups in reverse. And of course,
// the lazy DFA ignores capturing groups in all cases.
.configure(
thompson_config
.clone()
.which_captures(WhichCaptures::None)
.reverse(true),
)
.build_many_from_hir(hirs)
.map_err(BuildError::nfa)?;
let dfa = if !info.config().get_dfa() {
wrappers::DFA::none()
} else {
// FIXME: Technically, we don't quite yet KNOW that we need
// a reverse NFA. It's possible for the DFAs below to both
// fail to build just based on the forward NFA. In which case,
// building the reverse NFA was totally wasted work. But...
// fixing this requires breaking DFA construction apart into
// two pieces: one for the forward part and another for the
// reverse part. Quite annoying. Making it worse, when building
// both DFAs fails, it's quite likely that the NFA is large and
// that it will take quite some time to build the reverse NFA
// too. So... it's really probably worth it to do this!
let nfarev = thompson::Compiler::new()
// Currently, reverse NFAs don't support capturing groups,
// so we MUST disable them. But even if we didn't have to,
// we would, because nothing in this crate does anything
// useful with capturing groups in reverse. And of course,
// the lazy DFA ignores capturing groups in all cases.
.configure(
thompson_config
.clone()
.which_captures(WhichCaptures::None)
.reverse(true),
)
.build_many_from_hir(hirs)
.map_err(BuildError::nfa)?;
let dfa = if !info.config().get_dfa() {
wrappers::DFA::none()
} else {
wrappers::DFA::new(&info, pre.clone(), &nfa, &nfarev)
};
let hybrid = if !info.config().get_hybrid() {
wrappers::Hybrid::none()
} else if dfa.is_some() {
debug!("skipping lazy DFA because we have a full DFA");
wrappers::Hybrid::none()
} else {
wrappers::Hybrid::new(&info, pre.clone(), &nfa, &nfarev)
};
(Some(nfarev), hybrid, dfa)
wrappers::DFA::new(&info, pre.clone(), &nfa, &nfarev)
};
let hybrid = if !info.config().get_hybrid() {
wrappers::Hybrid::none()
} else if dfa.is_some() {
debug!("skipping lazy DFA because we have a full DFA");
wrappers::Hybrid::none()
} else {
wrappers::Hybrid::new(&info, pre.clone(), &nfa, &nfarev)
};
(Some(nfarev), hybrid, dfa)
};
Ok(Core {
info,
pre,
Expand Down
9 changes: 7 additions & 2 deletions regex-automata/src/nfa/thompson/backtrack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,9 @@ impl Builder {
nfa: NFA,
) -> Result<BoundedBacktracker, BuildError> {
nfa.look_set_any().available().map_err(BuildError::word)?;
if nfa.lookaround_count() > 0 {
return Err(BuildError::unsupported_lookarounds());
}
Ok(BoundedBacktracker { config: self.config.clone(), nfa })
}

Expand Down Expand Up @@ -1453,7 +1456,7 @@ impl BoundedBacktracker {
/// Execute a "step" in the backtracing algorithm.
///
/// A "step" is somewhat of a misnomer, because this routine keeps going
/// until it either runs out of things to try or fins a match. In the
/// until it either runs out of things to try or finds a match. In the
/// former case, it may have pushed some things on to the backtracking
/// stack, in which case, those will be tried next as part of the
/// 'backtrack' routine above.
Expand Down Expand Up @@ -1521,7 +1524,9 @@ impl BoundedBacktracker {
}
State::WriteLookAround { .. }
| State::CheckLookAround { .. } => {
todo!("check how to handle")
unimplemented!(
"backtracking engine does not support look-arounds"
);
}
State::Union { ref alternates } => {
sid = match alternates.get(0) {
Expand Down
13 changes: 6 additions & 7 deletions regex-automata/src/nfa/thompson/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ enum State {
next: StateID,
},
/// An empty state that behaves analogously to a `Match` state but for
/// the look-around sub-expression with the given index.
/// the look-around sub-expression with the given look-around index.
WriteLookAround { lookaround_index: SmallIndex },
/// A conditional epsilon transition that will only be taken if the
/// look-around sub-expression with the given index evaluates to `positive`
Expand Down Expand Up @@ -484,17 +484,16 @@ impl Builder {
remap[sid] = nfa.add(nfa::State::Look { look, next });
}
State::WriteLookAround { lookaround_index } => {
remap[sid] = nfa.add(nfa::State::WriteLookAround {
lookaround_idx: lookaround_index,
});
remap[sid] = nfa
.add(nfa::State::WriteLookAround { lookaround_index });
}
State::CheckLookAround {
lookaround_index,
positive,
next,
} => {
remap[sid] = nfa.add(nfa::State::CheckLookAround {
lookaround_idx: lookaround_index,
lookaround_index,
positive,
next,
});
Expand Down Expand Up @@ -722,7 +721,7 @@ impl Builder {
self.add(State::Empty { next: StateID::ZERO })
}

/// Add a state which will record that the lookaround with the given index
/// Add a state which will record that the look-around with the given index
/// is satisfied at the current position.
pub fn add_write_lookaround(
&mut self,
Expand All @@ -731,7 +730,7 @@ impl Builder {
self.add(State::WriteLookAround { lookaround_index: index })
}

/// Add a state which will check whether the lookaround with the given
/// Add a state which will check whether the look-around with the given
/// index is satisfied at the current position.
pub fn add_check_lookaround(
&mut self,
Expand Down
34 changes: 32 additions & 2 deletions regex-automata/src/nfa/thompson/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,13 @@ impl Compiler {
{
return Err(BuildError::unsupported_captures());
}
if self.config.get_reverse()
&& exprs.iter().any(|e| {
(e.borrow() as &Hir).properties().contains_lookaround_expr()
})
{
return Err(BuildError::unsupported_lookarounds());
}

self.builder.borrow_mut().clear();
self.builder.borrow_mut().set_utf8(self.config.get_utf8());
Expand Down Expand Up @@ -2036,14 +2043,14 @@ mod tests {

fn s_write_lookaround(id: usize) -> State {
State::WriteLookAround {
lookaround_idx: SmallIndex::new(id)
lookaround_index: SmallIndex::new(id)
.expect("look-around index too large"),
}
}

fn s_check_lookaround(id: usize, positive: bool, next: usize) -> State {
State::CheckLookAround {
lookaround_idx: SmallIndex::new(id)
lookaround_index: SmallIndex::new(id)
.expect("look-around index too large"),
positive,
next: sid(next),
Expand Down Expand Up @@ -2151,6 +2158,29 @@ mod tests {
);
}

#[test]
fn compile_yes_unanchored_prefix_with_start_anchor_in_lookaround() {
let nfa = NFA::compiler()
.configure(NFA::config().which_captures(WhichCaptures::None))
.build(r"(?<=^)a")
.unwrap();
assert_eq!(
nfa.states(),
&[
s_bin_union(2, 1),
s_range(0, 255, 0),
s_bin_union(3, 6),
s_bin_union(5, 4),
s_range(0, 255, 3),
s_look(Look::Start, 7),
s_check_lookaround(0, true, 8),
s_write_lookaround(0),
s_byte(b'a', 9),
s_match(0)
]
);
}

#[test]
fn compile_empty() {
assert_eq!(build("").states(), &[s_match(0),]);
Expand Down
16 changes: 16 additions & 0 deletions regex-automata/src/nfa/thompson/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ enum BuildErrorKind {
/// should support it at some point.
#[cfg(feature = "syntax")]
UnsupportedCaptures,
/// An error that occurs when one tries to build a reverse NFA with
/// look-around sub-expressions. Currently, this isn't supported, but we
/// probably should support it at some point.
///
/// This is also emmitted by the backtracking engine which does not
/// support look-around sub-expressions.
UnsupportedLookArounds,
}

impl BuildError {
Expand Down Expand Up @@ -142,6 +149,10 @@ impl BuildError {
pub(crate) fn unsupported_captures() -> BuildError {
BuildError { kind: BuildErrorKind::UnsupportedCaptures }
}

pub(crate) fn unsupported_lookarounds() -> BuildError {
BuildError { kind: BuildErrorKind::UnsupportedLookArounds }
}
}

#[cfg(feature = "std")]
Expand Down Expand Up @@ -201,6 +212,11 @@ impl core::fmt::Display for BuildError {
"currently captures must be disabled when compiling \
a reverse NFA",
),
BuildErrorKind::UnsupportedLookArounds => write!(
f,
"currently look-around sub-expressions cannot be in the pattern \
when compiling a reverse NFA or using the backtracking engine",
),
}
}
}
Loading