Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion regex-automata/src/dfa/dense.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5084,7 +5084,7 @@ impl BuildError {
}

pub(crate) fn unsupported_lookaround() -> BuildError {
let msg = "cannot build DFAs for regexes with look-around\
let msg = "cannot build DFAs for regexes with look-around \
sub-expressions; use a different regex engine";
BuildError { kind: BuildErrorKind::Unsupported(msg) }
}
Expand Down
13 changes: 12 additions & 1 deletion regex-automata/src/dfa/onepass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,9 @@ impl<'a> InternalBuilder<'a> {
));
}
assert_eq!(DEAD, self.add_empty_state()?);
if self.nfa.lookaround_count() > 0 {
return Err(BuildError::unsupported_lookaround());
}

// This is where the explicit slots start. We care about this because
// we only need to track explicit slots. The implicit slots---two for
Expand Down Expand Up @@ -640,7 +643,7 @@ impl<'a> InternalBuilder<'a> {
match *self.nfa.state(id) {
thompson::State::WriteLookAround { .. }
| thompson::State::CheckLookAround { .. } => {
todo!("check how to handle")
return Err(BuildError::unsupported_lookaround());
}
thompson::State::ByteRange { ref trans } => {
self.compile_transition(dfa_id, trans, epsilons)?;
Expand Down Expand Up @@ -3000,6 +3003,7 @@ enum BuildErrorKind {
UnsupportedLook { look: Look },
ExceededSizeLimit { limit: usize },
NotOnePass { msg: &'static str },
UnsupportedLookAround,
}

impl BuildError {
Expand Down Expand Up @@ -3030,6 +3034,10 @@ impl BuildError {
fn not_one_pass(msg: &'static str) -> BuildError {
BuildError { kind: BuildErrorKind::NotOnePass { msg } }
}

fn unsupported_lookaround() -> BuildError {
BuildError { kind: BuildErrorKind::UnsupportedLookAround }
}
}

#[cfg(feature = "std")]
Expand Down Expand Up @@ -3078,6 +3086,9 @@ impl core::fmt::Display for BuildError {
pattern is not one-pass: {}",
msg,
),
UnsupportedLookAround => {
write!(f, "one-pass DFA does not support look-arounds")
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion regex-automata/src/hybrid/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ impl BuildError {
}

pub(crate) fn unsupported_lookaround() -> BuildError {
let msg = "cannot build DFAs for regexes with look-around\
let msg = "cannot build DFAs for regexes with look-around \
sub-expressions; use a different regex engine";
BuildError { kind: BuildErrorKind::Unsupported(msg) }
}
Expand Down
85 changes: 44 additions & 41 deletions regex-automata/src/meta/strategy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -490,49 +490,52 @@ impl Core {
// we know we aren't going to use the lazy DFA. So we do a config check
// up front, which is in practice the only way we won't try to use the
// DFA.
let (nfarev, hybrid, dfa) =
if !info.config().get_hybrid() && !info.config().get_dfa() {
(None, wrappers::Hybrid::none(), wrappers::DFA::none())
let (nfarev, hybrid, dfa) = if !info.config().get_hybrid()
&& !info.config().get_dfa()
// With look-arounds, the lazy DFA and dense DFA would fail to build
|| nfa.lookaround_count() > 0
{
(None, wrappers::Hybrid::none(), wrappers::DFA::none())
} else {
// FIXME: Technically, we don't quite yet KNOW that we need
// a reverse NFA. It's possible for the DFAs below to both
// fail to build just based on the forward NFA. In which case,
// building the reverse NFA was totally wasted work. But...
// fixing this requires breaking DFA construction apart into
// two pieces: one for the forward part and another for the
// reverse part. Quite annoying. Making it worse, when building
// both DFAs fails, it's quite likely that the NFA is large and
// that it will take quite some time to build the reverse NFA
// too. So... it's really probably worth it to do this!
let nfarev = thompson::Compiler::new()
// Currently, reverse NFAs don't support capturing groups,
// so we MUST disable them. But even if we didn't have to,
// we would, because nothing in this crate does anything
// useful with capturing groups in reverse. And of course,
// the lazy DFA ignores capturing groups in all cases.
.configure(
thompson_config
.clone()
.which_captures(WhichCaptures::None)
.reverse(true),
)
.build_many_from_hir(hirs)
.map_err(BuildError::nfa)?;
let dfa = if !info.config().get_dfa() {
wrappers::DFA::none()
} else {
// FIXME: Technically, we don't quite yet KNOW that we need
// a reverse NFA. It's possible for the DFAs below to both
// fail to build just based on the forward NFA. In which case,
// building the reverse NFA was totally wasted work. But...
// fixing this requires breaking DFA construction apart into
// two pieces: one for the forward part and another for the
// reverse part. Quite annoying. Making it worse, when building
// both DFAs fails, it's quite likely that the NFA is large and
// that it will take quite some time to build the reverse NFA
// too. So... it's really probably worth it to do this!
let nfarev = thompson::Compiler::new()
// Currently, reverse NFAs don't support capturing groups,
// so we MUST disable them. But even if we didn't have to,
// we would, because nothing in this crate does anything
// useful with capturing groups in reverse. And of course,
// the lazy DFA ignores capturing groups in all cases.
.configure(
thompson_config
.clone()
.which_captures(WhichCaptures::None)
.reverse(true),
)
.build_many_from_hir(hirs)
.map_err(BuildError::nfa)?;
let dfa = if !info.config().get_dfa() {
wrappers::DFA::none()
} else {
wrappers::DFA::new(&info, pre.clone(), &nfa, &nfarev)
};
let hybrid = if !info.config().get_hybrid() {
wrappers::Hybrid::none()
} else if dfa.is_some() {
debug!("skipping lazy DFA because we have a full DFA");
wrappers::Hybrid::none()
} else {
wrappers::Hybrid::new(&info, pre.clone(), &nfa, &nfarev)
};
(Some(nfarev), hybrid, dfa)
wrappers::DFA::new(&info, pre.clone(), &nfa, &nfarev)
};
let hybrid = if !info.config().get_hybrid() {
wrappers::Hybrid::none()
} else if dfa.is_some() {
debug!("skipping lazy DFA because we have a full DFA");
wrappers::Hybrid::none()
} else {
wrappers::Hybrid::new(&info, pre.clone(), &nfa, &nfarev)
};
(Some(nfarev), hybrid, dfa)
};
Ok(Core {
info,
pre,
Expand Down
9 changes: 7 additions & 2 deletions regex-automata/src/nfa/thompson/backtrack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,9 @@ impl Builder {
nfa: NFA,
) -> Result<BoundedBacktracker, BuildError> {
nfa.look_set_any().available().map_err(BuildError::word)?;
if nfa.lookaround_count() > 0 {
return Err(BuildError::unsupported_lookarounds());
}
Ok(BoundedBacktracker { config: self.config.clone(), nfa })
}

Expand Down Expand Up @@ -1453,7 +1456,7 @@ impl BoundedBacktracker {
/// Execute a "step" in the backtracing algorithm.
///
/// A "step" is somewhat of a misnomer, because this routine keeps going
/// until it either runs out of things to try or fins a match. In the
/// until it either runs out of things to try or finds a match. In the
/// former case, it may have pushed some things on to the backtracking
/// stack, in which case, those will be tried next as part of the
/// 'backtrack' routine above.
Expand Down Expand Up @@ -1521,7 +1524,9 @@ impl BoundedBacktracker {
}
State::WriteLookAround { .. }
| State::CheckLookAround { .. } => {
todo!("check how to handle")
unimplemented!(
"backtracking engine does not support look-arounds"
);
}
State::Union { ref alternates } => {
sid = match alternates.get(0) {
Expand Down
13 changes: 6 additions & 7 deletions regex-automata/src/nfa/thompson/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ enum State {
next: StateID,
},
/// An empty state that behaves analogously to a `Match` state but for
/// the look-around sub-expression with the given index.
/// the look-around sub-expression with the given look-around index.
WriteLookAround { lookaround_index: SmallIndex },
/// A conditional epsilon transition that will only be taken if the
/// look-around sub-expression with the given index evaluates to `positive`
Expand Down Expand Up @@ -484,17 +484,16 @@ impl Builder {
remap[sid] = nfa.add(nfa::State::Look { look, next });
}
State::WriteLookAround { lookaround_index } => {
remap[sid] = nfa.add(nfa::State::WriteLookAround {
lookaround_idx: lookaround_index,
});
remap[sid] = nfa
.add(nfa::State::WriteLookAround { lookaround_index });
}
State::CheckLookAround {
lookaround_index,
positive,
next,
} => {
remap[sid] = nfa.add(nfa::State::CheckLookAround {
lookaround_idx: lookaround_index,
lookaround_index,
positive,
next,
});
Expand Down Expand Up @@ -722,7 +721,7 @@ impl Builder {
self.add(State::Empty { next: StateID::ZERO })
}

/// Add a state which will record that the lookaround with the given index
/// Add a state which will record that the look-around with the given index
/// is satisfied at the current position.
pub fn add_write_lookaround(
&mut self,
Expand All @@ -731,7 +730,7 @@ impl Builder {
self.add(State::WriteLookAround { lookaround_index: index })
}

/// Add a state which will check whether the lookaround with the given
/// Add a state which will check whether the look-around with the given
/// index is satisfied at the current position.
pub fn add_check_lookaround(
&mut self,
Expand Down
34 changes: 32 additions & 2 deletions regex-automata/src/nfa/thompson/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,13 @@ impl Compiler {
{
return Err(BuildError::unsupported_captures());
}
if self.config.get_reverse()
&& exprs.iter().any(|e| {
(e.borrow() as &Hir).properties().contains_lookaround_expr()
})
{
return Err(BuildError::unsupported_lookarounds());
}

self.builder.borrow_mut().clear();
self.builder.borrow_mut().set_utf8(self.config.get_utf8());
Expand Down Expand Up @@ -2036,14 +2043,14 @@ mod tests {

fn s_write_lookaround(id: usize) -> State {
State::WriteLookAround {
lookaround_idx: SmallIndex::new(id)
lookaround_index: SmallIndex::new(id)
.expect("look-around index too large"),
}
}

fn s_check_lookaround(id: usize, positive: bool, next: usize) -> State {
State::CheckLookAround {
lookaround_idx: SmallIndex::new(id)
lookaround_index: SmallIndex::new(id)
.expect("look-around index too large"),
positive,
next: sid(next),
Expand Down Expand Up @@ -2151,6 +2158,29 @@ mod tests {
);
}

#[test]
fn compile_yes_unanchored_prefix_with_start_anchor_in_lookaround() {
let nfa = NFA::compiler()
.configure(NFA::config().which_captures(WhichCaptures::None))
.build(r"(?<=^)a")
.unwrap();
assert_eq!(
nfa.states(),
&[
s_bin_union(2, 1),
s_range(0, 255, 0),
s_bin_union(3, 6),
s_bin_union(5, 4),
s_range(0, 255, 3),
s_look(Look::Start, 7),
s_check_lookaround(0, true, 8),
s_write_lookaround(0),
s_byte(b'a', 9),
s_match(0)
]
);
}

#[test]
fn compile_empty() {
assert_eq!(build("").states(), &[s_match(0),]);
Expand Down
16 changes: 16 additions & 0 deletions regex-automata/src/nfa/thompson/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ enum BuildErrorKind {
/// should support it at some point.
#[cfg(feature = "syntax")]
UnsupportedCaptures,
/// An error that occurs when one tries to build a reverse NFA with
/// look-around sub-expressions. Currently, this isn't supported, but we
/// probably should support it at some point.
///
/// This is also emmitted by the backtracking engine which does not
/// support look-around sub-expressions.
UnsupportedLookArounds,
}

impl BuildError {
Expand Down Expand Up @@ -142,6 +149,10 @@ impl BuildError {
pub(crate) fn unsupported_captures() -> BuildError {
BuildError { kind: BuildErrorKind::UnsupportedCaptures }
}

pub(crate) fn unsupported_lookarounds() -> BuildError {
BuildError { kind: BuildErrorKind::UnsupportedLookArounds }
}
}

#[cfg(feature = "std")]
Expand Down Expand Up @@ -201,6 +212,11 @@ impl core::fmt::Display for BuildError {
"currently captures must be disabled when compiling \
a reverse NFA",
),
BuildErrorKind::UnsupportedLookArounds => write!(
f,
"currently look-around sub-expressions cannot be in the pattern \
when compiling a reverse NFA or using the backtracking engine",
),
}
}
}
Loading
Loading