diff --git a/CHANGELOG.md b/CHANGELOG.md index 58f9d79f3..5e8525909 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,13 @@ +1.11.4 (TBD) +============ +TODO + +Bug fixes: + +* [BUG #1165](https://github.com/rust-lang/regex/issues/1083): +Fixes a panic in the lazy DFA (can only occur for especially large regexes). + + 1.11.3 (2025-09-25) =================== This is a small patch release with an improvement in memory usage in some diff --git a/regex-automata/src/hybrid/dfa.rs b/regex-automata/src/hybrid/dfa.rs index b4c9cf444..22893d7a3 100644 --- a/regex-automata/src/hybrid/dfa.rs +++ b/regex-automata/src/hybrid/dfa.rs @@ -2132,7 +2132,19 @@ impl<'i, 'c> Lazy<'i, 'c> { unit, empty_builder, ); - let save_state = !self.as_ref().state_builder_fits_in_cache(&builder); + // This is subtle, but if we *might* clear the cache, then we should + // try to save the current state so that we can re-map its ID after + // cache clearing. We *might* clear the cache when either the new + // state can't fit in the cache or when the number of transitions has + // reached the maximum. Even if either of these conditions is true, + // the cache might not be cleared if we can reuse an existing state. + // But we don't know that at this point. Moreover, we don't save the + // current state every time because it is costly. + // + // TODO: We should try to find a way to make this less subtle and error + // prone. ---AG + let save_state = !self.as_ref().state_builder_fits_in_cache(&builder) + || self.cache.trans.len() >= LazyStateID::MAX; if save_state { self.save_state(current); } @@ -2761,7 +2773,7 @@ impl<'i, 'c> LazyRef<'i, 'c> { let needed = self.cache.memory_usage() + self.memory_usage_for_one_more_state(state.memory_usage()); trace!( - "lazy DFA cache capacity check: {:?} ?<=? {:?}", + "lazy DFA cache capacity state check: {:?} ?<=? {:?}", needed, self.dfa.cache_capacity ); @@ -2773,6 +2785,11 @@ impl<'i, 'c> LazyRef<'i, 'c> { fn state_builder_fits_in_cache(&self, state: &StateBuilderNFA) -> bool { let needed = self.cache.memory_usage() + self.memory_usage_for_one_more_state(state.as_bytes().len()); + trace!( + "lazy DFA cache capacity state builder check: {:?} ?<=? {:?}", + needed, + self.dfa.cache_capacity + ); needed <= self.dfa.cache_capacity } diff --git a/regex-automata/src/hybrid/id.rs b/regex-automata/src/hybrid/id.rs index 43d5b5ba0..65d8528e7 100644 --- a/regex-automata/src/hybrid/id.rs +++ b/regex-automata/src/hybrid/id.rs @@ -180,7 +180,7 @@ impl LazyStateID { const MASK_QUIT: usize = 1 << (LazyStateID::MAX_BIT - 2); const MASK_START: usize = 1 << (LazyStateID::MAX_BIT - 3); const MASK_MATCH: usize = 1 << (LazyStateID::MAX_BIT - 4); - const MAX: usize = LazyStateID::MASK_MATCH - 1; + pub(crate) const MAX: usize = LazyStateID::MASK_MATCH - 1; /// Create a new lazy state ID. ///