@@ -9,15 +9,16 @@ import (
99// Cache provides thread-safe storage for DFA states with bounded memory.
1010//
1111// The cache maps StateKey (NFA state set hash) → DFA State.
12- // When the cache reaches maxStates, it stops accepting new entries and
13- // the DFA must fall back to NFA for uncached transitions .
12+ // When the cache reaches maxStates, it can be cleared and rebuilt
13+ // (up to a configured limit) before falling back to NFA .
1414//
1515// Thread safety: All methods are safe for concurrent access via RWMutex.
1616//
1717// Memory management:
18- // - States are never evicted (LRU would require additional overhead)
19- // - When cache is full, new states trigger NFA fallback
20- // - This is simple and efficient for most patterns
18+ // - States are never evicted individually (no LRU overhead)
19+ // - When cache is full, it is cleared entirely and search continues
20+ // - After too many clears, falls back to NFA
21+ // - Clearing keeps allocated memory to avoid re-allocation
2122type Cache struct {
2223 // mu protects all fields below
2324 // RWMutex allows concurrent reads (common case during search)
@@ -33,6 +34,12 @@ type Cache struct {
3334 // Start at 1 (0 is reserved for StartState)
3435 nextID StateID
3536
37+ // clearCount tracks how many times the cache has been cleared during
38+ // the current search. This is used to detect pathological cache thrashing
39+ // and trigger NFA fallback when clears exceed the configured limit.
40+ // Inspired by Rust regex-automata's hybrid DFA cache clearing strategy.
41+ clearCount int
42+
3643 // Statistics for cache performance tuning
3744 hits uint64 // Number of cache hits
3845 misses uint64 // Number of cache misses
@@ -175,14 +182,58 @@ func (c *Cache) ResetStats() {
175182}
176183
177184// Clear removes all states from the cache and resets statistics.
178- // This is primarily for testing.
185+ // This also resets the clear counter. Primarily for testing.
179186func (c * Cache ) Clear () {
180187 c .mu .Lock ()
181188 defer c .mu .Unlock ()
182189
183190 // Clear map (GC will reclaim memory)
184191 c .states = make (map [StateKey ]* State , c .maxStates )
185192 c .nextID = StartState + 1
193+ c .clearCount = 0
186194 c .hits = 0
187195 c .misses = 0
188196}
197+
198+ // ClearKeepMemory clears all states from the cache but keeps the allocated
199+ // map memory for reuse and increments the clear counter. This is used during
200+ // search when the cache is full: instead of falling back to NFA permanently,
201+ // we clear the cache and continue DFA search, rebuilding states on demand.
202+ //
203+ // Unlike Clear(), this method:
204+ // - Increments clearCount (tracks clears during a search)
205+ // - Does NOT reset hit/miss statistics (they accumulate across clears)
206+ // - Reuses map memory via Go's map clearing optimization
207+ //
208+ // After calling this, all previously returned *State pointers are stale
209+ // and must not be used. The caller must re-obtain the start state.
210+ //
211+ // Inspired by Rust regex-automata's cache clearing strategy (hybrid/dfa.rs).
212+ func (c * Cache ) ClearKeepMemory () {
213+ c .mu .Lock ()
214+ defer c .mu .Unlock ()
215+
216+ // Clear the map using Go's optimized clear-by-range idiom.
217+ // This reuses the map's internal memory (buckets) instead of reallocating.
218+ for k := range c .states {
219+ delete (c .states , k )
220+ }
221+ c .nextID = StartState + 1
222+ c .clearCount ++
223+ }
224+
225+ // ClearCount returns how many times the cache has been cleared.
226+ // Used to check against the MaxCacheClears limit.
227+ func (c * Cache ) ClearCount () int {
228+ c .mu .RLock ()
229+ defer c .mu .RUnlock ()
230+ return c .clearCount
231+ }
232+
233+ // ResetClearCount resets the clear counter to zero.
234+ // Called at the start of each new search to give the DFA a fresh budget.
235+ func (c * Cache ) ResetClearCount () {
236+ c .mu .Lock ()
237+ defer c .mu .Unlock ()
238+ c .clearCount = 0
239+ }
0 commit comments