Skip to content

Commit a30fd70

Browse files
authored
Merge pull request #112 from coregx/feature/rust-inspired-optimizations
perf: Rust-inspired optimizations (anti-quadratic, DFA unrolling, 1-bit visited)
2 parents 8b528fa + 61fe34e commit a30fd70

21 files changed

+1831
-164
lines changed

CHANGELOG.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1212
- ARM NEON SIMD support (waiting for Go 1.26 native SIMD)
1313
- SIMD prefilter for CompositeSequenceDFA (#83)
1414

15+
## [0.12.0] - 2026-02-06
16+
17+
### Performance
18+
- **Anti-quadratic guard for reverse searches** — Track `minStart` position in
19+
ReverseSuffix, ReverseInner, and ReverseSuffixSet searchers to prevent O(n²)
20+
degradation on high false-positive suffix workloads. Falls back to PikeVM
21+
when quadratic behavior detected. Inspired by Rust regex `meta/limited.rs`.
22+
- **Lazy DFA 4x loop unrolling** — Process 4 state transitions per inner loop
23+
iteration in forward and reverse DFA search. Check special states only between
24+
batches. Direct field access for minimal per-transition overhead.
25+
Expected 15-40% throughput on DFA-heavy patterns (alpha_digit, word_digit).
26+
- **Prefilter `IsFast()` gate** — Skip reverse search optimizations when a fast
27+
SIMD-backed prefix prefilter already exists. Heuristic: Memchr/Memmem always
28+
fast, Teddy fast when `minLen >= 3`. Inspired by Rust regex `is_fast()`.
29+
- **DFA cache clear & continue** — On cache overflow, clear and fall back to
30+
PikeVM for current search instead of permanently disabling DFA. Configurable
31+
`MaxCacheClears` limit (default 5). Inspired by Rust regex `try_clear_cache`.
32+
33+
### Fixed
34+
- **OnePass DFA capture limit** — Tighten from 17 to 16 capture groups.
35+
`uint32` slot mask (32 bits) can only track 16 groups (slots 0..31).
36+
Group 17 silently dropped end position due to `slotIdx < 32` guard.
37+
1538
---
1639

1740
## [0.11.9] - 2026-02-02

dfa/lazy/cache.go

Lines changed: 57 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,16 @@ import (
99
// Cache provides thread-safe storage for DFA states with bounded memory.
1010
//
1111
// The cache maps StateKey (NFA state set hash) → DFA State.
12-
// When the cache reaches maxStates, it stops accepting new entries and
13-
// the DFA must fall back to NFA for uncached transitions.
12+
// When the cache reaches maxStates, it can be cleared and rebuilt
13+
// (up to a configured limit) before falling back to NFA.
1414
//
1515
// Thread safety: All methods are safe for concurrent access via RWMutex.
1616
//
1717
// Memory management:
18-
// - States are never evicted (LRU would require additional overhead)
19-
// - When cache is full, new states trigger NFA fallback
20-
// - This is simple and efficient for most patterns
18+
// - States are never evicted individually (no LRU overhead)
19+
// - When cache is full, it is cleared entirely and search continues
20+
// - After too many clears, falls back to NFA
21+
// - Clearing keeps allocated memory to avoid re-allocation
2122
type Cache struct {
2223
// mu protects all fields below
2324
// RWMutex allows concurrent reads (common case during search)
@@ -33,6 +34,12 @@ type Cache struct {
3334
// Start at 1 (0 is reserved for StartState)
3435
nextID StateID
3536

37+
// clearCount tracks how many times the cache has been cleared during
38+
// the current search. This is used to detect pathological cache thrashing
39+
// and trigger NFA fallback when clears exceed the configured limit.
40+
// Inspired by Rust regex-automata's hybrid DFA cache clearing strategy.
41+
clearCount int
42+
3643
// Statistics for cache performance tuning
3744
hits uint64 // Number of cache hits
3845
misses uint64 // Number of cache misses
@@ -175,14 +182,58 @@ func (c *Cache) ResetStats() {
175182
}
176183

177184
// Clear removes all states from the cache and resets statistics.
178-
// This is primarily for testing.
185+
// This also resets the clear counter. Primarily for testing.
179186
func (c *Cache) Clear() {
180187
c.mu.Lock()
181188
defer c.mu.Unlock()
182189

183190
// Clear map (GC will reclaim memory)
184191
c.states = make(map[StateKey]*State, c.maxStates)
185192
c.nextID = StartState + 1
193+
c.clearCount = 0
186194
c.hits = 0
187195
c.misses = 0
188196
}
197+
198+
// ClearKeepMemory clears all states from the cache but keeps the allocated
199+
// map memory for reuse and increments the clear counter. This is used during
200+
// search when the cache is full: instead of falling back to NFA permanently,
201+
// we clear the cache and continue DFA search, rebuilding states on demand.
202+
//
203+
// Unlike Clear(), this method:
204+
// - Increments clearCount (tracks clears during a search)
205+
// - Does NOT reset hit/miss statistics (they accumulate across clears)
206+
// - Reuses map memory via Go's map clearing optimization
207+
//
208+
// After calling this, all previously returned *State pointers are stale
209+
// and must not be used. The caller must re-obtain the start state.
210+
//
211+
// Inspired by Rust regex-automata's cache clearing strategy (hybrid/dfa.rs).
212+
func (c *Cache) ClearKeepMemory() {
213+
c.mu.Lock()
214+
defer c.mu.Unlock()
215+
216+
// Clear the map using Go's optimized clear-by-range idiom.
217+
// This reuses the map's internal memory (buckets) instead of reallocating.
218+
for k := range c.states {
219+
delete(c.states, k)
220+
}
221+
c.nextID = StartState + 1
222+
c.clearCount++
223+
}
224+
225+
// ClearCount returns how many times the cache has been cleared.
226+
// Used to check against the MaxCacheClears limit.
227+
func (c *Cache) ClearCount() int {
228+
c.mu.RLock()
229+
defer c.mu.RUnlock()
230+
return c.clearCount
231+
}
232+
233+
// ResetClearCount resets the clear counter to zero.
234+
// Called at the start of each new search to give the DFA a fresh budget.
235+
func (c *Cache) ResetClearCount() {
236+
c.mu.Lock()
237+
defer c.mu.Unlock()
238+
c.clearCount = 0
239+
}

dfa/lazy/config.go

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ package lazy
66
// performance. Larger caches provide better hit rates but consume more memory.
77
type Config struct {
88
// MaxStates is the maximum number of DFA states to cache.
9-
// When this limit is reached, the DFA falls back to NFA execution.
9+
// When this limit is reached, the DFA clears the cache and continues
10+
// DFA search (up to MaxCacheClears times per search), then falls back
11+
// to NFA execution if the clear limit is exceeded.
1012
//
1113
// Default: 10,000 states (~1MB with 256-byte transition tables)
1214
// Memory usage: ~100-200 bytes per state (depending on transitions)
@@ -17,6 +19,25 @@ type Config struct {
1719
// - Memory-constrained: 1,000 states (~100KB)
1820
MaxStates uint32
1921

22+
// MaxCacheClears is the maximum number of times the DFA cache can be
23+
// cleared and rebuilt during a single search before falling back to NFA.
24+
//
25+
// When the cache fills up during determinization, instead of immediately
26+
// falling back to the NFA (PikeVM), the DFA clears the cache, re-creates
27+
// the start state, and continues searching from the current position.
28+
// This is much faster than NFA fallback for large inputs with complex
29+
// patterns that generate many DFA states.
30+
//
31+
// After MaxCacheClears clears, the DFA gives up and falls back to NFA
32+
// permanently. This prevents pathological cases where the cache thrashes
33+
// endlessly (clearing and refilling every few bytes).
34+
//
35+
// Default: 5
36+
// Set to 0 to disable cache clearing (always fall back to NFA on full cache).
37+
//
38+
// Inspired by Rust regex-automata's hybrid DFA cache clearing strategy.
39+
MaxCacheClears int
40+
2041
// CacheHitThreshold is the minimum cache hit rate (0.0-1.0) to continue
2142
// using DFA. If hit rate falls below this, fall back to NFA.
2243
//
@@ -64,6 +85,7 @@ type Config struct {
6485
func DefaultConfig() Config {
6586
return Config{
6687
MaxStates: 10_000,
88+
MaxCacheClears: 5, // Allow 5 cache clears before NFA fallback
6789
CacheHitThreshold: 0.0, // Disabled by default
6890
UsePrefilter: true,
6991
MinPrefilterLen: 3,
@@ -81,6 +103,13 @@ func (c *Config) Validate() error {
81103
}
82104
}
83105

106+
if c.MaxCacheClears < 0 {
107+
return &DFAError{
108+
Kind: InvalidConfig,
109+
Message: "MaxCacheClears must be >= 0",
110+
}
111+
}
112+
84113
if c.CacheHitThreshold < 0.0 || c.CacheHitThreshold > 1.0 {
85114
return &DFAError{
86115
Kind: InvalidConfig,
@@ -111,6 +140,13 @@ func (c Config) WithMaxStates(maxStates uint32) Config {
111140
return c
112141
}
113142

143+
// WithMaxCacheClears returns a new config with the specified max cache clears.
144+
// Set to 0 to disable cache clearing (always fall back to NFA on full cache).
145+
func (c Config) WithMaxCacheClears(maxClears int) Config {
146+
c.MaxCacheClears = maxClears
147+
return c
148+
}
149+
114150
// WithCacheHitThreshold returns a new config with the specified cache hit threshold
115151
func (c Config) WithCacheHitThreshold(threshold float64) Config {
116152
c.CacheHitThreshold = threshold

dfa/lazy/error.go

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ import "fmt"
44

55
// Error types for Lazy DFA operations
66

7-
// ErrCacheFull indicates that the DFA state cache has exceeded its maximum size.
7+
// ErrCacheFull indicates that the DFA state cache has exceeded its maximum size
8+
// AND the maximum number of cache clears has been reached.
89
// When this occurs, the DFA falls back to NFA execution (PikeVM) for the
910
// remainder of the search.
1011
//
@@ -15,6 +16,17 @@ var ErrCacheFull = &DFAError{
1516
Message: "DFA state cache is full",
1617
}
1718

19+
// errCacheCleared is an internal sentinel error returned by determinize()
20+
// when the cache was cleared and rebuilt. The search loop must re-obtain
21+
// the current state from the start state at the current position and continue.
22+
//
23+
// This is NOT a real error - it signals that the search should restart
24+
// DFA processing from the current position with a fresh cache.
25+
var errCacheCleared = &DFAError{
26+
Kind: CacheCleared,
27+
Message: "DFA cache was cleared and rebuilt",
28+
}
29+
1830
// ErrStateLimitExceeded indicates that the DFA has reached the maximum number
1931
// of allowed states during determinization.
2032
//
@@ -36,8 +48,14 @@ type ErrorKind uint8
3648

3749
const (
3850
// CacheFull indicates the state cache reached its size limit
51+
// and cannot be cleared further (max clears exceeded)
3952
CacheFull ErrorKind = iota
4053

54+
// CacheCleared indicates the cache was cleared and rebuilt.
55+
// This is an internal signal, not a real error. The search loop
56+
// should re-obtain the current state and continue from the current position.
57+
CacheCleared
58+
4159
// StateLimitExceeded indicates too many states were created
4260
StateLimitExceeded
4361

@@ -54,6 +72,8 @@ func (k ErrorKind) String() string {
5472
switch k {
5573
case CacheFull:
5674
return "CacheFull"
75+
case CacheCleared:
76+
return "CacheCleared"
5777
case StateLimitExceeded:
5878
return "StateLimitExceeded"
5979
case InvalidConfig:

0 commit comments

Comments
 (0)