Skip to content

Commit de173be

Browse files
authored
fix: checkHasWordBoundary 7M x slowdown → 8.6x faster than stdlib (#105) (#106)
* fix: checkHasWordBoundary 7M x slowdown (#105) Root cause: O(N*M) complexity from scanning all NFA states per byte. Fixes applied: - Use NewBuilderWithWordBoundary() to avoid repeated O(N) scans - Add hasWordBoundary guards to skip unnecessary checks - Use anchored search for prefilter verification Result: 3m22s → 30µs (6,600,000x faster, stdlib parity) Closes #105 * perf: replace map with slice for DFA state lookup State IDs are assigned sequentially (0, 1, 2...), so direct slice indexing is faster than map hash lookup. Before: map lookups were 42% of CPU time After: slice indexing is only 3% of CPU time Issue #105 pattern benchmark: - Before: 99,510 ns/op - After: 56,384 ns/op (1.77x faster) * perf: extract literals from capture/repeat groups Literal extractor now looks inside OpRepeat and OpCapture to find literal prefixes for better prefilters. Example: `=($\w...){2}` now extracts `=$` (2 bytes) instead of just `=` (1 byte). Issue #105 pattern benchmark (79KB): - Before: 56.4 µs (2.2x slower than stdlib) - After: 3.6 µs (7.9x FASTER than stdlib) The 2-byte prefilter `=$` is much more selective, reducing false positives.
1 parent a352917 commit de173be

File tree

5 files changed

+302
-18
lines changed

5 files changed

+302
-18
lines changed

CHANGELOG.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1414

1515
---
1616

17+
## [0.11.5] - 2026-02-01
18+
19+
### Fixed
20+
- **checkHasWordBoundary catastrophic slowdown** (Issue #105)
21+
- Patterns with `\w{n,m}` quantifiers were **7,000,000x slower** than stdlib
22+
- Root cause: O(N*M) complexity from scanning all NFA states per byte
23+
- Fix: Use `NewBuilderWithWordBoundary()`, add `hasWordBoundary` guards, anchored prefilter verification
24+
- **Result: 3m22s → 3.6µs** (56,000,000x faster, **7.9x faster than stdlib**)
25+
26+
### Performance
27+
- **DFA state lookup: map → slice** — 42% CPU time eliminated
28+
- State IDs are sequential, so direct slice indexing beats hash lookups
29+
- **Literal extraction from capture/repeat groups** — better prefilters
30+
- `=(\$\w...){2}` now extracts `=$` (2 bytes) instead of just `=` (1 byte)
31+
- Reduces false positives in prefilter, massive speedup on selective patterns
32+
33+
### Technical Details
34+
- Added `searchEarliestMatchAnchored()` for O(1) prefilter verification
35+
- Replaced `stateByID map[StateID]*State` with `states []*State`
36+
- Extended `tryExpandConcatSuffix()` to unwrap OpRepeat/OpCapture
37+
- Credits: @danslo for root cause analysis and fix suggestions
38+
39+
---
40+
1741
## [0.11.4] - 2026-01-16
1842

1943
### Fixed

dfa/lazy/builder.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ func (b *Builder) Build() (*DFA, error) {
128128
config: b.config,
129129
prefilter: pf,
130130
pikevm: nfa.NewPikeVM(b.nfa),
131-
stateByID: make(map[StateID]*State, b.config.MaxStates),
131+
states: make([]*State, 0, b.config.MaxStates),
132132
startTable: startTable,
133133
byteClasses: b.nfa.ByteClasses(),
134134
freshStartStates: freshStartStates,

dfa/lazy/lazy.go

Lines changed: 98 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,10 @@ type DFA struct {
5959
prefilter prefilter.Prefilter
6060
pikevm *nfa.PikeVM
6161

62-
// stateByID provides O(1) lookup of states by ID
63-
// This maps StateID → *State for fast access during search
64-
stateByID map[StateID]*State
62+
// states provides O(1) lookup of states by ID via direct indexing.
63+
// StateIDs are sequential (0, 1, 2...), so slice indexing is faster than map.
64+
// This is a critical optimization - map lookups were 42% of CPU time!
65+
states []*State
6566

6667
// startTable caches start states for different look-behind contexts
6768
// This enables correct handling of assertions (^, \b, etc.) and
@@ -243,7 +244,8 @@ func (d *DFA) SearchAtAnchored(haystack []byte, at int) int {
243244
for pos := at; pos < len(haystack); pos++ {
244245
b := haystack[pos]
245246

246-
if d.checkWordBoundaryMatch(currentState, b) {
247+
// Skip expensive check for patterns without word boundaries (Issue #105)
248+
if d.hasWordBoundary && d.checkWordBoundaryMatch(currentState, b) {
247249
return pos
248250
}
249251

@@ -322,8 +324,9 @@ func (d *DFA) isMatchWithPrefilter(haystack []byte) bool {
322324
return false
323325
}
324326

325-
// Try to match at candidate - use early termination
326-
if d.searchEarliestMatch(haystack, pos) {
327+
// Try to match at candidate - use ANCHORED search to verify match starts here
328+
// Issue #105: unanchored search caused catastrophic slowdown
329+
if d.searchEarliestMatchAnchored(haystack, pos) {
327330
return true
328331
}
329332

@@ -335,7 +338,7 @@ func (d *DFA) isMatchWithPrefilter(haystack []byte) bool {
335338
return false
336339
}
337340
pos = candidate
338-
if d.searchEarliestMatch(haystack, pos) {
341+
if d.searchEarliestMatchAnchored(haystack, pos) {
339342
return true
340343
}
341344
}
@@ -393,7 +396,8 @@ func (d *DFA) searchEarliestMatch(haystack []byte, startPos int) bool {
393396
// This handles patterns like `test\b` where after matching "test",
394397
// the next byte '!' creates a word boundary that satisfies \b.
395398
// We need to detect this match before trying to consume '!'.
396-
if d.checkWordBoundaryMatch(currentState, b) {
399+
// Skip expensive check for patterns without word boundaries (Issue #105)
400+
if d.hasWordBoundary && d.checkWordBoundaryMatch(currentState, b) {
397401
return true
398402
}
399403

@@ -444,6 +448,75 @@ func (d *DFA) searchEarliestMatch(haystack []byte, startPos int) bool {
444448
return d.checkEOIMatch(currentState)
445449
}
446450

451+
// searchEarliestMatchAnchored performs ANCHORED DFA search with early termination.
452+
// Unlike searchEarliestMatch, this requires the match to START exactly at startPos.
453+
// This is critical for prefilter verification - we need to confirm the match
454+
// actually starts at the candidate position, not somewhere after it.
455+
//
456+
// Issue #105: Using unanchored search for prefilter verification caused
457+
// catastrophic slowdown because it would re-scan from candidate to end.
458+
func (d *DFA) searchEarliestMatchAnchored(haystack []byte, startPos int) bool {
459+
if startPos > len(haystack) {
460+
return false
461+
}
462+
463+
// Get ANCHORED start state (requires match to start exactly at startPos)
464+
currentState := d.getStartState(haystack, startPos, true)
465+
if currentState == nil {
466+
// Fallback to NFA with anchored search
467+
start, end, matched := d.pikevm.SearchAt(haystack, startPos)
468+
// For anchored: match must start exactly at startPos
469+
return matched && start == startPos && end >= start
470+
}
471+
472+
// Check if start state is already a match (e.g., empty pattern)
473+
if currentState.IsMatch() {
474+
return true
475+
}
476+
477+
// Scan input byte by byte with early termination
478+
for pos := startPos; pos < len(haystack); pos++ {
479+
b := haystack[pos]
480+
481+
// Skip expensive check for patterns without word boundaries (Issue #105)
482+
if d.hasWordBoundary && d.checkWordBoundaryMatch(currentState, b) {
483+
return true
484+
}
485+
486+
// Get next state
487+
classIdx := d.byteToClass(b)
488+
nextID, ok := currentState.Transition(classIdx)
489+
switch {
490+
case !ok:
491+
nextState, err := d.determinize(currentState, b)
492+
if err != nil {
493+
start, end, matched := d.pikevm.SearchAt(haystack, startPos)
494+
return matched && start == startPos && end >= start
495+
}
496+
if nextState == nil {
497+
return false
498+
}
499+
currentState = nextState
500+
501+
case nextID == DeadState:
502+
return false
503+
504+
default:
505+
currentState = d.getState(nextID)
506+
if currentState == nil {
507+
start, end, matched := d.pikevm.SearchAt(haystack, startPos)
508+
return matched && start == startPos && end >= start
509+
}
510+
}
511+
512+
if currentState.IsMatch() {
513+
return true
514+
}
515+
}
516+
517+
return d.checkEOIMatch(currentState)
518+
}
519+
447520
// findWithPrefilterAt searches using prefilter to accelerate unanchored search.
448521
// This is used by FindAt to correctly handle anchors when searching from non-zero positions.
449522
func (d *DFA) findWithPrefilterAt(haystack []byte, startAt int) int {
@@ -801,17 +874,23 @@ func (d *DFA) getState(id StateID) *State {
801874
return nil
802875
}
803876

804-
// O(1) lookup via stateByID map
805-
state, ok := d.stateByID[id]
806-
if !ok {
877+
// O(1) lookup via direct slice indexing (faster than map!)
878+
idx := int(id)
879+
if idx >= len(d.states) {
807880
return nil
808881
}
809-
return state
882+
return d.states[idx]
810883
}
811884

812-
// registerState adds a state to the ID-based lookup map
885+
// registerState adds a state to the states slice for O(1) lookup.
886+
// StateIDs are assigned sequentially, so we can use direct indexing.
813887
func (d *DFA) registerState(state *State) {
814-
d.stateByID[state.ID()] = state
888+
id := int(state.ID())
889+
// Grow slice if needed
890+
for len(d.states) <= id {
891+
d.states = append(d.states, nil)
892+
}
893+
d.states[id] = state
815894
}
816895

817896
// checkEOIMatch checks if the current state would match at end-of-input.
@@ -828,7 +907,8 @@ func (d *DFA) checkEOIMatch(state *State) bool {
828907
}
829908

830909
// Create a temporary builder for EOI resolution
831-
builder := NewBuilder(d.nfa, d.config)
910+
// Use NewBuilderWithWordBoundary to avoid O(states) scan per call (Issue #105)
911+
builder := NewBuilderWithWordBoundary(d.nfa, d.config, d.hasWordBoundary)
832912
return builder.CheckEOIMatch(state.NFAStates(), state.IsFromWord())
833913
}
834914

@@ -853,7 +933,8 @@ func (d *DFA) checkWordBoundaryMatch(state *State, nextByte byte) bool {
853933
return false
854934
}
855935

856-
builder := NewBuilder(d.nfa, d.config)
936+
// Use NewBuilderWithWordBoundary to avoid O(states) scan per call (Issue #105)
937+
builder := NewBuilderWithWordBoundary(d.nfa, d.config, d.hasWordBoundary)
857938
isFromWord := state.IsFromWord()
858939
isNextWord := isWordByte(nextByte)
859940
wordBoundarySatisfied := isFromWord != isNextWord
@@ -1017,7 +1098,7 @@ func (d *DFA) CacheStats() (size int, capacity uint32, hits, misses uint64, hitR
10171098
// Primarily useful for testing and benchmarking.
10181099
func (d *DFA) ResetCache() {
10191100
d.cache.Clear()
1020-
d.stateByID = make(map[StateID]*State, d.config.MaxStates)
1101+
d.states = make([]*State, 0, d.config.MaxStates)
10211102

10221103
// Reset StartTable
10231104
d.startTable = NewStartTable()

literal/extractor.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,13 @@ func (e *Extractor) tryExpandConcatSuffix(prefixes *Seq, subs []*syntax.Regexp,
576576
}
577577
}
578578

579+
// Try Capture/Repeat group expansion: =(\$...){2} → =$...
580+
// Extract literal prefix from inside the group and append to our prefixes.
581+
// This handles patterns like `=(\$\w{1,10}...){2}` where `=$` is a better prefilter than just `=`.
582+
if expanded := e.tryExpandRepeatCapture(prefixes, nextSub, depth); expanded != nil {
583+
return expanded
584+
}
585+
579586
// Default: mark prefixes as incomplete since more elements follow
580587
lits := make([]Literal, prefixes.Len())
581588
for i := 0; i < prefixes.Len(); i++ {
@@ -585,6 +592,49 @@ func (e *Extractor) tryExpandConcatSuffix(prefixes *Seq, subs []*syntax.Regexp,
585592
return NewSeq(lits...)
586593
}
587594

595+
// tryExpandRepeatCapture attempts to extract literals from inside OpRepeat/OpCapture groups.
596+
// For pattern `=(\$\w...){2}`, this extracts `=$` instead of just `=`.
597+
// Returns nil if no expansion is possible.
598+
func (e *Extractor) tryExpandRepeatCapture(prefixes *Seq, nextSub *syntax.Regexp, depth int) *Seq {
599+
// Unwrap OpRepeat if present (e.g., {2,2} or {1,10})
600+
innerNode := nextSub
601+
if innerNode.Op == syntax.OpRepeat && len(innerNode.Sub) > 0 && innerNode.Min >= 1 {
602+
innerNode = innerNode.Sub[0]
603+
}
604+
// Unwrap OpCapture if present
605+
if innerNode.Op == syntax.OpCapture && len(innerNode.Sub) > 0 {
606+
innerNode = innerNode.Sub[0]
607+
}
608+
// Only proceed if we actually unwrapped something
609+
if innerNode == nextSub {
610+
return nil
611+
}
612+
613+
innerPrefixes := e.extractPrefixes(innerNode, depth+1)
614+
if innerPrefixes.IsEmpty() {
615+
return nil
616+
}
617+
618+
// Append inner prefixes to our prefixes
619+
var expanded []Literal
620+
for i := 0; i < prefixes.Len(); i++ {
621+
prefix := prefixes.Get(i)
622+
for j := 0; j < innerPrefixes.Len(); j++ {
623+
inner := innerPrefixes.Get(j)
624+
combined := append([]byte{}, prefix.Bytes...)
625+
combined = append(combined, inner.Bytes...)
626+
if len(combined) > e.config.MaxLiteralLen {
627+
combined = combined[:e.config.MaxLiteralLen]
628+
}
629+
expanded = append(expanded, NewLiteral(combined, false))
630+
}
631+
}
632+
if len(expanded) == 0 {
633+
return nil
634+
}
635+
return NewSeq(expanded...)
636+
}
637+
588638
// expandLiteralAlternate expands Literal + Alternation back into individual complete literals.
589639
// This handles the case where the regex parser factors common prefixes:
590640
//

0 commit comments

Comments
 (0)