Skip to content

Commit 1480f40

Browse files
authored
Merge pull request #109 from coregx/fix/findall-optimization
fix: FindAll optimization - 1.08x faster than stdlib
2 parents fce1691 + 31e5444 commit 1480f40

File tree

3 files changed

+43
-101
lines changed

3 files changed

+43
-101
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1414

1515
---
1616

17+
## [0.11.7] - 2026-02-01
18+
19+
### Fixed
20+
- **FindAll now uses optimized state-reusing path** (Issue #107)
21+
- FindAll was using slow per-match loop instead of optimized findAllIndicesStreaming
22+
- Results for `(\w{2,8})+` on 6MB: 2179ms → 834ms (**2.6x faster**)
23+
- Now **1.08x faster than stdlib** (was 2.4x slower in regex-bench)
24+
25+
---
26+
1727
## [0.11.6] - 2026-02-01
1828

1929
### Performance

meta/findall.go

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -143,27 +143,39 @@ func (e *Engine) findAllIndicesLoop(haystack []byte, n int, results [][2]int) []
143143
break
144144
}
145145

146-
// Handle empty matches
147-
if start == end {
148-
if start == lastMatchEnd {
149-
// Skip empty match right after previous match to avoid infinite loop
150-
if pos < len(haystack) {
151-
pos++
152-
} else {
153-
break
154-
}
155-
continue
156-
}
157-
pos = end
158-
if pos < len(haystack) {
159-
pos++
146+
// Skip empty matches that start exactly where the previous non-empty match ended.
147+
// This matches Go's stdlib behavior:
148+
// - "a*" on "ab" returns [[0 1] [2 2]], not [[0 1] [1 1] [2 2]]
149+
//nolint:gocritic // badCond: intentional - checking empty match (start==end) at lastMatchEnd
150+
if start == end && start == lastMatchEnd {
151+
pos++
152+
if pos > len(haystack) {
153+
break
160154
}
161-
} else {
162-
pos = end
163-
lastMatchEnd = end
155+
continue
164156
}
165157

166158
results = append(results, [2]int{start, end})
159+
160+
// Track non-empty match ends for the skip rule
161+
if start != end {
162+
lastMatchEnd = end
163+
}
164+
165+
// Move position past this match
166+
switch {
167+
case start == end:
168+
// Empty match: advance by 1 to avoid infinite loop
169+
pos = end + 1
170+
case end > pos:
171+
pos = end
172+
default:
173+
pos++
174+
}
175+
176+
if pos > len(haystack) {
177+
break
178+
}
167179
}
168180

169181
return results

regex.go

Lines changed: 4 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -376,92 +376,12 @@ func (r *Regex) FindAll(b []byte, n int) [][]byte {
376376
return nil
377377
}
378378

379-
// Fast path: CharClassSearcher uses streaming state machine (single-pass, no per-match overhead)
380-
// This is 2-3x faster than the loop below for patterns like \w+, \d+, [a-z]+
381-
if r.engine.Strategy() == meta.UseCharClassSearcher {
382-
return r.findAllStreaming(b, n)
383-
}
384-
385-
var matches [][]byte
386-
pos := 0
387-
lastMatchEnd := -1 // Track where the last non-empty match ended
388-
389-
for {
390-
// Use zero-allocation FindIndicesAt instead of FindAt (avoids Match object creation)
391-
start, end, found := r.engine.FindIndicesAt(b, pos)
392-
if !found {
393-
break
394-
}
395-
396-
// Lazy allocation: only allocate once we find the first match
397-
//nolint:nestif // Allocation logic requires checking multiple conditions
398-
if matches == nil {
399-
// Smart allocation: anchored patterns have max 1 match, others use capped heuristic.
400-
// This avoids huge allocations on large inputs (6MB → 62k capacity was causing 170µs overhead).
401-
var estimatedCap int
402-
if r.engine.IsStartAnchored() {
403-
estimatedCap = 1 // Start-anchored patterns match at most once (position 0 only)
404-
} else {
405-
// Estimate ~1 match per 100 bytes, but cap at reasonable size
406-
estimatedCap = len(b) / 100
407-
if estimatedCap < 4 {
408-
estimatedCap = 4
409-
}
410-
if estimatedCap > 256 {
411-
estimatedCap = 256 // Cap to limit allocation overhead; append will grow if needed
412-
}
413-
}
414-
if n > 0 && estimatedCap > n {
415-
estimatedCap = n
416-
}
417-
matches = make([][]byte, 0, estimatedCap)
418-
}
419-
420-
// Skip empty matches that start exactly where the previous non-empty match ended.
421-
// This matches Go's stdlib behavior for preventing duplicate empty matches.
422-
//nolint:gocritic // badCond: intentional - checking empty match (start==end) at lastMatchEnd
423-
if start == end && start == lastMatchEnd {
424-
pos++
425-
if pos > len(b) {
426-
break
427-
}
428-
continue
429-
}
430-
431-
matches = append(matches, b[start:end])
432-
433-
// Track non-empty match ends for the skip rule
434-
if start != end {
435-
lastMatchEnd = end
436-
}
437-
438-
// Move position past this match
439-
switch {
440-
case start == end:
441-
// Empty match: advance by 1 to avoid infinite loop
442-
pos = end + 1
443-
case end > pos:
444-
pos = end
445-
default:
446-
// Fallback (shouldn't normally happen)
447-
pos++
448-
}
449-
450-
if pos > len(b) {
451-
break
452-
}
453-
454-
// Check limit
455-
if n > 0 && len(matches) >= n {
456-
break
457-
}
458-
}
459-
460-
return matches
379+
// Use optimized streaming path for ALL strategies (state-reusing, no sync.Pool overhead)
380+
return r.findAllStreaming(b, n)
461381
}
462382

463-
// findAllStreaming uses single-pass streaming state machine for CharClassSearcher patterns.
464-
// This avoids per-match function call overhead (2-3x faster than the loop approach).
383+
// findAllStreaming uses state-reusing search loop for all strategies.
384+
// This avoids sync.Pool overhead (1.29M Get/Put → 1 for 6MB input).
465385
func (r *Regex) findAllStreaming(b []byte, n int) [][]byte {
466386
// Get streaming indices ([][2]int format)
467387
streamResults := r.engine.FindAllIndicesStreaming(b, n, nil)

0 commit comments

Comments
 (0)