@@ -483,6 +483,25 @@ func (e *Engine) findIndicesMultilineReverseSuffixAt(haystack []byte, at int) (i
483483 return e .multilineReverseSuffixSearcher .FindIndicesAt (haystack , at )
484484}
485485
486+ // findIndicesBidirectionalDFA uses forward DFA + reverse DFA for exact match bounds.
487+ // Forward DFA finds match end, reverse DFA finds match start. O(n) total.
488+ // Used as fallback when BoundedBacktracker can't handle large inputs.
489+ func (e * Engine ) findIndicesBidirectionalDFA (haystack []byte , at int ) (int , int , bool ) {
490+ atomic .AddUint64 (& e .stats .DFASearches , 1 )
491+ end := e .dfa .FindAt (haystack , at )
492+ if end == - 1 {
493+ return - 1 , - 1 , false
494+ }
495+ if end == at {
496+ return at , at , true // Empty match
497+ }
498+ start := e .reverseDFA .SearchReverse (haystack , at , end )
499+ if start < 0 {
500+ return - 1 , - 1 , false // Reverse DFA failed (cache full)
501+ }
502+ return start , end , true
503+ }
504+
486505// findIndicesBoundedBacktracker searches using bounded backtracker - zero alloc.
487506// Thread-safe: uses pooled state.
488507func (e * Engine ) findIndicesBoundedBacktracker (haystack []byte ) (int , int , bool ) {
@@ -499,7 +518,10 @@ func (e *Engine) findIndicesBoundedBacktracker(haystack []byte) (int, int, bool)
499518
500519 atomic .AddUint64 (& e .stats .NFASearches , 1 )
501520 if ! e .boundedBacktracker .CanHandle (len (haystack )) {
502- // Use optimized SlotTable-based search for large inputs
521+ // Bidirectional DFA: O(n) vs PikeVM's O(n*states) for large inputs
522+ if e .dfa != nil && e .reverseDFA != nil {
523+ return e .findIndicesBidirectionalDFA (haystack , 0 )
524+ }
503525 return e .pikevm .SearchWithSlotTable (haystack , nfa .SearchModeFind )
504526 }
505527
@@ -529,21 +551,33 @@ func (e *Engine) findIndicesBoundedBacktrackerAt(haystack []byte, at int) (int,
529551 // to search the remaining portion, not the full haystack.
530552 remaining := haystack [at :]
531553
532- // V11-002 ASCII optimization
533- if e .asciiBoundedBacktracker != nil && simd .IsASCII (remaining ) {
534- if ! e .asciiBoundedBacktracker .CanHandle (len (remaining )) {
535- // Use optimized SlotTable-based search for large inputs
536- return e .pikevm .SearchWithSlotTableAt (haystack , at , nfa .SearchModeFind )
554+ // V11-002 ASCII optimization.
555+ // For start-anchored patterns, limit the IsASCII check to a small prefix
556+ // to avoid O(n) scan of the entire input when only position 0 matters.
557+ if e .asciiBoundedBacktracker != nil {
558+ asciiCheck := remaining
559+ if e .isStartAnchored && len (asciiCheck ) > 4096 {
560+ asciiCheck = asciiCheck [:4096 ]
537561 }
538- start , end , found := e .asciiBoundedBacktracker .Search (remaining )
539- if found {
540- return at + start , at + end , true
562+ if simd .IsASCII (asciiCheck ) {
563+ if ! e .asciiBoundedBacktracker .CanHandle (len (remaining )) {
564+ if e .dfa != nil && e .reverseDFA != nil {
565+ return e .findIndicesBidirectionalDFA (haystack , at )
566+ }
567+ return e .pikevm .SearchWithSlotTableAt (haystack , at , nfa .SearchModeFind )
568+ }
569+ start , end , found := e .asciiBoundedBacktracker .Search (remaining )
570+ if found {
571+ return at + start , at + end , true
572+ }
573+ return - 1 , - 1 , false
541574 }
542- return - 1 , - 1 , false
543575 }
544576
545577 if ! e .boundedBacktracker .CanHandle (len (remaining )) {
546- // Delegate to NFA path which uses prefilter if available
578+ if e .dfa != nil && e .reverseDFA != nil {
579+ return e .findIndicesBidirectionalDFA (haystack , at )
580+ }
547581 return e .findIndicesNFAAt (haystack , at )
548582 }
549583
@@ -730,6 +764,14 @@ func (e *Engine) findIndicesDigitPrefilter(haystack []byte) (int, int, bool) {
730764 }
731765
732766 pos = digitPos + 1
767+ // When the leading digit class is greedy unbounded (\d+, \d*), all
768+ // positions in the same digit run reach the same DFA state after
769+ // consuming digits, so they all fail identically. Skip the entire run.
770+ if e .digitRunSkipSafe {
771+ for pos < len (haystack ) && haystack [pos ] >= '0' && haystack [pos ] <= '9' {
772+ pos ++
773+ }
774+ }
733775 }
734776
735777 return - 1 , - 1 , false
@@ -767,6 +809,11 @@ func (e *Engine) findIndicesDigitPrefilterAt(haystack []byte, at int) (int, int,
767809 }
768810
769811 pos = digitPos + 1
812+ if e .digitRunSkipSafe {
813+ for pos < len (haystack ) && haystack [pos ] >= '0' && haystack [pos ] <= '9' {
814+ pos ++
815+ }
816+ }
770817 }
771818
772819 return - 1 , - 1 , false
@@ -931,46 +978,52 @@ func (e *Engine) findIndicesBoundedBacktrackerAtWithState(haystack []byte, at in
931978 // to search the remaining portion, not the full haystack.
932979 remaining := haystack [at :]
933980
934- // V11-002 ASCII optimization
935- if e .asciiBoundedBacktracker != nil && simd .IsASCII (remaining ) {
936- if ! e .asciiBoundedBacktracker .CanHandle (len (remaining )) {
937- // V12 Windowed BoundedBacktracker for ASCII path
938- maxInput := e .asciiBoundedBacktracker .MaxInputSize ()
939- if maxInput > 0 && len (remaining ) > maxInput {
940- window := remaining [:maxInput ]
941- start , end , found := e .asciiBoundedBacktracker .Search (window )
942- if found {
943- return at + start , at + end , true
981+ // V11-002 ASCII optimization.
982+ // For start-anchored patterns, limit the IsASCII check to a small prefix
983+ // to avoid O(n) scan of the entire input when only position 0 matters.
984+ if e .asciiBoundedBacktracker != nil {
985+ asciiCheck := remaining
986+ if e .isStartAnchored && len (asciiCheck ) > 4096 {
987+ asciiCheck = asciiCheck [:4096 ]
988+ }
989+ if simd .IsASCII (asciiCheck ) {
990+ if ! e .asciiBoundedBacktracker .CanHandle (len (remaining )) {
991+ // Bidirectional DFA: O(n) vs PikeVM's O(n*states)
992+ if e .dfa != nil && e .reverseDFA != nil {
993+ return e .findIndicesBidirectionalDFA (haystack , at )
994+ }
995+ // V12 Windowed BoundedBacktracker for ASCII path
996+ maxInput := e .asciiBoundedBacktracker .MaxInputSize ()
997+ if maxInput > 0 && len (remaining ) > maxInput {
998+ window := remaining [:maxInput ]
999+ start , end , found := e .asciiBoundedBacktracker .Search (window )
1000+ if found {
1001+ return at + start , at + end , true
1002+ }
9441003 }
1004+ return state .pikevm .SearchWithSlotTableAt (haystack , at , nfa .SearchModeFind )
9451005 }
946- return state . pikevm . SearchWithSlotTableAt ( haystack , at , nfa . SearchModeFind )
947- }
948- start , end , found := e . asciiBoundedBacktracker . Search ( remaining )
949- if found {
950- return at + start , at + end , true
1006+ start , end , found := e . asciiBoundedBacktracker . Search ( remaining )
1007+ if found {
1008+ return at + start , at + end , true
1009+ }
1010+ return - 1 , - 1 , false
9511011 }
952- return - 1 , - 1 , false
9531012 }
9541013
9551014 if ! e .boundedBacktracker .CanHandle (len (remaining )) {
956- // V12 Windowed BoundedBacktracker: For large inputs, try searching in a
957- // window of maxInputSize bytes first. Most patterns produce short matches
958- // (e.g., word patterns like (\w{2,8})+ match 2-8 chars), so the match
959- // will be found within the first window. Only fall back to PikeVM if
960- // no match is found in the window (rare for common patterns).
1015+ // Bidirectional DFA: O(n) vs PikeVM's O(n*states) for large inputs
1016+ if e . dfa != nil && e . reverseDFA != nil {
1017+ return e . findIndicesBidirectionalDFA ( haystack , at )
1018+ }
1019+ // V12 Windowed BoundedBacktracker fallback
9611020 maxInput := e .boundedBacktracker .MaxInputSize ()
9621021 if maxInput > 0 && len (remaining ) > maxInput {
963- // Search in the first window
9641022 window := remaining [:maxInput ]
9651023 start , end , found := e .boundedBacktracker .SearchWithState (window , state .backtracker )
9661024 if found {
967- // Match found within window - this is the common case
9681025 return at + start , at + end , true
9691026 }
970- // No match in window - could be:
971- // 1. No match exists in the full input
972- // 2. Match exists beyond the window
973- // Fall back to PikeVM to handle both cases correctly
9741027 }
9751028 return state .pikevm .SearchWithSlotTableAt (haystack , at , nfa .SearchModeFind )
9761029 }
0 commit comments