1- use std:: fmt;
2- use std:: sync:: atomic:: { AtomicU32 , Ordering } ;
3-
1+ use itertools:: Itertools ;
42use tracing:: instrument;
53
6- use super :: { Byte , Nfa , Ref , nfa} ;
7- use crate :: Map ;
4+ use super :: automaton:: { Automaton , State , Transition } ;
5+ use super :: { Byte , Nfa , Ref } ;
6+ use crate :: { Map , Set } ;
87
98#[ derive( PartialEq , Clone , Debug ) ]
10- pub ( crate ) struct Dfa < R >
11- where
12- R : Ref ,
13- {
14- pub ( crate ) transitions : Map < State , Transitions < R > > ,
15- pub ( crate ) start : State ,
16- pub ( crate ) accepting : State ,
17- }
18-
19- #[ derive( PartialEq , Clone , Debug ) ]
20- pub ( crate ) struct Transitions < R >
21- where
22- R : Ref ,
23- {
24- byte_transitions : Map < Byte , State > ,
25- ref_transitions : Map < R , State > ,
26- }
27-
28- impl < R > Default for Transitions < R >
29- where
30- R : Ref ,
31- {
32- fn default ( ) -> Self {
33- Self { byte_transitions : Map :: default ( ) , ref_transitions : Map :: default ( ) }
34- }
35- }
36-
37- impl < R > Transitions < R >
38- where
39- R : Ref ,
40- {
41- #[ cfg( test) ]
42- fn insert ( & mut self , transition : Transition < R > , state : State ) {
43- match transition {
44- Transition :: Byte ( b) => {
45- self . byte_transitions . insert ( b, state) ;
46- }
47- Transition :: Ref ( r) => {
48- self . ref_transitions . insert ( r, state) ;
49- }
50- }
51- }
52- }
53-
54- /// The states in a `Nfa` represent byte offsets.
55- #[ derive( Hash , Eq , PartialEq , PartialOrd , Ord , Copy , Clone ) ]
56- pub ( crate ) struct State ( u32 ) ;
57-
58- #[ cfg( test) ]
59- #[ derive( Hash , Eq , PartialEq , Clone , Copy ) ]
60- pub ( crate ) enum Transition < R >
61- where
62- R : Ref ,
63- {
64- Byte ( Byte ) ,
65- Ref ( R ) ,
66- }
67-
68- impl fmt:: Debug for State {
69- fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
70- write ! ( f, "S_{}" , self . 0 )
71- }
72- }
73-
74- #[ cfg( test) ]
75- impl < R > fmt:: Debug for Transition < R >
76- where
77- R : Ref ,
78- {
79- fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
80- match & self {
81- Self :: Byte ( b) => b. fmt ( f) ,
82- Self :: Ref ( r) => r. fmt ( f) ,
83- }
84- }
85- }
9+ pub ( crate ) struct Dfa < R : Ref > (
10+ // INVARIANT: `Automaton` is a DFA, which means that, for any `state`, each
11+ // transition in `self.0.transitions[state]` contains exactly one
12+ // destination state.
13+ pub ( crate ) Automaton < R > ,
14+ ) ;
8615
8716impl < R > Dfa < R >
8817where
8918 R : Ref ,
9019{
9120 #[ cfg( test) ]
9221 pub ( crate ) fn bool ( ) -> Self {
93- let mut transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
22+ let mut transitions: Map < State , Map < Transition < R > , Set < State > > > = Map :: default ( ) ;
9423 let start = State :: new ( ) ;
95- let accepting = State :: new ( ) ;
24+ let accept = State :: new ( ) ;
9625
97- transitions. entry ( start) . or_default ( ) . insert ( Transition :: Byte ( Byte :: Init ( 0x00 ) ) , accepting) ;
26+ transitions
27+ . entry ( start)
28+ . or_default ( )
29+ . insert ( Transition :: Byte ( Byte :: Init ( 0x00 ) ) , [ accept] . into_iter ( ) . collect ( ) ) ;
9830
99- transitions. entry ( start) . or_default ( ) . insert ( Transition :: Byte ( Byte :: Init ( 0x01 ) ) , accepting) ;
31+ transitions
32+ . entry ( start)
33+ . or_default ( )
34+ . insert ( Transition :: Byte ( Byte :: Init ( 0x01 ) ) , [ accept] . into_iter ( ) . collect ( ) ) ;
10035
101- Self { transitions, start, accepting }
36+ Dfa ( Automaton { transitions, start, accept } )
10237 }
10338
10439 #[ instrument( level = "debug" ) ]
10540 pub ( crate ) fn from_nfa ( nfa : Nfa < R > ) -> Self {
106- let Nfa { transitions : nfa_transitions, start : nfa_start, accepting : nfa_accepting } = nfa;
41+ // It might already be the case that `nfa` is a DFA. If that's the case,
42+ // we can avoid reconstructing the DFA.
43+ let is_dfa = nfa
44+ . 0
45+ . transitions
46+ . iter ( )
47+ . flat_map ( |( _, transitions) | transitions. iter ( ) )
48+ . all ( |( _, dsts) | dsts. len ( ) <= 1 ) ;
49+ if is_dfa {
50+ return Dfa ( nfa. 0 ) ;
51+ }
52+
53+ let Nfa ( Automaton { transitions : nfa_transitions, start : nfa_start, accept : nfa_accept } ) =
54+ nfa;
10755
108- let mut dfa_transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
109- let mut nfa_to_dfa: Map < nfa :: State , State > = Map :: default ( ) ;
56+ let mut dfa_transitions: Map < State , Map < Transition < R > , Set < State > > > = Map :: default ( ) ;
57+ let mut nfa_to_dfa: Map < State , State > = Map :: default ( ) ;
11058 let dfa_start = State :: new ( ) ;
11159 nfa_to_dfa. insert ( nfa_start, dfa_start) ;
11260
11361 let mut queue = vec ! [ ( nfa_start, dfa_start) ] ;
11462
11563 while let Some ( ( nfa_state, dfa_state) ) = queue. pop ( ) {
116- if nfa_state == nfa_accepting {
64+ if nfa_state == nfa_accept {
11765 continue ;
11866 }
11967
12068 for ( nfa_transition, next_nfa_states) in nfa_transitions[ & nfa_state] . iter ( ) {
69+ use itertools:: Itertools as _;
70+
12171 let dfa_transitions =
12272 dfa_transitions. entry ( dfa_state) . or_insert_with ( Default :: default) ;
12373
12474 let mapped_state = next_nfa_states. iter ( ) . find_map ( |x| nfa_to_dfa. get ( x) . copied ( ) ) ;
12575
126- let next_dfa_state = match nfa_transition {
127- & nfa:: Transition :: Byte ( b) => * dfa_transitions
128- . byte_transitions
129- . entry ( b)
130- . or_insert_with ( || mapped_state. unwrap_or_else ( State :: new) ) ,
131- & nfa:: Transition :: Ref ( r) => * dfa_transitions
132- . ref_transitions
133- . entry ( r)
134- . or_insert_with ( || mapped_state. unwrap_or_else ( State :: new) ) ,
135- } ;
76+ let next_dfa_state = dfa_transitions. entry ( * nfa_transition) . or_insert_with ( || {
77+ [ mapped_state. unwrap_or_else ( State :: new) ] . into_iter ( ) . collect ( )
78+ } ) ;
79+ let next_dfa_state = * next_dfa_state. iter ( ) . exactly_one ( ) . unwrap ( ) ;
13680
13781 for & next_nfa_state in next_nfa_states {
13882 nfa_to_dfa. entry ( next_nfa_state) . or_insert_with ( || {
@@ -143,40 +87,38 @@ where
14387 }
14488 }
14589
146- let dfa_accepting = nfa_to_dfa[ & nfa_accepting] ;
147-
148- Self { transitions : dfa_transitions, start : dfa_start, accepting : dfa_accepting }
149- }
150-
151- pub ( crate ) fn bytes_from ( & self , start : State ) -> Option < & Map < Byte , State > > {
152- Some ( & self . transitions . get ( & start) ?. byte_transitions )
90+ let dfa_accept = nfa_to_dfa[ & nfa_accept] ;
91+ Dfa ( Automaton { transitions : dfa_transitions, start : dfa_start, accept : dfa_accept } )
15392 }
15493
15594 pub ( crate ) fn byte_from ( & self , start : State , byte : Byte ) -> Option < State > {
156- self . transitions . get ( & start) ?. byte_transitions . get ( & byte) . copied ( )
157- }
158-
159- pub ( crate ) fn refs_from ( & self , start : State ) -> Option < & Map < R , State > > {
160- Some ( & self . transitions . get ( & start) ?. ref_transitions )
95+ Some (
96+ self . 0
97+ . transitions
98+ . get ( & start) ?
99+ . get ( & Transition :: Byte ( byte) ) ?
100+ . iter ( )
101+ . copied ( )
102+ . exactly_one ( )
103+ . unwrap ( ) ,
104+ )
161105 }
162- }
163106
164- impl State {
165- pub ( crate ) fn new ( ) -> Self {
166- static COUNTER : AtomicU32 = AtomicU32 :: new ( 0 ) ;
167- Self ( COUNTER . fetch_add ( 1 , Ordering :: SeqCst ) )
107+ pub ( crate ) fn iter_bytes_from ( & self , start : State ) -> impl Iterator < Item = ( Byte , State ) > {
108+ self . 0 . transitions . get ( & start) . into_iter ( ) . flat_map ( |transitions| {
109+ transitions. iter ( ) . filter_map ( |( t, s) | {
110+ let s = s. iter ( ) . copied ( ) . exactly_one ( ) . unwrap ( ) ;
111+ if let Transition :: Byte ( b) = t { Some ( ( * b, s) ) } else { None }
112+ } )
113+ } )
168114 }
169- }
170115
171- #[ cfg( test) ]
172- impl < R > From < nfa:: Transition < R > > for Transition < R >
173- where
174- R : Ref ,
175- {
176- fn from ( nfa_transition : nfa:: Transition < R > ) -> Self {
177- match nfa_transition {
178- nfa:: Transition :: Byte ( byte) => Transition :: Byte ( byte) ,
179- nfa:: Transition :: Ref ( r) => Transition :: Ref ( r) ,
180- }
116+ pub ( crate ) fn iter_refs_from ( & self , start : State ) -> impl Iterator < Item = ( R , State ) > {
117+ self . 0 . transitions . get ( & start) . into_iter ( ) . flat_map ( |transitions| {
118+ transitions. iter ( ) . filter_map ( |( t, s) | {
119+ let s = s. iter ( ) . copied ( ) . exactly_one ( ) . unwrap ( ) ;
120+ if let Transition :: Ref ( r) = t { Some ( ( * r, s) ) } else { None }
121+ } )
122+ } )
181123 }
182124}
0 commit comments