1- use std:: fmt;
2- use std:: sync:: atomic:: { AtomicU32 , Ordering } ;
3-
1+ use itertools:: Itertools ;
42use tracing:: instrument;
53
6- use super :: { Byte , Nfa , Ref , nfa} ;
7- use crate :: Map ;
4+ use super :: {
5+ Byte , Nfa , Ref ,
6+ automaton:: { Automaton , State , Transition } ,
7+ } ;
8+ use crate :: { Map , Set } ;
89
910#[ derive( PartialEq , Clone , Debug ) ]
10- pub ( crate ) struct Dfa < R >
11- where
12- R : Ref ,
13- {
14- pub ( crate ) transitions : Map < State , Transitions < R > > ,
15- pub ( crate ) start : State ,
16- pub ( crate ) accepting : State ,
17- }
18-
19- #[ derive( PartialEq , Clone , Debug ) ]
20- pub ( crate ) struct Transitions < R >
21- where
22- R : Ref ,
23- {
24- byte_transitions : Map < Byte , State > ,
25- ref_transitions : Map < R , State > ,
26- }
27-
28- impl < R > Default for Transitions < R >
29- where
30- R : Ref ,
31- {
32- fn default ( ) -> Self {
33- Self { byte_transitions : Map :: default ( ) , ref_transitions : Map :: default ( ) }
34- }
35- }
36-
37- impl < R > Transitions < R >
38- where
39- R : Ref ,
40- {
41- #[ cfg( test) ]
42- fn insert ( & mut self , transition : Transition < R > , state : State ) {
43- match transition {
44- Transition :: Byte ( b) => {
45- self . byte_transitions . insert ( b, state) ;
46- }
47- Transition :: Ref ( r) => {
48- self . ref_transitions . insert ( r, state) ;
49- }
50- }
51- }
52- }
53-
54- /// The states in a `Nfa` represent byte offsets.
55- #[ derive( Hash , Eq , PartialEq , PartialOrd , Ord , Copy , Clone ) ]
56- pub ( crate ) struct State ( u32 ) ;
57-
58- #[ cfg( test) ]
59- #[ derive( Hash , Eq , PartialEq , Clone , Copy ) ]
60- pub ( crate ) enum Transition < R >
61- where
62- R : Ref ,
63- {
64- Byte ( Byte ) ,
65- Ref ( R ) ,
66- }
67-
68- impl fmt:: Debug for State {
69- fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
70- write ! ( f, "S_{}" , self . 0 )
71- }
72- }
73-
74- #[ cfg( test) ]
75- impl < R > fmt:: Debug for Transition < R >
76- where
77- R : Ref ,
78- {
79- fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
80- match & self {
81- Self :: Byte ( b) => b. fmt ( f) ,
82- Self :: Ref ( r) => r. fmt ( f) ,
83- }
84- }
85- }
11+ pub ( crate ) struct Dfa < R : Ref > (
12+ // INVARIANT: `Automaton` is a DFA, which means that, for any `state`, each
13+ // transition in `self.0.transitions[state]` contains exactly one
14+ // destination state.
15+ pub ( crate ) Automaton < R > ,
16+ ) ;
8617
8718impl < R > Dfa < R >
8819where
8920 R : Ref ,
9021{
9122 #[ cfg( test) ]
9223 pub ( crate ) fn bool ( ) -> Self {
93- let mut transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
24+ let mut transitions: Map < State , Map < Transition < R > , Set < State > > > = Map :: default ( ) ;
9425 let start = State :: new ( ) ;
95- let accepting = State :: new ( ) ;
26+ let accept = State :: new ( ) ;
9627
97- transitions. entry ( start) . or_default ( ) . insert ( Transition :: Byte ( Byte :: Init ( 0x00 ) ) , accepting) ;
28+ transitions
29+ . entry ( start)
30+ . or_default ( )
31+ . insert ( Transition :: Byte ( Byte :: Init ( 0x00 ) ) , [ accept] . into_iter ( ) . collect ( ) ) ;
9832
99- transitions. entry ( start) . or_default ( ) . insert ( Transition :: Byte ( Byte :: Init ( 0x01 ) ) , accepting) ;
33+ transitions
34+ . entry ( start)
35+ . or_default ( )
36+ . insert ( Transition :: Byte ( Byte :: Init ( 0x01 ) ) , [ accept] . into_iter ( ) . collect ( ) ) ;
10037
101- Self { transitions, start, accepting }
38+ Dfa ( Automaton { transitions, start, accept } )
10239 }
10340
10441 #[ instrument( level = "debug" ) ]
10542 pub ( crate ) fn from_nfa ( nfa : Nfa < R > ) -> Self {
106- let Nfa { transitions : nfa_transitions, start : nfa_start, accepting : nfa_accepting } = nfa;
43+ // It might already be the case that `nfa` is a DFA. If that's the case,
44+ // we can avoid reconstructing the DFA.
45+ let is_dfa = nfa
46+ . 0
47+ . transitions
48+ . iter ( )
49+ . flat_map ( |( _, transitions) | transitions. iter ( ) )
50+ . all ( |( _, dsts) | dsts. len ( ) <= 1 ) ;
51+ if is_dfa {
52+ return Dfa ( nfa. 0 ) ;
53+ }
54+
55+ let Nfa ( Automaton { transitions : nfa_transitions, start : nfa_start, accept : nfa_accept } ) =
56+ nfa;
10757
108- let mut dfa_transitions: Map < State , Transitions < R > > = Map :: default ( ) ;
109- let mut nfa_to_dfa: Map < nfa :: State , State > = Map :: default ( ) ;
58+ let mut dfa_transitions: Map < State , Map < Transition < R > , Set < State > > > = Map :: default ( ) ;
59+ let mut nfa_to_dfa: Map < State , State > = Map :: default ( ) ;
11060 let dfa_start = State :: new ( ) ;
11161 nfa_to_dfa. insert ( nfa_start, dfa_start) ;
11262
11363 let mut queue = vec ! [ ( nfa_start, dfa_start) ] ;
11464
11565 while let Some ( ( nfa_state, dfa_state) ) = queue. pop ( ) {
116- if nfa_state == nfa_accepting {
66+ if nfa_state == nfa_accept {
11767 continue ;
11868 }
11969
12070 for ( nfa_transition, next_nfa_states) in nfa_transitions[ & nfa_state] . iter ( ) {
71+ use itertools:: Itertools as _;
72+
12173 let dfa_transitions =
12274 dfa_transitions. entry ( dfa_state) . or_insert_with ( Default :: default) ;
12375
12476 let mapped_state = next_nfa_states. iter ( ) . find_map ( |x| nfa_to_dfa. get ( x) . copied ( ) ) ;
12577
126- let next_dfa_state = match nfa_transition {
127- & nfa:: Transition :: Byte ( b) => * dfa_transitions
128- . byte_transitions
129- . entry ( b)
130- . or_insert_with ( || mapped_state. unwrap_or_else ( State :: new) ) ,
131- & nfa:: Transition :: Ref ( r) => * dfa_transitions
132- . ref_transitions
133- . entry ( r)
134- . or_insert_with ( || mapped_state. unwrap_or_else ( State :: new) ) ,
135- } ;
78+ let next_dfa_state = dfa_transitions. entry ( * nfa_transition) . or_insert_with ( || {
79+ [ mapped_state. unwrap_or_else ( State :: new) ] . into_iter ( ) . collect ( )
80+ } ) ;
81+ let next_dfa_state = * next_dfa_state. iter ( ) . exactly_one ( ) . unwrap ( ) ;
13682
13783 for & next_nfa_state in next_nfa_states {
13884 nfa_to_dfa. entry ( next_nfa_state) . or_insert_with ( || {
@@ -143,40 +89,38 @@ where
14389 }
14490 }
14591
146- let dfa_accepting = nfa_to_dfa[ & nfa_accepting] ;
147-
148- Self { transitions : dfa_transitions, start : dfa_start, accepting : dfa_accepting }
149- }
150-
151- pub ( crate ) fn bytes_from ( & self , start : State ) -> Option < & Map < Byte , State > > {
152- Some ( & self . transitions . get ( & start) ?. byte_transitions )
92+ let dfa_accept = nfa_to_dfa[ & nfa_accept] ;
93+ Dfa ( Automaton { transitions : dfa_transitions, start : dfa_start, accept : dfa_accept } )
15394 }
15495
15596 pub ( crate ) fn byte_from ( & self , start : State , byte : Byte ) -> Option < State > {
156- self . transitions . get ( & start) ?. byte_transitions . get ( & byte) . copied ( )
157- }
158-
159- pub ( crate ) fn refs_from ( & self , start : State ) -> Option < & Map < R , State > > {
160- Some ( & self . transitions . get ( & start) ?. ref_transitions )
97+ Some (
98+ self . 0
99+ . transitions
100+ . get ( & start) ?
101+ . get ( & Transition :: Byte ( byte) ) ?
102+ . iter ( )
103+ . copied ( )
104+ . exactly_one ( )
105+ . unwrap ( ) ,
106+ )
161107 }
162- }
163108
164- impl State {
165- pub ( crate ) fn new ( ) -> Self {
166- static COUNTER : AtomicU32 = AtomicU32 :: new ( 0 ) ;
167- Self ( COUNTER . fetch_add ( 1 , Ordering :: SeqCst ) )
109+ pub ( crate ) fn iter_bytes_from ( & self , start : State ) -> impl Iterator < Item = ( Byte , State ) > {
110+ self . 0 . transitions . get ( & start) . into_iter ( ) . flat_map ( |transitions| {
111+ transitions. iter ( ) . filter_map ( |( t, s) | {
112+ let s = s. iter ( ) . copied ( ) . exactly_one ( ) . unwrap ( ) ;
113+ if let Transition :: Byte ( b) = t { Some ( ( * b, s) ) } else { None }
114+ } )
115+ } )
168116 }
169- }
170117
171- #[ cfg( test) ]
172- impl < R > From < nfa:: Transition < R > > for Transition < R >
173- where
174- R : Ref ,
175- {
176- fn from ( nfa_transition : nfa:: Transition < R > ) -> Self {
177- match nfa_transition {
178- nfa:: Transition :: Byte ( byte) => Transition :: Byte ( byte) ,
179- nfa:: Transition :: Ref ( r) => Transition :: Ref ( r) ,
180- }
118+ pub ( crate ) fn iter_refs_from ( & self , start : State ) -> impl Iterator < Item = ( R , State ) > {
119+ self . 0 . transitions . get ( & start) . into_iter ( ) . flat_map ( |transitions| {
120+ transitions. iter ( ) . filter_map ( |( t, s) | {
121+ let s = s. iter ( ) . copied ( ) . exactly_one ( ) . unwrap ( ) ;
122+ if let Transition :: Ref ( r) = t { Some ( ( * r, s) ) } else { None }
123+ } )
124+ } )
181125 }
182126}
0 commit comments