1
1
use std:: {
2
- collections:: { HashMap , HashSet , VecDeque } ,
2
+ collections:: { HashSet , VecDeque } ,
3
3
fs,
4
4
io:: { BufReader , Write } ,
5
5
path:: { Path , PathBuf } ,
@@ -49,51 +49,52 @@ fn read_grammar_from_file<P: AsRef<Path>>(path: P) -> Value {
49
49
}
50
50
51
51
#[ derive( Debug ) ]
52
- struct Element {
52
+ struct Element < ' src > {
53
53
pub state : usize ,
54
- pub items : Rc < VecDeque < String > > ,
54
+ pub items : Rc < VecDeque < & ' src str > > ,
55
55
}
56
56
57
57
#[ derive( Default , Debug , Clone , PartialEq , Eq , Hash ) ]
58
- struct Transition {
58
+ struct Transition < ' src > {
59
59
pub source : usize ,
60
60
pub dest : usize ,
61
- pub ss : Vec < String > ,
62
- pub terminal : String ,
63
- pub is_regex : bool ,
64
- pub stack : Rc < VecDeque < String > > ,
61
+ // pub ss: Vec<String>,
62
+ pub terminal : & ' src str ,
63
+ // pub is_regex: bool,
64
+ pub stack_len : usize ,
65
65
}
66
66
67
67
#[ derive( Default ) ]
68
- struct Stacks {
69
- pub q : HashMap < usize , VecDeque < String > > ,
70
- pub s : HashMap < usize , Vec < String > > ,
68
+ struct Stacks < ' src > {
69
+ pub q : Vec < Rc < VecDeque < & ' src str > > > ,
70
+ pub s : Vec < Box < [ & ' src str ] > > ,
71
71
}
72
72
73
- fn tokenize ( rule : & str ) -> ( String , Vec < String > , bool ) {
73
+ fn tokenize ( rule : & str ) -> ( & str , Vec < & str > ) {
74
74
let re = RE . get_or_init ( || Regex :: new ( r"([r])*'([\s\S]+)'([\s\S]*)" ) . unwrap ( ) ) ;
75
+ // let re = RE.get_or_init(|| Regex::new(r"'([\s\S]+)'([\s\S]*)").unwrap());
75
76
let cap = re. captures ( rule) . unwrap ( ) ;
76
- let is_regex = cap. get ( 1 ) . is_some ( ) ;
77
- let terminal = cap. get ( 2 ) . unwrap ( ) . as_str ( ) . to_owned ( ) ;
77
+ // let is_regex = cap.get(1).is_some();
78
+ let terminal = cap. get ( 2 ) . unwrap ( ) . as_str ( ) ;
78
79
let ss = cap. get ( 3 ) . map_or ( vec ! [ ] , |m| {
79
80
m. as_str ( )
80
81
. split_whitespace ( )
81
- . map ( ToOwned :: to_owned)
82
+ // .map(ToOwned::to_owned)
82
83
. collect ( )
83
84
} ) ;
84
85
if terminal == "\\ n" {
85
- ( "\n " . into ( ) , ss, is_regex)
86
+ ( "\n " , ss /* is_regex*/ )
86
87
} else {
87
- ( terminal, ss, is_regex)
88
+ ( terminal, ss /* is_regex*/ )
88
89
}
89
90
}
90
91
91
- fn prepare_transitions (
92
- grammar : & Value ,
93
- pda : & mut Vec < Transition > ,
94
- state_stacks : & mut Stacks ,
92
+ fn prepare_transitions < ' pda , ' src : ' pda > (
93
+ grammar : & ' src Value ,
94
+ pda : & ' pda mut Vec < Transition < ' src > > ,
95
+ state_stacks : & mut Stacks < ' src > ,
95
96
state_count : & mut usize ,
96
- worklist : & mut VecDeque < Element > ,
97
+ worklist : & mut VecDeque < Element < ' src > > ,
97
98
element : & Element ,
98
99
stack_limit : usize ,
99
100
) {
@@ -102,46 +103,46 @@ fn prepare_transitions(
102
103
}
103
104
104
105
let state = element. state ;
105
- let nonterminal = & element. items [ 0 ] ;
106
+ let nonterminal = element. items [ 0 ] ;
106
107
let rules = grammar[ nonterminal] . as_array ( ) . unwrap ( ) ;
107
108
// let mut i = 0;
108
109
' rules_loop: for rule in rules {
109
110
let rule = rule. as_str ( ) . unwrap ( ) ;
110
- let ( terminal, ss, is_regex ) = tokenize ( rule) ;
111
+ let ( terminal, ss /*_is_regex*/ ) = tokenize ( rule) ;
111
112
let dest = * state_count;
112
113
113
114
// log::trace!("Rule \"{}\", {} over {}", &rule, i, rules.len());
114
115
115
116
// Creating a state stack for the new state
116
117
let mut state_stack = state_stacks
117
118
. q
118
- . get ( & state)
119
- . map_or ( VecDeque :: new ( ) , Clone :: clone) ;
120
- if !state_stack. is_empty ( ) {
121
- state_stack. pop_front ( ) ;
122
- }
123
- for symbol in ss. iter ( ) . rev ( ) {
124
- state_stack. push_front ( symbol. clone ( ) ) ;
119
+ . get ( state. wrapping_sub ( 1 ) )
120
+ . map_or ( VecDeque :: new ( ) , |state_stack| ( * * state_stack) . clone ( ) ) ;
121
+
122
+ state_stack. pop_front ( ) ;
123
+ for symbol in ss. into_iter ( ) . rev ( ) {
124
+ state_stack. push_front ( symbol) ;
125
125
}
126
- let mut state_stack_sorted: Vec < _ > = state_stack. iter ( ) . cloned ( ) . collect ( ) ;
127
- state_stack_sorted. sort ( ) ;
126
+ let mut state_stack_sorted: Box < _ > = state_stack. iter ( ) . copied ( ) . collect ( ) ;
127
+ state_stack_sorted. sort_unstable ( ) ;
128
128
129
129
let mut transition = Transition {
130
130
source : state,
131
131
dest,
132
- ss,
132
+ // ss,
133
133
terminal,
134
- is_regex,
135
- stack : Rc :: new ( state_stack. clone ( ) ) ,
134
+ // is_regex,
135
+ // stack: Rc::new(state_stack.clone()),
136
+ stack_len : state_stack. len ( ) ,
136
137
} ;
137
138
138
139
// Check if a recursive transition state being created, if so make a backward
139
140
// edge and don't add anything to the worklist
140
- for ( key , val ) in & state_stacks. s {
141
- if state_stack_sorted == * val {
142
- transition. dest = * key ;
141
+ for ( dest , stack ) in state_stacks. s . iter ( ) . enumerate ( ) {
142
+ if state_stack_sorted == * stack {
143
+ transition. dest = dest + 1 ;
143
144
// i += 1;
144
- pda. push ( transition. clone ( ) ) ;
145
+ pda. push ( transition) ;
145
146
146
147
// If a recursive transition exercised don't add the same transition as a new
147
148
// edge, continue onto the next transitions
@@ -151,18 +152,23 @@ fn prepare_transitions(
151
152
152
153
// If the generated state has a stack size > stack_limit then that state is abandoned
153
154
// and not added to the FSA or the worklist for further expansion
154
- if stack_limit > 0 && transition. stack . len ( ) > stack_limit {
155
+ if stack_limit > 0 && transition. stack_len > stack_limit {
155
156
// TODO add to unexpanded_rules
156
157
continue ;
157
158
}
158
159
160
+ let state_stack = Rc :: new ( state_stack) ;
161
+
159
162
// Create transitions for the non-recursive relations and add to the worklist
160
163
worklist. push_back ( Element {
161
164
state : dest,
162
- items : transition . stack . clone ( ) ,
165
+ items : Rc :: clone ( & state_stack ) ,
163
166
} ) ;
164
- state_stacks. q . insert ( dest, state_stack) ;
165
- state_stacks. s . insert ( dest, state_stack_sorted) ;
167
+
168
+ // since each index corresponds to `state_count - 1`
169
+ // index with `dest - 1`
170
+ state_stacks. q . push ( state_stack) ;
171
+ state_stacks. s . push ( state_stack_sorted) ;
166
172
pda. push ( transition) ;
167
173
168
174
println ! ( "worklist size: {}" , worklist. len( ) ) ;
@@ -205,11 +211,11 @@ fn postprocess(pda: &[Transition], stack_limit: usize) -> Automaton {
205
211
if stack_limit > 0 {
206
212
let mut culled_pda = Vec :: with_capacity ( pda. len ( ) ) ;
207
213
let mut blocklist = HashSet :: new ( ) ;
208
- //let mut culled_pda_unique = HashSet::new();
214
+ // let mut culled_pda_unique = HashSet::new();
209
215
210
216
for final_state in & finals {
211
217
for transition in pda {
212
- if transition. dest == * final_state && transition. stack . len ( ) > 0 {
218
+ if transition. dest == * final_state && transition. stack_len > 0 {
213
219
blocklist. insert ( transition. dest ) ;
214
220
} else {
215
221
culled_pda. push ( transition) ;
@@ -223,7 +229,9 @@ fn postprocess(pda: &[Transition], stack_limit: usize) -> Automaton {
223
229
let culled_finals: HashSet < usize > = finals. difference ( & blocklist) . copied ( ) . collect ( ) ;
224
230
assert ! ( culled_finals. len( ) == 1 ) ;
225
231
226
- for transition in & culled_pda {
232
+ let culled_pda_len = culled_pda. len ( ) ;
233
+
234
+ for transition in culled_pda {
227
235
if blocklist. contains ( & transition. dest ) {
228
236
continue ;
229
237
}
@@ -234,15 +242,11 @@ fn postprocess(pda: &[Transition], stack_limit: usize) -> Automaton {
234
242
}
235
243
memoized[ state] . push ( Trigger {
236
244
dest : transition. dest ,
237
- term : transition. terminal . clone ( ) ,
245
+ term : transition. terminal . to_string ( ) ,
238
246
} ) ;
239
247
240
248
if num_transition % 4096 == 0 {
241
- println ! (
242
- "processed {} transitions over {}" ,
243
- num_transition,
244
- culled_pda. len( )
245
- ) ;
249
+ println ! ( "processed {num_transition} transitions over {culled_pda_len}" , ) ;
246
250
}
247
251
}
248
252
@@ -261,8 +265,8 @@ fn postprocess(pda: &[Transition], stack_limit: usize) -> Automaton {
261
265
*/
262
266
263
267
Automaton {
264
- init_state : initial. iter ( ) . next ( ) . copied ( ) . unwrap ( ) ,
265
- final_state : culled_finals. iter ( ) . next ( ) . copied ( ) . unwrap ( ) ,
268
+ init_state : initial. into_iter ( ) . next ( ) . unwrap ( ) ,
269
+ final_state : culled_finals. into_iter ( ) . next ( ) . unwrap ( ) ,
266
270
pda : memoized,
267
271
}
268
272
} else {
@@ -275,7 +279,7 @@ fn postprocess(pda: &[Transition], stack_limit: usize) -> Automaton {
275
279
}
276
280
memoized[ state] . push ( Trigger {
277
281
dest : transition. dest ,
278
- term : transition. terminal . clone ( ) ,
282
+ term : transition. terminal . to_string ( ) ,
279
283
} ) ;
280
284
281
285
if num_transition % 4096 == 0 {
@@ -288,8 +292,8 @@ fn postprocess(pda: &[Transition], stack_limit: usize) -> Automaton {
288
292
}
289
293
290
294
Automaton {
291
- init_state : initial. iter ( ) . next ( ) . copied ( ) . unwrap ( ) ,
292
- final_state : finals. iter ( ) . next ( ) . copied ( ) . unwrap ( ) ,
295
+ init_state : initial. into_iter ( ) . next ( ) . unwrap ( ) ,
296
+ final_state : finals. into_iter ( ) . next ( ) . unwrap ( ) ,
293
297
pda : memoized,
294
298
}
295
299
}
@@ -308,7 +312,7 @@ fn main() {
308
312
let mut pda = vec ! [ ] ;
309
313
310
314
let grammar = read_grammar_from_file ( grammar_file) ;
311
- let start_symbol = grammar[ "Start" ] [ 0 ] . as_str ( ) . unwrap ( ) . to_owned ( ) ;
315
+ let start_symbol = grammar[ "Start" ] [ 0 ] . as_str ( ) . unwrap ( ) ;
312
316
let mut start_vec = VecDeque :: new ( ) ;
313
317
start_vec. push_back ( start_symbol) ;
314
318
worklist. push_back ( Element {
@@ -328,8 +332,7 @@ fn main() {
328
332
) ;
329
333
}
330
334
331
- state_stacks. q . clear ( ) ;
332
- state_stacks. s . clear ( ) ;
335
+ drop ( state_stacks) ;
333
336
334
337
let transformed = postprocess ( & pda, stack_limit) ;
335
338
let serialized = postcard:: to_allocvec ( & transformed) . unwrap ( ) ;
0 commit comments