@@ -4,7 +4,7 @@ mod traversal;
44use crate :: parser:: grammar:: ParseGrammar ;
55use crate :: { GrammarParser , ParseTree , ParseTreeNode , Term , tracing} ;
66use input_range:: InputRange ;
7- use std:: collections:: { BTreeSet , HashSet , VecDeque } ;
7+ use std:: collections:: { HashSet , VecDeque } ;
88use std:: rc:: Rc ;
99use traversal:: { TermMatch , Traversal , TraversalId , TraversalTree } ;
1010
@@ -13,6 +13,7 @@ pub fn parse<'gram>(
1313 input : & ' gram str ,
1414 starting_term : Option < & ' gram Term > ,
1515) -> impl Iterator < Item = ParseTree < ' gram > > {
16+ let _span = tracing:: span!( DEBUG , "earley::parse" ) . entered ( ) ;
1617 ParseTreeIter :: new ( ParserHold :: Borrowed ( grammar) , input, starting_term)
1718}
1819
@@ -23,6 +24,7 @@ pub fn parse_with_parser_rc<'gram>(
2324 input : & ' gram str ,
2425 starting_term : Option < & ' gram Term > ,
2526) -> impl Iterator < Item = ParseTree < ' gram > > {
27+ let _span = tracing:: span!( DEBUG , "earley::parse_with_parser_rc" ) . entered ( ) ;
2628 ParseTreeIter :: new ( ParserHold :: Owned ( parser) , input, starting_term)
2729}
2830
@@ -86,15 +88,15 @@ fn parse_tree<'gram>(
8688 let traversal = traversal_tree. get ( traversal_id) ;
8789 grammar. get_production_by_id ( traversal. production_id )
8890 } ;
89- let rhs = traversal_tree
90- . get_matched ( traversal_id)
91- . map ( |term_match| match term_match {
91+ let mut rhs = Vec :: with_capacity ( production . rhs . terms . len ( ) ) ;
92+ for term_match in traversal_tree . get_matched ( traversal_id) {
93+ rhs . push ( match term_match {
9294 TermMatch :: Terminal ( term) => ParseTreeNode :: Terminal ( term) ,
9395 TermMatch :: Nonterminal ( traversal_id) => {
9496 ParseTreeNode :: Nonterminal ( parse_tree ( traversal_tree, grammar, * traversal_id) )
9597 }
96- } )
97- . collect :: < Vec < ParseTreeNode > > ( ) ;
98+ } ) ;
99+ }
98100
99101 ParseTree :: new ( production. lhs , rhs)
100102}
@@ -107,52 +109,49 @@ fn earley<'gram>(
107109 completions : & mut CompletionMap < ' gram > ,
108110 grammar : & Rc < ParseGrammar < ' gram > > ,
109111) -> Option < TraversalId > {
110- let _span = tracing:: span!( tracing :: Level :: DEBUG , "earley" ) . entered ( ) ;
112+ let _span = tracing:: span!( DEBUG , "earley" ) . entered ( ) ;
111113 while let Some ( traversal_id) = queue. pop_front ( ) {
112114 tracing:: event!(
113- tracing :: Level :: TRACE ,
115+ TRACE ,
114116 "earley queue pop: {:#?}" ,
115117 traversal_tree. get( traversal_id)
116118 ) ;
117119 let traversal = traversal_tree. get ( traversal_id) ;
118120
119121 match traversal_tree. get_matching ( traversal_id) {
120122 Some ( nonterminal @ Term :: Nonterminal ( _) ) => {
121- let _span = tracing:: span!( tracing :: Level :: DEBUG , "Predict" ) . entered ( ) ;
123+ let _span = tracing:: span!( DEBUG , "Predict" ) . entered ( ) ;
122124
123125 let lhs = grammar. get_production_by_id ( traversal. production_id ) . lhs ;
124126
125127 completions. insert ( traversal, lhs) ;
126128
127- let input_range = traversal. input_range . clone ( ) ;
129+ let input_range = traversal. input_range ;
128130
129131 for production in grammar. get_productions_by_lhs ( nonterminal) {
130132 let predicted = traversal_tree. predict ( production, & input_range) ;
131- tracing:: event!( tracing :: Level :: TRACE , "predicted: {predicted:#?}" ) ;
133+ tracing:: event!( TRACE , "predicted: {predicted:#?}" ) ;
132134 queue. push_back ( predicted) ;
133135 }
134136
135137 for completed in completions. get_complete ( nonterminal, & input_range) {
136138 let term_match = TermMatch :: Nonterminal ( completed) ;
137139 let prior_completed = traversal_tree. match_term ( traversal_id, term_match) ;
138- tracing:: event!(
139- tracing:: Level :: TRACE ,
140- "prior_completed: {prior_completed:#?}"
141- ) ;
140+ tracing:: event!( TRACE , "prior_completed: {prior_completed:#?}" ) ;
142141 queue. push_back ( prior_completed) ;
143142 }
144143 }
145144 Some ( Term :: Terminal ( term) ) => {
146- let _span = tracing:: span!( tracing :: Level :: DEBUG , "Scan" ) . entered ( ) ;
145+ let _span = tracing:: span!( DEBUG , "Scan" ) . entered ( ) ;
147146 if traversal. input_range . next ( ) . starts_with ( term) {
148147 let term_match = TermMatch :: Terminal ( term) ;
149148 let scanned = traversal_tree. match_term ( traversal_id, term_match) ;
150- tracing:: event!( tracing :: Level :: TRACE , "scanned: {scanned:#?}" ) ;
149+ tracing:: event!( TRACE , "scanned: {scanned:#?}" ) ;
151150 queue. push_back ( scanned) ;
152151 }
153152 }
154153 None => {
155- let _span = tracing:: span!( tracing :: Level :: DEBUG , "Complete" ) . entered ( ) ;
154+ let _span = tracing:: span!( DEBUG , "Complete" ) . entered ( ) ;
156155
157156 let is_full_traversal =
158157 traversal. is_starting && traversal. input_range . is_complete ( ) ;
@@ -164,7 +163,7 @@ fn earley<'gram>(
164163 let term_match = TermMatch :: Nonterminal ( traversal_id) ;
165164 let completed = traversal_tree. match_term ( incomplete_traversal_id, term_match) ;
166165
167- tracing:: event!( tracing :: Level :: TRACE , "completed: {completed:#?}" ) ;
166+ tracing:: event!( TRACE , "completed: {completed:#?}" ) ;
168167 queue. push_back ( completed) ;
169168 }
170169
@@ -192,10 +191,11 @@ impl<'gram> ParseTreeIter<'gram> {
192191 input : & ' gram str ,
193192 starting_term : Option < & ' gram Term > ,
194193 ) -> Self {
194+ let _span = tracing:: span!( DEBUG , "ParseTreeIter::new" ) . entered ( ) ;
195195 let input_range = InputRange :: new ( input) ;
196196 let mut traversal_tree = TraversalTree :: default ( ) ;
197197 let mut queue = TraversalQueue :: default ( ) ;
198- let completions = CompletionMap :: default ( ) ;
198+ let completions = CompletionMap :: with_capacity ( 32 , 32 ) ;
199199 let parser_ref = parser. as_ref ( ) ;
200200 let starting_term = starting_term. unwrap_or ( parser_ref. starting_term ) ;
201201
@@ -227,9 +227,9 @@ impl<'gram> Iterator for ParseTreeIter<'gram> {
227227 let parse_grammar = & parser. as_ref ( ) . parse_grammar ;
228228
229229 earley ( queue, traversal_tree, completions, parse_grammar) . map ( |traversal_id| {
230- let _span = tracing:: span!( tracing :: Level :: DEBUG , "next_parse_tree" ) . entered ( ) ;
230+ let _span = tracing:: span!( DEBUG , "next_parse_tree" ) . entered ( ) ;
231231 let parse_tree = parse_tree ( traversal_tree, parse_grammar, traversal_id) ;
232- tracing:: event!( tracing :: Level :: TRACE , "\n {parse_tree}" ) ;
232+ tracing:: event!( TRACE , "\n {parse_tree}" ) ;
233233 parse_tree
234234 } )
235235 }
@@ -256,19 +256,40 @@ impl<'gram> CompletionKey<'gram> {
256256 }
257257}
258258
259- #[ derive( Debug , Default ) ]
259+ /// Insert into a sorted Vec; no-op if already present. Keeps iteration order stable (same as `BTreeSet`).
260+ fn sorted_vec_insert ( vec : & mut Vec < TraversalId > , id : TraversalId ) {
261+ match vec. binary_search ( & id) {
262+ Ok ( _) => { }
263+ Err ( i) => vec. insert ( i, id) ,
264+ }
265+ }
266+
267+ #[ derive( Debug ) ]
260268pub ( crate ) struct CompletionMap < ' gram > {
261- incomplete : crate :: HashMap < CompletionKey < ' gram > , BTreeSet < TraversalId > > ,
262- complete : crate :: HashMap < CompletionKey < ' gram > , BTreeSet < TraversalId > > ,
269+ incomplete : crate :: HashMap < CompletionKey < ' gram > , Vec < TraversalId > > ,
270+ complete : crate :: HashMap < CompletionKey < ' gram > , Vec < TraversalId > > ,
271+ }
272+
273+ impl < ' gram > Default for CompletionMap < ' gram > {
274+ fn default ( ) -> Self {
275+ Self :: with_capacity ( 0 , 0 )
276+ }
263277}
264278
265279impl < ' gram > CompletionMap < ' gram > {
280+ /// Create with reserved capacity to reduce rehashing during parsing.
281+ pub fn with_capacity ( incomplete : usize , complete : usize ) -> Self {
282+ Self {
283+ incomplete : crate :: HashMap :: with_capacity ( incomplete) ,
284+ complete : crate :: HashMap :: with_capacity ( complete) ,
285+ }
286+ }
266287 pub fn get_incomplete < ' map > (
267288 & ' map self ,
268289 term : & ' gram Term ,
269290 complete_traversal : & Traversal < ' gram > ,
270291 ) -> impl Iterator < Item = TraversalId > + use < ' map > {
271- let _span = tracing:: span!( tracing :: Level :: DEBUG , "get_incomplete" ) . entered ( ) ;
292+ let _span = tracing:: span!( DEBUG , "get_incomplete" ) . entered ( ) ;
272293 let key = CompletionKey :: new_start ( term, & complete_traversal. input_range ) ;
273294 self . incomplete . get ( & key) . into_iter ( ) . flatten ( ) . cloned ( )
274295 }
@@ -277,23 +298,23 @@ impl<'gram> CompletionMap<'gram> {
277298 term : & ' gram Term ,
278299 input_range : & InputRange < ' gram > ,
279300 ) -> impl Iterator < Item = TraversalId > + use < ' map > {
280- let _span = tracing:: span!( tracing :: Level :: DEBUG , "get_complete" ) . entered ( ) ;
301+ let _span = tracing:: span!( DEBUG , "get_complete" ) . entered ( ) ;
281302 let key = CompletionKey :: new_total ( term, input_range) ;
282303 self . complete . get ( & key) . into_iter ( ) . flatten ( ) . cloned ( )
283304 }
284305 pub fn insert ( & mut self , traversal : & Traversal < ' gram > , lhs : & ' gram Term ) {
285- let _span = tracing:: span!( tracing :: Level :: DEBUG , "insert" ) . entered ( ) ;
306+ let _span = tracing:: span!( DEBUG , "insert" ) . entered ( ) ;
286307 match traversal. next_unmatched ( ) {
287308 Some ( Term :: Terminal ( _) ) => {
288309 // do nothing, because terminals are irrelevant to completion
289310 }
290311 Some ( unmatched @ Term :: Nonterminal ( _) ) => {
291312 let key = CompletionKey :: new_total ( unmatched, & traversal. input_range ) ;
292- self . incomplete . entry ( key) . or_default ( ) . insert ( traversal. id ) ;
313+ sorted_vec_insert ( self . incomplete . entry ( key) . or_default ( ) , traversal. id ) ;
293314 }
294315 None => {
295316 let key = CompletionKey :: new_start ( lhs, & traversal. input_range ) ;
296- self . complete . entry ( key) . or_default ( ) . insert ( traversal. id ) ;
317+ sorted_vec_insert ( self . complete . entry ( key) . or_default ( ) , traversal. id ) ;
297318 }
298319 }
299320 }
0 commit comments