diff --git a/benches/divan.rs b/benches/divan.rs index b06a848..1d60bed 100644 --- a/benches/divan.rs +++ b/benches/divan.rs @@ -22,6 +22,8 @@ mod examples { )); bencher.bench(|| { + #[cfg(feature = "tracing")] + let _span = tracing::span!(tracing::Level::DEBUG, "bench_parse_postal").entered(); input.parse::().unwrap(); }); } @@ -46,6 +48,8 @@ mod examples { let mut index = (0..random_walk_count).cycle(); bencher.bench_local(|| { + #[cfg(feature = "tracing")] + let _span = tracing::span!(tracing::Level::DEBUG, "bench_parse_postal_input").entered(); let index = index.next().unwrap(); let input = random_postal_strings.get(index).unwrap(); postal_grammar @@ -64,6 +68,8 @@ mod examples { grammar }) .bench_refs(|grammar| { + #[cfg(feature = "tracing")] + let _span = tracing::span!(tracing::Level::DEBUG, "bench_generate_dna").entered(); grammar.generate().unwrap(); }); } @@ -90,6 +96,9 @@ mod examples { let mut index = (0..random_walk_count).cycle(); bencher.bench_local(|| { + #[cfg(feature = "tracing")] + let _span = + tracing::span!(tracing::Level::DEBUG, "bench_parse_polish_calculator").entered(); let index = index.next().unwrap(); let input = random_walks.get(index).unwrap(); polish_calc_grammar @@ -114,6 +123,12 @@ mod examples { .with_inputs(|| rng.random_range(1..100)) .count_inputs_as::() .bench_local_values(|parse_count| { + #[cfg(feature = "tracing")] + let _span = tracing::span!( + tracing::Level::DEBUG, + "bench_parse_infinite_nullable_grammar" + ) + .entered(); infinite_grammar .parse_input("") .take(parse_count) @@ -132,6 +147,9 @@ mod parser_api { ); bencher.bench(|| { + #[cfg(feature = "tracing")] + let _span = + tracing::span!(tracing::Level::DEBUG, "bench_build_postal_parser").entered(); grammar.build_parser().unwrap(); }); } @@ -148,6 +166,9 @@ mod parser_api { ); bencher.bench(|| { + #[cfg(feature = "tracing")] + let _span = + tracing::span!(tracing::Level::DEBUG, "bench_build_polish_parser").entered(); grammar.build_parser().unwrap(); }); } @@ -173,6 +194,9 @@ mod parser_api { let mut index = (0..random_walk_count).cycle(); bencher.bench_local(|| { + #[cfg(feature = "tracing")] + let _span = + tracing::span!(tracing::Level::DEBUG, "bench_parse_postal_with_parser").entered(); let index = index.next().unwrap(); let input = random_postal_strings.get(index).unwrap(); parser.parse_input(input).for_each(divan::black_box_drop); @@ -202,6 +226,9 @@ mod parser_api { let mut index = (0..random_walk_count).cycle(); bencher.bench_local(|| { + #[cfg(feature = "tracing")] + let _span = + tracing::span!(tracing::Level::DEBUG, "bench_parse_polish_with_parser").entered(); let index = index.next().unwrap(); let input = random_walks.get(index).unwrap(); parser.parse_input(input).for_each(divan::black_box_drop); @@ -225,6 +252,12 @@ mod parser_api { .with_inputs(|| rng.random_range(1..100)) .count_inputs_as::() .bench_local_values(|parse_count| { + #[cfg(feature = "tracing")] + let _span = tracing::span!( + tracing::Level::DEBUG, + "bench_parse_infinite_nullable_with_parser" + ) + .entered(); parser .parse_input("") .take(parse_count) @@ -250,6 +283,8 @@ mod parser_api { .collect::>() }) .bench_local_refs(|inputs| { + #[cfg(feature = "tracing")] + let _span = tracing::span!(tracing::Level::DEBUG, "bench_per_input_100").entered(); for input in inputs { polish_calc_grammar .parse_input(input) @@ -277,6 +312,9 @@ mod parser_api { .collect::>() }) .bench_local_refs(|inputs| { + #[cfg(feature = "tracing")] + let _span = + tracing::span!(tracing::Level::DEBUG, "bench_reuse_parser_100").entered(); for input in inputs { parser.parse_input(input).for_each(divan::black_box_drop); } diff --git a/src/earley/grammar.rs b/src/earley/grammar.rs index 31e5d27..b5ffe3e 100644 --- a/src/earley/grammar.rs +++ b/src/earley/grammar.rs @@ -24,7 +24,7 @@ pub(crate) struct ParseGrammar<'gram> { impl<'gram, 'a> ParseGrammar<'gram> { pub fn new(grammar: &'gram crate::Grammar) -> Self { - let _span = tracing::span!(tracing::Level::DEBUG, "ParseGrammar_new").entered(); + let _span = tracing::span!(DEBUG, "ParseGrammar_new").entered(); let mut productions = AppendOnlyVec::::new(); let mut prods_by_lhs = ProdTermMap::new(); diff --git a/src/earley/input_range.rs b/src/earley/input_range.rs index 762c40d..30b0c32 100644 --- a/src/earley/input_range.rs +++ b/src/earley/input_range.rs @@ -1,4 +1,4 @@ -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub(crate) struct InputRangeOffset { pub start: usize, pub len: usize, @@ -11,7 +11,7 @@ impl InputRangeOffset { } /// A sliding window over the input strings being parsed. -#[derive(Clone)] +#[derive(Clone, Copy)] pub(crate) struct InputRange<'gram> { input: &'gram str, pub offset: InputRangeOffset, @@ -24,6 +24,8 @@ impl<'gram> InputRange<'gram> { offset: InputRangeOffset { start: 0, len: 0 }, } } + /// Remaining input from the current position (slice; does not allocate). + #[inline(always)] pub fn next(&self) -> &'gram str { let next_idx = self.offset.start + self.offset.len; self.input.get(next_idx..).unwrap_or("") diff --git a/src/earley/mod.rs b/src/earley/mod.rs index d550a9c..ad3383f 100644 --- a/src/earley/mod.rs +++ b/src/earley/mod.rs @@ -4,7 +4,7 @@ mod traversal; use crate::parser::grammar::ParseGrammar; use crate::{GrammarParser, ParseTree, ParseTreeNode, Term, tracing}; use input_range::InputRange; -use std::collections::{BTreeSet, HashSet, VecDeque}; +use std::collections::{HashSet, VecDeque}; use std::rc::Rc; use traversal::{TermMatch, Traversal, TraversalId, TraversalTree}; @@ -13,6 +13,7 @@ pub fn parse<'gram>( input: &'gram str, starting_term: Option<&'gram Term>, ) -> impl Iterator> { + let _span = tracing::span!(DEBUG, "earley::parse").entered(); ParseTreeIter::new(ParserHold::Borrowed(grammar), input, starting_term) } @@ -23,6 +24,7 @@ pub fn parse_with_parser_rc<'gram>( input: &'gram str, starting_term: Option<&'gram Term>, ) -> impl Iterator> { + let _span = tracing::span!(DEBUG, "earley::parse_with_parser_rc").entered(); ParseTreeIter::new(ParserHold::Owned(parser), input, starting_term) } @@ -86,15 +88,15 @@ fn parse_tree<'gram>( let traversal = traversal_tree.get(traversal_id); grammar.get_production_by_id(traversal.production_id) }; - let rhs = traversal_tree - .get_matched(traversal_id) - .map(|term_match| match term_match { + let mut rhs = Vec::with_capacity(production.rhs.terms.len()); + for term_match in traversal_tree.get_matched(traversal_id) { + rhs.push(match term_match { TermMatch::Terminal(term) => ParseTreeNode::Terminal(term), TermMatch::Nonterminal(traversal_id) => { ParseTreeNode::Nonterminal(parse_tree(traversal_tree, grammar, *traversal_id)) } - }) - .collect::>(); + }); + } ParseTree::new(production.lhs, rhs) } @@ -107,10 +109,10 @@ fn earley<'gram>( completions: &mut CompletionMap<'gram>, grammar: &Rc>, ) -> Option { - let _span = tracing::span!(tracing::Level::DEBUG, "earley").entered(); + let _span = tracing::span!(DEBUG, "earley").entered(); while let Some(traversal_id) = queue.pop_front() { tracing::event!( - tracing::Level::TRACE, + TRACE, "earley queue pop: {:#?}", traversal_tree.get(traversal_id) ); @@ -118,41 +120,38 @@ fn earley<'gram>( match traversal_tree.get_matching(traversal_id) { Some(nonterminal @ Term::Nonterminal(_)) => { - let _span = tracing::span!(tracing::Level::DEBUG, "Predict").entered(); + let _span = tracing::span!(DEBUG, "Predict").entered(); let lhs = grammar.get_production_by_id(traversal.production_id).lhs; completions.insert(traversal, lhs); - let input_range = traversal.input_range.clone(); + let input_range = traversal.input_range; for production in grammar.get_productions_by_lhs(nonterminal) { let predicted = traversal_tree.predict(production, &input_range); - tracing::event!(tracing::Level::TRACE, "predicted: {predicted:#?}"); + tracing::event!(TRACE, "predicted: {predicted:#?}"); queue.push_back(predicted); } for completed in completions.get_complete(nonterminal, &input_range) { let term_match = TermMatch::Nonterminal(completed); let prior_completed = traversal_tree.match_term(traversal_id, term_match); - tracing::event!( - tracing::Level::TRACE, - "prior_completed: {prior_completed:#?}" - ); + tracing::event!(TRACE, "prior_completed: {prior_completed:#?}"); queue.push_back(prior_completed); } } Some(Term::Terminal(term)) => { - let _span = tracing::span!(tracing::Level::DEBUG, "Scan").entered(); + let _span = tracing::span!(DEBUG, "Scan").entered(); if traversal.input_range.next().starts_with(term) { let term_match = TermMatch::Terminal(term); let scanned = traversal_tree.match_term(traversal_id, term_match); - tracing::event!(tracing::Level::TRACE, "scanned: {scanned:#?}"); + tracing::event!(TRACE, "scanned: {scanned:#?}"); queue.push_back(scanned); } } None => { - let _span = tracing::span!(tracing::Level::DEBUG, "Complete").entered(); + let _span = tracing::span!(DEBUG, "Complete").entered(); let is_full_traversal = traversal.is_starting && traversal.input_range.is_complete(); @@ -164,7 +163,7 @@ fn earley<'gram>( let term_match = TermMatch::Nonterminal(traversal_id); let completed = traversal_tree.match_term(incomplete_traversal_id, term_match); - tracing::event!(tracing::Level::TRACE, "completed: {completed:#?}"); + tracing::event!(TRACE, "completed: {completed:#?}"); queue.push_back(completed); } @@ -192,10 +191,11 @@ impl<'gram> ParseTreeIter<'gram> { input: &'gram str, starting_term: Option<&'gram Term>, ) -> Self { + let _span = tracing::span!(DEBUG, "ParseTreeIter::new").entered(); let input_range = InputRange::new(input); let mut traversal_tree = TraversalTree::default(); let mut queue = TraversalQueue::default(); - let completions = CompletionMap::default(); + let completions = CompletionMap::with_capacity(32, 32); let parser_ref = parser.as_ref(); let starting_term = starting_term.unwrap_or(parser_ref.starting_term); @@ -227,9 +227,9 @@ impl<'gram> Iterator for ParseTreeIter<'gram> { let parse_grammar = &parser.as_ref().parse_grammar; earley(queue, traversal_tree, completions, parse_grammar).map(|traversal_id| { - let _span = tracing::span!(tracing::Level::DEBUG, "next_parse_tree").entered(); + let _span = tracing::span!(DEBUG, "next_parse_tree").entered(); let parse_tree = parse_tree(traversal_tree, parse_grammar, traversal_id); - tracing::event!(tracing::Level::TRACE, "\n{parse_tree}"); + tracing::event!(TRACE, "\n{parse_tree}"); parse_tree }) } @@ -256,19 +256,40 @@ impl<'gram> CompletionKey<'gram> { } } -#[derive(Debug, Default)] +/// Insert into a sorted Vec; no-op if already present. Keeps iteration order stable (same as `BTreeSet`). +fn sorted_vec_insert(vec: &mut Vec, id: TraversalId) { + match vec.binary_search(&id) { + Ok(_) => {} + Err(i) => vec.insert(i, id), + } +} + +#[derive(Debug)] pub(crate) struct CompletionMap<'gram> { - incomplete: crate::HashMap, BTreeSet>, - complete: crate::HashMap, BTreeSet>, + incomplete: crate::HashMap, Vec>, + complete: crate::HashMap, Vec>, +} + +impl<'gram> Default for CompletionMap<'gram> { + fn default() -> Self { + Self::with_capacity(0, 0) + } } impl<'gram> CompletionMap<'gram> { + /// Create with reserved capacity to reduce rehashing during parsing. + pub fn with_capacity(incomplete: usize, complete: usize) -> Self { + Self { + incomplete: crate::HashMap::with_capacity(incomplete), + complete: crate::HashMap::with_capacity(complete), + } + } pub fn get_incomplete<'map>( &'map self, term: &'gram Term, complete_traversal: &Traversal<'gram>, ) -> impl Iterator + use<'map> { - let _span = tracing::span!(tracing::Level::DEBUG, "get_incomplete").entered(); + let _span = tracing::span!(DEBUG, "get_incomplete").entered(); let key = CompletionKey::new_start(term, &complete_traversal.input_range); self.incomplete.get(&key).into_iter().flatten().cloned() } @@ -277,23 +298,23 @@ impl<'gram> CompletionMap<'gram> { term: &'gram Term, input_range: &InputRange<'gram>, ) -> impl Iterator + use<'map> { - let _span = tracing::span!(tracing::Level::DEBUG, "get_complete").entered(); + let _span = tracing::span!(DEBUG, "get_complete").entered(); let key = CompletionKey::new_total(term, input_range); self.complete.get(&key).into_iter().flatten().cloned() } pub fn insert(&mut self, traversal: &Traversal<'gram>, lhs: &'gram Term) { - let _span = tracing::span!(tracing::Level::DEBUG, "insert").entered(); + let _span = tracing::span!(DEBUG, "insert").entered(); match traversal.next_unmatched() { Some(Term::Terminal(_)) => { // do nothing, because terminals are irrelevant to completion } Some(unmatched @ Term::Nonterminal(_)) => { let key = CompletionKey::new_total(unmatched, &traversal.input_range); - self.incomplete.entry(key).or_default().insert(traversal.id); + sorted_vec_insert(self.incomplete.entry(key).or_default(), traversal.id); } None => { let key = CompletionKey::new_start(lhs, &traversal.input_range); - self.complete.entry(key).or_default().insert(traversal.id); + sorted_vec_insert(self.complete.entry(key).or_default(), traversal.id); } } } diff --git a/src/earley/traversal.rs b/src/earley/traversal.rs index eb1e4aa..8296fbf 100644 --- a/src/earley/traversal.rs +++ b/src/earley/traversal.rs @@ -75,7 +75,7 @@ pub(crate) struct TraversalMatchIter<'gram, 'tree> { impl<'gram, 'tree> TraversalMatchIter<'gram, 'tree> { pub fn new(last: TraversalId, tree: &'tree TraversalTree<'gram>) -> Self { - let _span = tracing::span!(tracing::Level::DEBUG, "match_iter_new").entered(); + let _span = tracing::span!(DEBUG, "match_iter_new").entered(); // walk up the tree until the root is found let mut current = last; while let Some(edge) = &tree.get(current).from { @@ -93,7 +93,7 @@ impl<'gram, 'tree> TraversalMatchIter<'gram, 'tree> { impl<'gram, 'tree> Iterator for TraversalMatchIter<'gram, 'tree> { type Item = &'tree TermMatch<'gram>; fn next(&mut self) -> Option { - let _span = tracing::span!(tracing::Level::DEBUG, "match_iter_next").entered(); + let _span = tracing::span!(DEBUG, "match_iter_next").entered(); if self.current == self.last { return None; } @@ -143,8 +143,7 @@ impl<'gram> TraversalTree<'gram> { production: &Production<'gram>, is_starting: bool, ) -> TraversalId { - let _span = - tracing::span!(tracing::Level::DEBUG, "traversal_tree_predict_is_starting").entered(); + let _span = tracing::span!(DEBUG, "traversal_tree_predict_is_starting").entered(); let production_id = production.id; let traversal_root_key = TraversalRoot { production_id, @@ -182,7 +181,7 @@ impl<'gram> TraversalTree<'gram> { self._predict(input_range, production, false) } pub fn match_term(&mut self, parent: TraversalId, term: TermMatch<'gram>) -> TraversalId { - let _span = tracing::span!(tracing::Level::DEBUG, "match_term").entered(); + let _span = tracing::span!(DEBUG, "match_term").entered(); let parent = self.arena.get(parent).expect("valid parent traversal ID"); let input_range = match term { TermMatch::Terminal(term) => parent.input_range.advance_by(term.len()), @@ -212,7 +211,7 @@ impl<'gram> TraversalTree<'gram> { id, production_id, unmatched, - input_range: input_range.clone(), + input_range, is_starting, from: Some(from.clone()), }); diff --git a/src/grammar.rs b/src/grammar.rs index b691e92..6da68a9 100644 --- a/src/grammar.rs +++ b/src/grammar.rs @@ -35,11 +35,10 @@ #[cfg(feature = "ABNF")] use crate::ABNF; use crate::error::Error; -use crate::expression::Expression; use crate::parsers::{self, BNF, Format}; use crate::production::Production; use crate::term::Term; -use rand::{Rng, SeedableRng, rng, rngs::StdRng, seq::IndexedRandom}; +use rand::{Rng, SeedableRng, rng, rngs::StdRng}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -356,6 +355,11 @@ impl Grammar { self.productions.iter() } + /// Number of productions (for capacity reservation in validation). + pub(crate) const fn production_count(&self) -> usize { + self.productions.len() + } + /// Get mutable iterator of the `Grammar`'s `Production`s pub fn productions_iter_mut(&mut self) -> impl Iterator { self.productions.iter_mut() @@ -491,7 +495,7 @@ impl Grammar { f: &impl Fn(&str, &str) -> bool, ) -> Result { match term { - Term::Nonterminal(nt) => self.traverse(nt, rng, f), + Term::Nonterminal(nt) => self.traverse(nt.as_str(), rng, f), Term::Terminal(t) => Ok(t.clone()), } } @@ -503,19 +507,25 @@ impl Grammar { f: &impl Fn(&str, &str) -> bool, ) -> Result { loop { - let nonterm = Term::Nonterminal(ident.to_string()); - let find_lhs = self.productions_iter().find(|&x| x.lhs == nonterm); - - let Some(production) = find_lhs else { - return Ok(nonterm.to_string()); + let production = match self + .productions_iter() + .find(|p| matches!(&p.lhs, Term::Nonterminal(s) if s.as_str() == ident)) + { + Some(p) => p, + None => return Ok(ident.to_string()), }; - let expressions = production.rhs_iter().collect::>(); - - let Some(expression) = expressions.choose(rng) else { - return Err(Error::GenerateError(String::from( - "Couldn't select random Expression!", - ))); + let len = production.len(); + let expression = match len { + 0 => { + return Err(Error::GenerateError(String::from( + "Couldn't select random Expression!", + ))); + } + n => { + let idx = rng.random_range(0..n); + production.rhs_iter().nth(idx).expect("n > 0") + } }; let mut result = String::new(); diff --git a/src/parser/grammar.rs b/src/parser/grammar.rs index 9e079de..087d4dd 100644 --- a/src/parser/grammar.rs +++ b/src/parser/grammar.rs @@ -32,11 +32,13 @@ impl<'gram, 'a> ParseGrammar<'gram> { /// Returns `Error::ValidationError` if any nonterminal used in the RHS of /// productions lacks a definition in the grammar. pub fn new(grammar: &'gram crate::Grammar) -> Result { - let _span = tracing::span!(tracing::Level::DEBUG, "ParseGrammar_new").entered(); + let _span = tracing::span!(DEBUG, "ParseGrammar_new").entered(); let mut productions = AppendOnlyVec::::new(); let mut prods_by_lhs = ProdTermMap::new(); let mut sets = crate::validation::NonterminalSets::new(); + let n = grammar.production_count(); + sets.reserve(n, n.saturating_mul(2)); let flat_prod_iter = grammar .productions_iter() @@ -72,7 +74,7 @@ impl<'gram, 'a> ParseGrammar<'gram> { /// `Grammar::parse_input` / `parse_input_starting_with` to preserve /// pre-validation behavior. pub(crate) fn new_unchecked(grammar: &'gram crate::Grammar) -> Self { - let _span = tracing::span!(tracing::Level::DEBUG, "ParseGrammar_new_unchecked").entered(); + let _span = tracing::span!(DEBUG, "ParseGrammar_new_unchecked").entered(); let mut productions = AppendOnlyVec::::new(); let mut prods_by_lhs = ProdTermMap::new(); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5db602c..5b551e2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4,6 +4,7 @@ use crate::ParseTree; use crate::error::Error; use crate::grammar::Grammar; use crate::term::Term; +use crate::tracing; use grammar::ParseGrammar; use std::rc::Rc; @@ -39,6 +40,7 @@ impl<'gram> GrammarParser<'gram> { /// Returns `Error::ValidationError` if any nonterminal used in the RHS of /// productions lacks a definition in the grammar. pub fn new(grammar: &'gram Grammar) -> Result { + let _span = tracing::span!(DEBUG, "GrammarParser::new").entered(); let starting_term = grammar.starting_term().ok_or_else(|| { Error::ValidationError("Grammar must have at least one production".to_string()) })?; @@ -52,6 +54,7 @@ impl<'gram> GrammarParser<'gram> { /// Construct a parser without validating that all nonterminals are defined. /// Used only by deprecated `Grammar::parse_input` / `parse_input_starting_with`. pub(crate) fn new_unchecked(grammar: &'gram Grammar) -> Self { + let _span = tracing::span!(DEBUG, "GrammarParser::new_unchecked").entered(); let starting_term = grammar .starting_term() .expect("Grammar must have at least one production"); @@ -69,6 +72,7 @@ impl<'gram> GrammarParser<'gram> { &'p self, input: &'gram str, ) -> impl Iterator> + use<'p, 'gram> { + let _span = tracing::span!(DEBUG, "GrammarParser::parse_input").entered(); self.parse_input_starting_with(input, self.starting_term) } @@ -80,6 +84,7 @@ impl<'gram> GrammarParser<'gram> { input: &'gram str, start: &'gram Term, ) -> impl Iterator> + use<'p, 'gram> { + let _span = tracing::span!(DEBUG, "GrammarParser::parse_input_starting_with").entered(); crate::earley::parse(self, input, Some(start)) } } diff --git a/src/parsers/bnf.rs b/src/parsers/bnf.rs index 90f02c6..745db88 100644 --- a/src/parsers/bnf.rs +++ b/src/parsers/bnf.rs @@ -13,6 +13,9 @@ impl Format for BNF { fn alternative_separator() -> char { '|' } + fn production_start_char() -> Option { + Some('<') + } } #[cfg(test)] diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs index c677ac3..46be27b 100644 --- a/src/parsers/mod.rs +++ b/src/parsers/mod.rs @@ -16,7 +16,7 @@ use nom::{ IResult, Parser, branch::alt, bytes::complete::{tag, take_till, take_until}, - character::complete::{self, multispace0, satisfy}, + character::complete::{self, satisfy}, combinator::{all_consuming, eof, not, opt, peek, recognize}, multi::many1, sequence::{delimited, preceded, terminated}, @@ -70,9 +70,16 @@ pub trait Format { fn nonterminal_delimiter() -> Option<(char, char)>; fn production_separator() -> &'static str; fn alternative_separator() -> char; + /// If `Some(c)`, production boundaries can be detected by this character after whitespace + /// (e.g. BNF uses `'<'`), avoiding a full `prod_lhs` parse as lookahead. + #[must_use] + fn production_start_char() -> Option { + None + } } fn nonterminal(input: &str) -> IResult<&str, Term> { + let _span = crate::tracing::span!(DEBUG, "nonterminal").entered(); let (input, nt) = match F::nonterminal_delimiter() { Some((start, end)) => delimited( complete::char(start), @@ -92,6 +99,7 @@ fn nonterminal(input: &str) -> IResult<&str, Term> { } fn prod_lhs(input: &str) -> IResult<&str, Term> { + let _span = crate::tracing::span!(DEBUG, "prod_lhs").entered(); let (input, nt) = nonterminal::(input)?; let (input, _) = tag(F::production_separator()).parse(input)?; @@ -103,10 +111,12 @@ fn prod_lhs(input: &str) -> IResult<&str, Term> { } fn prod_rhs(input: &str) -> IResult<&str, Vec> { + let _span = crate::tracing::span!(DEBUG, "prod_rhs").entered(); xt_list_with_separator(expression::, expression_next::).parse(input) } pub fn terminal(input: &str) -> IResult<&str, Term> { + let _span = crate::tracing::span!(DEBUG, "terminal").entered(); let (input, t) = alt(( delimited(complete::char('"'), take_until("\""), complete::char('"')), delimited(complete::char('\''), take_until("'"), complete::char('\'')), @@ -118,24 +128,26 @@ pub fn terminal(input: &str) -> IResult<&str, Term> { Ok((input, Term::Terminal(t.to_string()))) } -///this should never fail, unwrap it when calling directly please! +/// Skips whitespace and ;-comments in one pass. Never fails. #[mutants::skip] pub fn whitespace_plus_comments(mut input: &str) -> IResult<&str, char> { - let mut old_input = input; + let _span = crate::tracing::span!(DEBUG, "whitespace_plus_comments").entered(); loop { - (input, _) = multispace0::<&str, nom::error::Error<&str>>.parse(input)?; - (input, _) = opt(preceded( - complete::char(';'), - take_till(|c: char| c == '\r' || c == '\n'), - )) - .parse(input)?; - - if input == old_input { - break; + let rest = input.trim_start_matches(|c: char| c.is_whitespace()); + if rest.len() == input.len() { + if let Some(after_semicolon) = rest.strip_prefix(';') { + if let Some(pos) = after_semicolon.find(['\r', '\n']) { + input = &after_semicolon[pos..]; + } else { + return Ok(("", '\0')); + } + } else { + return Ok((input, '\0')); + } + } else { + input = rest; } - old_input = input } - Ok((input, '\0')) } pub fn is_format_standard_bnf(input: &str) -> bool { @@ -146,10 +158,12 @@ pub fn is_format_standard_bnf(input: &str) -> bool { } pub fn term(input: &str) -> IResult<&str, Term> { + let _span = crate::tracing::span!(DEBUG, "term").entered(); alt((terminal, nonterminal::)).parse(input) } pub fn expression_next(input: &str) -> IResult<&str, &str> { + let _span = crate::tracing::span!(DEBUG, "expression_next").entered(); let (input, _) = complete::char(F::alternative_separator()).parse(input)?; let (input, _) = whitespace_plus_comments(input).unwrap(); @@ -157,6 +171,7 @@ pub fn expression_next(input: &str) -> IResult<&str, &str> { } pub fn expression(input: &str) -> IResult<&str, Expression> { + let _span = crate::tracing::span!(DEBUG, "expression").entered(); let (input, terms) = many1(terminated(term::, not(tag(F::production_separator())))).parse(input)?; @@ -164,9 +179,20 @@ pub fn expression(input: &str) -> IResult<&str, Expression> { } pub fn production(input: &str) -> IResult<&str, Production> { + let _span = crate::tracing::span!(DEBUG, "production").entered(); let (input, lhs) = prod_lhs::(input)?; let (input, rhs) = prod_rhs::(input)?; - let (input, _) = alt((recognize(peek(eof)), recognize(peek(prod_lhs::)))).parse(input)?; + let (input, _) = match F::production_start_char() { + Some(start_char) => alt(( + recognize(peek(eof)), + recognize(peek(preceded( + whitespace_plus_comments, + complete::char(start_char), + ))), + )) + .parse(input)?, + None => alt((recognize(peek(eof)), recognize(peek(prod_lhs::)))).parse(input)?, + }; Ok((input, Production::from_parts(lhs, rhs))) } @@ -180,9 +206,57 @@ pub fn grammar(input: &str) -> IResult<&str, Grammar> { Ok((input, normalize_parsed_grammar(parsed))) } +/// Returns true if the grammar text contains `(` or `[` outside of string literals, +/// i.e. it uses extended syntax (groups or optionals). Used to choose the fast parse path. +pub(crate) fn grammar_has_extended_syntax(input: &str) -> bool { + if !input.contains('(') && !input.contains('[') { + return false; + } + let mut in_double = false; + let mut in_single = false; + for c in input.chars() { + if in_double { + if c == '"' { + in_double = false; + } + continue; + } + if in_single { + if c == '\'' { + in_single = false; + } + continue; + } + match c { + '"' => in_double = true, + '\'' => in_single = true, + '(' | '[' => return true, + _ => {} + } + } + false +} + +/// Plain BNF grammar (no groups/optionals) → Grammar directly, no normalization. +fn plain_grammar(input: &str) -> IResult<&str, Grammar> { + let _span = crate::tracing::span!(DEBUG, "plain_grammar").entered(); + let (input, _) = whitespace_plus_comments(input)?; + let (input, first) = production::(input)?; + let (input, rest) = many1(preceded(whitespace_plus_comments, production::)).parse(input)?; + let mut prods = vec![first]; + prods.extend(rest); + Ok((input, Grammar::from_parts(prods))) +} + /// Like `grammar`, but requires the entire input to be consumed. #[allow(dead_code)] // public API for nom-style parsing pub fn grammar_complete(input: &str) -> IResult<&str, Grammar> { + let _span = crate::tracing::span!(DEBUG, "grammar_complete").entered(); + if !grammar_has_extended_syntax(input) + && let Ok((input, g)) = all_consuming(plain_grammar::).parse(input) + { + return Ok((input, g)); + } let (input, parsed) = parsed_grammar_complete::(input)?; Ok((input, normalize_parsed_grammar(parsed))) } diff --git a/src/tracing.rs b/src/tracing.rs index 5b5ac2e..8c4ea96 100644 --- a/src/tracing.rs +++ b/src/tracing.rs @@ -1,6 +1,22 @@ #[cfg(feature = "tracing")] mod defs { - pub(crate) use tracing::{Level, event, span}; + #[allow(unused_imports)] + pub(crate) use ::tracing::Level; + + macro_rules! span { + (Level::$level:ident, $($rest:tt)*) => { ::tracing::span!(::tracing::Level::$level, $($rest)*) }; + ($level:ident, $($rest:tt)*) => { ::tracing::span!(::tracing::Level::$level, $($rest)*) }; + ($($all:tt)*) => { ::tracing::span!($($all)*) }; + } + + macro_rules! event { + (Level::$level:ident, $($rest:tt)*) => { ::tracing::event!(::tracing::Level::$level, $($rest)*) }; + ($level:ident, $($rest:tt)*) => { ::tracing::event!(::tracing::Level::$level, $($rest)*) }; + ($($all:tt)*) => { ::tracing::event!($($all)*) }; + } + + pub(crate) use event; + pub(crate) use span; #[allow(dead_code)] #[mutants::skip] @@ -14,6 +30,16 @@ mod defs { #[cfg(not(feature = "tracing"))] mod defs { + /// Stub level when the `tracing` feature is disabled; only used inside macros (arguments are discarded). + #[allow(dead_code)] + pub enum Level { + TRACE, + DEBUG, + INFO, + WARN, + ERROR, + } + pub struct Span {} impl Span { @@ -24,6 +50,14 @@ mod defs { } macro_rules! span { + (Level::$level:ident, $($rest:tt)*) => {{ + use crate::tracing::Span; + Span {} + }}; + ($level:ident, $($rest:tt)*) => {{ + use crate::tracing::Span; + Span {} + }}; ($($any:tt)*) => {{ use crate::tracing::Span; Span {} @@ -32,13 +66,13 @@ mod defs { pub(crate) use span; + #[allow(dead_code)] pub struct Event {} macro_rules! event { - ($($any:tt)*) => {{ - use crate::tracing::Event; - Event {} - }}; + (Level::$level:ident, $($rest:tt)*) => {{}}; + ($level:ident, $($rest:tt)*) => {{}}; + ($($any:tt)*) => {{}}; } pub(crate) use event; @@ -85,4 +119,16 @@ mod tests { let span = span!(Level::DEBUG, "test"); let _entered = span.entered(); } + + #[test] + fn test_span_macro_bare_level() { + // Test that span! accepts bare level (no Level:: prefix, no import) + let _span = span!(DEBUG, "bare_level_span").entered(); + } + + #[test] + fn test_event_macro_bare_level() { + // Test that event! accepts bare level (no Level:: prefix, no import) + event!(INFO, "bare_level_event"); + } } diff --git a/src/validation.rs b/src/validation.rs index 4be6155..58315e8 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -25,6 +25,12 @@ impl<'a> NonterminalSets<'a> { self.referenced.insert(nt); } + /// Reserve capacity to avoid reallocations during recording. + pub(crate) fn reserve(&mut self, defined: usize, referenced: usize) { + self.defined.reserve(defined); + self.referenced.reserve(referenced); + } + /// Iterator over nonterminals that are referenced but not defined. pub(crate) fn undefined(&self) -> impl Iterator + '_ { self.referenced.difference(&self.defined).copied()