Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions benches/divan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ mod examples {
));

bencher.bench(|| {
#[cfg(feature = "tracing")]
let _span = tracing::span!(tracing::Level::DEBUG, "bench_parse_postal").entered();
input.parse::<bnf::Grammar>().unwrap();
});
}
Expand All @@ -46,6 +48,8 @@ mod examples {
let mut index = (0..random_walk_count).cycle();

bencher.bench_local(|| {
#[cfg(feature = "tracing")]
let _span = tracing::span!(tracing::Level::DEBUG, "bench_parse_postal_input").entered();
let index = index.next().unwrap();
let input = random_postal_strings.get(index).unwrap();
postal_grammar
Expand All @@ -64,6 +68,8 @@ mod examples {
grammar
})
.bench_refs(|grammar| {
#[cfg(feature = "tracing")]
let _span = tracing::span!(tracing::Level::DEBUG, "bench_generate_dna").entered();
grammar.generate().unwrap();
});
}
Expand All @@ -90,6 +96,9 @@ mod examples {
let mut index = (0..random_walk_count).cycle();

bencher.bench_local(|| {
#[cfg(feature = "tracing")]
let _span =
tracing::span!(tracing::Level::DEBUG, "bench_parse_polish_calculator").entered();
let index = index.next().unwrap();
let input = random_walks.get(index).unwrap();
polish_calc_grammar
Expand All @@ -114,6 +123,12 @@ mod examples {
.with_inputs(|| rng.random_range(1..100))
.count_inputs_as::<divan::counter::ItemsCount>()
.bench_local_values(|parse_count| {
#[cfg(feature = "tracing")]
let _span = tracing::span!(
tracing::Level::DEBUG,
"bench_parse_infinite_nullable_grammar"
)
.entered();
infinite_grammar
.parse_input("")
.take(parse_count)
Expand All @@ -132,6 +147,9 @@ mod parser_api {
);

bencher.bench(|| {
#[cfg(feature = "tracing")]
let _span =
tracing::span!(tracing::Level::DEBUG, "bench_build_postal_parser").entered();
grammar.build_parser().unwrap();
});
}
Expand All @@ -148,6 +166,9 @@ mod parser_api {
);

bencher.bench(|| {
#[cfg(feature = "tracing")]
let _span =
tracing::span!(tracing::Level::DEBUG, "bench_build_polish_parser").entered();
grammar.build_parser().unwrap();
});
}
Expand All @@ -173,6 +194,9 @@ mod parser_api {
let mut index = (0..random_walk_count).cycle();

bencher.bench_local(|| {
#[cfg(feature = "tracing")]
let _span =
tracing::span!(tracing::Level::DEBUG, "bench_parse_postal_with_parser").entered();
let index = index.next().unwrap();
let input = random_postal_strings.get(index).unwrap();
parser.parse_input(input).for_each(divan::black_box_drop);
Expand Down Expand Up @@ -202,6 +226,9 @@ mod parser_api {
let mut index = (0..random_walk_count).cycle();

bencher.bench_local(|| {
#[cfg(feature = "tracing")]
let _span =
tracing::span!(tracing::Level::DEBUG, "bench_parse_polish_with_parser").entered();
let index = index.next().unwrap();
let input = random_walks.get(index).unwrap();
parser.parse_input(input).for_each(divan::black_box_drop);
Expand All @@ -225,6 +252,12 @@ mod parser_api {
.with_inputs(|| rng.random_range(1..100))
.count_inputs_as::<divan::counter::ItemsCount>()
.bench_local_values(|parse_count| {
#[cfg(feature = "tracing")]
let _span = tracing::span!(
tracing::Level::DEBUG,
"bench_parse_infinite_nullable_with_parser"
)
.entered();
parser
.parse_input("")
.take(parse_count)
Expand All @@ -250,6 +283,8 @@ mod parser_api {
.collect::<Vec<_>>()
})
.bench_local_refs(|inputs| {
#[cfg(feature = "tracing")]
let _span = tracing::span!(tracing::Level::DEBUG, "bench_per_input_100").entered();
for input in inputs {
polish_calc_grammar
.parse_input(input)
Expand Down Expand Up @@ -277,6 +312,9 @@ mod parser_api {
.collect::<Vec<_>>()
})
.bench_local_refs(|inputs| {
#[cfg(feature = "tracing")]
let _span =
tracing::span!(tracing::Level::DEBUG, "bench_reuse_parser_100").entered();
for input in inputs {
parser.parse_input(input).for_each(divan::black_box_drop);
}
Expand Down
2 changes: 1 addition & 1 deletion src/earley/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub(crate) struct ParseGrammar<'gram> {

impl<'gram, 'a> ParseGrammar<'gram> {
pub fn new(grammar: &'gram crate::Grammar) -> Self {
let _span = tracing::span!(tracing::Level::DEBUG, "ParseGrammar_new").entered();
let _span = tracing::span!(DEBUG, "ParseGrammar_new").entered();

let mut productions = AppendOnlyVec::<Production, ProductionId>::new();
let mut prods_by_lhs = ProdTermMap::new();
Expand Down
6 changes: 4 additions & 2 deletions src/earley/input_range.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) struct InputRangeOffset {
pub start: usize,
pub len: usize,
Expand All @@ -11,7 +11,7 @@ impl InputRangeOffset {
}

/// A sliding window over the input strings being parsed.
#[derive(Clone)]
#[derive(Clone, Copy)]
pub(crate) struct InputRange<'gram> {
input: &'gram str,
pub offset: InputRangeOffset,
Expand All @@ -24,6 +24,8 @@ impl<'gram> InputRange<'gram> {
offset: InputRangeOffset { start: 0, len: 0 },
}
}
/// Remaining input from the current position (slice; does not allocate).
#[inline(always)]
pub fn next(&self) -> &'gram str {
let next_idx = self.offset.start + self.offset.len;
self.input.get(next_idx..).unwrap_or("")
Expand Down
81 changes: 51 additions & 30 deletions src/earley/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mod traversal;
use crate::parser::grammar::ParseGrammar;
use crate::{GrammarParser, ParseTree, ParseTreeNode, Term, tracing};
use input_range::InputRange;
use std::collections::{BTreeSet, HashSet, VecDeque};
use std::collections::{HashSet, VecDeque};
use std::rc::Rc;
use traversal::{TermMatch, Traversal, TraversalId, TraversalTree};

Expand All @@ -13,6 +13,7 @@ pub fn parse<'gram>(
input: &'gram str,
starting_term: Option<&'gram Term>,
) -> impl Iterator<Item = ParseTree<'gram>> {
let _span = tracing::span!(DEBUG, "earley::parse").entered();
ParseTreeIter::new(ParserHold::Borrowed(grammar), input, starting_term)
}

Expand All @@ -23,6 +24,7 @@ pub fn parse_with_parser_rc<'gram>(
input: &'gram str,
starting_term: Option<&'gram Term>,
) -> impl Iterator<Item = ParseTree<'gram>> {
let _span = tracing::span!(DEBUG, "earley::parse_with_parser_rc").entered();
ParseTreeIter::new(ParserHold::Owned(parser), input, starting_term)
}

Expand Down Expand Up @@ -86,15 +88,15 @@ fn parse_tree<'gram>(
let traversal = traversal_tree.get(traversal_id);
grammar.get_production_by_id(traversal.production_id)
};
let rhs = traversal_tree
.get_matched(traversal_id)
.map(|term_match| match term_match {
let mut rhs = Vec::with_capacity(production.rhs.terms.len());
for term_match in traversal_tree.get_matched(traversal_id) {
rhs.push(match term_match {
TermMatch::Terminal(term) => ParseTreeNode::Terminal(term),
TermMatch::Nonterminal(traversal_id) => {
ParseTreeNode::Nonterminal(parse_tree(traversal_tree, grammar, *traversal_id))
}
})
.collect::<Vec<ParseTreeNode>>();
});
}

ParseTree::new(production.lhs, rhs)
}
Expand All @@ -107,52 +109,49 @@ fn earley<'gram>(
completions: &mut CompletionMap<'gram>,
grammar: &Rc<ParseGrammar<'gram>>,
) -> Option<TraversalId> {
let _span = tracing::span!(tracing::Level::DEBUG, "earley").entered();
let _span = tracing::span!(DEBUG, "earley").entered();
while let Some(traversal_id) = queue.pop_front() {
tracing::event!(
tracing::Level::TRACE,
TRACE,
"earley queue pop: {:#?}",
traversal_tree.get(traversal_id)
);
let traversal = traversal_tree.get(traversal_id);

match traversal_tree.get_matching(traversal_id) {
Some(nonterminal @ Term::Nonterminal(_)) => {
let _span = tracing::span!(tracing::Level::DEBUG, "Predict").entered();
let _span = tracing::span!(DEBUG, "Predict").entered();

let lhs = grammar.get_production_by_id(traversal.production_id).lhs;

completions.insert(traversal, lhs);

let input_range = traversal.input_range.clone();
let input_range = traversal.input_range;

for production in grammar.get_productions_by_lhs(nonterminal) {
let predicted = traversal_tree.predict(production, &input_range);
tracing::event!(tracing::Level::TRACE, "predicted: {predicted:#?}");
tracing::event!(TRACE, "predicted: {predicted:#?}");
queue.push_back(predicted);
}

for completed in completions.get_complete(nonterminal, &input_range) {
let term_match = TermMatch::Nonterminal(completed);
let prior_completed = traversal_tree.match_term(traversal_id, term_match);
tracing::event!(
tracing::Level::TRACE,
"prior_completed: {prior_completed:#?}"
);
tracing::event!(TRACE, "prior_completed: {prior_completed:#?}");
queue.push_back(prior_completed);
}
}
Some(Term::Terminal(term)) => {
let _span = tracing::span!(tracing::Level::DEBUG, "Scan").entered();
let _span = tracing::span!(DEBUG, "Scan").entered();
if traversal.input_range.next().starts_with(term) {
let term_match = TermMatch::Terminal(term);
let scanned = traversal_tree.match_term(traversal_id, term_match);
tracing::event!(tracing::Level::TRACE, "scanned: {scanned:#?}");
tracing::event!(TRACE, "scanned: {scanned:#?}");
queue.push_back(scanned);
}
}
None => {
let _span = tracing::span!(tracing::Level::DEBUG, "Complete").entered();
let _span = tracing::span!(DEBUG, "Complete").entered();

let is_full_traversal =
traversal.is_starting && traversal.input_range.is_complete();
Expand All @@ -164,7 +163,7 @@ fn earley<'gram>(
let term_match = TermMatch::Nonterminal(traversal_id);
let completed = traversal_tree.match_term(incomplete_traversal_id, term_match);

tracing::event!(tracing::Level::TRACE, "completed: {completed:#?}");
tracing::event!(TRACE, "completed: {completed:#?}");
queue.push_back(completed);
}

Expand Down Expand Up @@ -192,10 +191,11 @@ impl<'gram> ParseTreeIter<'gram> {
input: &'gram str,
starting_term: Option<&'gram Term>,
) -> Self {
let _span = tracing::span!(DEBUG, "ParseTreeIter::new").entered();
let input_range = InputRange::new(input);
let mut traversal_tree = TraversalTree::default();
let mut queue = TraversalQueue::default();
let completions = CompletionMap::default();
let completions = CompletionMap::with_capacity(32, 32);
let parser_ref = parser.as_ref();
let starting_term = starting_term.unwrap_or(parser_ref.starting_term);

Expand Down Expand Up @@ -227,9 +227,9 @@ impl<'gram> Iterator for ParseTreeIter<'gram> {
let parse_grammar = &parser.as_ref().parse_grammar;

earley(queue, traversal_tree, completions, parse_grammar).map(|traversal_id| {
let _span = tracing::span!(tracing::Level::DEBUG, "next_parse_tree").entered();
let _span = tracing::span!(DEBUG, "next_parse_tree").entered();
let parse_tree = parse_tree(traversal_tree, parse_grammar, traversal_id);
tracing::event!(tracing::Level::TRACE, "\n{parse_tree}");
tracing::event!(TRACE, "\n{parse_tree}");
parse_tree
})
}
Expand All @@ -256,19 +256,40 @@ impl<'gram> CompletionKey<'gram> {
}
}

#[derive(Debug, Default)]
/// Insert into a sorted Vec; no-op if already present. Keeps iteration order stable (same as `BTreeSet`).
fn sorted_vec_insert(vec: &mut Vec<TraversalId>, id: TraversalId) {
match vec.binary_search(&id) {
Ok(_) => {}
Err(i) => vec.insert(i, id),
}
}

#[derive(Debug)]
pub(crate) struct CompletionMap<'gram> {
incomplete: crate::HashMap<CompletionKey<'gram>, BTreeSet<TraversalId>>,
complete: crate::HashMap<CompletionKey<'gram>, BTreeSet<TraversalId>>,
incomplete: crate::HashMap<CompletionKey<'gram>, Vec<TraversalId>>,
complete: crate::HashMap<CompletionKey<'gram>, Vec<TraversalId>>,
}

impl<'gram> Default for CompletionMap<'gram> {
fn default() -> Self {
Self::with_capacity(0, 0)
}
}

impl<'gram> CompletionMap<'gram> {
/// Create with reserved capacity to reduce rehashing during parsing.
pub fn with_capacity(incomplete: usize, complete: usize) -> Self {
Self {
incomplete: crate::HashMap::with_capacity(incomplete),
complete: crate::HashMap::with_capacity(complete),
}
}
pub fn get_incomplete<'map>(
&'map self,
term: &'gram Term,
complete_traversal: &Traversal<'gram>,
) -> impl Iterator<Item = TraversalId> + use<'map> {
let _span = tracing::span!(tracing::Level::DEBUG, "get_incomplete").entered();
let _span = tracing::span!(DEBUG, "get_incomplete").entered();
let key = CompletionKey::new_start(term, &complete_traversal.input_range);
self.incomplete.get(&key).into_iter().flatten().cloned()
}
Expand All @@ -277,23 +298,23 @@ impl<'gram> CompletionMap<'gram> {
term: &'gram Term,
input_range: &InputRange<'gram>,
) -> impl Iterator<Item = TraversalId> + use<'map> {
let _span = tracing::span!(tracing::Level::DEBUG, "get_complete").entered();
let _span = tracing::span!(DEBUG, "get_complete").entered();
let key = CompletionKey::new_total(term, input_range);
self.complete.get(&key).into_iter().flatten().cloned()
}
pub fn insert(&mut self, traversal: &Traversal<'gram>, lhs: &'gram Term) {
let _span = tracing::span!(tracing::Level::DEBUG, "insert").entered();
let _span = tracing::span!(DEBUG, "insert").entered();
match traversal.next_unmatched() {
Some(Term::Terminal(_)) => {
// do nothing, because terminals are irrelevant to completion
}
Some(unmatched @ Term::Nonterminal(_)) => {
let key = CompletionKey::new_total(unmatched, &traversal.input_range);
self.incomplete.entry(key).or_default().insert(traversal.id);
sorted_vec_insert(self.incomplete.entry(key).or_default(), traversal.id);
}
None => {
let key = CompletionKey::new_start(lhs, &traversal.input_range);
self.complete.entry(key).or_default().insert(traversal.id);
sorted_vec_insert(self.complete.entry(key).or_default(), traversal.id);
}
}
}
Expand Down
Loading