Skip to content

Commit b201e0f

Browse files
authored
Benchmarking-based performance improvements (#198)
1 parent b2adee3 commit b201e0f

File tree

12 files changed

+281
-75
lines changed

12 files changed

+281
-75
lines changed

benches/divan.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ mod examples {
2222
));
2323

2424
bencher.bench(|| {
25+
#[cfg(feature = "tracing")]
26+
let _span = tracing::span!(tracing::Level::DEBUG, "bench_parse_postal").entered();
2527
input.parse::<bnf::Grammar>().unwrap();
2628
});
2729
}
@@ -46,6 +48,8 @@ mod examples {
4648
let mut index = (0..random_walk_count).cycle();
4749

4850
bencher.bench_local(|| {
51+
#[cfg(feature = "tracing")]
52+
let _span = tracing::span!(tracing::Level::DEBUG, "bench_parse_postal_input").entered();
4953
let index = index.next().unwrap();
5054
let input = random_postal_strings.get(index).unwrap();
5155
postal_grammar
@@ -64,6 +68,8 @@ mod examples {
6468
grammar
6569
})
6670
.bench_refs(|grammar| {
71+
#[cfg(feature = "tracing")]
72+
let _span = tracing::span!(tracing::Level::DEBUG, "bench_generate_dna").entered();
6773
grammar.generate().unwrap();
6874
});
6975
}
@@ -90,6 +96,9 @@ mod examples {
9096
let mut index = (0..random_walk_count).cycle();
9197

9298
bencher.bench_local(|| {
99+
#[cfg(feature = "tracing")]
100+
let _span =
101+
tracing::span!(tracing::Level::DEBUG, "bench_parse_polish_calculator").entered();
93102
let index = index.next().unwrap();
94103
let input = random_walks.get(index).unwrap();
95104
polish_calc_grammar
@@ -114,6 +123,12 @@ mod examples {
114123
.with_inputs(|| rng.random_range(1..100))
115124
.count_inputs_as::<divan::counter::ItemsCount>()
116125
.bench_local_values(|parse_count| {
126+
#[cfg(feature = "tracing")]
127+
let _span = tracing::span!(
128+
tracing::Level::DEBUG,
129+
"bench_parse_infinite_nullable_grammar"
130+
)
131+
.entered();
117132
infinite_grammar
118133
.parse_input("")
119134
.take(parse_count)
@@ -132,6 +147,9 @@ mod parser_api {
132147
);
133148

134149
bencher.bench(|| {
150+
#[cfg(feature = "tracing")]
151+
let _span =
152+
tracing::span!(tracing::Level::DEBUG, "bench_build_postal_parser").entered();
135153
grammar.build_parser().unwrap();
136154
});
137155
}
@@ -148,6 +166,9 @@ mod parser_api {
148166
);
149167

150168
bencher.bench(|| {
169+
#[cfg(feature = "tracing")]
170+
let _span =
171+
tracing::span!(tracing::Level::DEBUG, "bench_build_polish_parser").entered();
151172
grammar.build_parser().unwrap();
152173
});
153174
}
@@ -173,6 +194,9 @@ mod parser_api {
173194
let mut index = (0..random_walk_count).cycle();
174195

175196
bencher.bench_local(|| {
197+
#[cfg(feature = "tracing")]
198+
let _span =
199+
tracing::span!(tracing::Level::DEBUG, "bench_parse_postal_with_parser").entered();
176200
let index = index.next().unwrap();
177201
let input = random_postal_strings.get(index).unwrap();
178202
parser.parse_input(input).for_each(divan::black_box_drop);
@@ -202,6 +226,9 @@ mod parser_api {
202226
let mut index = (0..random_walk_count).cycle();
203227

204228
bencher.bench_local(|| {
229+
#[cfg(feature = "tracing")]
230+
let _span =
231+
tracing::span!(tracing::Level::DEBUG, "bench_parse_polish_with_parser").entered();
205232
let index = index.next().unwrap();
206233
let input = random_walks.get(index).unwrap();
207234
parser.parse_input(input).for_each(divan::black_box_drop);
@@ -225,6 +252,12 @@ mod parser_api {
225252
.with_inputs(|| rng.random_range(1..100))
226253
.count_inputs_as::<divan::counter::ItemsCount>()
227254
.bench_local_values(|parse_count| {
255+
#[cfg(feature = "tracing")]
256+
let _span = tracing::span!(
257+
tracing::Level::DEBUG,
258+
"bench_parse_infinite_nullable_with_parser"
259+
)
260+
.entered();
228261
parser
229262
.parse_input("")
230263
.take(parse_count)
@@ -250,6 +283,8 @@ mod parser_api {
250283
.collect::<Vec<_>>()
251284
})
252285
.bench_local_refs(|inputs| {
286+
#[cfg(feature = "tracing")]
287+
let _span = tracing::span!(tracing::Level::DEBUG, "bench_per_input_100").entered();
253288
for input in inputs {
254289
polish_calc_grammar
255290
.parse_input(input)
@@ -277,6 +312,9 @@ mod parser_api {
277312
.collect::<Vec<_>>()
278313
})
279314
.bench_local_refs(|inputs| {
315+
#[cfg(feature = "tracing")]
316+
let _span =
317+
tracing::span!(tracing::Level::DEBUG, "bench_reuse_parser_100").entered();
280318
for input in inputs {
281319
parser.parse_input(input).for_each(divan::black_box_drop);
282320
}

src/earley/grammar.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ pub(crate) struct ParseGrammar<'gram> {
2424

2525
impl<'gram, 'a> ParseGrammar<'gram> {
2626
pub fn new(grammar: &'gram crate::Grammar) -> Self {
27-
let _span = tracing::span!(tracing::Level::DEBUG, "ParseGrammar_new").entered();
27+
let _span = tracing::span!(DEBUG, "ParseGrammar_new").entered();
2828

2929
let mut productions = AppendOnlyVec::<Production, ProductionId>::new();
3030
let mut prods_by_lhs = ProdTermMap::new();

src/earley/input_range.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
1+
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
22
pub(crate) struct InputRangeOffset {
33
pub start: usize,
44
pub len: usize,
@@ -11,7 +11,7 @@ impl InputRangeOffset {
1111
}
1212

1313
/// A sliding window over the input strings being parsed.
14-
#[derive(Clone)]
14+
#[derive(Clone, Copy)]
1515
pub(crate) struct InputRange<'gram> {
1616
input: &'gram str,
1717
pub offset: InputRangeOffset,
@@ -24,6 +24,8 @@ impl<'gram> InputRange<'gram> {
2424
offset: InputRangeOffset { start: 0, len: 0 },
2525
}
2626
}
27+
/// Remaining input from the current position (slice; does not allocate).
28+
#[inline(always)]
2729
pub fn next(&self) -> &'gram str {
2830
let next_idx = self.offset.start + self.offset.len;
2931
self.input.get(next_idx..).unwrap_or("")

src/earley/mod.rs

Lines changed: 51 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ mod traversal;
44
use crate::parser::grammar::ParseGrammar;
55
use crate::{GrammarParser, ParseTree, ParseTreeNode, Term, tracing};
66
use input_range::InputRange;
7-
use std::collections::{BTreeSet, HashSet, VecDeque};
7+
use std::collections::{HashSet, VecDeque};
88
use std::rc::Rc;
99
use traversal::{TermMatch, Traversal, TraversalId, TraversalTree};
1010

@@ -13,6 +13,7 @@ pub fn parse<'gram>(
1313
input: &'gram str,
1414
starting_term: Option<&'gram Term>,
1515
) -> impl Iterator<Item = ParseTree<'gram>> {
16+
let _span = tracing::span!(DEBUG, "earley::parse").entered();
1617
ParseTreeIter::new(ParserHold::Borrowed(grammar), input, starting_term)
1718
}
1819

@@ -23,6 +24,7 @@ pub fn parse_with_parser_rc<'gram>(
2324
input: &'gram str,
2425
starting_term: Option<&'gram Term>,
2526
) -> impl Iterator<Item = ParseTree<'gram>> {
27+
let _span = tracing::span!(DEBUG, "earley::parse_with_parser_rc").entered();
2628
ParseTreeIter::new(ParserHold::Owned(parser), input, starting_term)
2729
}
2830

@@ -86,15 +88,15 @@ fn parse_tree<'gram>(
8688
let traversal = traversal_tree.get(traversal_id);
8789
grammar.get_production_by_id(traversal.production_id)
8890
};
89-
let rhs = traversal_tree
90-
.get_matched(traversal_id)
91-
.map(|term_match| match term_match {
91+
let mut rhs = Vec::with_capacity(production.rhs.terms.len());
92+
for term_match in traversal_tree.get_matched(traversal_id) {
93+
rhs.push(match term_match {
9294
TermMatch::Terminal(term) => ParseTreeNode::Terminal(term),
9395
TermMatch::Nonterminal(traversal_id) => {
9496
ParseTreeNode::Nonterminal(parse_tree(traversal_tree, grammar, *traversal_id))
9597
}
96-
})
97-
.collect::<Vec<ParseTreeNode>>();
98+
});
99+
}
98100

99101
ParseTree::new(production.lhs, rhs)
100102
}
@@ -107,52 +109,49 @@ fn earley<'gram>(
107109
completions: &mut CompletionMap<'gram>,
108110
grammar: &Rc<ParseGrammar<'gram>>,
109111
) -> Option<TraversalId> {
110-
let _span = tracing::span!(tracing::Level::DEBUG, "earley").entered();
112+
let _span = tracing::span!(DEBUG, "earley").entered();
111113
while let Some(traversal_id) = queue.pop_front() {
112114
tracing::event!(
113-
tracing::Level::TRACE,
115+
TRACE,
114116
"earley queue pop: {:#?}",
115117
traversal_tree.get(traversal_id)
116118
);
117119
let traversal = traversal_tree.get(traversal_id);
118120

119121
match traversal_tree.get_matching(traversal_id) {
120122
Some(nonterminal @ Term::Nonterminal(_)) => {
121-
let _span = tracing::span!(tracing::Level::DEBUG, "Predict").entered();
123+
let _span = tracing::span!(DEBUG, "Predict").entered();
122124

123125
let lhs = grammar.get_production_by_id(traversal.production_id).lhs;
124126

125127
completions.insert(traversal, lhs);
126128

127-
let input_range = traversal.input_range.clone();
129+
let input_range = traversal.input_range;
128130

129131
for production in grammar.get_productions_by_lhs(nonterminal) {
130132
let predicted = traversal_tree.predict(production, &input_range);
131-
tracing::event!(tracing::Level::TRACE, "predicted: {predicted:#?}");
133+
tracing::event!(TRACE, "predicted: {predicted:#?}");
132134
queue.push_back(predicted);
133135
}
134136

135137
for completed in completions.get_complete(nonterminal, &input_range) {
136138
let term_match = TermMatch::Nonterminal(completed);
137139
let prior_completed = traversal_tree.match_term(traversal_id, term_match);
138-
tracing::event!(
139-
tracing::Level::TRACE,
140-
"prior_completed: {prior_completed:#?}"
141-
);
140+
tracing::event!(TRACE, "prior_completed: {prior_completed:#?}");
142141
queue.push_back(prior_completed);
143142
}
144143
}
145144
Some(Term::Terminal(term)) => {
146-
let _span = tracing::span!(tracing::Level::DEBUG, "Scan").entered();
145+
let _span = tracing::span!(DEBUG, "Scan").entered();
147146
if traversal.input_range.next().starts_with(term) {
148147
let term_match = TermMatch::Terminal(term);
149148
let scanned = traversal_tree.match_term(traversal_id, term_match);
150-
tracing::event!(tracing::Level::TRACE, "scanned: {scanned:#?}");
149+
tracing::event!(TRACE, "scanned: {scanned:#?}");
151150
queue.push_back(scanned);
152151
}
153152
}
154153
None => {
155-
let _span = tracing::span!(tracing::Level::DEBUG, "Complete").entered();
154+
let _span = tracing::span!(DEBUG, "Complete").entered();
156155

157156
let is_full_traversal =
158157
traversal.is_starting && traversal.input_range.is_complete();
@@ -164,7 +163,7 @@ fn earley<'gram>(
164163
let term_match = TermMatch::Nonterminal(traversal_id);
165164
let completed = traversal_tree.match_term(incomplete_traversal_id, term_match);
166165

167-
tracing::event!(tracing::Level::TRACE, "completed: {completed:#?}");
166+
tracing::event!(TRACE, "completed: {completed:#?}");
168167
queue.push_back(completed);
169168
}
170169

@@ -192,10 +191,11 @@ impl<'gram> ParseTreeIter<'gram> {
192191
input: &'gram str,
193192
starting_term: Option<&'gram Term>,
194193
) -> Self {
194+
let _span = tracing::span!(DEBUG, "ParseTreeIter::new").entered();
195195
let input_range = InputRange::new(input);
196196
let mut traversal_tree = TraversalTree::default();
197197
let mut queue = TraversalQueue::default();
198-
let completions = CompletionMap::default();
198+
let completions = CompletionMap::with_capacity(32, 32);
199199
let parser_ref = parser.as_ref();
200200
let starting_term = starting_term.unwrap_or(parser_ref.starting_term);
201201

@@ -227,9 +227,9 @@ impl<'gram> Iterator for ParseTreeIter<'gram> {
227227
let parse_grammar = &parser.as_ref().parse_grammar;
228228

229229
earley(queue, traversal_tree, completions, parse_grammar).map(|traversal_id| {
230-
let _span = tracing::span!(tracing::Level::DEBUG, "next_parse_tree").entered();
230+
let _span = tracing::span!(DEBUG, "next_parse_tree").entered();
231231
let parse_tree = parse_tree(traversal_tree, parse_grammar, traversal_id);
232-
tracing::event!(tracing::Level::TRACE, "\n{parse_tree}");
232+
tracing::event!(TRACE, "\n{parse_tree}");
233233
parse_tree
234234
})
235235
}
@@ -256,19 +256,40 @@ impl<'gram> CompletionKey<'gram> {
256256
}
257257
}
258258

259-
#[derive(Debug, Default)]
259+
/// Insert into a sorted Vec; no-op if already present. Keeps iteration order stable (same as `BTreeSet`).
260+
fn sorted_vec_insert(vec: &mut Vec<TraversalId>, id: TraversalId) {
261+
match vec.binary_search(&id) {
262+
Ok(_) => {}
263+
Err(i) => vec.insert(i, id),
264+
}
265+
}
266+
267+
#[derive(Debug)]
260268
pub(crate) struct CompletionMap<'gram> {
261-
incomplete: crate::HashMap<CompletionKey<'gram>, BTreeSet<TraversalId>>,
262-
complete: crate::HashMap<CompletionKey<'gram>, BTreeSet<TraversalId>>,
269+
incomplete: crate::HashMap<CompletionKey<'gram>, Vec<TraversalId>>,
270+
complete: crate::HashMap<CompletionKey<'gram>, Vec<TraversalId>>,
271+
}
272+
273+
impl<'gram> Default for CompletionMap<'gram> {
274+
fn default() -> Self {
275+
Self::with_capacity(0, 0)
276+
}
263277
}
264278

265279
impl<'gram> CompletionMap<'gram> {
280+
/// Create with reserved capacity to reduce rehashing during parsing.
281+
pub fn with_capacity(incomplete: usize, complete: usize) -> Self {
282+
Self {
283+
incomplete: crate::HashMap::with_capacity(incomplete),
284+
complete: crate::HashMap::with_capacity(complete),
285+
}
286+
}
266287
pub fn get_incomplete<'map>(
267288
&'map self,
268289
term: &'gram Term,
269290
complete_traversal: &Traversal<'gram>,
270291
) -> impl Iterator<Item = TraversalId> + use<'map> {
271-
let _span = tracing::span!(tracing::Level::DEBUG, "get_incomplete").entered();
292+
let _span = tracing::span!(DEBUG, "get_incomplete").entered();
272293
let key = CompletionKey::new_start(term, &complete_traversal.input_range);
273294
self.incomplete.get(&key).into_iter().flatten().cloned()
274295
}
@@ -277,23 +298,23 @@ impl<'gram> CompletionMap<'gram> {
277298
term: &'gram Term,
278299
input_range: &InputRange<'gram>,
279300
) -> impl Iterator<Item = TraversalId> + use<'map> {
280-
let _span = tracing::span!(tracing::Level::DEBUG, "get_complete").entered();
301+
let _span = tracing::span!(DEBUG, "get_complete").entered();
281302
let key = CompletionKey::new_total(term, input_range);
282303
self.complete.get(&key).into_iter().flatten().cloned()
283304
}
284305
pub fn insert(&mut self, traversal: &Traversal<'gram>, lhs: &'gram Term) {
285-
let _span = tracing::span!(tracing::Level::DEBUG, "insert").entered();
306+
let _span = tracing::span!(DEBUG, "insert").entered();
286307
match traversal.next_unmatched() {
287308
Some(Term::Terminal(_)) => {
288309
// do nothing, because terminals are irrelevant to completion
289310
}
290311
Some(unmatched @ Term::Nonterminal(_)) => {
291312
let key = CompletionKey::new_total(unmatched, &traversal.input_range);
292-
self.incomplete.entry(key).or_default().insert(traversal.id);
313+
sorted_vec_insert(self.incomplete.entry(key).or_default(), traversal.id);
293314
}
294315
None => {
295316
let key = CompletionKey::new_start(lhs, &traversal.input_range);
296-
self.complete.entry(key).or_default().insert(traversal.id);
317+
sorted_vec_insert(self.complete.entry(key).or_default(), traversal.id);
297318
}
298319
}
299320
}

0 commit comments

Comments
 (0)