Skip to content

Commit 5d9870e

Browse files
authored
feat(cli): show parse times in testing output
1 parent 8744a4e commit 5d9870e

File tree

2 files changed

+138
-22
lines changed

2 files changed

+138
-22
lines changed

cli/src/main.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use tree_sitter_cli::{
2424
logger,
2525
parse::{self, ParseFileOptions, ParseOutput, ParseTheme},
2626
playground, query, tags,
27-
test::{self, TestOptions},
27+
test::{self, TestOptions, TestStats},
2828
test_highlight, test_tags, util, version, wasm,
2929
};
3030
use tree_sitter_config::Config;
@@ -274,6 +274,8 @@ struct Test {
274274
pub config_path: Option<PathBuf>,
275275
#[arg(long, help = "Force showing fields in test diffs")]
276276
pub show_fields: bool,
277+
#[arg(long, help = "Show parsing statistics")]
278+
pub stat: Option<TestStats>,
277279
#[arg(short, long, help = "Force rebuild the parser")]
278280
pub rebuild: bool,
279281
#[arg(long, help = "Show only the pass-fail overview tree")]
@@ -936,6 +938,7 @@ impl Test {
936938
fn run(self, mut loader: loader::Loader, current_dir: &Path) -> Result<()> {
937939
let config = Config::load(self.config_path)?;
938940
let color = env::var("NO_COLOR").map_or(true, |v| v != "1");
941+
let stat = self.stat.unwrap_or_default();
939942

940943
loader.debug_build(self.debug_build);
941944
loader.force_rebuild(self.rebuild);
@@ -959,11 +962,15 @@ impl Test {
959962
parser.set_language(language)?;
960963

961964
let test_dir = current_dir.join("test");
965+
let mut stats = parse::Stats::default();
962966

963967
// Run the corpus tests. Look for them in `test/corpus`.
964968
let test_corpus_dir = test_dir.join("corpus");
965969
if test_corpus_dir.is_dir() {
970+
let mut output = String::new();
971+
let mut rates = Vec::new();
966972
let mut opts = TestOptions {
973+
output: &mut output,
967974
path: test_corpus_dir,
968975
debug: self.debug,
969976
debug_graph: self.debug_graph,
@@ -974,11 +981,15 @@ impl Test {
974981
languages: languages.iter().map(|(l, n)| (n.as_str(), l)).collect(),
975982
color,
976983
test_num: 1,
984+
parse_rates: &mut rates,
985+
stat_display: stat,
986+
stats: &mut stats,
977987
show_fields: self.show_fields,
978988
overview_only: self.overview_only,
979989
};
980990

981991
test::run_tests_at_path(&mut parser, &mut opts)?;
992+
println!("\n{stats}");
982993
}
983994

984995
// Check that all of the queries are valid.

cli/src/test.rs

Lines changed: 126 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,29 @@
11
use std::{
22
collections::BTreeMap,
33
ffi::OsStr,
4+
fmt::Write as _,
45
fs,
56
io::{self, Write},
67
path::{Path, PathBuf},
78
str,
9+
time::Duration,
810
};
911

1012
use anstyle::{AnsiColor, Color, Style};
1113
use anyhow::{anyhow, Context, Result};
14+
use clap::ValueEnum;
1215
use indoc::indoc;
1316
use lazy_static::lazy_static;
1417
use regex::{
1518
bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder},
1619
Regex,
1720
};
1821
use similar::{ChangeTag, TextDiff};
19-
use tree_sitter::{format_sexp, Language, LogType, Parser, Query};
22+
use tree_sitter::{format_sexp, Language, LogType, Parser, Query, Tree};
2023
use walkdir::WalkDir;
2124

2225
use super::util;
26+
use crate::parse::Stats;
2327

2428
lazy_static! {
2529
static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new(
@@ -95,7 +99,16 @@ impl Default for TestAttributes {
9599
}
96100
}
97101

102+
#[derive(ValueEnum, Default, Copy, Clone, PartialEq, Eq)]
103+
pub enum TestStats {
104+
All,
105+
#[default]
106+
OutliersAndTotal,
107+
TotalOnly,
108+
}
109+
98110
pub struct TestOptions<'a> {
111+
pub output: &'a mut String,
99112
pub path: PathBuf,
100113
pub debug: bool,
101114
pub debug_graph: bool,
@@ -106,6 +119,11 @@ pub struct TestOptions<'a> {
106119
pub languages: BTreeMap<&'a str, &'a Language>,
107120
pub color: bool,
108121
pub test_num: usize,
122+
/// Whether a test ran for the nth line in `output`, the true parse rate, and the adjusted
123+
/// parse rate
124+
pub parse_rates: &'a mut Vec<(bool, Option<(f64, f64)>)>,
125+
pub stat_display: TestStats,
126+
pub stats: &'a mut Stats,
109127
pub show_fields: bool,
110128
pub overview_only: bool,
111129
}
@@ -138,6 +156,53 @@ pub fn run_tests_at_path(parser: &mut Parser, opts: &mut TestOptions) -> Result<
138156
&mut has_parse_errors,
139157
)?;
140158

159+
let (count, total_adj_parse_time) = opts
160+
.parse_rates
161+
.iter()
162+
.flat_map(|(_, rates)| rates)
163+
.fold((0usize, 0.0f64), |(count, rate_accum), (_, adj_rate)| {
164+
(count + 1, rate_accum + adj_rate)
165+
});
166+
167+
let avg = total_adj_parse_time / count as f64;
168+
let std_dev = {
169+
let variance = opts
170+
.parse_rates
171+
.iter()
172+
.flat_map(|(_, rates)| rates)
173+
.map(|(_, rate_i)| (rate_i - avg).powi(2))
174+
.sum::<f64>()
175+
/ count as f64;
176+
variance.sqrt()
177+
};
178+
179+
for ((is_test, rates), out_line) in opts.parse_rates.iter().zip(opts.output.lines()) {
180+
let stat_display = if !is_test {
181+
// Test group, no actual parsing took place
182+
String::new()
183+
} else {
184+
match (opts.stat_display, rates) {
185+
(TestStats::TotalOnly, _) | (_, None) => String::new(),
186+
(display, Some((true_rate, adj_rate))) => {
187+
let mut stats = if display == TestStats::All {
188+
format!(" ({true_rate:.3} bytes/ms)")
189+
} else {
190+
String::new()
191+
};
192+
// 3 standard deviations below the mean, aka the "Empirical Rule"
193+
if *adj_rate < 3.0f64.mul_add(-std_dev, avg) {
194+
stats += &paint(
195+
opts.color.then_some(AnsiColor::Red),
196+
&format!(" -- Warning: Slow parse rate ({true_rate:.3} bytes/ms)"),
197+
);
198+
}
199+
stats
200+
}
201+
}
202+
};
203+
println!("{out_line}{stat_display}");
204+
}
205+
141206
parser.stop_printing_dot_graphs();
142207

143208
if failures.is_empty() {
@@ -354,24 +419,28 @@ fn run_tests(
354419
attributes_str,
355420
attributes,
356421
} => {
357-
print!("{}", " ".repeat(indent_level as usize));
422+
write!(opts.output, "{}", " ".repeat(indent_level as usize))?;
358423

359424
if attributes.skip {
360-
println!(
425+
writeln!(
426+
opts.output,
361427
"{:>3}. ⌀ {}",
362428
opts.test_num,
363429
paint(opts.color.then_some(AnsiColor::Yellow), &name),
364-
);
430+
)?;
431+
opts.parse_rates.push((true, None));
365432
opts.test_num += 1;
366433
return Ok(true);
367434
}
368435

369436
if !attributes.platform {
370-
println!(
437+
writeln!(
438+
opts.output,
371439
"{:>3}. ⌀ {}",
372440
opts.test_num,
373441
paint(opts.color.then_some(AnsiColor::Magenta), &name),
374-
);
442+
)?;
443+
opts.parse_rates.push((true, None));
375444
opts.test_num += 1;
376445
return Ok(true);
377446
}
@@ -384,15 +453,30 @@ fn run_tests(
384453
.ok_or_else(|| anyhow!("Language not found: {language_name}"))?;
385454
parser.set_language(language)?;
386455
}
456+
let start = std::time::Instant::now();
387457
let tree = parser.parse(&input, None).unwrap();
458+
{
459+
let parse_time = start.elapsed();
460+
let true_parse_rate = tree.root_node().byte_range().len() as f64
461+
/ (parse_time.as_nanos() as f64 / 1_000_000.0);
462+
let adj_parse_rate = adjusted_parse_rate(&tree, parse_time);
463+
464+
opts.parse_rates
465+
.push((true, Some((true_parse_rate, adj_parse_rate))));
466+
opts.stats.total_parses += 1;
467+
opts.stats.total_duration += parse_time;
468+
opts.stats.total_bytes += tree.root_node().byte_range().len();
469+
}
388470

389471
if attributes.error {
390472
if tree.root_node().has_error() {
391-
println!(
473+
writeln!(
474+
opts.output,
392475
"{:>3}. ✓ {}",
393476
opts.test_num,
394-
paint(opts.color.then_some(AnsiColor::Green), &name)
395-
);
477+
paint(opts.color.then_some(AnsiColor::Green), &name),
478+
)?;
479+
opts.stats.successful_parses += 1;
396480
if opts.update {
397481
let input = String::from_utf8(input.clone()).unwrap();
398482
let output = format_sexp(&output, 0);
@@ -419,11 +503,12 @@ fn run_tests(
419503
divider_delim_len,
420504
));
421505
}
422-
println!(
506+
writeln!(
507+
opts.output,
423508
"{:>3}. ✗ {}",
424509
opts.test_num,
425-
paint(opts.color.then_some(AnsiColor::Red), &name)
426-
);
510+
paint(opts.color.then_some(AnsiColor::Red), &name),
511+
)?;
427512
failures.push((
428513
name.clone(),
429514
tree.root_node().to_sexp(),
@@ -441,11 +526,13 @@ fn run_tests(
441526
}
442527

443528
if actual == output {
444-
println!(
529+
writeln!(
530+
opts.output,
445531
"{:>3}. ✓ {}",
446532
opts.test_num,
447-
paint(opts.color.then_some(AnsiColor::Green), &name)
448-
);
533+
paint(opts.color.then_some(AnsiColor::Green), &name),
534+
)?;
535+
opts.stats.successful_parses += 1;
449536
if opts.update {
450537
let input = String::from_utf8(input.clone()).unwrap();
451538
let output = format_sexp(&output, 0);
@@ -490,18 +577,20 @@ fn run_tests(
490577
header_delim_len,
491578
divider_delim_len,
492579
));
493-
println!(
580+
writeln!(
581+
opts.output,
494582
"{:>3}. ✓ {}",
495583
opts.test_num,
496584
paint(opts.color.then_some(AnsiColor::Blue), &name),
497-
);
585+
)?;
498586
}
499587
} else {
500-
println!(
588+
writeln!(
589+
opts.output,
501590
"{:>3}. ✗ {}",
502591
opts.test_num,
503592
paint(opts.color.then_some(AnsiColor::Red), &name),
504-
);
593+
)?;
505594
}
506595
failures.push((name.clone(), actual, output.clone()));
507596

@@ -583,8 +672,12 @@ fn run_tests(
583672
}
584673
if !has_printed && indent_level > 1 {
585674
has_printed = true;
586-
print!("{}", " ".repeat((indent_level - 1) as usize));
587-
println!("{name}:");
675+
writeln!(
676+
opts.output,
677+
"{}{name}:",
678+
" ".repeat((indent_level - 1) as usize)
679+
)?;
680+
opts.parse_rates.push((false, None));
588681
}
589682
if !run_tests(
590683
parser,
@@ -620,6 +713,18 @@ fn count_subtests(test_entry: &TestEntry) -> usize {
620713
}
621714
}
622715

716+
// Parse time is interpreted in ns before converting to ms to avoid truncation issues
717+
// Parse rates often have several outliers, leading to a large standard deviation. Taking
718+
// the log of these rates serves to "flatten" out the distribution, yielding a more
719+
// usable standard deviation for finding statistically significant slow parse rates
720+
// NOTE: This is just a heuristic
721+
#[must_use]
722+
pub fn adjusted_parse_rate(tree: &Tree, parse_time: Duration) -> f64 {
723+
f64::ln(
724+
tree.root_node().byte_range().len() as f64 / (parse_time.as_nanos() as f64 / 1_000_000.0),
725+
)
726+
}
727+
623728
fn write_tests(
624729
file_path: &Path,
625730
corrected_entries: &[(String, String, String, String, usize, usize)],

0 commit comments

Comments
 (0)