11use std:: {
22 collections:: BTreeMap ,
33 ffi:: OsStr ,
4+ fmt:: Write as _,
45 fs,
56 io:: { self , Write } ,
67 path:: { Path , PathBuf } ,
78 str,
9+ time:: Duration ,
810} ;
911
1012use anstyle:: { AnsiColor , Color , Style } ;
1113use anyhow:: { anyhow, Context , Result } ;
14+ use clap:: ValueEnum ;
1215use indoc:: indoc;
1316use lazy_static:: lazy_static;
1417use regex:: {
1518 bytes:: { Regex as ByteRegex , RegexBuilder as ByteRegexBuilder } ,
1619 Regex ,
1720} ;
1821use similar:: { ChangeTag , TextDiff } ;
19- use tree_sitter:: { format_sexp, Language , LogType , Parser , Query } ;
22+ use tree_sitter:: { format_sexp, Language , LogType , Parser , Query , Tree } ;
2023use walkdir:: WalkDir ;
2124
2225use super :: util;
26+ use crate :: parse:: Stats ;
2327
2428lazy_static ! {
2529 static ref HEADER_REGEX : ByteRegex = ByteRegexBuilder :: new(
@@ -95,7 +99,16 @@ impl Default for TestAttributes {
9599 }
96100}
97101
102+ #[ derive( ValueEnum , Default , Copy , Clone , PartialEq , Eq ) ]
103+ pub enum TestStats {
104+ All ,
105+ #[ default]
106+ OutliersAndTotal ,
107+ TotalOnly ,
108+ }
109+
98110pub struct TestOptions < ' a > {
111+ pub output : & ' a mut String ,
99112 pub path : PathBuf ,
100113 pub debug : bool ,
101114 pub debug_graph : bool ,
@@ -106,6 +119,11 @@ pub struct TestOptions<'a> {
106119 pub languages : BTreeMap < & ' a str , & ' a Language > ,
107120 pub color : bool ,
108121 pub test_num : usize ,
122+ /// Whether a test ran for the nth line in `output`, the true parse rate, and the adjusted
123+ /// parse rate
124+ pub parse_rates : & ' a mut Vec < ( bool , Option < ( f64 , f64 ) > ) > ,
125+ pub stat_display : TestStats ,
126+ pub stats : & ' a mut Stats ,
109127 pub show_fields : bool ,
110128 pub overview_only : bool ,
111129}
@@ -138,6 +156,53 @@ pub fn run_tests_at_path(parser: &mut Parser, opts: &mut TestOptions) -> Result<
138156 & mut has_parse_errors,
139157 ) ?;
140158
159+ let ( count, total_adj_parse_time) = opts
160+ . parse_rates
161+ . iter ( )
162+ . flat_map ( |( _, rates) | rates)
163+ . fold ( ( 0usize , 0.0f64 ) , |( count, rate_accum) , ( _, adj_rate) | {
164+ ( count + 1 , rate_accum + adj_rate)
165+ } ) ;
166+
167+ let avg = total_adj_parse_time / count as f64 ;
168+ let std_dev = {
169+ let variance = opts
170+ . parse_rates
171+ . iter ( )
172+ . flat_map ( |( _, rates) | rates)
173+ . map ( |( _, rate_i) | ( rate_i - avg) . powi ( 2 ) )
174+ . sum :: < f64 > ( )
175+ / count as f64 ;
176+ variance. sqrt ( )
177+ } ;
178+
179+ for ( ( is_test, rates) , out_line) in opts. parse_rates . iter ( ) . zip ( opts. output . lines ( ) ) {
180+ let stat_display = if !is_test {
181+ // Test group, no actual parsing took place
182+ String :: new ( )
183+ } else {
184+ match ( opts. stat_display , rates) {
185+ ( TestStats :: TotalOnly , _) | ( _, None ) => String :: new ( ) ,
186+ ( display, Some ( ( true_rate, adj_rate) ) ) => {
187+ let mut stats = if display == TestStats :: All {
188+ format ! ( " ({true_rate:.3} bytes/ms)" )
189+ } else {
190+ String :: new ( )
191+ } ;
192+ // 3 standard deviations below the mean, aka the "Empirical Rule"
193+ if * adj_rate < 3.0f64 . mul_add ( -std_dev, avg) {
194+ stats += & paint (
195+ opts. color . then_some ( AnsiColor :: Red ) ,
196+ & format ! ( " -- Warning: Slow parse rate ({true_rate:.3} bytes/ms)" ) ,
197+ ) ;
198+ }
199+ stats
200+ }
201+ }
202+ } ;
203+ println ! ( "{out_line}{stat_display}" ) ;
204+ }
205+
141206 parser. stop_printing_dot_graphs ( ) ;
142207
143208 if failures. is_empty ( ) {
@@ -354,24 +419,28 @@ fn run_tests(
354419 attributes_str,
355420 attributes,
356421 } => {
357- print ! ( "{}" , " " . repeat( indent_level as usize ) ) ;
422+ write ! ( opts . output , "{}" , " " . repeat( indent_level as usize ) ) ? ;
358423
359424 if attributes. skip {
360- println ! (
425+ writeln ! (
426+ opts. output,
361427 "{:>3}. ⌀ {}" ,
362428 opts. test_num,
363429 paint( opts. color. then_some( AnsiColor :: Yellow ) , & name) ,
364- ) ;
430+ ) ?;
431+ opts. parse_rates . push ( ( true , None ) ) ;
365432 opts. test_num += 1 ;
366433 return Ok ( true ) ;
367434 }
368435
369436 if !attributes. platform {
370- println ! (
437+ writeln ! (
438+ opts. output,
371439 "{:>3}. ⌀ {}" ,
372440 opts. test_num,
373441 paint( opts. color. then_some( AnsiColor :: Magenta ) , & name) ,
374- ) ;
442+ ) ?;
443+ opts. parse_rates . push ( ( true , None ) ) ;
375444 opts. test_num += 1 ;
376445 return Ok ( true ) ;
377446 }
@@ -384,15 +453,30 @@ fn run_tests(
384453 . ok_or_else ( || anyhow ! ( "Language not found: {language_name}" ) ) ?;
385454 parser. set_language ( language) ?;
386455 }
456+ let start = std:: time:: Instant :: now ( ) ;
387457 let tree = parser. parse ( & input, None ) . unwrap ( ) ;
458+ {
459+ let parse_time = start. elapsed ( ) ;
460+ let true_parse_rate = tree. root_node ( ) . byte_range ( ) . len ( ) as f64
461+ / ( parse_time. as_nanos ( ) as f64 / 1_000_000.0 ) ;
462+ let adj_parse_rate = adjusted_parse_rate ( & tree, parse_time) ;
463+
464+ opts. parse_rates
465+ . push ( ( true , Some ( ( true_parse_rate, adj_parse_rate) ) ) ) ;
466+ opts. stats . total_parses += 1 ;
467+ opts. stats . total_duration += parse_time;
468+ opts. stats . total_bytes += tree. root_node ( ) . byte_range ( ) . len ( ) ;
469+ }
388470
389471 if attributes. error {
390472 if tree. root_node ( ) . has_error ( ) {
391- println ! (
473+ writeln ! (
474+ opts. output,
392475 "{:>3}. ✓ {}" ,
393476 opts. test_num,
394- paint( opts. color. then_some( AnsiColor :: Green ) , & name)
395- ) ;
477+ paint( opts. color. then_some( AnsiColor :: Green ) , & name) ,
478+ ) ?;
479+ opts. stats . successful_parses += 1 ;
396480 if opts. update {
397481 let input = String :: from_utf8 ( input. clone ( ) ) . unwrap ( ) ;
398482 let output = format_sexp ( & output, 0 ) ;
@@ -419,11 +503,12 @@ fn run_tests(
419503 divider_delim_len,
420504 ) ) ;
421505 }
422- println ! (
506+ writeln ! (
507+ opts. output,
423508 "{:>3}. ✗ {}" ,
424509 opts. test_num,
425- paint( opts. color. then_some( AnsiColor :: Red ) , & name)
426- ) ;
510+ paint( opts. color. then_some( AnsiColor :: Red ) , & name) ,
511+ ) ? ;
427512 failures. push ( (
428513 name. clone ( ) ,
429514 tree. root_node ( ) . to_sexp ( ) ,
@@ -441,11 +526,13 @@ fn run_tests(
441526 }
442527
443528 if actual == output {
444- println ! (
529+ writeln ! (
530+ opts. output,
445531 "{:>3}. ✓ {}" ,
446532 opts. test_num,
447- paint( opts. color. then_some( AnsiColor :: Green ) , & name)
448- ) ;
533+ paint( opts. color. then_some( AnsiColor :: Green ) , & name) ,
534+ ) ?;
535+ opts. stats . successful_parses += 1 ;
449536 if opts. update {
450537 let input = String :: from_utf8 ( input. clone ( ) ) . unwrap ( ) ;
451538 let output = format_sexp ( & output, 0 ) ;
@@ -490,18 +577,20 @@ fn run_tests(
490577 header_delim_len,
491578 divider_delim_len,
492579 ) ) ;
493- println ! (
580+ writeln ! (
581+ opts. output,
494582 "{:>3}. ✓ {}" ,
495583 opts. test_num,
496584 paint( opts. color. then_some( AnsiColor :: Blue ) , & name) ,
497- ) ;
585+ ) ? ;
498586 }
499587 } else {
500- println ! (
588+ writeln ! (
589+ opts. output,
501590 "{:>3}. ✗ {}" ,
502591 opts. test_num,
503592 paint( opts. color. then_some( AnsiColor :: Red ) , & name) ,
504- ) ;
593+ ) ? ;
505594 }
506595 failures. push ( ( name. clone ( ) , actual, output. clone ( ) ) ) ;
507596
@@ -583,8 +672,12 @@ fn run_tests(
583672 }
584673 if !has_printed && indent_level > 1 {
585674 has_printed = true ;
586- print ! ( "{}" , " " . repeat( ( indent_level - 1 ) as usize ) ) ;
587- println ! ( "{name}:" ) ;
675+ writeln ! (
676+ opts. output,
677+ "{}{name}:" ,
678+ " " . repeat( ( indent_level - 1 ) as usize )
679+ ) ?;
680+ opts. parse_rates . push ( ( false , None ) ) ;
588681 }
589682 if !run_tests (
590683 parser,
@@ -620,6 +713,18 @@ fn count_subtests(test_entry: &TestEntry) -> usize {
620713 }
621714}
622715
716+ // Parse time is interpreted in ns before converting to ms to avoid truncation issues
717+ // Parse rates often have several outliers, leading to a large standard deviation. Taking
718+ // the log of these rates serves to "flatten" out the distribution, yielding a more
719+ // usable standard deviation for finding statistically significant slow parse rates
720+ // NOTE: This is just a heuristic
721+ #[ must_use]
722+ pub fn adjusted_parse_rate ( tree : & Tree , parse_time : Duration ) -> f64 {
723+ f64:: ln (
724+ tree. root_node ( ) . byte_range ( ) . len ( ) as f64 / ( parse_time. as_nanos ( ) as f64 / 1_000_000.0 ) ,
725+ )
726+ }
727+
623728fn write_tests (
624729 file_path : & Path ,
625730 corrected_entries : & [ ( String , String , String , String , usize , usize ) ] ,
0 commit comments