Update documentation and terminology of runtime benchmarks

Kobzol · Kobzol · commit a987f3287a34 · 2022-10-14T09:42:10.000+02:00
diff --git a/collector/benchlib/src/benchmark.rs b/collector/benchlib/src/benchmark.rs
@@ -1,5 +1,5 @@
 use crate::cli::{parse_cli, Args, BenchmarkArgs};
-use crate::comm::messages::{BenchmarkMeasurement, BenchmarkMessage, BenchmarkStats};
+use crate::comm::messages::{BenchmarkMessage, BenchmarkResult, BenchmarkStats};
 use crate::comm::output_message;
 use crate::measure::benchmark_function;
 use crate::process::raise_process_priority;
@@ -14,9 +14,9 @@ pub fn benchmark_suite<F: FnOnce(&mut BenchmarkSuite)>(define_func: F) {
     suite.run().expect("Benchmark suite has failed");
 }
 
-/// Type-erased function that performs a benchmark.
+/// Type-erased function that executes a single benchmark.
 struct BenchmarkWrapper {
-    func: Box<dyn Fn() -> anyhow::Result<BenchmarkMeasurement>>,
+    func: Box<dyn Fn() -> anyhow::Result<BenchmarkStats>>,
 }
 
 type BenchmarkMap = HashMap<&'static str, BenchmarkWrapper>;
@@ -82,18 +82,17 @@ fn run_benchmark(args: BenchmarkArgs, benchmarks: BenchmarkMap) -> anyhow::Resul
     let mut stdout = std::io::stdout().lock();
 
     for (name, def) in items {
-        let mut measurements: Vec<BenchmarkMeasurement> =
-            Vec::with_capacity(args.iterations as usize);
+        let mut stats: Vec<BenchmarkStats> = Vec::with_capacity(args.iterations as usize);
         for i in 0..args.iterations {
-            let measurement = (def.func)()?;
-            log::info!("Benchmark (run {i}) `{name}` completed: {measurement:?}");
-            measurements.push(measurement);
+            let benchmark_stats = (def.func)()?;
+            log::info!("Benchmark (run {i}) `{name}` completed: {benchmark_stats:?}");
+            stats.push(benchmark_stats);
         }
         output_message(
             &mut stdout,
-            BenchmarkMessage::Stats(BenchmarkStats {
+            BenchmarkMessage::Result(BenchmarkResult {
                 name: name.to_string(),
-                measurements,
+                stats,
             }),
         )?;
     }
diff --git a/collector/benchlib/src/comm/messages.rs b/collector/benchlib/src/comm/messages.rs
@@ -5,19 +5,19 @@ use std::time::Duration;
 
 #[derive(Debug, serde::Serialize, serde::Deserialize)]
 pub enum BenchmarkMessage {
-    Stats(BenchmarkStats),
+    Result(BenchmarkResult),
 }
 
-/// Results of several measurements of a single benchmark.
+/// Stats gathered by several executions of a single benchmark.
 #[derive(Debug, serde::Serialize, serde::Deserialize)]
-pub struct BenchmarkStats {
+pub struct BenchmarkResult {
     pub name: String,
-    pub measurements: Vec<BenchmarkMeasurement>,
+    pub stats: Vec<BenchmarkStats>,
 }
 
-/// Results of a single benchmark execution.
+/// The stats gathered by a single benchmark execution.
 #[derive(Debug, serde::Serialize, serde::Deserialize)]
-pub struct BenchmarkMeasurement {
+pub struct BenchmarkStats {
     pub cycles: u64,
     pub instructions: u64,
     pub branch_misses: u64,
diff --git a/collector/benchlib/src/measure/perf_counter/unix.rs b/collector/benchlib/src/measure/perf_counter/unix.rs
@@ -1,5 +1,5 @@
 use crate::benchmark::black_box;
-use crate::comm::messages::BenchmarkMeasurement;
+use crate::comm::messages::BenchmarkStats;
 use perf_event::events::Hardware;
 use perf_event::{Builder, Counter, Group};
 use std::time::Instant;
@@ -18,7 +18,7 @@ struct Counters {
 pub fn benchmark_function<F: Fn() -> Bench + 'static, R, Bench: FnOnce() -> R + 'static>(
     name: &'static str,
     benchmark_constructor: F,
-) -> anyhow::Result<BenchmarkMeasurement> {
+) -> anyhow::Result<BenchmarkStats> {
     let mut group = create_group()?;
     let counters = prepare_counters(&mut group)?;
 
@@ -48,7 +48,7 @@ pub fn benchmark_function<F: Fn() -> Bench + 'static, R, Bench: FnOnce() -> R +
     // Try to avoid optimizing the result out.
     black_box(output);
 
-    let result = BenchmarkMeasurement {
+    let result = BenchmarkStats {
         cycles: measurement[&counters.cycles],
         instructions: measurement[&counters.instructions],
         branch_misses: measurement[&counters.branch_misses],
diff --git a/collector/benchlib/src/measure/perf_counter/windows.rs b/collector/benchlib/src/measure/perf_counter/windows.rs
@@ -1,5 +1,5 @@
-use crate::comm::messages::BenchmarkMeasurement;
+use crate::comm::messages::BenchmarkStats;
 
-pub fn benchmark_function<F: FnOnce() -> R, R>(_func: F) -> anyhow::Result<BenchmarkMeasurement> {
+pub fn benchmark_function<F: FnOnce() -> R, R>(_func: F) -> anyhow::Result<BenchmarkStats> {
     panic!("Runtime benchmarking is not supported on Windows");
 }
diff --git a/collector/src/runtime/benchmark.rs b/collector/src/runtime/benchmark.rs
@@ -6,21 +6,23 @@ use core::result::Result::Ok;
 use std::path::{Path, PathBuf};
 use std::process::Command;
 
+/// A binary that defines several benchmarks using the `benchmark_suite` function from `benchlib`.
 #[derive(Debug)]
-pub struct BenchmarkBinary {
-    pub path: PathBuf,
+pub struct BenchmarkSuite {
+    pub binary: PathBuf,
     pub benchmark_names: Vec<String>,
 }
 
-impl BenchmarkBinary {
+impl BenchmarkSuite {
     pub fn name(&self) -> &str {
-        self.path.file_name().unwrap().to_str().unwrap()
+        self.binary.file_name().unwrap().to_str().unwrap()
     }
 }
 
+/// A collection of benchmark suites gathered from a directory.
 #[derive(Debug)]
 pub struct BenchmarkDatabase {
-    pub binaries: Vec<BenchmarkBinary>,
+    pub suites: Vec<BenchmarkSuite>,
 }
 
 impl BenchmarkDatabase {
@@ -40,9 +42,9 @@ impl BenchmarkDatabase {
     }
 
     fn benchmark_names(&self) -> impl Iterator<Item = &str> {
-        self.binaries
+        self.suites
             .iter()
-            .flat_map(|binary| binary.benchmark_names.iter().map(|n| n.as_ref()))
+            .flat_map(|suite| suite.benchmark_names.iter().map(|n| n.as_ref()))
     }
 }
 
@@ -57,8 +59,10 @@ impl BenchmarkFilter {
     }
 }
 
-/// Parse Cargo JSON output and find all compiled binaries.
-/// Then execute each benchmark with the `list-benchmarks` command to find out its benchmark names.
+/// Parse Cargo JSON output to find all compiled binaries.
+/// We assume that each binary defines a benchmark suite using `benchlib`.
+/// We then execute each benchmark suite with the `list-benchmarks` command to find out its
+/// benchmark names.
 pub fn discover_benchmarks(cargo_stdout: &[u8]) -> anyhow::Result<BenchmarkDatabase> {
     let mut binaries = vec![];
 
@@ -75,8 +79,8 @@ pub fn discover_benchmarks(cargo_stdout: &[u8]) -> anyhow::Result<BenchmarkDatab
                                 path.display()
                             )
                         })?;
-                        binaries.push(BenchmarkBinary {
-                            path,
+                        binaries.push(BenchmarkSuite {
+                            binary: path,
                             benchmark_names: benchmarks,
                         });
                     }
@@ -86,10 +90,10 @@ pub fn discover_benchmarks(cargo_stdout: &[u8]) -> anyhow::Result<BenchmarkDatab
         }
     }
 
-    binaries.sort_unstable_by(|a, b| a.path.cmp(&b.path));
+    binaries.sort_unstable_by(|a, b| a.binary.cmp(&b.binary));
     log::debug!("Found binaries: {:?}", binaries);
 
-    Ok(BenchmarkDatabase { binaries })
+    Ok(BenchmarkDatabase { suites: binaries })
 }
 
 /// Uses the `list-benchmarks` command from `benchlib` to find the benchmark names from the given
diff --git a/collector/src/runtime/mod.rs b/collector/src/runtime/mod.rs
@@ -2,7 +2,7 @@ mod benchmark;
 
 use crate::benchmark::profile::Profile;
 use crate::toolchain::{get_local_toolchain, LocalToolchain};
-use benchlib::comm::messages::{BenchmarkMeasurement, BenchmarkMessage, BenchmarkStats};
+use benchlib::comm::messages::{BenchmarkMessage, BenchmarkResult, BenchmarkStats};
 use std::io::{BufRead, BufReader};
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
@@ -32,25 +32,25 @@ pub fn bench_runtime(
     );
 
     let mut benchmark_index = 0;
-    for binary in benchmark_db.binaries {
-        for message in execute_runtime_benchmark(&binary.path, &filter)? {
+    for binary in benchmark_db.suites {
+        for message in execute_runtime_benchmark(&binary.binary, &filter)? {
             let message = message.map_err(|err| {
                 anyhow::anyhow!(
                     "Cannot parse BenchmarkMessage from benchmark {}: {err:?}",
-                    binary.path.display()
+                    binary.binary.display()
                 )
             })?;
             match message {
-                BenchmarkMessage::Stats(stats) => {
+                BenchmarkMessage::Result(result) => {
                     benchmark_index += 1;
                     println!(
                         "Finished {}/{} ({}/{})",
                         binary.name(),
-                        stats.name,
+                        result.name,
                         benchmark_index,
                         filtered
                     );
-                    print_stats(&stats);
+                    print_stats(&result);
                 }
             }
         }
@@ -121,12 +121,12 @@ fn calculate_mean<I: Iterator<Item = f64> + Clone>(iter: I) -> f64 {
     sum / count as f64
 }
 
-fn print_stats(stats: &BenchmarkStats) {
-    fn print_metric<F: Fn(&BenchmarkMeasurement) -> u64>(stats: &BenchmarkStats, name: &str, f: F) {
-        let mean = calculate_mean(stats.measurements.iter().map(&f).map(|v| v as f64));
+fn print_stats(result: &BenchmarkResult) {
+    fn print_metric<F: Fn(&BenchmarkStats) -> u64>(result: &BenchmarkResult, name: &str, f: F) {
+        let mean = calculate_mean(result.stats.iter().map(&f).map(|v| v as f64));
         let stddev = calculate_mean(
-            stats
-                .measurements
+            result
+                .stats
                 .iter()
                 .map(&f)
                 .map(|v| (v as f64 - mean).powf(2.0)),
@@ -137,9 +137,9 @@ fn print_stats(stats: &BenchmarkStats) {
         println!("{name:>20}: {:>16} (+/- {:>8})", mean as u64, stddev as u64);
     }
 
-    print_metric(stats, "Instructions", |m| m.instructions);
-    print_metric(stats, "Cycles", |m| m.cycles);
-    print_metric(stats, "Wall time [us]", |m| m.wall_time.as_micros() as u64);
-    print_metric(stats, "Branch misses", |m| m.branch_misses);
-    print_metric(stats, "Cache misses", |m| m.cache_misses);
+    print_metric(result, "Instructions", |m| m.instructions);
+    print_metric(result, "Cycles", |m| m.cycles);
+    print_metric(result, "Wall time [us]", |m| m.wall_time.as_micros() as u64);
+    print_metric(result, "Branch misses", |m| m.branch_misses);
+    print_metric(result, "Cache misses", |m| m.cache_misses);
 }
diff --git a/docs/glossary.md b/docs/glossary.md
@@ -36,6 +36,9 @@ The following is a glossary of domain specific terminology. Although benchmarks
 * **relevant test result comparison**: a test result comparison can be significant but still not be relevant (i.e., worth paying attention to). Relevance is a factor of the test result comparison's significance and magnitude. Comparisons are considered relevant if they are significant and have at least a small magnitude .
 * **test result comparison magnitude**: how "large" the delta is between the two test result's under comparison. This is determined by the average of two factors: the absolute size of the change (i.e., a change of 5% is larger than a change of 1%) and the amount above the significance threshold (i.e., a change that is 5x the significance threshold is larger than a change 1.5x the significance threshold).
 
+## Runtime benchmarks
+* **benchmark suite**: a collection of individual benchmarks that is defined in a single binary.
+
 ## Other 
 
 * **bootstrap**: the process of building the compiler from a previous version of the compiler

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`		`-use crate::comm::messages::BenchmarkMeasurement;`
	`1`	`+use crate::comm::messages::BenchmarkStats;`
`2`	`2`
`3`		`-pub fn benchmark_function<F: FnOnce() -> R, R>(_func: F) -> anyhow::Result<BenchmarkMeasurement> {`
	`3`	`+pub fn benchmark_function<F: FnOnce() -> R, R>(_func: F) -> anyhow::Result<BenchmarkStats> {`
`4`	`4`	`panic!("Runtime benchmarking is not supported on Windows");`
`5`	`5`	`}`
Original file line number	Diff line number	Diff line change
`@@ -6,21 +6,23 @@ use core::result::Result::Ok;`
`6`	`6`	`use std::path::{Path, PathBuf};`
`7`	`7`	`use std::process::Command;`
`8`	`8`
	`9`	+/// A binary that defines several benchmarks using the `benchmark_suite` function from `benchlib`.
`9`	`10`	`#[derive(Debug)]`
`10`		`-pub struct BenchmarkBinary {`
`11`		`- pub path: PathBuf,`
	`11`	`+pub struct BenchmarkSuite {`
	`12`	`+ pub binary: PathBuf,`
`12`	`13`	`pub benchmark_names: Vec<String>,`
`13`	`14`	`}`
`14`	`15`
`15`		`-impl BenchmarkBinary {`
	`16`	`+impl BenchmarkSuite {`
`16`	`17`	`pub fn name(&self) -> &str {`
`17`		`- self.path.file_name().unwrap().to_str().unwrap()`
	`18`	`+ self.binary.file_name().unwrap().to_str().unwrap()`
`18`	`19`	`}`
`19`	`20`	`}`
`20`	`21`
	`22`	`+/// A collection of benchmark suites gathered from a directory.`
`21`	`23`	`#[derive(Debug)]`
`22`	`24`	`pub struct BenchmarkDatabase {`
`23`		`- pub binaries: Vec<BenchmarkBinary>,`
	`25`	`+ pub suites: Vec<BenchmarkSuite>,`
`24`	`26`	`}`
`25`	`27`
`26`	`28`	`impl BenchmarkDatabase {`
`@@ -40,9 +42,9 @@ impl BenchmarkDatabase {`
`40`	`42`	`}`
`41`	`43`
`42`	`44`	`fn benchmark_names(&self) -> impl Iterator<Item = &str> {`
`43`		`- self.binaries`
	`45`	`+ self.suites`
`44`	`46`	`.iter()`
`45`		`- .flat_map(\|binary\| binary.benchmark_names.iter().map(\|n\| n.as_ref()))`
	`47`	`+ .flat_map(\|suite\| suite.benchmark_names.iter().map(\|n\| n.as_ref()))`
`46`	`48`	`}`
`47`	`49`	`}`
`48`	`50`
`@@ -57,8 +59,10 @@ impl BenchmarkFilter {`
`57`	`59`	`}`
`58`	`60`	`}`
`59`	`61`
`60`		`-/// Parse Cargo JSON output and find all compiled binaries.`
`61`		-/// Then execute each benchmark with the `list-benchmarks` command to find out its benchmark names.
	`62`	`+/// Parse Cargo JSON output to find all compiled binaries.`
	`63`	+/// We assume that each binary defines a benchmark suite using `benchlib`.
	`64`	+/// We then execute each benchmark suite with the `list-benchmarks` command to find out its
	`65`	`+/// benchmark names.`
`62`	`66`	`pub fn discover_benchmarks(cargo_stdout: &[u8]) -> anyhow::Result<BenchmarkDatabase> {`
`63`	`67`	`let mut binaries = vec![];`
`64`	`68`
`@@ -75,8 +79,8 @@ pub fn discover_benchmarks(cargo_stdout: &[u8]) -> anyhow::Result<BenchmarkDatab`
`75`	`79`	`path.display()`
`76`	`80`	`)`
`77`	`81`	`})?;`
`78`		`- binaries.push(BenchmarkBinary {`
`79`		`- path,`
	`82`	`+ binaries.push(BenchmarkSuite {`
	`83`	`+ binary: path,`
`80`	`84`	`benchmark_names: benchmarks,`
`81`	`85`	`});`
`82`	`86`	`}`
`@@ -86,10 +90,10 @@ pub fn discover_benchmarks(cargo_stdout: &[u8]) -> anyhow::Result<BenchmarkDatab`
`86`	`90`	`}`
`87`	`91`	`}`
`88`	`92`
`89`		`- binaries.sort_unstable_by(\|a, b\| a.path.cmp(&b.path));`
	`93`	`+ binaries.sort_unstable_by(\|a, b\| a.binary.cmp(&b.binary));`
`90`	`94`	`log::debug!("Found binaries: {:?}", binaries);`
`91`	`95`
`92`		`- Ok(BenchmarkDatabase { binaries })`
	`96`	`+ Ok(BenchmarkDatabase { suites: binaries })`
`93`	`97`	`}`
`94`	`98`
`95`	`99`	/// Uses the `list-benchmarks` command from `benchlib` to find the benchmark names from the given