Skip to content

Commit 372c47a

Browse files
authored
Record and Use Engine Flags in Summarize/Effect-Size Tools (#293)
* Allow benchmark names to be overriden This can be useful when the engine filename is not useful or when wanting a concise way to differentiate benchmark runs that may use the same engine but with different flags. * Record engine_flags with measurement data Previously, only the engine name/path was stored for each measurement. The flags used represents valuable information for both understanding how a historical run was performed as well as determining how different configurations impact performance for the same engine. * Update summarize/effect-size to use engine flags Previously, recordings were updated to capture engine flag information. This change updates the code used for summaries and effect-size analysis to allow for comparing engine/flag combinations against each other which is useful for a class of comparisons. * Fix issues raised by the tests, improve coverage * Use single field for sightglass_data::Engine This simplifies consuming and building measurement and other related structures but at the cost of somewhat more complex serialization. This is largely due to our use of CSV as an input/output format and rust-csv's longstanding inability to handle flattened structures. See BurntSushi/rust-csv#98
1 parent c35548e commit 372c47a

File tree

12 files changed

+480
-87
lines changed

12 files changed

+480
-87
lines changed

crates/analysis/src/effect_size.rs

Lines changed: 19 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::keys::KeyBuilder;
22
use anyhow::Result;
3-
use sightglass_data::{EffectSize, Measurement, Phase, Summary};
3+
use sightglass_data::{EffectSize, Engine, Measurement, Phase, Summary};
44
use std::{collections::BTreeSet, io::Write};
55

66
/// Find the effect size (and confidence interval) of between two different
@@ -25,7 +25,10 @@ pub fn calculate<'a>(
2525
significance_level,
2626
);
2727

28-
let keys = KeyBuilder::all().engine(false).keys(measurements);
28+
let keys = KeyBuilder::all()
29+
.engine(false)
30+
.engine_flags(false)
31+
.keys(measurements);
2932
let mut results = Vec::with_capacity(keys.len());
3033

3134
for key in keys {
@@ -46,12 +49,12 @@ pub fn calculate<'a>(
4649

4750
let a: behrens_fisher::Stats = key_measurements
4851
.iter()
49-
.filter(|m| m.engine.as_ref() == engine_a)
52+
.filter(|m| &m.engine == engine_a)
5053
.map(|m| m.count as f64)
5154
.collect();
5255
let b: behrens_fisher::Stats = key_measurements
5356
.iter()
54-
.filter(|m| m.engine.as_ref() == engine_b)
57+
.filter(|m| &m.engine == engine_b)
5558
.map(|m| m.count as f64)
5659
.collect();
5760

@@ -101,21 +104,12 @@ pub fn write(
101104
writeln!(output_file)?;
102105

103106
// For readability, trim the shared prefix from our two engine names.
104-
let end_of_shared_prefix = effect_size
105-
.a_engine
106-
.char_indices()
107-
.zip(effect_size.b_engine.char_indices())
108-
.find_map(|((i, a), (j, b))| {
109-
if a == b {
110-
None
111-
} else {
112-
debug_assert_eq!(i, j);
113-
Some(i)
114-
}
115-
})
116-
.unwrap_or(0);
117-
let a_engine = &effect_size.a_engine[end_of_shared_prefix..];
118-
let b_engine = &effect_size.b_engine[end_of_shared_prefix..];
107+
//
108+
// Furthermore, there are a few special cases:
109+
// 1. If the engines are the same, show just the flags.
110+
// 2. If not, show the computed full label with common prefix removed.
111+
let (a_eng_label, b_eng_label) =
112+
effect_size.a_engine.relative_labels(&effect_size.b_engine);
119113

120114
if effect_size.is_significant() {
121115
writeln!(
@@ -132,9 +126,7 @@ pub fn write(
132126
let ratio_ci = effect_size.half_width_confidence_interval / effect_size.a_mean;
133127
writeln!(
134128
output_file,
135-
" {a_engine} is {ratio_min:.2}x to {ratio_max:.2}x faster than {b_engine}!",
136-
a_engine = a_engine,
137-
b_engine = b_engine,
129+
" {a_eng_label} is {ratio_min:.2}x to {ratio_max:.2}x faster than {b_eng_label}!",
138130
ratio_min = ratio - ratio_ci,
139131
ratio_max = ratio + ratio_ci,
140132
)?;
@@ -143,9 +135,7 @@ pub fn write(
143135
let ratio_ci = effect_size.half_width_confidence_interval / effect_size.b_mean;
144136
writeln!(
145137
output_file,
146-
" {b_engine} is {ratio_min:.2}x to {ratio_max:.2}x faster than {a_engine}!",
147-
a_engine = a_engine,
148-
b_engine = b_engine,
138+
" {b_eng_label} is {ratio_min:.2}x to {ratio_max:.2}x faster than {a_eng_label}!",
149139
ratio_min = ratio - ratio_ci,
150140
ratio_max = ratio + ratio_ci,
151141
)?;
@@ -155,13 +145,13 @@ pub fn write(
155145
}
156146
writeln!(output_file)?;
157147

158-
let get_summary = |engine: &str, wasm: &str, phase: Phase, event: &str| {
148+
let get_summary = |engine: &Engine, wasm: &str, phase: Phase, event: &str| {
159149
// TODO this sorting is not using `arch` which is not guaranteed to be the same in
160150
// result sets; potentially this could re-use `Key` functionality.
161151
summaries
162152
.iter()
163153
.find(|s| {
164-
s.engine == engine && s.wasm == wasm && s.phase == phase && s.event == event
154+
&s.engine == engine && s.wasm == wasm && s.phase == phase && s.event == event
165155
})
166156
.unwrap()
167157
};
@@ -175,7 +165,7 @@ pub fn write(
175165
writeln!(
176166
output_file,
177167
" [{} {:.2} {}] {}",
178-
a_summary.min, a_summary.mean, a_summary.max, a_engine,
168+
a_summary.min, a_summary.mean, a_summary.max, a_eng_label,
179169
)?;
180170

181171
let b_summary = get_summary(
@@ -187,7 +177,7 @@ pub fn write(
187177
writeln!(
188178
output_file,
189179
" [{} {:.2} {}] {}",
190-
b_summary.min, b_summary.mean, b_summary.max, b_engine,
180+
b_summary.min, b_summary.mean, b_summary.max, b_eng_label,
191181
)?;
192182
}
193183

crates/analysis/src/keys.rs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
use sightglass_data::{Measurement, Phase};
1+
use sightglass_data::{Engine, Measurement, Phase};
22
use std::{borrow::Cow, collections::BTreeSet};
33

44
/// A builder for finding keys in a set of measurements.
55
#[derive(Copy, Clone)]
66
pub struct KeyBuilder {
77
arch: bool,
88
engine: bool,
9+
engine_flags: bool,
910
wasm: bool,
1011
phase: bool,
1112
event: bool,
@@ -20,6 +21,7 @@ impl KeyBuilder {
2021
wasm: true,
2122
phase: true,
2223
event: true,
24+
engine_flags: true,
2325
}
2426
}
2527

@@ -31,6 +33,7 @@ impl KeyBuilder {
3133
wasm: false,
3234
phase: false,
3335
event: false,
36+
engine_flags: false,
3437
}
3538
}
3639

@@ -52,6 +55,12 @@ impl KeyBuilder {
5255
self
5356
}
5457

58+
/// Whether to group keys by engine flags or not.
59+
pub fn engine_flags(mut self, engine_flags: bool) -> Self {
60+
self.engine_flags = engine_flags;
61+
self
62+
}
63+
5564
/// Whether to group keys by phase or not.
5665
pub fn phase(mut self, phase: bool) -> Self {
5766
self.phase = phase;
@@ -82,10 +91,10 @@ impl KeyBuilder {
8291
}
8392

8493
/// A key for grouping measurements together.
85-
#[derive(PartialOrd, Ord, PartialEq, Eq, Hash)]
94+
#[derive(PartialOrd, Ord, PartialEq, Eq, Hash, Debug)]
8695
pub struct Key<'a> {
8796
pub arch: Option<Cow<'a, str>>,
88-
pub engine: Option<Cow<'a, str>>,
97+
pub engine: Option<Engine<'a>>,
8998
pub wasm: Option<Cow<'a, str>>,
9099
pub phase: Option<Phase>,
91100
pub event: Option<Cow<'a, str>>,
@@ -111,7 +120,10 @@ mod tests {
111120
fn matching_fields() {
112121
let key = Key {
113122
arch: Some("x86".into()),
114-
engine: Some("wasmtime".into()),
123+
engine: Some(Engine {
124+
name: "wasmtime".into(),
125+
flags: None,
126+
}),
115127
wasm: Some("bench.wasm".into()),
116128
phase: Some(Phase::Compilation),
117129
event: Some("cycles".into()),

crates/analysis/src/summarize.rs

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use anyhow::Result;
33
use sightglass_data::{Measurement, Summary};
44
use std::io::Write;
55

6-
/// Summarize measurements grouped by: architecture, engine, benchmark file, phase and event.
6+
/// Summarize measurements grouped by: architecture, engine, flags, benchmark file, phase and event.
77
pub fn calculate<'a>(measurements: &[Measurement<'a>]) -> Vec<Summary<'a>> {
88
let mut summaries = Vec::new();
99
for k in KeyBuilder::all().keys(measurements) {
@@ -106,7 +106,7 @@ pub fn write(mut summaries: Vec<Summary<'_>>, output_file: &mut dyn Write) -> Re
106106
#[cfg(test)]
107107
mod tests {
108108
use super::*;
109-
use sightglass_data::Phase;
109+
use sightglass_data::{Engine, Phase};
110110

111111
#[test]
112112
fn simple_statistics() {
@@ -164,4 +164,33 @@ mod tests {
164164

165165
assert_eq!(calculate(&measurements).len(), 2);
166166
}
167+
168+
#[test]
169+
fn differing_engine_flags() {
170+
use std::borrow::Cow;
171+
172+
fn measurement<'a>(flags: Option<Cow<'a, str>>, count: u64) -> Measurement<'a> {
173+
Measurement {
174+
arch: "x86".into(),
175+
engine: Engine {
176+
name: "wasmtime".into(),
177+
flags,
178+
},
179+
wasm: "bench.wasm".into(),
180+
process: 42,
181+
iteration: 0,
182+
phase: Phase::Execution,
183+
event: "cycles".into(),
184+
count,
185+
}
186+
}
187+
let measurements = vec![
188+
measurement(Some("-Wfoo=bar".into()), 0),
189+
measurement(Some("-Wfoo=bar".into()), 1),
190+
measurement(Some("-Wdead=beeef".into()), 2),
191+
measurement(None, 3),
192+
];
193+
194+
assert_eq!(calculate(&measurements).len(), 3);
195+
}
167196
}

crates/cli/src/benchmark.rs

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ pub struct BenchmarkCommand {
5757
#[structopt(long = "processes", default_value = "10", value_name = "PROCESSES")]
5858
processes: usize,
5959

60+
/// Override the "engine" name; this is useful if running experiments that might
61+
/// not have a differentiating engine name (e.g. if customizing the flags).
62+
///
63+
/// If multiple engines are provided, the order of names provided here should
64+
/// match the order of the engines specified.
65+
#[structopt(long = "name", short = "n")]
66+
names: Option<Vec<String>>,
67+
6068
/// How many times should we run a benchmark in a single process?
6169
#[structopt(
6270
long = "iterations-per-process",
@@ -166,14 +174,19 @@ impl BenchmarkCommand {
166174
.collect();
167175
let mut all_measurements = vec![];
168176

169-
for engine in &self.engines {
170-
let engine_path = check_engine_path(engine)?;
177+
for (i, engine_name) in self.engines.iter().enumerate() {
178+
let engine_path = check_engine_path(engine_name)?;
179+
let engine_name = self
180+
.names
181+
.as_ref()
182+
.and_then(|names| names.get(i).map(|s| s.as_str()))
183+
.unwrap_or(engine_name);
171184
log::info!("Using benchmark engine: {}", engine_path.display());
172185
let lib = unsafe { libloading::Library::new(&engine_path)? };
173186
let mut bench_api = unsafe { BenchApi::new(&lib)? };
174187

175188
for wasm_file in &wasm_files {
176-
log::info!("Using Wasm benchmark: {}", wasm_file);
189+
log::info!("Using Wasm benchmark: {wasm_file}");
177190

178191
// Use the provided --working-dir, otherwise find the Wasm file's parent directory.
179192
let working_dir = self.get_working_directory(&wasm_file)?;
@@ -196,6 +209,10 @@ impl BenchmarkCommand {
196209
let stderr = Path::new(&stderr);
197210
let stdin = None;
198211

212+
let engine = sightglass_data::Engine {
213+
name: engine_name.into(),
214+
flags: self.engine_flags.as_ref().map(|ef| ef.into()),
215+
};
199216
let mut measurements = Measurements::new(this_arch(), engine, wasm_file);
200217
let mut measure = if self.measures.len() <= 1 {
201218
let measure = self.measures.first().unwrap_or(&MeasureType::Cycles);
@@ -488,10 +505,10 @@ fn display_summaries(measurements: &[Measurement<'_>], output_file: &mut dyn Wri
488505
// engine's dylib.
489506
pub fn check_engine_path(engine: &str) -> Result<PathBuf> {
490507
if Path::new(engine).exists() {
491-
log::debug!("Using engine path: {}", engine);
508+
log::debug!("Using engine path: {engine}");
492509
Ok(PathBuf::from(engine))
493510
} else {
494-
Err(anyhow!("invalid path to engine: {}", engine))
511+
Err(anyhow!("invalid path to engine: {engine}"))
495512
}
496513
}
497514

crates/cli/tests/all/benchmark.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,13 @@ fn benchmark_csv() {
117117

118118
assert
119119
.stdout(
120-
predicate::str::starts_with("arch,engine,wasm,process,iteration,phase,event,count\n")
121-
.and(predicate::str::contains(benchmark("noop")))
122-
.and(predicate::str::contains("Compilation"))
123-
.and(predicate::str::contains("Instantiation"))
124-
.and(predicate::str::contains("Execution")),
120+
predicate::str::starts_with(
121+
"arch,engine,engine_flags,wasm,process,iteration,phase,event,count\n",
122+
)
123+
.and(predicate::str::contains(benchmark("noop")))
124+
.and(predicate::str::contains("Compilation"))
125+
.and(predicate::str::contains("Instantiation"))
126+
.and(predicate::str::contains("Execution")),
125127
)
126128
.success();
127129
}

0 commit comments

Comments
 (0)