Skip to content

Commit 989927e

Browse files
committed
Update summarize/effect-size to use engine flags
Previously, recordings were updated to capture engine flag information. This change updates the code used for summaries and effect-size analysis to allow for comparing engine/flag combinations against each other which is useful for a class of comparisons.
1 parent 9663a64 commit 989927e

File tree

4 files changed

+136
-35
lines changed

4 files changed

+136
-35
lines changed

crates/analysis/src/effect_size.rs

Lines changed: 86 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::keys::KeyBuilder;
22
use anyhow::Result;
33
use sightglass_data::{EffectSize, Measurement, Phase, Summary};
4-
use std::{collections::BTreeSet, io::Write};
4+
use std::{borrow::Cow, collections::BTreeSet, io::Write};
55

66
/// Find the effect size (and confidence interval) of between two different
77
/// engines (i.e. two different commits of Wasmtime).
@@ -25,14 +25,20 @@ pub fn calculate<'a>(
2525
significance_level,
2626
);
2727

28-
let keys = KeyBuilder::all().engine(false).keys(measurements);
28+
let keys = KeyBuilder::all()
29+
.engine(false)
30+
.engine_flags(false)
31+
.keys(measurements);
2932
let mut results = Vec::with_capacity(keys.len());
3033

3134
for key in keys {
3235
let key_measurements: Vec<_> = measurements.iter().filter(|m| key.matches(m)).collect();
3336

3437
// NB: `BTreeSet` so they're always sorted.
35-
let engines: BTreeSet<_> = key_measurements.iter().map(|m| &m.engine).collect();
38+
let engines: BTreeSet<_> = key_measurements
39+
.iter()
40+
.map(|m| (&m.engine, &m.engine_flags))
41+
.collect();
3642
anyhow::ensure!(
3743
engines.len() == 2,
3844
"Can only test significance between exactly two different engines. Found {} \
@@ -41,17 +47,17 @@ pub fn calculate<'a>(
4147
);
4248

4349
let mut engines = engines.into_iter();
44-
let engine_a = engines.next().unwrap();
45-
let engine_b = engines.next().unwrap();
50+
let (engine_a, engine_a_flags) = engines.next().unwrap();
51+
let (engine_b, engine_b_flags) = engines.next().unwrap();
4652

4753
let a: behrens_fisher::Stats = key_measurements
4854
.iter()
49-
.filter(|m| m.engine.as_ref() == engine_a)
55+
.filter(|m| m.engine.as_ref() == engine_a && &m.engine_flags == engine_a_flags)
5056
.map(|m| m.count as f64)
5157
.collect();
5258
let b: behrens_fisher::Stats = key_measurements
5359
.iter()
54-
.filter(|m| m.engine.as_ref() == engine_b)
60+
.filter(|m| m.engine.as_ref() == engine_b && &m.engine_flags == engine_b_flags)
5561
.map(|m| m.count as f64)
5662
.collect();
5763

@@ -62,8 +68,10 @@ pub fn calculate<'a>(
6268
phase: key.phase.unwrap(),
6369
event: key.event.unwrap(),
6470
a_engine: engine_a.clone(),
71+
a_engine_flags: engine_a_flags.clone(),
6572
a_mean: a.mean,
6673
b_engine: engine_b.clone(),
74+
b_engine_flags: engine_b_flags.clone(),
6775
b_mean: b.mean,
6876
significance_level,
6977
half_width_confidence_interval: ci,
@@ -73,6 +81,18 @@ pub fn calculate<'a>(
7381
Ok(results)
7482
}
7583

84+
fn engine_label(engine: &str, engine_flags: &Option<Cow<str>>) -> String {
85+
format!(
86+
"{}{}",
87+
engine,
88+
if let Some(ef) = engine_flags {
89+
format!(" ({ef})")
90+
} else {
91+
"".into()
92+
}
93+
)
94+
}
95+
7696
/// Write a vector of [EffectSize] structures to the passed `output_file` in human-readable form.
7797
/// The `summaries` are needed
7898
pub fn write(
@@ -100,22 +120,50 @@ pub fn write(
100120
)?;
101121
writeln!(output_file)?;
102122

123+
let end_of_shared_prefix = |astr: &str, bstr: &str| {
124+
astr.char_indices()
125+
.zip(bstr.char_indices())
126+
.find_map(|((i, a), (j, b))| {
127+
if a == b {
128+
None
129+
} else {
130+
debug_assert_eq!(i, j);
131+
Some(i)
132+
}
133+
})
134+
.unwrap_or(0)
135+
};
136+
103137
// For readability, trim the shared prefix from our two engine names.
104-
let end_of_shared_prefix = effect_size
105-
.a_engine
106-
.char_indices()
107-
.zip(effect_size.b_engine.char_indices())
108-
.find_map(|((i, a), (j, b))| {
109-
if a == b {
110-
None
111-
} else {
112-
debug_assert_eq!(i, j);
113-
Some(i)
114-
}
115-
})
116-
.unwrap_or(0);
117-
let a_engine = &effect_size.a_engine[end_of_shared_prefix..];
118-
let b_engine = &effect_size.b_engine[end_of_shared_prefix..];
138+
//
139+
// Furthermore, there are a few special cases:
140+
// 1. If the engines are the same, show just the flags.
141+
// 2. If not, show the computed full label with common prefix removed.
142+
let (a_eng_label, b_eng_label) = if effect_size.a_engine == effect_size.b_engine {
143+
(
144+
effect_size
145+
.a_engine_flags
146+
.as_ref()
147+
.map(|ref ef| ef.to_string())
148+
.unwrap_or_else(|| "(no flags)".into())
149+
.to_string(),
150+
effect_size
151+
.b_engine_flags
152+
.as_ref()
153+
.map(|ref ef| ef.to_string())
154+
.unwrap_or_else(|| "(no flags)".into())
155+
.to_string(),
156+
)
157+
} else {
158+
let a_label = engine_label(&effect_size.a_engine, &effect_size.a_engine_flags);
159+
let b_label = engine_label(&effect_size.b_engine, &effect_size.b_engine_flags);
160+
let idx_end_of_shared = end_of_shared_prefix(&a_label, &b_label);
161+
162+
(
163+
a_label[idx_end_of_shared..].into(),
164+
b_label[idx_end_of_shared..].into(),
165+
)
166+
};
119167

120168
if effect_size.is_significant() {
121169
writeln!(
@@ -132,9 +180,7 @@ pub fn write(
132180
let ratio_ci = effect_size.half_width_confidence_interval / effect_size.a_mean;
133181
writeln!(
134182
output_file,
135-
" {a_engine} is {ratio_min:.2}x to {ratio_max:.2}x faster than {b_engine}!",
136-
a_engine = a_engine,
137-
b_engine = b_engine,
183+
" {a_eng_label} is {ratio_min:.2}x to {ratio_max:.2}x faster than {b_eng_label}!",
138184
ratio_min = ratio - ratio_ci,
139185
ratio_max = ratio + ratio_ci,
140186
)?;
@@ -143,9 +189,7 @@ pub fn write(
143189
let ratio_ci = effect_size.half_width_confidence_interval / effect_size.b_mean;
144190
writeln!(
145191
output_file,
146-
" {b_engine} is {ratio_min:.2}x to {ratio_max:.2}x faster than {a_engine}!",
147-
a_engine = a_engine,
148-
b_engine = b_engine,
192+
" {b_eng_label} is {ratio_min:.2}x to {ratio_max:.2}x faster than {a_eng_label}!",
149193
ratio_min = ratio - ratio_ci,
150194
ratio_max = ratio + ratio_ci,
151195
)?;
@@ -155,39 +199,49 @@ pub fn write(
155199
}
156200
writeln!(output_file)?;
157201

158-
let get_summary = |engine: &str, wasm: &str, phase: Phase, event: &str| {
202+
let get_summary = |engine: &str,
203+
engine_flags: Option<Cow<str>>,
204+
wasm: &str,
205+
phase: Phase,
206+
event: &str| {
159207
// TODO this sorting is not using `arch` which is not guaranteed to be the same in
160208
// result sets; potentially this could re-use `Key` functionality.
161209
summaries
162210
.iter()
163211
.find(|s| {
164-
s.engine == engine && s.wasm == wasm && s.phase == phase && s.event == event
212+
s.engine == engine
213+
&& s.engine_flags == engine_flags
214+
&& s.wasm == wasm
215+
&& s.phase == phase
216+
&& s.event == event
165217
})
166218
.unwrap()
167219
};
168220

169221
let a_summary = get_summary(
170222
&effect_size.a_engine,
223+
effect_size.a_engine_flags,
171224
&effect_size.wasm,
172225
effect_size.phase,
173226
&effect_size.event,
174227
);
175228
writeln!(
176229
output_file,
177230
" [{} {:.2} {}] {}",
178-
a_summary.min, a_summary.mean, a_summary.max, a_engine,
231+
a_summary.min, a_summary.mean, a_summary.max, a_eng_label,
179232
)?;
180233

181234
let b_summary = get_summary(
182235
&effect_size.b_engine,
236+
effect_size.b_engine_flags,
183237
&effect_size.wasm,
184238
effect_size.phase,
185239
&effect_size.event,
186240
);
187241
writeln!(
188242
output_file,
189243
" [{} {:.2} {}] {}",
190-
b_summary.min, b_summary.mean, b_summary.max, b_engine,
244+
b_summary.min, b_summary.mean, b_summary.max, b_eng_label,
191245
)?;
192246
}
193247

crates/analysis/src/keys.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use std::{borrow::Cow, collections::BTreeSet};
66
pub struct KeyBuilder {
77
arch: bool,
88
engine: bool,
9+
engine_flags: bool,
910
wasm: bool,
1011
phase: bool,
1112
event: bool,
@@ -20,6 +21,7 @@ impl KeyBuilder {
2021
wasm: true,
2122
phase: true,
2223
event: true,
24+
engine_flags: true,
2325
}
2426
}
2527

@@ -31,6 +33,7 @@ impl KeyBuilder {
3133
wasm: false,
3234
phase: false,
3335
event: false,
36+
engine_flags: false,
3437
}
3538
}
3639

@@ -52,6 +55,12 @@ impl KeyBuilder {
5255
self
5356
}
5457

58+
/// Whether to group keys by engine flags or not.
59+
pub fn engine_flags(mut self, engine_flags: bool) -> Self {
60+
self.engine_flags = engine_flags;
61+
self
62+
}
63+
5564
/// Whether to group keys by phase or not.
5665
pub fn phase(mut self, phase: bool) -> Self {
5766
self.phase = phase;
@@ -72,6 +81,11 @@ impl KeyBuilder {
7281
.map(|m| Key {
7382
arch: if self.arch { Some(m.arch) } else { None },
7483
engine: if self.engine { Some(m.engine) } else { None },
84+
engine_flags: if self.engine_flags {
85+
m.engine_flags
86+
} else {
87+
None
88+
},
7589
wasm: if self.wasm { Some(m.wasm) } else { None },
7690
phase: if self.phase { Some(m.phase) } else { None },
7791
event: if self.event { Some(m.event) } else { None },
@@ -89,6 +103,7 @@ pub struct Key<'a> {
89103
pub wasm: Option<Cow<'a, str>>,
90104
pub phase: Option<Phase>,
91105
pub event: Option<Cow<'a, str>>,
106+
pub engine_flags: Option<Cow<'a, str>>,
92107
}
93108

94109
impl Key<'_> {
@@ -99,6 +114,10 @@ impl Key<'_> {
99114
&& self.wasm.as_ref().is_none_or(|x| *x == m.wasm)
100115
&& self.phase.as_ref().is_none_or(|x| *x == m.phase)
101116
&& self.event.as_ref().is_none_or(|x| *x == m.event)
117+
&& self
118+
.engine_flags
119+
.as_ref()
120+
.is_none_or(|x| Some(x) == m.engine_flags.as_ref())
102121
}
103122
}
104123

@@ -115,6 +134,7 @@ mod tests {
115134
wasm: Some("bench.wasm".into()),
116135
phase: Some(Phase::Compilation),
117136
event: Some("cycles".into()),
137+
engine_flags: Some("-Wfoo=bar".into()),
118138
};
119139

120140
// More test cases are needed, but this provides a sanity check for the matched key and
@@ -128,7 +148,7 @@ mod tests {
128148
phase: Phase::Compilation,
129149
event: "cycles".into(),
130150
count: 42,
131-
engine_flags: None,
151+
engine_flags: Some("-Wfoo=bar".into()),
132152
}));
133153
}
134154
}

crates/analysis/src/summarize.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use anyhow::Result;
33
use sightglass_data::{Measurement, Summary};
44
use std::io::Write;
55

6-
/// Summarize measurements grouped by: architecture, engine, benchmark file, phase and event.
6+
/// Summarize measurements grouped by: architecture, engine, flags, benchmark file, phase and event.
77
pub fn calculate<'a>(measurements: &[Measurement<'a>]) -> Vec<Summary<'a>> {
88
let mut summaries = Vec::new();
99
for k in KeyBuilder::all().keys(measurements) {
@@ -15,6 +15,7 @@ pub fn calculate<'a>(measurements: &[Measurement<'a>]) -> Vec<Summary<'a>> {
1515
summaries.push(Summary {
1616
arch: k.arch.unwrap(),
1717
engine: k.engine.unwrap(),
18+
engine_flags: k.engine_flags,
1819
wasm: k.wasm.unwrap(),
1920
phase: k.phase.unwrap(),
2021
event: k.event.unwrap(),
@@ -69,6 +70,7 @@ pub fn write(mut summaries: Vec<Summary<'_>>, output_file: &mut dyn Write) -> Re
6970
.then_with(|| x.wasm.cmp(&y.wasm))
7071
.then_with(|| x.event.cmp(&y.event))
7172
.then_with(|| x.engine.cmp(&y.engine))
73+
.then_with(|| x.engine_flags.cmp(&y.engine_flags))
7274
});
7375

7476
let mut last_phase = None;
@@ -93,9 +95,16 @@ pub fn write(mut summaries: Vec<Summary<'_>>, output_file: &mut dyn Write) -> Re
9395
writeln!(output_file, " {}", summary.event)?;
9496
}
9597

98+
let engine_flags = match summary.engine_flags {
99+
None => "".into(),
100+
Some(ef) => {
101+
format!(" ({ef})")
102+
}
103+
};
104+
96105
writeln!(
97106
output_file,
98-
" [{} {:.2} {}] {}",
107+
" [{} {:.2} {}] {}{engine_flags}",
99108
summary.min, summary.mean, summary.max, summary.engine,
100109
)?;
101110
}
@@ -130,6 +139,7 @@ mod tests {
130139
vec![Summary {
131140
arch: "x86".into(),
132141
engine: "wasmtime".into(),
142+
engine_flags: None,
133143
wasm: "bench.wasm".into(),
134144
phase: Phase::Compilation,
135145
event: "cycles".into(),

crates/data/src/lib.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ pub struct Summary<'a> {
108108
/// record this measurement.
109109
pub engine: Cow<'a, str>,
110110

111+
/// The flags, if any, used to record this measurement.
112+
pub engine_flags: Option<Cow<'a, str>>,
113+
111114
/// The file path of the Wasm benchmark program.
112115
pub wasm: Cow<'a, str>,
113116

@@ -164,6 +167,13 @@ pub struct EffectSize<'a> {
164167
/// to record this measurement.
165168
pub a_engine: Cow<'a, str>,
166169

170+
/// The first engine flags being compared.
171+
///
172+
/// When provided, this is a string capturing the engine flags passed
173+
/// to the benchmark invocation that are in turn used to configure
174+
/// wasmtime/cranelift.
175+
pub a_engine_flags: Option<Cow<'a, str>>,
176+
167177
/// The first engine's result's arithmetic mean of the `count` field.
168178
pub a_mean: f64,
169179

@@ -173,6 +183,13 @@ pub struct EffectSize<'a> {
173183
/// to record this measurement.
174184
pub b_engine: Cow<'a, str>,
175185

186+
/// The second engine flags being compared.
187+
///
188+
/// When provided, this is a string capturing the engine flags passed
189+
/// to the benchmark invocation that are in turn used to configure
190+
/// wasmtime/cranelift.
191+
pub b_engine_flags: Option<Cow<'a, str>>,
192+
176193
/// The second engine's result's arithmetic mean of the `count` field.
177194
pub b_mean: f64,
178195

0 commit comments

Comments
 (0)