bytecodealliance · abrown · Dec 8, 2025 · Dec 2, 2025 · Dec 2, 2025 · Dec 3, 2025
diff --git a/crates/analysis/src/effect_size.rs b/crates/analysis/src/effect_size.rs
@@ -1,7 +1,7 @@
 use crate::keys::KeyBuilder;
 use anyhow::Result;
 use sightglass_data::{EffectSize, Measurement, Phase, Summary};
-use std::{collections::BTreeSet, io::Write};
+use std::{borrow::Cow, collections::BTreeSet, io::Write};
 
 /// Find the effect size (and confidence interval) of between two different
 /// engines (i.e. two different commits of Wasmtime).
@@ -25,14 +25,20 @@ pub fn calculate<'a>(
         significance_level,
     );
 
-    let keys = KeyBuilder::all().engine(false).keys(measurements);
+    let keys = KeyBuilder::all()
+        .engine(false)
+        .engine_flags(false)
+        .keys(measurements);
     let mut results = Vec::with_capacity(keys.len());
 
     for key in keys {
         let key_measurements: Vec<_> = measurements.iter().filter(|m| key.matches(m)).collect();
 
         // NB: `BTreeSet` so they're always sorted.
-        let engines: BTreeSet<_> = key_measurements.iter().map(|m| &m.engine).collect();
+        let engines: BTreeSet<_> = key_measurements
+            .iter()
+            .map(|m| (&m.engine, &m.engine_flags))
+            .collect();
         anyhow::ensure!(
             engines.len() == 2,
             "Can only test significance between exactly two different engines. Found {} \
@@ -41,17 +47,17 @@ pub fn calculate<'a>(
         );
 
         let mut engines = engines.into_iter();
-        let engine_a = engines.next().unwrap();
-        let engine_b = engines.next().unwrap();
+        let (engine_a, engine_a_flags) = engines.next().unwrap();
+        let (engine_b, engine_b_flags) = engines.next().unwrap();
 
         let a: behrens_fisher::Stats = key_measurements
             .iter()
-            .filter(|m| m.engine.as_ref() == engine_a)
+            .filter(|m| m.engine.as_ref() == engine_a && &m.engine_flags == engine_a_flags)
             .map(|m| m.count as f64)
             .collect();
         let b: behrens_fisher::Stats = key_measurements
             .iter()
-            .filter(|m| m.engine.as_ref() == engine_b)
+            .filter(|m| m.engine.as_ref() == engine_b && &m.engine_flags == engine_b_flags)
             .map(|m| m.count as f64)
             .collect();
 
@@ -62,8 +68,10 @@ pub fn calculate<'a>(
             phase: key.phase.unwrap(),
             event: key.event.unwrap(),
             a_engine: engine_a.clone(),
+            a_engine_flags: engine_a_flags.clone(),
             a_mean: a.mean,
             b_engine: engine_b.clone(),
+            b_engine_flags: engine_b_flags.clone(),
             b_mean: b.mean,
             significance_level,
             half_width_confidence_interval: ci,
@@ -73,6 +81,18 @@ pub fn calculate<'a>(
     Ok(results)
 }
 
+fn engine_label(engine: &str, engine_flags: &Option<Cow<str>>) -> String {
+    format!(
+        "{}{}",
+        engine,
+        if let Some(ef) = engine_flags {
+            format!(" ({ef})")
+        } else {
+            "".into()
+        }
+    )
-    format!(
-        "{}{}",
-        engine,
-        if let Some(ef) = engine_flags {
-            format!(" ({ef})")
-        } else {
-            "".into()
-        }
-    )
+    if let Some(ef) = engine_flags {
+      format!("{engine} ({ef})")
+    } else {
+      format!("{engine}")
+    }
-    format!(
-        "{}{}",
-        engine,
-        if let Some(ef) = engine_flags {
-            format!(" ({ef})")
-        } else {
-            "".into()
-        }
-    )
+    if let Some(ef) = engine_flags {
+      format!("{engine} ({ef})")
+    } else {
+      format!("{engine}")
+    }
+}
+
 /// Write a vector of [EffectSize] structures to the passed `output_file` in human-readable form.
 /// The `summaries` are needed
 pub fn write(
@@ -100,22 +120,50 @@ pub fn write(
         )?;
         writeln!(output_file)?;
 
+        let end_of_shared_prefix = |astr: &str, bstr: &str| {
+            astr.char_indices()
+                .zip(bstr.char_indices())
+                .find_map(|((i, a), (j, b))| {
+                    if a == b {
+                        None
+                    } else {
+                        debug_assert_eq!(i, j);
+                        Some(i)
+                    }
+                })
+                .unwrap_or(0)
+        };
+
         // For readability, trim the shared prefix from our two engine names.
-        let end_of_shared_prefix = effect_size
-            .a_engine
-            .char_indices()
-            .zip(effect_size.b_engine.char_indices())
-            .find_map(|((i, a), (j, b))| {
-                if a == b {
-                    None
-                } else {
-                    debug_assert_eq!(i, j);
-                    Some(i)
-                }
-            })
-            .unwrap_or(0);
-        let a_engine = &effect_size.a_engine[end_of_shared_prefix..];
-        let b_engine = &effect_size.b_engine[end_of_shared_prefix..];
+        //
+        // Furthermore, there are a few special cases:
+        // 1. If the engines are the same, show just the flags.
+        // 2. If not, show the computed full label with common prefix removed.
+        let (a_eng_label, b_eng_label) = if effect_size.a_engine == effect_size.b_engine {
+            (
+                effect_size
+                    .a_engine_flags
+                    .as_ref()
+                    .map(|ref ef| ef.to_string())
+                    .unwrap_or_else(|| "(no flags)".into())
+                    .to_string(),
+                effect_size
+                    .b_engine_flags
+                    .as_ref()
+                    .map(|ref ef| ef.to_string())
+                    .unwrap_or_else(|| "(no flags)".into())
+                    .to_string(),
+            )
+        } else {
+            let a_label = engine_label(&effect_size.a_engine, &effect_size.a_engine_flags);
+            let b_label = engine_label(&effect_size.b_engine, &effect_size.b_engine_flags);
+            let idx_end_of_shared = end_of_shared_prefix(&a_label, &b_label);
+
+            (
+                a_label[idx_end_of_shared..].into(),
+                b_label[idx_end_of_shared..].into(),
+            )
+        };
 
         if effect_size.is_significant() {
             writeln!(
@@ -132,9 +180,7 @@ pub fn write(
                 let ratio_ci = effect_size.half_width_confidence_interval / effect_size.a_mean;
                 writeln!(
                     output_file,
-                    "  {a_engine} is {ratio_min:.2}x to {ratio_max:.2}x faster than {b_engine}!",
-                    a_engine = a_engine,
-                    b_engine = b_engine,
+                    "  {a_eng_label} is {ratio_min:.2}x to {ratio_max:.2}x faster than {b_eng_label}!",
                     ratio_min = ratio - ratio_ci,
                     ratio_max = ratio + ratio_ci,
                 )?;
@@ -143,9 +189,7 @@ pub fn write(
                 let ratio_ci = effect_size.half_width_confidence_interval / effect_size.b_mean;
                 writeln!(
                     output_file,
-                    "  {b_engine} is {ratio_min:.2}x to {ratio_max:.2}x faster than {a_engine}!",
-                    a_engine = a_engine,
-                    b_engine = b_engine,
+                    "  {b_eng_label} is {ratio_min:.2}x to {ratio_max:.2}x faster than {a_eng_label}!",
                     ratio_min = ratio - ratio_ci,
                     ratio_max = ratio + ratio_ci,
                 )?;
@@ -155,39 +199,49 @@ pub fn write(
         }
         writeln!(output_file)?;
 
-        let get_summary = |engine: &str, wasm: &str, phase: Phase, event: &str| {
+        let get_summary = |engine: &str,
+                           engine_flags: Option<Cow<str>>,
+                           wasm: &str,
+                           phase: Phase,
+                           event: &str| {
             // TODO this sorting is not using `arch` which is not guaranteed to be the same in
             // result sets; potentially this could re-use `Key` functionality.
             summaries
                 .iter()
                 .find(|s| {
-                    s.engine == engine && s.wasm == wasm && s.phase == phase && s.event == event
+                    s.engine == engine
+                        && s.engine_flags == engine_flags
+                        && s.wasm == wasm
+                        && s.phase == phase
+                        && s.event == event
                 })
                 .unwrap()
         };
 
         let a_summary = get_summary(
             &effect_size.a_engine,
+            effect_size.a_engine_flags,
             &effect_size.wasm,
             effect_size.phase,
             &effect_size.event,
         );
         writeln!(
             output_file,
             "  [{} {:.2} {}] {}",
-            a_summary.min, a_summary.mean, a_summary.max, a_engine,
+            a_summary.min, a_summary.mean, a_summary.max, a_eng_label,
         )?;
 
         let b_summary = get_summary(
             &effect_size.b_engine,
+            effect_size.b_engine_flags,
             &effect_size.wasm,
             effect_size.phase,
             &effect_size.event,
         );
         writeln!(
             output_file,
             "  [{} {:.2} {}] {}",
-            b_summary.min, b_summary.mean, b_summary.max, b_engine,
+            b_summary.min, b_summary.mean, b_summary.max, b_eng_label,
         )?;
     }
 

diff --git a/crates/analysis/src/keys.rs b/crates/analysis/src/keys.rs
@@ -6,6 +6,7 @@ use std::{borrow::Cow, collections::BTreeSet};
 pub struct KeyBuilder {
     arch: bool,
     engine: bool,
+    engine_flags: bool,
     wasm: bool,
     phase: bool,
     event: bool,
@@ -20,6 +21,7 @@ impl KeyBuilder {
             wasm: true,
             phase: true,
             event: true,
+            engine_flags: true,
         }
     }
 
@@ -31,6 +33,7 @@ impl KeyBuilder {
             wasm: false,
             phase: false,
             event: false,
+            engine_flags: false,
         }
     }
 
@@ -52,6 +55,12 @@ impl KeyBuilder {
         self
     }
 
+    /// Whether to group keys by engine flags or not.
+    pub fn engine_flags(mut self, engine_flags: bool) -> Self {
+        self.engine_flags = engine_flags;
+        self
+    }
+
     /// Whether to group keys by phase or not.
     pub fn phase(mut self, phase: bool) -> Self {
         self.phase = phase;
@@ -72,6 +81,11 @@ impl KeyBuilder {
             .map(|m| Key {
                 arch: if self.arch { Some(m.arch) } else { None },
                 engine: if self.engine { Some(m.engine) } else { None },
+                engine_flags: if self.engine_flags {
+                    Some(m.engine_flags)
+                } else {
+                    None
+                },
                 wasm: if self.wasm { Some(m.wasm) } else { None },
                 phase: if self.phase { Some(m.phase) } else { None },
                 event: if self.event { Some(m.event) } else { None },
@@ -82,10 +96,11 @@ impl KeyBuilder {
 }
 
 /// A key for grouping measurements together.
-#[derive(PartialOrd, Ord, PartialEq, Eq, Hash)]
+#[derive(PartialOrd, Ord, PartialEq, Eq, Hash, Debug)]
 pub struct Key<'a> {
     pub arch: Option<Cow<'a, str>>,
     pub engine: Option<Cow<'a, str>>,
+    pub engine_flags: Option<Option<Cow<'a, str>>>,
     pub wasm: Option<Cow<'a, str>>,
     pub phase: Option<Phase>,
     pub event: Option<Cow<'a, str>>,
@@ -96,6 +111,10 @@ impl Key<'_> {
     pub fn matches(&self, m: &Measurement) -> bool {
         self.arch.as_ref().is_none_or(|x| *x == m.arch)
             && self.engine.as_ref().is_none_or(|x| *x == m.engine)
+            && self
+                .engine_flags
+                .as_ref()
+                .is_none_or(|x| *x == m.engine_flags)
             && self.wasm.as_ref().is_none_or(|x| *x == m.wasm)
             && self.phase.as_ref().is_none_or(|x| *x == m.phase)
             && self.event.as_ref().is_none_or(|x| *x == m.event)
@@ -115,6 +134,7 @@ mod tests {
             wasm: Some("bench.wasm".into()),
             phase: Some(Phase::Compilation),
             event: Some("cycles".into()),
+            engine_flags: Some(Some("-Wfoo=bar".into())),
         };
 
         // More test cases are needed, but this provides a sanity check for the matched key and
@@ -128,6 +148,7 @@ mod tests {
             phase: Phase::Compilation,
             event: "cycles".into(),
             count: 42,
+            engine_flags: Some("-Wfoo=bar".into()),
         }));
     }
 }