Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,11 +214,26 @@ If you don't want the results to be summarized and displayed in a human-readable
format, you can get raw JSON or CSV via the `--raw` flag:

```
$ cargo run -- benchmark --raw --output-format csv -- benchmark.wasm
$ cargo run -- benchmark --raw --output-format json -- benchmark.wasm
```

Then you can use your own R/Python/spreadsheets/etc. to analyze and visualize
the benchmark results.
the benchmark results. The JSON output is also compatible with the viz.py
script in tree.

### Benchmark Results Visualization (viz.py)

For results collected in the JSON format, a tool is provided in tree that can
visualize benchmark results as an HTML document with embedded graphics. This
tool requires that the python
[`uv`](https://docs.astral.sh/uv/getting-started/installation/) tool be
installed.

```
$ ./scripts/viz.py -o experiment.html baseline.json experiment1.json experiment2.json
```

See the `--help` option on the tool for additional usage options.

### Adding a New Benchmark

Expand Down
97 changes: 97 additions & 0 deletions scripts/viz.jinja
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
<!doctype html>
<html lang="en-us">
<head>
<meta charset="utf-8">
<meta http-equiv="x-ua-compatible" content="ie=edge">
<title>Sightglass Benchmark Results</title>
<meta name="description" content="Sightglass Benchmark Results Visualization">
<meta name="viewport" content="width=device-width, initial-scale=1">
<script src="https://cdn.jsdelivr.net/npm/vega@6"></script>
<script src="https://cdn.jsdelivr.net/npm/vega-lite@6"></script>
<script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>
{% for benchmark in benchmarks %}
<script type="application/json" id="vega-lite-{{ benchmark.chart.id }}">
{{ benchmark.chart.json }}
</script>
<style>
.slower {
background: lightcoral;
font-weight: bold;
}

.faster {
background: lightgreen;
font-weight: bold;
}

.inconsistent {
background: lightgray;
}
</style>
{% endfor %}
</head>
<body>
<h1>Sightglass Benchmark Results</h1>
<div>
<span style="font-weight:bold">Key:</span>
<span class="inconsistent">Baseline CV > 5% -- Inconsistent Benchmark</span>
<span class="slower">Test Slower by CV%+ from baseline</span>
<span class="faster">Test Faster by CV%+ from baseline</span>
</div>
<div>
CV is the <a href="https://en.wikipedia.org/wiki/Coefficient_of_variation">Coefficient of Variation</a> for the baseline benchmark run.
</div>
<table style="padding-top:20px">
<thead>
<tr>
<th scope="col">Benchmark</th>
{% for prefix in prefixes %}
<th scope="col">{% if prefix == baseline %}*{% endif %}{{ prefix }}</th>
{% endfor %}
</tr>
</thead>
<tbody>
{% for benchmark in benchmarks %}
<tr>
<td><a href="#{{ benchmark.name }}">{{ benchmark.name }}</a></td>
{% for prefix in prefixes %}
{% set stats = benchmark.stats if prefix == baseline else benchmark.stats.relative[prefix] %}
{% if prefix == baseline %}
{% set class = "inconsistent" if stats.cv > 5 else "" %}
<td class="{{ class }}">{{ "%.2f"|format(stats.p25) }} +/- {{ "%.2f"|format(stats.cv) }}%</td>
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

todo: no reason to format cycles (p25) as float here, will always be an integer.

{% else %}
{% set class = "slower" if stats.p25_delta_pct > benchmark.stats.cv else "faster" if stats.p25_delta_pct < -1 * benchmark.stats.cv else "" %}
<td class="{{ class }}">{{ "%.2f"|format(stats.p25_delta_pct) }}%</td>
{% endif %}
{% endfor %}
</tr>
{% endfor %}
</tbody>
</table>

<div id="benchmark-details">
{% for benchmark in benchmarks %}
<h2 id="{{benchmark.name}}">{{ benchmark.name }}</h2>
<ul>
<li><strong {% if benchmark.stats.cv > 5 %}class="inconsistent"{% endif %}>Baseline CV:</strong> {{ "%.2f"|format(benchmark.stats.cv) }}%</li>
{% for prefix, stats in benchmark.stats.relative.items() %}
{% set class = "slower" if stats.p25_delta_pct > benchmark.stats.cv else "faster" if stats.p25_delta_pct < -1 * benchmark.stats.cv else "" %}
<li>{{ prefix }}: <span class="{{ class }}">{{ "%.2f"|format(stats.p25_delta_pct) }}%</span></li>
{% endfor %}
</ul>
<div id="viz-{{ benchmark.chart.id }}"></div>
{% endfor %}
</div>

<script type="text/javascript">
function renderViz(chart_id) {
let vl_text = document.getElementById(`vega-lite-${chart_id}`).textContent;
let vl_data = JSON.parse(vl_text);
vegaEmbed(`#viz-${chart_id}`, vl_data);
}
{% for benchmark in benchmarks %}
renderViz("{{ benchmark.chart.id }}");
{% endfor %}
</script>>
</body>
</html>
216 changes: 216 additions & 0 deletions scripts/viz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
#!/usr/bin/env python3
#
# /// script
# dependencies = [
# "pandas",
# "altair",
# "jinja2"
# ]
# ///

import argparse
import os
import pandas as pd
import altair as alt
from dataclasses import dataclass
import re
import sys
import pathlib

# our usage isn't abusing this, suppress noisy warnings
pd.options.mode.chained_assignment = None


@dataclass
class RelativeStats:
cv: float
p25: float
p25_delta_pct: float


@dataclass
class Stats:
cv: float
p25: float
relative: dict[str, RelativeStats]


def wasm_path_to_benchmark_name(wasm_path: str) -> str:
splits = wasm_path.split("/")
if splits[-1] == "benchmark.wasm":
# E.g. noop/benchmark.wasm -> noop
return splits[-2]
else:
# E.g. libsodium/libsodium-box7.wasm -> libsodium-box7
return splits[-1].replace(".wasm", "")


def parse_single_input(path, prefix, pass_num=1, measure="cycles"):
df = pd.read_json(path)
df = df[df["event"] == measure]
df["pass"] = pass_num
df["prefix"] = prefix
return df


def parse_inputs(inputs, measure="cycles"):
"""Yield a chart for each pass for this prefix"""
if len(inputs) == 1 and not inputs[0].endswith(".json"):
# assume this is a directory; try to use all JSON files
import glob

file_inputs = list(sorted(glob.glob(f"{inputs[0]}/*.json")))
return parse_inputs(file_inputs, measure)

# list of files now; organize by prefix if detected
RE_NUMBERED = re.compile(r"(?P<prefix>.+)-(?P<number>\d+).json")
df = pd.DataFrame()
for path in inputs:
path = pathlib.Path(path)
if not path.exists():
print(f"{path} not found!")
return sys.exit(1)

match = RE_NUMBERED.match(path.name)
if match:
prefix = match["prefix"]
pass_num = int(match["number"])
df = pd.concat([df, parse_single_input(path, prefix, pass_num)])
else:
prefix = path.stem
df = pd.concat([df, parse_single_input(path, prefix)])

return df


def compute_stats(df, benchmark, baseline):
# select only rows for this benchmark
df = df[df["benchmark"] == benchmark]

baseline_df = df[df["prefix"] == baseline]
prefixes = df[df["prefix"] != baseline]["prefix"].unique()

baseline_p25 = baseline_df["count"].quantile(0.25)
baseline_mean = baseline_df["count"].mean()
baseline_cv = (baseline_df["count"].std() / baseline_mean) * 100
stats = Stats(cv=baseline_cv, p25=baseline_p25, relative={})
for prefix in prefixes:
prefix_df = df[df["prefix"] == prefix]
prefix_p25 = prefix_df["count"].quantile(0.25)
prefix_mean = prefix_df["count"].mean()
prefix_cv = (prefix_df["count"].std() / prefix_mean) * 100
p25_delta_pct = (
(prefix_p25 - baseline_p25) / ((baseline_p25 + prefix_p25) / 2)
) * 100
rel_stats = RelativeStats(
cv=prefix_cv, p25=prefix_p25, p25_delta_pct=p25_delta_pct
)
stats.relative[prefix] = rel_stats

return stats


def plot_benchmark(df, baseline, benchmark):
# select only rows for this benchmark
df = df[df["benchmark"] == benchmark]

chart1 = (
alt.Chart(df)
.mark_boxplot()
.encode(
y=alt.Y("prefix", title=None),
x=alt.X("count:Q", title="Count (Cycles)"),
color=alt.Color("prefix"),
)
)

df_baseline = df[df["prefix"] == baseline]
baseline_p25 = df_baseline["count"].quantile(0.25)
df["pct_diff_from_p25"] = (df["count"] - baseline_p25) / baseline_p25 * 100

chart2 = (
alt.Chart(df)
.mark_boxplot()
.encode(
y=alt.Y("prefix", title=None),
x=alt.X("pct_diff_from_p25", title="Percent Difference from Baseline p25"),
color=alt.Color("prefix"),
)
)

return alt.hconcat(chart1, chart2).properties(title=f"{benchmark}")


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("inputs", nargs="+", help="Directory or JSON files to analyze")
parser.add_argument(
"-b",
"--baseline",
default=None,
help="prefix of baseline to use (by default look for one containing 'baseline')",
)
parser.add_argument(
"-o", "--output", default="viz.html", help="HTML File to output"
)
args = parser.parse_args()
return args


def render(args, tmpl_data):
from jinja2 import Environment, FileSystemLoader

env = Environment(
loader=FileSystemLoader(os.path.abspath(os.path.dirname(__file__)))
)
viz_template = env.get_template("viz.jinja")
rendered = viz_template.render(**tmpl_data)
with open(args.output, "w") as out:
out.write(rendered)
print(f"Wrote {args.output}")


def main():
args = parse_args()
df = parse_inputs(args.inputs)

# add column for benchmark
df["benchmark"] = df.apply(
lambda row: wasm_path_to_benchmark_name(row["wasm"]), axis=1
)

tmpl_data = {}
benchmarks = df["benchmark"].unique()
baseline = (
next((p for p in df["prefix"].unique() if "baseline" in p), None)
or benchmarks[0]
)

# reorg prefixes so that baseline is first
prefixes = df["prefix"].unique()
for i, prefix in enumerate(prefixes):
if prefix == baseline:
prefixes[i] = prefixes[0]
prefixes[0] = baseline
break
tmpl_data["prefixes"] = prefixes
tmpl_data["baseline"] = baseline
tmpl_data["benchmarks"] = []
for benchmark in benchmarks:
chart = plot_benchmark(df, baseline, benchmark)
tmpl_data["benchmarks"].append(
{
"name": benchmark,
"baseline": baseline,
"stats": compute_stats(df, benchmark, baseline),
"chart": {
"id": benchmark,
"json": chart.to_json(),
},
}
)
render(args, tmpl_data)


if __name__ == "__main__":
main()