Skip to content

Commit e2f47a1

Browse files
SonSangduburcqa
andauthored
[MISC] Format benchmark params as table, add legend, and support download as csv. (#1965)
Co-authored-by: Alexis Duburcq <[email protected]>
1 parent d9c2a44 commit e2f47a1

File tree

1 file changed

+97
-21
lines changed

1 file changed

+97
-21
lines changed

.github/workflows/alarm.yml

Lines changed: 97 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -55,21 +55,24 @@ jobs:
5555

5656
# --- Parameters ---
5757
MAX_VALID_REVISIONS: 5
58-
MAX_FETCH_REVISIONS: 100
58+
MAX_FETCH_REVISIONS: 50
5959
RUNTIME_REGRESSION_TOLERANCE_PCT: 10
6060
COMPILE_REGRESSION_TOLERANCE_PCT: 10
6161

6262
# Input/Output paths
6363
ARTIFACTS_DIR: ${{ steps.dl.outputs.download-path }}
6464
PR_COMMENT_PATH: pr_comment.md
6565
CHECK_BODY_PATH: check_output.md
66+
CSV_RUNTIME_PATH: runtime_fps.csv
67+
CSV_COMPILE_PATH: compile_time.csv
6668
EXIT_CODE_REGRESSION: 42
6769
run: |
6870
{ python - << 'PY'; EXIT_CODE=$?; } || true
6971
7072
import os, sys, json, re, math, statistics
7173
import wandb
7274
from pathlib import Path
75+
import csv
7376
7477
# ----- arguments -----
7578
@@ -86,6 +89,11 @@ jobs:
8689
pr_comment_path = Path(os.environ["PR_COMMENT_PATH"]).expanduser()
8790
check_body_path = Path(os.environ["CHECK_BODY_PATH"]).expanduser()
8891
92+
csv_files = {
93+
"runtime_fps": Path(os.environ["CSV_RUNTIME_PATH"]).expanduser().resolve(),
94+
"compile_time": Path(os.environ["CSV_COMPILE_PATH"]).expanduser().resolve(),
95+
}
96+
8997
# ---------- helpers ----------
9098
9199
METRIC_KEYS = ("compile_time", "runtime_fps", "realtime_factor")
@@ -97,6 +105,16 @@ jobs:
97105
kv = dict(map(str.strip, token.split("=", 1)) for token in bid.split("-"))
98106
return _normalize_kv_id(kv)
99107
108+
def parse_norm_id(nbid: str) -> dict:
109+
kv = {}
110+
if nbid:
111+
for token in nbid.split("-"):
112+
token = token.strip()
113+
if token and "=" in token:
114+
k, v = token.split("=", 1)
115+
kv[k.strip()] = v.strip()
116+
return kv
117+
100118
def artifacts_parse_csv_summary(current_txt_path):
101119
out = {}
102120
for line in current_txt_path.read_text().splitlines():
@@ -139,24 +157,19 @@ jobs:
139157
api = wandb.Api()
140158
runs_iter = api.runs(f"{ENTITY}/{PROJECT}", order="-created_at")
141159
142-
is_complete = False
160+
revs = set()
143161
records_by_rev = {}
144162
for i, run in enumerate(runs_iter):
145163
# Abort if still not complete after checking enough runs.
146164
# This would happen if a new benchmark has been added, and not enough past data is available yet.
147-
if i == MAX_FETCH_REVISIONS:
165+
if len(revs) == MAX_FETCH_REVISIONS:
148166
break
149167
150168
# Early return if enough complete records have been collected
151169
records_is_complete = [bids_set.issubset(record.keys()) for record in records_by_rev.values()]
152-
is_complete = sum(records_is_complete) == MAX_VALID_REVISIONS
153-
if is_complete:
170+
if sum(records_is_complete) == MAX_VALID_REVISIONS:
154171
break
155172
156-
# Skip runs did not finish for some reason
157-
if run.state != "finished":
158-
continue
159-
160173
# Load config and summary, with support of legacy runs
161174
config, summary = run.config, run.summary
162175
if isinstance(config, str):
@@ -167,13 +180,18 @@ jobs:
167180
# Extract revision commit and branch
168181
try:
169182
rev, branch = config["revision"].split("@", 1)
183+
revs.add(rev)
170184
except ValueError:
171185
# Ignore this run if the revision has been corrupted for some unknown reason
172186
continue
173187
# Ignore runs associated with a commit that is not part of the official repository
174188
if not branch.startswith('Genesis-Embodied-AI/'):
175189
continue
176190
191+
# Skip runs did not finish for some reason
192+
if run.state != "finished":
193+
continue
194+
177195
# Do not store new records if the desired number of revision is already reached
178196
if len(records_by_rev) == MAX_VALID_REVISIONS and rev not in records_by_rev:
179197
continue
@@ -205,50 +223,87 @@ jobs:
205223
206224
# ----- build TWO tables -----
207225
226+
# Parse benchmark IDs into key-value dicts
227+
id2kv = {bid: parse_norm_id(bid) for bid in current_bm.keys()}
228+
params_name = sorted(set(kv.keys() for kv in id2kv.values()))
229+
208230
reg_found = False
209231
tables = {}
232+
rows_for_csv = {"runtime_fps": [], "compile_time": []}
233+
info = {}
210234
for metric, alias in (("runtime_fps", "FPS"), ("compile_time", "compile")):
211-
rows = []
235+
rows_md = []
236+
237+
header_cells = (
238+
"status",
239+
*params_name,
240+
f"current {alias}",
241+
f"baseline {alias} [last (mean ± std)] (*1)",
242+
f"Δ {alias} (*2)"
243+
)
244+
header = "| " + " | ".join(header_cells) + " |"
245+
align = "|:------:|" + "|".join([":---" for _ in params_name]) + "|---:|---:|---:|"
246+
212247
for bid in sorted(current_bm.keys()):
213248
value_cur = current_bm[bid][metric]
214249
is_int = isinstance(value_cur, int) or value_cur.is_integer()
215250
value_repr = fmt_num(value_cur, is_int)
216251
252+
info = {
253+
**{k: kv.get(k, "-") for k in params_name},
254+
"current": value_cur,
255+
"baseline_last": None,
256+
"baseline_min": None,
257+
"baseline_max": None,
258+
}
259+
217260
values_prev = [
218261
record[bid][metric]
219262
for record in records_by_rev.values()
220263
if bid in record
221264
]
222265
if values_prev:
266+
value_last = values_prev[0]
223267
value_ref = statistics.fmean(values_prev)
224-
delta = (value_cur - value_ref) / value_ref * 100.0
268+
delta = (value_cur - value_last) / value_last * 100.0
269+
270+
info["baseline_last"] = int(value_last) if is_int else float(value_last)
225271
226-
stats_repr = f"{fmt_num(values_prev[0], is_int)}"
272+
stats_repr = f"{fmt_num(value_last, is_int)}"
227273
delta_repr = f"{delta:+.1f}%"
228274
if len(values_prev) == MAX_VALID_REVISIONS:
275+
info["baseline_mean"] = int(value_ref) if is_int else float(value_ref)
276+
info["baseline_min"] = int(min(values_prev)) if is_int else float(min(values_prev))
277+
info["baseline_max"] = int(max(values_prev)) if is_int else float(max(values_prev))
278+
229279
value_std = statistics.stdev(values_prev)
230280
stats_repr += f" ({fmt_num(value_ref, is_int)} ± {fmt_num(value_std, is_int)})"
231-
if abs(delta) > METRICS_TOL[metrics]:
281+
if abs(delta) > METRICS_TOL[metric]:
282+
info["status"] = "alert"
283+
232284
delta_repr = f"**{delta_repr}**"
233285
picto = "🔴"
234286
reg_found = True
235287
else:
288+
info["status"] = "ok"
289+
236290
picto = "✅"
237291
else:
292+
info["status"] = "n/a"
293+
238294
picto = "ℹ️"
239295
else:
240296
picto, stats_repr, delta_repr = "ℹ️", "---", "---"
241297
242-
rows.append([picto, f"`{bid}`", value_repr, stats_repr, delta_repr])
298+
kv = id2kv[bid]
299+
key_cells = [kv.get(k, "-") for k in params_name]
243300
244-
header = [
245-
f"| status | benchmark ID | current {alias} | baseline {alias} [last (mean ± std)] | Δ {alias} |",
246-
"|:------:|:-------------|-----------:|-------------:|------:|",
247-
]
248-
tables[metric] = header + ["| " + " | ".join(r) + " |" for r in rows]
301+
rows_md.append("| " + " | ".join([picto] + key_cells + [value_repr, stats_repr, delta_repr]) + " |")
302+
rows_for_csv[metric].append(info)
249303
250-
# ----- baseline commit list -----
304+
tables[metric] = [header, align] + rows_md
251305
306+
# ----- baseline commit list (MD) -----
252307
blist = [f"- Commit {i}: {sha}" for i, sha in enumerate(records_by_rev.keys(), 1)]
253308
baseline_block = ["**Baselines considered:** " + f"**{len(records_by_rev)}** commits"] + blist
254309
@@ -270,6 +325,9 @@ jobs:
270325
"",
271326
"### Compile Time",
272327
*tables["compile_time"],
328+
"",
329+
f"- (*1) last: last commit on main, mean/std: stats over revs {MAX_VALID_REVISIONS} commits if available.",
330+
f"- (*2) Δ: relative difference between PR and last commit on main, i.e. (PR - main) / main * 100%.",
273331
]
274332
)
275333
@@ -280,7 +338,15 @@ jobs:
280338
else:
281339
comment_body = ""
282340
283-
# Write files
341+
# CSV file
342+
for metric in ("runtime_fps", "compile_time"):
343+
with csv_files[metric].open("w", newline="", encoding="utf-8") as f:
344+
w = csv.DictWriter(f, fieldnames=info.keys())
345+
w.writeheader()
346+
for rec in rows_for_csv[metric]:
347+
w.writerow(rec)
348+
349+
# write md results
284350
check_body_path.write_text(check_body + "\n", encoding="utf-8")
285351
pr_comment_path.write_text(comment_body + "\n", encoding="utf-8")
286352
@@ -369,3 +435,13 @@ jobs:
369435
text: process.env.CHECK_BODY || undefined
370436
}
371437
});
438+
439+
- name: Upload benchmark comparisons in CSV & JSONL
440+
if: always()
441+
uses: actions/upload-artifact@v4
442+
with:
443+
name: benchmark-comparison-tables
444+
path: |
445+
runtime_fps.csv
446+
compile_time.csv
447+
if-no-files-found: warn

0 commit comments

Comments
 (0)