@@ -55,21 +55,24 @@ jobs:
5555
5656 # --- Parameters ---
5757 MAX_VALID_REVISIONS : 5
58- MAX_FETCH_REVISIONS : 100
58+ MAX_FETCH_REVISIONS : 50
5959 RUNTIME_REGRESSION_TOLERANCE_PCT : 10
6060 COMPILE_REGRESSION_TOLERANCE_PCT : 10
6161
6262 # Input/Output paths
6363 ARTIFACTS_DIR : ${{ steps.dl.outputs.download-path }}
6464 PR_COMMENT_PATH : pr_comment.md
6565 CHECK_BODY_PATH : check_output.md
66+ CSV_RUNTIME_PATH : runtime_fps.csv
67+ CSV_COMPILE_PATH : compile_time.csv
6668 EXIT_CODE_REGRESSION : 42
6769 run : |
6870 { python - << 'PY'; EXIT_CODE=$?; } || true
6971
7072 import os, sys, json, re, math, statistics
7173 import wandb
7274 from pathlib import Path
75+ import csv
7376
7477 # ----- arguments -----
7578
8689 pr_comment_path = Path(os.environ["PR_COMMENT_PATH"]).expanduser()
8790 check_body_path = Path(os.environ["CHECK_BODY_PATH"]).expanduser()
8891
92+ csv_files = {
93+ "runtime_fps": Path(os.environ["CSV_RUNTIME_PATH"]).expanduser().resolve(),
94+ "compile_time": Path(os.environ["CSV_COMPILE_PATH"]).expanduser().resolve(),
95+ }
96+
8997 # ---------- helpers ----------
9098
9199 METRIC_KEYS = ("compile_time", "runtime_fps", "realtime_factor")
@@ -97,6 +105,16 @@ jobs:
97105 kv = dict(map(str.strip, token.split("=", 1)) for token in bid.split("-"))
98106 return _normalize_kv_id(kv)
99107
108+ def parse_norm_id(nbid: str) -> dict:
109+ kv = {}
110+ if nbid:
111+ for token in nbid.split("-"):
112+ token = token.strip()
113+ if token and "=" in token:
114+ k, v = token.split("=", 1)
115+ kv[k.strip()] = v.strip()
116+ return kv
117+
100118 def artifacts_parse_csv_summary(current_txt_path):
101119 out = {}
102120 for line in current_txt_path.read_text().splitlines():
@@ -139,24 +157,19 @@ jobs:
139157 api = wandb.Api()
140158 runs_iter = api.runs(f"{ENTITY}/{PROJECT}", order="-created_at")
141159
142- is_complete = False
160+ revs = set()
143161 records_by_rev = {}
144162 for i, run in enumerate(runs_iter):
145163 # Abort if still not complete after checking enough runs.
146164 # This would happen if a new benchmark has been added, and not enough past data is available yet.
147- if i == MAX_FETCH_REVISIONS:
165+ if len(revs) == MAX_FETCH_REVISIONS:
148166 break
149167
150168 # Early return if enough complete records have been collected
151169 records_is_complete = [bids_set.issubset(record.keys()) for record in records_by_rev.values()]
152- is_complete = sum(records_is_complete) == MAX_VALID_REVISIONS
153- if is_complete:
170+ if sum(records_is_complete) == MAX_VALID_REVISIONS:
154171 break
155172
156- # Skip runs did not finish for some reason
157- if run.state != "finished":
158- continue
159-
160173 # Load config and summary, with support of legacy runs
161174 config, summary = run.config, run.summary
162175 if isinstance(config, str):
@@ -167,13 +180,18 @@ jobs:
167180 # Extract revision commit and branch
168181 try:
169182 rev, branch = config["revision"].split("@", 1)
183+ revs.add(rev)
170184 except ValueError:
171185 # Ignore this run if the revision has been corrupted for some unknown reason
172186 continue
173187 # Ignore runs associated with a commit that is not part of the official repository
174188 if not branch.startswith('Genesis-Embodied-AI/'):
175189 continue
176190
191+ # Skip runs did not finish for some reason
192+ if run.state != "finished":
193+ continue
194+
177195 # Do not store new records if the desired number of revision is already reached
178196 if len(records_by_rev) == MAX_VALID_REVISIONS and rev not in records_by_rev:
179197 continue
@@ -205,50 +223,87 @@ jobs:
205223
206224 # ----- build TWO tables -----
207225
226+ # Parse benchmark IDs into key-value dicts
227+ id2kv = {bid: parse_norm_id(bid) for bid in current_bm.keys()}
228+ params_name = sorted(set(kv.keys() for kv in id2kv.values()))
229+
208230 reg_found = False
209231 tables = {}
232+ rows_for_csv = {"runtime_fps": [], "compile_time": []}
233+ info = {}
210234 for metric, alias in (("runtime_fps", "FPS"), ("compile_time", "compile")):
211- rows = []
235+ rows_md = []
236+
237+ header_cells = (
238+ "status",
239+ *params_name,
240+ f"current {alias}",
241+ f"baseline {alias} [last (mean ± std)] (*1)",
242+ f"Δ {alias} (*2)"
243+ )
244+ header = "| " + " | ".join(header_cells) + " |"
245+ align = "|:------:|" + "|".join([":---" for _ in params_name]) + "|---:|---:|---:|"
246+
212247 for bid in sorted(current_bm.keys()):
213248 value_cur = current_bm[bid][metric]
214249 is_int = isinstance(value_cur, int) or value_cur.is_integer()
215250 value_repr = fmt_num(value_cur, is_int)
216251
252+ info = {
253+ **{k: kv.get(k, "-") for k in params_name},
254+ "current": value_cur,
255+ "baseline_last": None,
256+ "baseline_min": None,
257+ "baseline_max": None,
258+ }
259+
217260 values_prev = [
218261 record[bid][metric]
219262 for record in records_by_rev.values()
220263 if bid in record
221264 ]
222265 if values_prev:
266+ value_last = values_prev[0]
223267 value_ref = statistics.fmean(values_prev)
224- delta = (value_cur - value_ref) / value_ref * 100.0
268+ delta = (value_cur - value_last) / value_last * 100.0
269+
270+ info["baseline_last"] = int(value_last) if is_int else float(value_last)
225271
226- stats_repr = f"{fmt_num(values_prev[0] , is_int)}"
272+ stats_repr = f"{fmt_num(value_last , is_int)}"
227273 delta_repr = f"{delta:+.1f}%"
228274 if len(values_prev) == MAX_VALID_REVISIONS:
275+ info["baseline_mean"] = int(value_ref) if is_int else float(value_ref)
276+ info["baseline_min"] = int(min(values_prev)) if is_int else float(min(values_prev))
277+ info["baseline_max"] = int(max(values_prev)) if is_int else float(max(values_prev))
278+
229279 value_std = statistics.stdev(values_prev)
230280 stats_repr += f" ({fmt_num(value_ref, is_int)} ± {fmt_num(value_std, is_int)})"
231- if abs(delta) > METRICS_TOL[metrics]:
281+ if abs(delta) > METRICS_TOL[metric]:
282+ info["status"] = "alert"
283+
232284 delta_repr = f"**{delta_repr}**"
233285 picto = "🔴"
234286 reg_found = True
235287 else:
288+ info["status"] = "ok"
289+
236290 picto = "✅"
237291 else:
292+ info["status"] = "n/a"
293+
238294 picto = "ℹ️"
239295 else:
240296 picto, stats_repr, delta_repr = "ℹ️", "---", "---"
241297
242- rows.append([picto, f"`{bid}`", value_repr, stats_repr, delta_repr])
298+ kv = id2kv[bid]
299+ key_cells = [kv.get(k, "-") for k in params_name]
243300
244- header = [
245- f"| status | benchmark ID | current {alias} | baseline {alias} [last (mean ± std)] | Δ {alias} |",
246- "|:------:|:-------------|-----------:|-------------:|------:|",
247- ]
248- tables[metric] = header + ["| " + " | ".join(r) + " |" for r in rows]
301+ rows_md.append("| " + " | ".join([picto] + key_cells + [value_repr, stats_repr, delta_repr]) + " |")
302+ rows_for_csv[metric].append(info)
249303
250- # ----- baseline commit list -----
304+ tables[metric] = [header, align] + rows_md
251305
306+ # ----- baseline commit list (MD) -----
252307 blist = [f"- Commit {i}: {sha}" for i, sha in enumerate(records_by_rev.keys(), 1)]
253308 baseline_block = ["**Baselines considered:** " + f"**{len(records_by_rev)}** commits"] + blist
254309
@@ -270,6 +325,9 @@ jobs:
270325 "",
271326 "### Compile Time",
272327 *tables["compile_time"],
328+ "",
329+ f"- (*1) last: last commit on main, mean/std: stats over revs {MAX_VALID_REVISIONS} commits if available.",
330+ f"- (*2) Δ: relative difference between PR and last commit on main, i.e. (PR - main) / main * 100%.",
273331 ]
274332 )
275333
@@ -280,7 +338,15 @@ jobs:
280338 else:
281339 comment_body = ""
282340
283- # Write files
341+ # CSV file
342+ for metric in ("runtime_fps", "compile_time"):
343+ with csv_files[metric].open("w", newline="", encoding="utf-8") as f:
344+ w = csv.DictWriter(f, fieldnames=info.keys())
345+ w.writeheader()
346+ for rec in rows_for_csv[metric]:
347+ w.writerow(rec)
348+
349+ # write md results
284350 check_body_path.write_text(check_body + "\n", encoding="utf-8")
285351 pr_comment_path.write_text(comment_body + "\n", encoding="utf-8")
286352
@@ -369,3 +435,13 @@ jobs:
369435 text: process.env.CHECK_BODY || undefined
370436 }
371437 });
438+
439+ - name : Upload benchmark comparisons in CSV & JSONL
440+ if : always()
441+ uses : actions/upload-artifact@v4
442+ with :
443+ name : benchmark-comparison-tables
444+ path : |
445+ runtime_fps.csv
446+ compile_time.csv
447+ if-no-files-found : warn
0 commit comments