2626
2727 - name : Install deps
2828 run : |
29- python -m pip install --quiet --upgrade wandb
29+ python -m pip install --quiet --upgrade wandb frozendict
3030
3131 - name : Download artifacts from triggering run
3232 id : dl
@@ -55,21 +55,25 @@ jobs:
5555
5656 # --- Parameters ---
5757 MAX_VALID_REVISIONS : 5
58- MAX_FETCH_REVISIONS : 100
58+ MAX_FETCH_REVISIONS : 40
5959 RUNTIME_REGRESSION_TOLERANCE_PCT : 10
6060 COMPILE_REGRESSION_TOLERANCE_PCT : 10
6161
6262 # Input/Output paths
6363 ARTIFACTS_DIR : ${{ steps.dl.outputs.download-path }}
6464 PR_COMMENT_PATH : pr_comment.md
6565 CHECK_BODY_PATH : check_output.md
66+ CSV_RUNTIME_PATH : runtime_fps.csv
67+ CSV_COMPILE_PATH : compile_time.csv
6668 EXIT_CODE_REGRESSION : 42
6769 run : |
6870 { python - << 'PY'; EXIT_CODE=$?; } || true
6971
7072 import os, sys, json, re, math, statistics
7173 import wandb
74+ from frozendict import frozendict
7275 from pathlib import Path
76+ import csv
7377
7478 # ----- arguments -----
7579
@@ -86,16 +90,52 @@ jobs:
8690 pr_comment_path = Path(os.environ["PR_COMMENT_PATH"]).expanduser()
8791 check_body_path = Path(os.environ["CHECK_BODY_PATH"]).expanduser()
8892
93+ csv_files = {
94+ "runtime_fps": Path(os.environ["CSV_RUNTIME_PATH"]).expanduser().resolve(),
95+ "compile_time": Path(os.environ["CSV_COMPILE_PATH"]).expanduser().resolve(),
96+ }
97+
8998 # ---------- helpers ----------
9099
91100 METRIC_KEYS = ("compile_time", "runtime_fps", "realtime_factor")
92101
93- def _normalize_kv_id(kv: dict) -> str:
94- return "-".join(f"{k}={v}" for k, v in sorted(kv.items()))
95-
96- def normalize_benchmark_id(bid: str) -> str:
97- kv = dict(map(str.strip, token.split("=", 1)) for token in bid.split("-"))
98- return _normalize_kv_id(kv)
102+ def parse_benchmark_id(bid: str) -> dict:
103+ kv = {}
104+ if bid:
105+ for token in bid.split("-"):
106+ token = token.strip()
107+ if token and "=" in token:
108+ k, v = token.split("=", 1)
109+ kv[k.strip()] = v.strip()
110+ return kv
111+
112+ def normalize_benchmark_id(bid: str) -> frozendict[str, str]:
113+ return frozendict(parse_benchmark_id(bid))
114+
115+ def get_param_names(bids: tuple[frozendict]) -> tuple[str, ...]:
116+ """
117+ Merge a list of tuples into a single tuple of keys that:
118+ - Preserves the relative order of keys within each tuple
119+ - Gives precedence to later tuples when conflicts arise
120+ """
121+ merged = list(bids[-1])
122+ merged_set = set(merged)
123+ for tup in bids[:-1]:
124+ for key in tup:
125+ if key not in merged_set:
126+ merged.append(key)
127+ merged_set.add(key)
128+ return tuple(merged)
129+
130+ def sort_key(d):
131+ key_list = []
132+ for col in params_name:
133+ if col in d:
134+ val = d[col]
135+ key_list.append((0, val))
136+ else:
137+ key_list.append((1, None))
138+ return key_list
99139
100140 def artifacts_parse_csv_summary(current_txt_path):
101141 out = {}
@@ -107,8 +147,8 @@ jobs:
107147 record[k] = float(kv.pop(k))
108148 except (ValueError, TypeError, KeyError):
109149 pass
110- bid = _normalize_kv_id (kv)
111- out[bid ] = record
150+ nbid = frozendict (kv)
151+ out[nbid ] = record
112152 return out
113153
114154 def fmt_num(v, is_int: bool):
@@ -125,7 +165,7 @@ jobs:
125165 current_bm = {}
126166 for csv_path in current_csv_paths:
127167 current_bm |= artifacts_parse_csv_summary(csv_path)
128- bids_set = set (current_bm.keys())
168+ bids_set = frozenset (current_bm.keys())
129169 assert bids_set
130170
131171 # ----- W&B baselines -----
@@ -139,24 +179,19 @@ jobs:
139179 api = wandb.Api()
140180 runs_iter = api.runs(f"{ENTITY}/{PROJECT}", order="-created_at")
141181
142- is_complete = False
182+ revs = set()
143183 records_by_rev = {}
144184 for i, run in enumerate(runs_iter):
145185 # Abort if still not complete after checking enough runs.
146186 # This would happen if a new benchmark has been added, and not enough past data is available yet.
147- if i == MAX_FETCH_REVISIONS:
187+ if len(revs) == MAX_FETCH_REVISIONS:
148188 break
149189
150190 # Early return if enough complete records have been collected
151191 records_is_complete = [bids_set.issubset(record.keys()) for record in records_by_rev.values()]
152- is_complete = sum(records_is_complete) == MAX_VALID_REVISIONS
153- if is_complete:
192+ if sum(records_is_complete) == MAX_VALID_REVISIONS:
154193 break
155194
156- # Skip runs did not finish for some reason
157- if run.state != "finished":
158- continue
159-
160195 # Load config and summary, with support of legacy runs
161196 config, summary = run.config, run.summary
162197 if isinstance(config, str):
@@ -167,21 +202,25 @@ jobs:
167202 # Extract revision commit and branch
168203 try:
169204 rev, branch = config["revision"].split("@", 1)
205+ revs.add(rev)
170206 except ValueError:
171207 # Ignore this run if the revision has been corrupted for some unknown reason
172208 continue
173209 # Ignore runs associated with a commit that is not part of the official repository
174210 if not branch.startswith('Genesis-Embodied-AI/'):
175211 continue
176212
213+ # Skip runs did not finish for some reason
214+ if run.state != "finished":
215+ continue
216+
177217 # Do not store new records if the desired number of revision is already reached
178218 if len(records_by_rev) == MAX_VALID_REVISIONS and rev not in records_by_rev:
179219 continue
180220
181221 # Extract benchmark ID and normalize it to make sure it does not depends on key ordering.
182222 # Note that the rigid body benchmark suite is the only one being supported for now.
183223 sid, bid = config["benchmark_id"].split("-", 1)
184- nbid = normalize_benchmark_id(bid)
185224 if sid != "rigid_body":
186225 continue
187226
@@ -199,56 +238,91 @@ jobs:
199238 continue
200239
201240 # Store all the records into a dict
241+ nbid = normalize_benchmark_id(bid)
202242 records_by_rev.setdefault(rev, {})[nbid] = {
203243 metric: summary[metric] for metric in METRIC_KEYS
204244 }
205245
206246 # ----- build TWO tables -----
207247
248+ # Parse benchmark IDs into key-value dicts while preserving order
249+ params_name = get_param_names(tuple((tuple(kv.keys())) for kv in current_bm.keys()))
250+
208251 reg_found = False
209252 tables = {}
253+ rows_for_csv = {"runtime_fps": [], "compile_time": []}
254+ info = {}
210255 for metric, alias in (("runtime_fps", "FPS"), ("compile_time", "compile")):
211- rows = []
212- for bid in sorted(current_bm.keys()):
256+ rows_md = []
257+
258+ header_cells = (
259+ "status",
260+ *params_name,
261+ f"current {alias}",
262+ f"baseline {alias} [last (mean ± std)] (*1)",
263+ f"Δ {alias} (*2)"
264+ )
265+ header = "| " + " | ".join(header_cells) + " |"
266+ align = "|:------:|" + "|".join([":---" for _ in params_name]) + "|---:|---:|---:|"
267+
268+ for bid in sorted(current_bm.keys(), key=sort_key):
213269 value_cur = current_bm[bid][metric]
214270 is_int = isinstance(value_cur, int) or value_cur.is_integer()
215271 value_repr = fmt_num(value_cur, is_int)
216272
273+ params_repr = [bid.get(k, "-") for k in params_name]
274+ info = {
275+ **dict(zip(params_name, params_repr)),
276+ "current": value_cur,
277+ "baseline_last": None,
278+ "baseline_min": None,
279+ "baseline_max": None,
280+ }
281+
217282 values_prev = [
218283 record[bid][metric]
219284 for record in records_by_rev.values()
220285 if bid in record
221286 ]
222287 if values_prev:
288+ value_last = values_prev[0]
223289 value_ref = statistics.fmean(values_prev)
224- delta = (value_cur - value_ref) / value_ref * 100.0
290+ delta = (value_cur - value_last) / value_last * 100.0
291+
292+ info["baseline_last"] = int(value_last) if is_int else float(value_last)
225293
226- stats_repr = f"{fmt_num(values_prev[0] , is_int)}"
294+ stats_repr = f"{fmt_num(value_last , is_int)}"
227295 delta_repr = f"{delta:+.1f}%"
228296 if len(values_prev) == MAX_VALID_REVISIONS:
297+ info["baseline_mean"] = int(value_ref) if is_int else float(value_ref)
298+ info["baseline_min"] = int(min(values_prev)) if is_int else float(min(values_prev))
299+ info["baseline_max"] = int(max(values_prev)) if is_int else float(max(values_prev))
300+
229301 value_std = statistics.stdev(values_prev)
230302 stats_repr += f" ({fmt_num(value_ref, is_int)} ± {fmt_num(value_std, is_int)})"
231- if abs(delta) > METRICS_TOL[metrics]:
303+ if abs(delta) > METRICS_TOL[metric]:
304+ info["status"] = "alert"
305+
232306 delta_repr = f"**{delta_repr}**"
233307 picto = "🔴"
234308 reg_found = True
235309 else:
310+ info["status"] = "ok"
311+
236312 picto = "✅"
237313 else:
314+ info["status"] = "n/a"
315+
238316 picto = "ℹ️"
239317 else:
240318 picto, stats_repr, delta_repr = "ℹ️", "---", "---"
241319
242- rows.append([picto, f"`{bid}`", value_repr, stats_repr, delta_repr])
243-
244- header = [
245- f"| status | benchmark ID | current {alias} | baseline {alias} [last (mean ± std)] | Δ {alias} |",
246- "|:------:|:-------------|-----------:|-------------:|------:|",
247- ]
248- tables[metric] = header + ["| " + " | ".join(r) + " |" for r in rows]
320+ rows_md.append("| " + " | ".join((picto, *params_repr, value_repr, stats_repr, delta_repr)) + " |")
321+ rows_for_csv[metric].append(info)
249322
250- # ----- baseline commit list -----
323+ tables[metric] = [header, align] + rows_md
251324
325+ # ----- baseline commit list (MD) -----
252326 blist = [f"- Commit {i}: {sha}" for i, sha in enumerate(records_by_rev.keys(), 1)]
253327 baseline_block = ["**Baselines considered:** " + f"**{len(records_by_rev)}** commits"] + blist
254328
@@ -270,6 +344,9 @@ jobs:
270344 "",
271345 "### Compile Time",
272346 *tables["compile_time"],
347+ "",
348+ f"- (*1) last: last commit on main, mean/std: stats over revs {MAX_VALID_REVISIONS} commits if available.",
349+ f"- (*2) Δ: relative difference between PR and last commit on main, i.e. (PR - main) / main * 100%.",
273350 ]
274351 )
275352
@@ -280,7 +357,15 @@ jobs:
280357 else:
281358 comment_body = ""
282359
283- # Write files
360+ # CSV file
361+ for metric in ("runtime_fps", "compile_time"):
362+ with csv_files[metric].open("w", newline="", encoding="utf-8") as f:
363+ w = csv.DictWriter(f, fieldnames=info.keys())
364+ w.writeheader()
365+ for rec in rows_for_csv[metric]:
366+ w.writerow(rec)
367+
368+ # write md results
284369 check_body_path.write_text(check_body + "\n", encoding="utf-8")
285370 pr_comment_path.write_text(comment_body + "\n", encoding="utf-8")
286371
@@ -317,6 +402,16 @@ jobs:
317402 echo "CONCLUSION=$([ "$EXIT_CODE" = "0" ] && echo 'success' || echo 'failure')" >> "$GITHUB_ENV"
318403 echo "HAS_REGRESSIONS=$([ "$EXIT_CODE" = "$EXIT_CODE_REGRESSION" ] && echo 1 || echo 0)" >> "$GITHUB_ENV"
319404
405+ - name : Upload benchmark comparisons in CSV
406+ id : upload
407+ uses : actions/upload-artifact@v4
408+ with :
409+ name : benchmark-comparison-tables
410+ path : |
411+ runtime_fps.csv
412+ compile_time.csv
413+ if-no-files-found : warn
414+
320415 - name : Add PR comment
321416 if : ${{ env.SCRIPT_OUTPUT != '' }}
322417 uses : actions/github-script@v8
@@ -344,15 +439,21 @@ jobs:
344439 });
345440
346441 - name : Publish PR check
347- if : always()
348442 uses : actions/github-script@v8
349443 env :
350444 CHECK_NAME : Benchmark Comparison
351- CHECK_BODY : ${{ env.CHECK_OUTPUT }}
445+ CHECK_OUTPUT : ${{ env.CHECK_OUTPUT }}
352446 CONCLUSION : ${{ env.CONCLUSION }}
353447 HAS_REGRESSIONS : ${{ env.HAS_REGRESSIONS }}
448+ ARTIFACT_URL : ${{ steps.upload.outputs.artifact-url }}
354449 with :
355450 script : |
451+ const artifactUrl = process.env.ARTIFACT_URL || '';
452+ let body = process.env.CHECK_OUTPUT || '';
453+ if (body && artifactUrl) {
454+ body += `\n\n**Artifact:** [Download raw data](${artifactUrl})`;
455+ }
456+
356457 const summary = (process.env.HAS_REGRESSIONS || '0') === '1'
357458 ? '🔴 Regressions detected. See tables below.'
358459 : '✅ No regressions detected. See tables below.';
@@ -366,6 +467,6 @@ jobs:
366467 output: {
367468 title: process.env.CHECK_NAME,
368469 summary,
369- text: process.env.CHECK_BODY || undefined
470+ text: body || undefined
370471 }
371472 });
0 commit comments