Genesis-Embodied-AI
diff --git a/‎.github/workflows/alarm.yml‎
Lines changed: 138 additions & 37 deletions b/‎.github/workflows/alarm.yml‎
Lines changed: 138 additions & 37 deletions
diff --git a/‎.github/workflows/production.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/production.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc‎ b/‎doc‎
@@ -26,7 +26,7 @@ jobs:
 
       - name: Install deps
         run: |
-          python -m pip install --quiet --upgrade wandb
+          python -m pip install --quiet --upgrade wandb frozendict
 
       - name: Download artifacts from triggering run
         id: dl
@@ -55,21 +55,25 @@ jobs:
 
           # --- Parameters ---
           MAX_VALID_REVISIONS: 5
-          MAX_FETCH_REVISIONS: 100
+          MAX_FETCH_REVISIONS: 40
           RUNTIME_REGRESSION_TOLERANCE_PCT: 10
           COMPILE_REGRESSION_TOLERANCE_PCT: 10
 
           # Input/Output paths
           ARTIFACTS_DIR: ${{ steps.dl.outputs.download-path }}
           PR_COMMENT_PATH: pr_comment.md
           CHECK_BODY_PATH: check_output.md
+          CSV_RUNTIME_PATH: runtime_fps.csv
+          CSV_COMPILE_PATH: compile_time.csv
           EXIT_CODE_REGRESSION: 42
         run: |
           { python - << 'PY'; EXIT_CODE=$?; } || true
 
           import os, sys, json, re, math, statistics
           import wandb
+          from frozendict import frozendict
           from pathlib import Path
+          import csv
 
           # ----- arguments -----
 
@@ -86,16 +90,52 @@ jobs:
           pr_comment_path = Path(os.environ["PR_COMMENT_PATH"]).expanduser()
           check_body_path = Path(os.environ["CHECK_BODY_PATH"]).expanduser()
 
+          csv_files = {
+            "runtime_fps": Path(os.environ["CSV_RUNTIME_PATH"]).expanduser().resolve(),
+            "compile_time": Path(os.environ["CSV_COMPILE_PATH"]).expanduser().resolve(),
+          }
+
           # ---------- helpers ----------
 
           METRIC_KEYS = ("compile_time", "runtime_fps", "realtime_factor")
 
-          def _normalize_kv_id(kv: dict) -> str:
-              return "-".join(f"{k}={v}" for k, v in sorted(kv.items()))
-
-          def normalize_benchmark_id(bid: str) -> str:
-              kv = dict(map(str.strip, token.split("=", 1)) for token in bid.split("-"))
-              return _normalize_kv_id(kv)
+          def parse_benchmark_id(bid: str) -> dict:
+              kv = {}
+              if bid:
+                  for token in bid.split("-"):
+                      token = token.strip()
+                      if token and "=" in token:
+                          k, v = token.split("=", 1)
+                          kv[k.strip()] = v.strip()
+              return kv
+
+          def normalize_benchmark_id(bid: str) -> frozendict[str, str]:
+              return frozendict(parse_benchmark_id(bid))
+
+          def get_param_names(bids: tuple[frozendict]) -> tuple[str, ...]:
+              """
+              Merge a list of tuples into a single tuple of keys that:
+              - Preserves the relative order of keys within each tuple
+              - Gives precedence to later tuples when conflicts arise
+              """
+              merged = list(bids[-1])
+              merged_set = set(merged)
+              for tup in bids[:-1]:
+                  for key in tup:
+                      if key not in merged_set:
+                          merged.append(key)
+                          merged_set.add(key)
+              return tuple(merged)
+
+          def sort_key(d):
+              key_list = []
+              for col in params_name:
+                  if col in d:
+                      val = d[col]
+                      key_list.append((0, val))
+                  else:
+                      key_list.append((1, None))
+              return key_list
 
           def artifacts_parse_csv_summary(current_txt_path):
               out = {}
@@ -107,8 +147,8 @@ jobs:
                           record[k] = float(kv.pop(k))
                       except (ValueError, TypeError, KeyError):
                           pass
-                  bid = _normalize_kv_id(kv)
-                  out[bid] = record
+                  nbid = frozendict(kv)
+                  out[nbid] = record
               return out
 
           def fmt_num(v, is_int: bool):
@@ -125,7 +165,7 @@ jobs:
           current_bm = {}
           for csv_path in current_csv_paths:
               current_bm |= artifacts_parse_csv_summary(csv_path)
-          bids_set = set(current_bm.keys())
+          bids_set = frozenset(current_bm.keys())
           assert bids_set
 
           # ----- W&B baselines -----
@@ -139,24 +179,19 @@ jobs:
           api = wandb.Api()
           runs_iter = api.runs(f"{ENTITY}/{PROJECT}", order="-created_at")
 
-          is_complete = False
+          revs = set()
           records_by_rev = {}
           for i, run in enumerate(runs_iter):
               # Abort if still not complete after checking enough runs.
               # This would happen if a new benchmark has been added, and not enough past data is available yet.
-              if i == MAX_FETCH_REVISIONS:
+              if len(revs) == MAX_FETCH_REVISIONS:
                   break
 
               # Early return if enough complete records have been collected
               records_is_complete = [bids_set.issubset(record.keys()) for record in records_by_rev.values()]
-              is_complete = sum(records_is_complete) == MAX_VALID_REVISIONS
-              if is_complete:
+              if sum(records_is_complete) == MAX_VALID_REVISIONS:
                   break
 
-              # Skip runs did not finish for some reason
-              if run.state != "finished":
-                  continue
-
               # Load config and summary, with support of legacy runs
               config, summary = run.config, run.summary
               if isinstance(config, str):
@@ -167,21 +202,25 @@ jobs:
               # Extract revision commit and branch
               try:
                   rev, branch = config["revision"].split("@", 1)
+                  revs.add(rev)
               except ValueError:
                   # Ignore this run if the revision has been corrupted for some unknown reason
                   continue
               # Ignore runs associated with a commit that is not part of the official repository
               if not branch.startswith('Genesis-Embodied-AI/'):
                   continue
 
+              # Skip runs did not finish for some reason
+              if run.state != "finished":
+                  continue
+
               # Do not store new records if the desired number of revision is already reached
               if len(records_by_rev) == MAX_VALID_REVISIONS and rev not in records_by_rev:
                   continue
 
               # Extract benchmark ID and normalize it to make sure it does not depends on key ordering.
               # Note that the rigid body benchmark suite is the only one being supported for now.
               sid, bid = config["benchmark_id"].split("-", 1)
-              nbid = normalize_benchmark_id(bid)
               if sid != "rigid_body":
                   continue
 
@@ -199,56 +238,91 @@ jobs:
                   continue
 
               # Store all the records into a dict
+              nbid = normalize_benchmark_id(bid)
               records_by_rev.setdefault(rev, {})[nbid] = {
                   metric: summary[metric] for metric in METRIC_KEYS
               }
 
           # ----- build TWO tables -----
 
+          # Parse benchmark IDs into key-value dicts while preserving order
+          params_name = get_param_names(tuple((tuple(kv.keys())) for kv in current_bm.keys()))
+
           reg_found = False
           tables = {}
+          rows_for_csv = {"runtime_fps": [], "compile_time": []}
+          info = {}
           for metric, alias in (("runtime_fps", "FPS"), ("compile_time", "compile")):
-              rows = []
-              for bid in sorted(current_bm.keys()):
+              rows_md = []
+
+              header_cells = (
+                  "status",
+                  *params_name,
+                  f"current {alias}",
+                  f"baseline {alias} [last (mean ± std)] (*1)",
+                  f"Δ {alias} (*2)"
+              )
+              header = "| " + " | ".join(header_cells) + " |"
+              align  = "|:------:|" + "|".join([":---" for _ in params_name]) + "|---:|---:|---:|"
+
+              for bid in sorted(current_bm.keys(), key=sort_key):
                   value_cur = current_bm[bid][metric]
                   is_int = isinstance(value_cur, int) or value_cur.is_integer()
                   value_repr = fmt_num(value_cur, is_int)
 
+                  params_repr = [bid.get(k, "-") for k in params_name]
+                  info = {
+                      **dict(zip(params_name, params_repr)),
+                      "current": value_cur,
+                      "baseline_last": None,
+                      "baseline_min": None,
+                      "baseline_max": None,
+                  }
+
                   values_prev = [
                       record[bid][metric]
                       for record in records_by_rev.values()
                       if bid in record
                   ]
                   if values_prev:
+                      value_last = values_prev[0]
                       value_ref = statistics.fmean(values_prev)
-                      delta = (value_cur - value_ref) / value_ref * 100.0
+                      delta = (value_cur - value_last) / value_last * 100.0
+
+                      info["baseline_last"] = int(value_last) if is_int else float(value_last)
 
-                      stats_repr = f"{fmt_num(values_prev[0], is_int)}"
+                      stats_repr = f"{fmt_num(value_last, is_int)}"
                       delta_repr = f"{delta:+.1f}%"
                       if len(values_prev) == MAX_VALID_REVISIONS:
+                          info["baseline_mean"] = int(value_ref) if is_int else float(value_ref)
+                          info["baseline_min"] = int(min(values_prev)) if is_int else float(min(values_prev))
+                          info["baseline_max"] = int(max(values_prev)) if is_int else float(max(values_prev))
+
                           value_std = statistics.stdev(values_prev)
                           stats_repr += f" ({fmt_num(value_ref, is_int)} ± {fmt_num(value_std, is_int)})"
-                          if abs(delta) > METRICS_TOL[metrics]:
+                          if abs(delta) > METRICS_TOL[metric]:
+                              info["status"] = "alert"
+
                               delta_repr = f"**{delta_repr}**"
                               picto = "🔴"
                               reg_found = True
                           else:
+                              info["status"] = "ok"
+
                               picto = "✅"
                       else:
+                          info["status"] = "n/a"
+
                           picto = "ℹ️"
                   else:
                       picto, stats_repr, delta_repr = "ℹ️", "---", "---"
 
-                  rows.append([picto, f"`{bid}`", value_repr, stats_repr, delta_repr])
-
-              header = [
-                f"| status | benchmark ID | current {alias} | baseline {alias} [last (mean ± std)] | Δ {alias} |",
-                "|:------:|:-------------|-----------:|-------------:|------:|",
-              ]
-              tables[metric] = header + ["| " + " | ".join(r) + " |" for r in rows]
+                  rows_md.append("| " + " | ".join((picto, *params_repr, value_repr, stats_repr, delta_repr)) + " |")
+                  rows_for_csv[metric].append(info)
 
-          # ----- baseline commit list -----
+              tables[metric] = [header, align] + rows_md
 
+          # ----- baseline commit list (MD) -----
           blist = [f"- Commit {i}: {sha}" for i, sha in enumerate(records_by_rev.keys(), 1)]
           baseline_block = ["**Baselines considered:** " + f"**{len(records_by_rev)}** commits"] + blist
 
@@ -270,6 +344,9 @@ jobs:
                   "",
                   "### Compile Time",
                   *tables["compile_time"],
+                  "",
+                  f"- (*1) last: last commit on main, mean/std: stats over revs {MAX_VALID_REVISIONS} commits if available.",
+                  f"- (*2) Δ: relative difference between PR and last commit on main, i.e. (PR - main) / main * 100%.",
               ]
           )
 
@@ -280,7 +357,15 @@ jobs:
           else:
               comment_body = ""
 
-          # Write files
+          # CSV file
+          for metric in ("runtime_fps", "compile_time"):
+            with csv_files[metric].open("w", newline="", encoding="utf-8") as f:
+                w = csv.DictWriter(f, fieldnames=info.keys())
+                w.writeheader()
+                for rec in rows_for_csv[metric]:
+                    w.writerow(rec)
+
+          # write md results
           check_body_path.write_text(check_body + "\n", encoding="utf-8")
           pr_comment_path.write_text(comment_body + "\n", encoding="utf-8")
 
@@ -317,6 +402,16 @@ jobs:
           echo "CONCLUSION=$([ "$EXIT_CODE" = "0" ] && echo 'success' || echo 'failure')" >> "$GITHUB_ENV"
           echo "HAS_REGRESSIONS=$([ "$EXIT_CODE" = "$EXIT_CODE_REGRESSION" ] && echo 1 || echo 0)" >> "$GITHUB_ENV"
 
+      - name: Upload benchmark comparisons in CSV
+        id: upload
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-comparison-tables
+          path: |
+            runtime_fps.csv
+            compile_time.csv
+          if-no-files-found: warn
+
       - name: Add PR comment
         if: ${{ env.SCRIPT_OUTPUT != '' }}
         uses: actions/github-script@v8
@@ -344,15 +439,21 @@ jobs:
             });
 
       - name: Publish PR check
-        if: always()
         uses: actions/github-script@v8
         env:
           CHECK_NAME: Benchmark Comparison
-          CHECK_BODY: ${{ env.CHECK_OUTPUT }}
+          CHECK_OUTPUT: ${{ env.CHECK_OUTPUT }}
           CONCLUSION: ${{ env.CONCLUSION }}
           HAS_REGRESSIONS: ${{ env.HAS_REGRESSIONS }}
+          ARTIFACT_URL: ${{ steps.upload.outputs.artifact-url }}
         with:
           script: |
+            const artifactUrl = process.env.ARTIFACT_URL || '';
+            let body = process.env.CHECK_OUTPUT || '';
+            if (body && artifactUrl) {
+              body += `\n\n**Artifact:** [Download raw data](${artifactUrl})`;
+            }
+
             const summary = (process.env.HAS_REGRESSIONS || '0') === '1'
               ? '🔴 Regressions detected. See tables below.'
               : '✅ No regressions detected. See tables below.';
@@ -366,6 +467,6 @@ jobs:
               output: {
                 title: process.env.CHECK_NAME,
                 summary,
-                text: process.env.CHECK_BODY || undefined
+                text: body || undefined
               }
             });
@@ -78,7 +78,7 @@ jobs:
               pip install --no-input --extra-index-url https://pypi.nvidia.com/ omniverse-kit
               pip install --no-input ".[dev,render,usd]"
 
-              pytest -v -rs --backend gpu --dev --forked ./tests
+              pytest -v -ra --backend gpu --dev --forked ./tests
           EOF
       - name: Kill srun job systematically
         if: always()