bench_diff improve

Malmahrouqi3 · Malmahrouqi3 · commit 3eff4cb5a56f · 2025-11-10T13:16:18.000-05:00
diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py
@@ -138,6 +138,8 @@ def add_common_arguments(p: argparse.ArgumentParser, mask = None):
     add_common_arguments(bench_diff, "t")
     bench_diff.add_argument("lhs", metavar="LHS", type=str, help="Path to a benchmark result YAML file.")
     bench_diff.add_argument("rhs", metavar="RHS", type=str, help="Path to a benchmark result YAML file.")
+    bench_diff.add_argument("-f", "--file", metavar="FILE", type=str, required=False, default=None, help="Path to the data file (e.g., data.js).")
+    bench_diff.add_argument("-n", "--name", metavar="NAME", nargs="+", type=str, required=False, default=[], help="Test name (e.g. GT Phoenix (CPU)).")
 
     # COUNT
     add_common_arguments(count, "g")
@@ -170,9 +172,6 @@ def add_common_arguments(p: argparse.ArgumentParser, mask = None):
         parser.print_help()
         exit(-1)
 
-    # "Slugify" the name of the job
-    args["name"] = re.sub(r'[\W_]+', '-', args["name"])
-
     # We need to check for some invalid combinations of arguments because of
     # the limitations of argparse.
     if args["command"] == "build":
@@ -181,6 +180,11 @@ def add_common_arguments(p: argparse.ArgumentParser, mask = None):
     if args["command"] == "run":
         if args["binary"] is not None and args["engine"] != "interactive":
             raise MFCException("./mfc.sh run's --binary can only be used with --engine=interactive.")
+        if isinstance(args["name"], str):
+          # "Slugify" the name of the job
+          args["name"] = re.sub(r'[\W_]+', '-', args["name"]).strip('-')
+    elif args["command"] == "bench_diff" and len(args["name"]) > 0:
+        args["name"] = " ".join(args["name"])
 
     # Input files to absolute paths
     for e in ["input", "input1", "input2"]:
diff --git a/toolchain/mfc/bench.py b/toolchain/mfc/bench.py
@@ -1,4 +1,4 @@
-import os, sys, uuid, subprocess, dataclasses, typing, math
+import os, sys, uuid, subprocess, dataclasses, typing, math, json
 
 import rich.table
 
@@ -106,16 +106,51 @@ def diff():
     Using intersection: {slugs} with {len(slugs)} elements.
         """)
 
+    cb_stats = {}
+    if ARG("file") is not None and ARG("name") is not None:
+        try:
+            with open(ARG("file"), 'r') as f:
+                data_json = json.load(f)
+            
+            cb_test = ARG("name")
+            if "entries" in data_json and cb_test in data_json["entries"]:
+                benchmark_runs = data_json["entries"][cb_test]
+                case_times = {}
+                for run in benchmark_runs:
+                    if "benches" not in run:
+                        continue
+                    for bench in run["benches"]:
+                        case_name = bench.get("name")
+                        grind_value = bench.get("value")
+                        if case_name is None or grind_value is None:
+                            continue
+                        if case_name not in case_times:
+                            case_times[case_name] = []
+                        case_times[case_name].append(grind_value)
+                for case_name, values in case_times.items():
+                    if len(values) > 0:
+                        avg = sum(values) / len(values)
+                        cb_stats[case_name] = {"avg": avg, "count": len(values)}
+                                
+                cons.print(f"[bold]Loaded cb data for test: [bold]{cb_test}[/bold] ({len(cb_stats)} cases)[/bold]")
+            else:
+                cons.print(f"[bold yellow]Warning[/bold yellow]: Test '[bold]{cb_test}[/bold]' not found in data file.")
+        except Exception as e:
+            cons.print(f"[bold yellow]Warning[/bold yellow]: Could not load data file: {e}")
+
     table = rich.table.Table(show_header=True, box=rich.table.box.SIMPLE)
     table.add_column("[bold]Case[/bold]",    justify="left")
     table.add_column("[bold]Pre Process[/bold]", justify="right")
     table.add_column("[bold]Simulation[/bold]", justify="right")
     table.add_column("[bold]Post Process[/bold]", justify="right")
+    if cb_stats:
+        table.add_column("[bold] CB (Grind)[/bold]", justify="right")
 
     err = 0
     for slug in slugs:
         lhs_summary, rhs_summary = lhs["cases"][slug]["output_summary"], rhs["cases"][slug]["output_summary"]
         speedups = ['N/A', 'N/A', 'N/A']
+        grind_comparison = 'N/A'
 
         for i, target in enumerate(sorted(DEFAULT_TARGETS, key=lambda t: t.runOrder)):
             if (target.name not in lhs_summary) or (target.name not in rhs_summary):
@@ -140,11 +175,23 @@ def diff():
                     if grind_time_value <0.95:
                         cons.print(f"[bold red]Error[/bold red]: Benchmarking failed since grind time speedup for {target.name} below acceptable threshold (<0.95) - Case: {slug}")
                         err = 1
+                    if slug in cb_stats:
+                        rhs_grind = rhs_summary[target.name]["grind"]
+                        stats = cb_stats[slug]
+                        avg = stats["avg"]
+                        offset_pct = ((rhs_grind - avg) / avg * 100)
+                        color = "red" if (offset_pct) > 0 else "yellow" if abs(offset_pct) == 0 else "green"
+                        grind_comparison = f"[{color}]{offset_pct:+.2f}%[/{color}] (avg: {avg:.2f})"
+
             except Exception as _:
                 pass
 
-        table.add_row(f"[magenta]{slug}[/magenta]", *speedups)
+        row = [f"[magenta]{slug}[/magenta]", *speedups]
+        if cb_stats:
+            row.append(grind_comparison)
+        table.add_row(*row)
 
     cons.raw.print(table)
     if err:
         raise MFCException("Benchmarking failed")
+