sbs

xadupre · xadupre · commit b1f3aebadda6 · 2025-07-09T15:56:50.000+02:00
diff --git a/_unittests/ut_helpers/test_log_helper.py b/_unittests/ut_helpers/test_log_helper.py
@@ -517,10 +517,10 @@ def test_cube_sbs(self):
         sbs, sbs_agg = cube.sbs(
             dict(CFA=dict(exporter="E1", opt="O"), CFB=dict(exporter="E2", opt="O"))
         )
-        self.assertEqual(sbs.shape, (4, 8))
+        self.assertEqual(sbs.shape, (4, 9))
         self.assertEqual(sbs.index.names, ["METRICS", "m_name"])
         self.assertEqual(sorted(sbs.columns.names), ["CONF", "exporter"])
-        self.assertEqual(sbs_agg.shape, (2, 8))
+        self.assertEqual(sbs_agg.shape, (2, 9))
         self.assertEqual(sbs_agg.index.names, ["METRICS"])
         self.assertEqual(sorted(sbs_agg.columns.names), ["CONF", "exporter"])
 
diff --git a/onnx_diagnostic/_command_lines_parser.py b/onnx_diagnostic/_command_lines_parser.py
@@ -645,6 +645,27 @@ def _cmd_stats(argv: List[Any]):
         print("done.")
 
 
+class _ParseNamedDict(argparse.Action):
+    def __call__(self, parser, namespace, values, option_string=None):
+        assert ":" in values, f"':' missing from {values!r}"
+        namespace_key, rest = values.split(":", 1)
+        pairs = rest.split(",")
+        inner_dict = {}
+
+        for pair in pairs:
+            if "=" not in pair:
+                raise argparse.ArgumentError(self, f"Expected '=' in pair '{pair}'")
+            key, value = pair.split("=", 1)
+            inner_dict[key] = value
+        assert inner_dict, f"Unable to parse {rest!r} into a dictionary"
+        if not hasattr(namespace, self.dest) or getattr(namespace, self.dest) is None:
+            setattr(namespace, self.dest, {})
+        assert isinstance(
+            getattr(namespace, self.dest), dict
+        ), f"Unexpected type for namespace.{self.dest}={getattr(namespace, self.dest)}"
+        getattr(namespace, self.dest).update({namespace_key: inner_dict})
+
+
 def get_parser_agg() -> ArgumentParser:
     parser = ArgumentParser(
         prog="agg",
@@ -653,6 +674,9 @@ def get_parser_agg() -> ArgumentParser:
             Aggregates statistics coming from benchmarks.
             Every run is a row. Every row is indexed by some keys,
             and produces values. Every row has a date.
+            The data can come any csv files produces by benchmarks,
+            it can concatenates many csv files, or csv files inside zip files.
+            It produces an excel file with many tabs, one per view.
             """
         ),
         epilog=textwrap.dedent(
@@ -744,7 +768,15 @@ def get_parser_agg() -> ArgumentParser:
         "--views",
         default="agg-suite,agg-all,disc,speedup,time,time_export,err,cmd,"
         "bucket-speedup,raw-short,counts,peak-gpu,onnx",
-        help="Views to add to the output files.",
+        help=textwrap.dedent(
+            """
+            Views to add to the output files. Each view becomes a tab.
+            A view is defined by its name, among
+            agg-suite, agg-all, disc, speedup, time, time_export, err,
+            cmd, bucket-speedup, raw-short, counts, peak-gpu, onnx.
+            Their definition is part of class CubeLogsPerformance.
+            """
+        ),
     )
     parser.add_argument(
         "--csv",
@@ -764,6 +796,18 @@ def get_parser_agg() -> ArgumentParser:
         help="adds a filter to filter out data, syntax is\n"
         '``"<column1>:<value1>;<value2>/<column2>:<value3>"`` ...',
     )
+    parser.add_argument(
+        "--sbs",
+        help=textwrap.dedent(
+            """
+            Defines an exporter to compare to another, there must be at least
+            two arguments defined with --sbs. Example:
+                --sbs dynamo:exporter=onnx-dynamo,opt=ir,attn_impl=eager
+                --sbs cusom:exporter=custom,opt=default,attn_impl=eager
+            """
+        ),
+        action=_ParseNamedDict,
+    )
     return parser
 
 
@@ -816,6 +860,7 @@ def _cmd_agg(argv: List[Any]):
         csv=args.csv.split(","),
         raw=args.raw,
         time_mask=True,
+        sbs=args.sbs,
     )
     if args.verbose:
         print(f"Wrote {args.output!r}")
diff --git a/onnx_diagnostic/helpers/_log_helper.py b/onnx_diagnostic/helpers/_log_helper.py
@@ -320,6 +320,7 @@ def apply_excel_style(
         Dict[str, Callable[[Any], "CubeViewDef.HighLightKind"]]  # noqa: F821
     ] = None,
     time_mask_view: Optional[Dict[str, pandas.DataFrame]] = None,
+    verbose: int = 0,
 ):
     """
     Applies styles on all sheets in a file unless the sheet is too big.
@@ -329,6 +330,7 @@ def apply_excel_style(
     :param time_mask_view: if specified, it contains dataframe with the same shape
         and values in {-1, 0, +1} which indicates if a value is unexpectedly lower (-1)
         or higher (+1), it changes the color of the background then.
+    :param verbosity: progress loop
     """
     from openpyxl import load_workbook
     from openpyxl.styles import Alignment
@@ -353,8 +355,13 @@ def apply_excel_style(
         CubeViewDef.HighLightKind.GREEN: Font(color="00AA00"),
         CubeViewDef.HighLightKind.RED: Font(color="FF0000"),
     }
+    if verbose:
+        from tqdm import tqdm
 
-    for name in workbook.sheetnames:
+        sheet_names = tqdm(list(workbook.sheetnames))
+    else:
+        sheet_names = workbook.sheetnames
+    for name in sheet_names:
         if time_mask_view and name in time_mask_view:
             mask = time_mask_view[name]
             with pandas.ExcelWriter(io.BytesIO(), engine="openpyxl") as mask_writer:
@@ -367,7 +374,7 @@ def apply_excel_style(
         sheet = workbook[name]
         n_rows = sheet.max_row
         n_cols = sheet.max_column
-        if n_rows * n_cols > 2**18:
+        if n_rows * n_cols > 2**16 or n_rows > 2**13:
             # Too big.
             continue
         co: Dict[int, int] = {}
diff --git a/onnx_diagnostic/helpers/log_helper.py b/onnx_diagnostic/helpers/log_helper.py
@@ -932,6 +932,17 @@ def view(
         else:
             piv.sort_index(inplace=True, axis=1)
 
+        # final step, force columns with numerical values to be float
+        for c in list(piv.columns):
+            s = piv[c]
+            if not pandas.api.types.is_object_dtype(s):
+                continue
+            try:
+                sf = s.astype(float)
+            except (ValueError, TypeError):
+                continue
+            piv[c] = sf
+
         if verbose:
             print(f"[CubeLogs.view] levels {piv.index.names}, {piv.columns.names}")
             print(f"[CubeLogs.view] -- done view {view_def.name!r}")
@@ -974,7 +985,9 @@ def _dropna(
         for c in set(key_index) | set(key_columns):
             s = new_data[c]
             if s.isna().max():
-                if pandas.api.types.is_numeric_dtype(s):
+                if pandas.api.types.is_numeric_dtype(
+                    s
+                ) and not pandas.api.types.is_object_dtype(s):
                     min_v = s.dropna().min()
                     assert (
                         min_v >= 0
@@ -1011,7 +1024,7 @@ def describe(self) -> pandas.DataFrame:
             )
             if len(nonan) > 0:
                 obs.update(dict(count=len(nonan)))
-                if is_numeric_dtype(nonan):
+                if is_numeric_dtype(nonan) and not pandas.api.types.is_object_dtype(nonan):
                     obs.update(
                         dict(
                             min=nonan.min(),
@@ -1048,6 +1061,7 @@ def to_excel(
         verbose: int = 0,
         csv: Optional[Sequence[str]] = None,
         time_mask: bool = False,
+        sbs: Optional[Dict[str, Dict[str, Any]]] = None,
     ):
         """
         Creates an excel file with a list of views.
@@ -1061,6 +1075,9 @@ def to_excel(
         :param time_mask: color the background of the cells if one
             of the value for the last date is unexpected,
             assuming they should remain stale
+        :param sbs: configurations to compare side-by-side, this adds two tabs,
+            one gathering raw data about the two configurations, the other one
+            is aggregated by metrics
         """
         if verbose:
             print(f"[CubeLogs.to_excel] create Excel file {output}, shape={self.shape}")
@@ -1175,6 +1192,36 @@ def to_excel(
                         writer, sheet_name="raw", freeze_panes=(1, 1), index=True
                     )
 
+            if sbs:
+                if verbose:
+                    for k, v in sbs.items():
+                        print(f"[CubeLogs.to_excel] sbs {k}: {v}")
+                sbs_raw, sbs_agg = self.sbs(sbs)
+                if verbose:
+                    print(f"[CubeLogs.to_excel] add sheet {name!r} with shape {sbs_raw.shape}")
+                    print(
+                        f"[CubeLogs.to_excel] add sheet '{name}-AGG' "
+                        f"with shape {sbs_agg.shape}"
+                    )
+                name = "∧".join(sbs)
+                sbs_raw = sbs_raw.reset_index(drop=False)
+                sbs_raw.to_excel(
+                    writer,
+                    sheet_name=name,
+                    freeze_panes=(
+                        sbs_raw.columns.nlevels + sbs_raw.index.nlevels,
+                        sbs_raw.index.nlevels,
+                    ),
+                )
+                sbs_agg.to_excel(
+                    writer,
+                    sheet_name=f"{name}-AGG",
+                    freeze_panes=(
+                        sbs_agg.columns.nlevels + sbs_agg.index.nlevels,
+                        sbs_agg.index.nlevels,
+                    ),
+                )
+
             if plots:
                 from openpyxl.drawing.image import Image
 
@@ -1206,7 +1253,9 @@ def to_excel(
 
             if verbose:
                 print(f"[CubeLogs.to_excel] applies style to {output!r}")
-            apply_excel_style(writer, f_highlights, time_mask_view=time_mask_view)  # type: ignore[arg-type]
+            apply_excel_style(  # type: ignore[arg-type]
+                writer, f_highlights, time_mask_view=time_mask_view, verbose=verbose
+            )
             if verbose:
                 print(f"[CubeLogs.to_excel] done with {len(views)} views")
 
@@ -1265,15 +1314,19 @@ def sbs(
         :param column_name: column to add with the name of the configuration
         :return: data and aggregated date
         """
+        assert (
+            len(configs) >= 2
+        ), f"A side by side needs at least two configs but configs={configs}"
         set_keys_time = set(self.keys_time)
         columns_index = None
         data_list = []
         for name_conf, conf in configs.items():
             if columns_index is None:
                 columns_index = list(conf.keys())
-                assert (
-                    set(columns_index) <= set_keys_time
-                ), f"Configuration {conf} includes columns outside the keys."
+                assert set(columns_index) <= set_keys_time, (
+                    f"Configuration {conf} includes columns outside the keys "
+                    f"{', '.join(sorted(set_keys_time))}"
+                )
             else:
                 assert set(columns_index) == set(conf), (
                     f"Every conf should share the same keys but conf={conf} "
@@ -1294,57 +1347,94 @@ def sbs(
         cube = self.clone(new_data, keys=[*self.keys_no_time, column_name])
         key_index = set(self.keys_time) - {*columns_index, column_name}  # type: ignore[misc]
         view = CubeViewDef(key_index=set(key_index), name="sbs", values=cube.values)  # type: ignore[arg-type]
-        res = cube.view(view)
-        res = res.stack("METRICS", future_stack=True)  # type: ignore[union-attr]
-        res = res.reorder_levels(
-            [res.index.nlevels - 1, *list(range(res.index.nlevels - 1))]
-        ).sort_index()
+        view_res = cube.view(view)
 
         # add metrics
-        index = list(res.columns.names).index(column_name)
+        index_column_name = list(view_res.columns.names).index(column_name)
+        index_metrics = list(view_res.columns.names).index("METRICS")
 
-        def _mkc(s, index=index):
-            c = ["" for c in res.columns.names]
-            c[index] = s
+        def _mkc(m, s):
+            c = ["" for c in view_res.columns.names]
+            c[index_column_name] = s
+            c[index_metrics] = m
             return tuple(c)
 
-        n_conf = res.shape[1]
-        mean_columns = list(res.columns)
+        list_configs = list(configs.items())
+        mean_columns = [
+            c
+            for c in view_res.columns
+            if pandas.api.types.is_numeric_dtype(view_res[c])
+            and not pandas.api.types.is_object_dtype(view_res[c])
+        ]
+        assert mean_columns, f"No numerical columns in {view_res.dtypes}"
+        view_res = view_res[mean_columns].copy()
+        metrics = sorted(set(c[index_metrics] for c in view_res.columns))
+        assert metrics, (
+            f"No numerical metrics detected in "
+            f"view_res.columns.names={view_res.columns.names}, "
+            f"columns={view_res.dtypes}"
+        )
         sum_columns = []
-        for i in range(n_conf):
-            c1 = res.columns[i]
-            n1 = c1[index]
-            if not pandas.api.types.is_numeric_dtype(res[c1].dtype):
-                continue
-            for j in range(i + 1, n_conf):
-                c2 = res.columns[j]
-                n2 = c2[index]
-                if not pandas.api.types.is_numeric_dtype(res[c2].dtype):
-                    continue
-                res[_mkc(f"∅{n1}∧∅{n2}")] = (res[c1].isna() & res[c2].isna()).astype(int)
-                res[_mkc(f"∅{n1}∧{n2}")] = (res[c1].isna() & ~res[c2].isna()).astype(int)
-                res[_mkc(f"{n1}∧∅{n2}")] = (~res[c1].isna() & res[c2].isna()).astype(int)
-                res[_mkc(f"{n1}∧{n2}")] = (~res[c1].isna() & ~res[c2].isna()).astype(int)
-                res[_mkc(f"{n1}<{n2}")] = (res[c1] < res[c2]).astype(int)
-                res[_mkc(f"{n1}>{n2}")] = (res[c1] > res[c2]).astype(int)
-                sum_columns.extend(
-                    [
-                        _mkc(f"∅{n1}∧∅{n2}"),
-                        _mkc(f"∅{n1}∧{n2}"),
-                        _mkc(f"{n1}∧∅{n2}"),
-                        _mkc(f"{n1}∧{n2}"),
-                        _mkc(f"{n1}<{n2}"),
-                        _mkc(f"{n1}>{n2}"),
-                    ]
-                )
+        columns_to_add = []
+        for i in range(len(list_configs)):
+            for j in range(i + 1, len(list_configs)):
+                for m in metrics:
+                    iname, ci = list_configs[i]
+                    jname, cj = list_configs[j]
+                    ci = ci.copy()
+                    cj = cj.copy()
+                    ci["METRICS"] = m
+                    cj["METRICS"] = m
+                    ci["CONF"] = iname
+                    cj["CONF"] = jname
+
+                    ci_name = tuple(ci[n] for n in view_res.columns.names)
+                    cj_name = tuple(cj[n] for n in view_res.columns.names)
+                    assert ci_name in view_res.columns or cj_name in view_res.columns, (
+                        f"Unable to find column {ci_name} or {cj_name} "
+                        f"in columns {view_res.columns}, metrics={metrics}"
+                    )
+                    if ci_name not in view_res.columns or cj_name not in view_res.columns:
+                        # One config does not have such metric.
+                        continue
+
+                    si = view_res[ci_name]
+                    sj = view_res[cj_name]
+
+                    sinan = si.isna()
+                    sjnan = sj.isna()
+                    n1 = iname
+                    n2 = jname
+                    nas = pandas.DataFrame(
+                        {
+                            _mkc(m, f"∅{n1}∧∅{n2}"): (sinan & sjnan).astype(int),
+                            _mkc(m, f"∅{n1}∧{n2}"): (sinan & ~sjnan).astype(int),
+                            _mkc(m, f"{n1}∧∅{n2}"): (~sinan & sjnan).astype(int),
+                            _mkc(m, f"{n1}∧{n2}"): (~sinan & ~sjnan).astype(int),
+                            _mkc(m, f"{n1}<{n2}"): (si < sj).astype(int),
+                            _mkc(m, f"{n1}=={n2}"): (si == sj).astype(int),
+                            _mkc(m, f"{n1}>{n2}"): (si > sj).astype(int),
+                        }
+                    )
+                    nas.columns.names = view_res.columns.names
+                    columns_to_add.append(nas)
+                    sum_columns.extend(nas.columns)
 
         # aggregated metrics
         aggs = {
             **{k: "mean" for k in mean_columns},  # noqa: C420
             **{k: "sum" for k in sum_columns},  # noqa: C420
         }
-        agg = res.reset_index(level="METRICS").groupby("METRICS").agg(aggs)
-        return res, agg
+        view_res = pandas.concat([view_res, *columns_to_add], axis=1)
+        res = view_res.stack("METRICS", future_stack=True)  # type: ignore[union-attr]
+        res = res.reorder_levels(
+            [res.index.nlevels - 1, *list(range(res.index.nlevels - 1))]
+        ).sort_index()
+
+        view_res["GROUPBY"] = "A"
+        flat = view_res.groupby("GROUPBY").agg(aggs).reset_index(drop=True)
+        flat = flat.stack("METRICS", future_stack=True).droplevel(None, axis=0)
+        return res, flat
 
 
 class CubeLogsPerformance(CubeLogs):