add command lines

xadupre · xadupre · commit 049c17436f05 · 2025-11-23T19:57:16.000+01:00
diff --git a/_unittests/ut_torch_onnx/test_sbs.py b/_unittests/ut_torch_onnx/test_sbs.py
@@ -23,7 +23,7 @@ def setUpClass(cls):
 
     def test_run_aligned_record(self):
         r = RunAlignedRecord(
-            ep_id_node=-1,
+            ep_id_node=1,
             onnx_id_node=-1,
             ep_name="A",
             onnx_name="B",
diff --git a/_unittests/ut_xrun_doc/test_command_lines_exe.py b/_unittests/ut_xrun_doc/test_command_lines_exe.py
@@ -112,6 +112,7 @@ def forward(self, x):
         input_file = self.get_dump_file("test_h_parser_sbs.inputs.pt")
         ep_file = self.get_dump_file("test_h_parser_sbs.ep")
         onnx_file = self.get_dump_file("test_h_parser_sbs.model.onnx")
+        replay_foler = self.get_dump_folder("test_h_parser_sbs.replay")
         torch.save(inputs, input_file)
         to_onnx(
             Model(),
@@ -139,6 +140,10 @@ def forward(self, x):
                     output,
                     "-m",
                     onnx_file,
+                    "-t",
+                    "Gemm",
+                    "-f",
+                    replay_foler,
                 ]
             )
         text = st.getvalue()
diff --git a/onnx_diagnostic/_command_lines_parser.py b/onnx_diagnostic/_command_lines_parser.py
@@ -1140,6 +1140,14 @@ def get_parser_sbs() -> ArgumentParser:
             - torch.export.save(ep: torch.export.ExportedProgram)
             - torch.save(**inputs)
             - onnx.save(...)
+
+            The Replay functionality is just a way to investigates a part of a model.
+            It saves torch and onnx inputs, the torch outputs, and the minimal onnx model
+            which shares its inputs with the exported program.
+            This is used to investigate the discrepancies between the torch
+            model (through the exported program) and its onnx conversion.
+            This functionality dumps everything it can to disk
+            so that it be replayed in a separate process.
             """
         ),
     )
@@ -1222,10 +1230,33 @@ def get_parser_sbs() -> ArgumentParser:
         ),
     )
     parser.add_argument(
-        "--gemmlinear",
-        action=BooleanOptionalAction,
-        default=False,
-        help="Replaces Gemm(A,X.T,B) by torch...linear(A,X,B) on onnx side",
+        "-s",
+        "--replay-threshold",
+        type=float,
+        required=False,
+        default=1e6,
+        help="Triggers the replay if the discrepancies are higher than this value.",
+    )
+    parser.add_argument(
+        "-n",
+        "--replay-names",
+        required=False,
+        default="",
+        help="Triggers the replay if a result name is in this set of values (comma separated)",
+    )
+    parser.add_argument(
+        "-t",
+        "--replay-op-types",
+        required=False,
+        default="",
+        help="Triggers the replay if an onnx type is in this set of values (comma separated)",
+    )
+    parser.add_argument(
+        "-f",
+        "--replay-folder",
+        required=False,
+        default="replay",
+        help="If the replay is triggered, this defines the folder where everything is dumped.",
     )
 
     return parser
@@ -1235,7 +1266,7 @@ def _cmd_sbs(argv: List[Any]):
     import pandas
     import torch
     from .helpers import flatten_object, max_diff, string_diff, string_type
-    from .torch_onnx.sbs import run_aligned
+    from .torch_onnx.sbs import run_aligned, ReplayConfiguration
     from .reference import OnnxruntimeEvaluator
 
     parser = get_parser_sbs()
@@ -1306,6 +1337,17 @@ def _size(name):
     onx = onnx.load(args.onnx)
     print(f"-- done in {time.perf_counter() - begin:1.1f}s")
 
+    replay_configuration = None
+    if args.replay_threshold < 1e6 or args.replay_names or args.replay_op_types:
+        replay_configuration = ReplayConfiguration(
+            threshold=args.replay_threshold,
+            selected_names=set(args.replay_names.split(",")) if args.replay_names else None,
+            selected_op_types=(
+                set(args.replay_op_types.split(",")) if args.replay_op_types else None
+            ),
+            dump_folder=args.replay_folder,
+        )
+
     print("-- starts side-by-side")
     ratio = int(args.ratio)
     data = []
@@ -1321,6 +1363,7 @@ def _size(name):
         use_tensor=True,
         reset_names=args.reset.split(","),
         exc=False,
+        replay_configuration=replay_configuration,
     ):
         data.append(obs)
         if (
diff --git a/onnx_diagnostic/torch_onnx/sbs.py b/onnx_diagnostic/torch_onnx/sbs.py
@@ -184,14 +184,17 @@ class ReplayConfiguration:
         pieces to investigate
     :param selected_names: list of results names to dump
     :param selected_op_types: list of onnx operators to dump
-    :param threshold: only keep thoses whose discrepancies is greater than that threshold
+    :param threshold: only keep those whose discrepancies is greater than that threshold
     """
 
     dump_folder: str
     selected_names: Optional[Set[str]] = None
     selected_op_types: Optional[Set[str]] = None
     threshold: float = 0.1
 
+    def __post_init__(self):
+        assert self.dump_folder, "dump_folder is empty and this is not allowed for the replay"
+
     def select(
         self,
         name: Optional[str] = None,