doc

xadupre · xadupre · commit 1e0b64fc5233 · 2025-11-25T16:24:10.000Z
diff --git a/_doc/cmds/sbs.rst b/_doc/cmds/sbs.rst
@@ -20,3 +20,40 @@ CPU, CUDA
 
 Inputs are saved :func:`torch.save`. The execution will run on CUDA
 if the device of the inputs is CUDA, same goes on CPU.
+
+Example
++++++++
+
+.. code-block::
+
+    python -m onnx_diagnostic sbs \
+        -i qwen_2_5_vl_instruct_visual.inputs.pt \
+        --ep test_imagetext2text_qwen_2_5_vl_instruct_visual.cuda.float16.custom.graph.ep.pt2 \
+        -m test_imagetext2text_qwen_2_5_vl_instruct_visual.cuda.float16.custom.onnx \
+        -o results.dynamo.float16.xlsx \
+        -v 1 --atol=0.1 --rtol=1 \
+        --replay-names conv3d,rsqrt,to_4,mul_48,linear,linear_2,linear_84,linear_89,mul_172,linear_156,linear_159 \
+        -2 --reset conv3d
+
+A snippet of the table it produces:
+
+::
+
+    ep_name         onnx_name       ep_target               onnx_op_type            onnx_id_output   ep_shape_type      onnx_shape_type    err_abs 
+    transpose_18    transpose_18    aten.transpose.int      Transpose                           0    GT10s16x1292x80    GT10s16x1292x80    0.0083 
+    unsqueeze_50    unsqueeze_50    aten.unsqueeze.default  Unsqueeze                           0    GT10s1x16x1292x80  GT10s1x16x1292x80  0.0083 
+    eq_20           eq_20           aten.eq.Scalar          Equal                               0    GT9s1292x1292      GT9s1292x1292      0   
+    unsqueeze_56    unsqueeze_56    aten.unsqueeze.default  Unsqueeze                           0    GT9s1x1x1292x1292  GT9s1x1x1292x1292  0   
+    slice_29        slice_29        aten.slice.Tensor       Slice                               0    GT9s1x1x1292x1292  GT9s1x1x1292x1292  0   
+    transpose_19    transpose_19    aten.transpose.int      Transpose                           0    GT10s1x1292x16x80  GT10s1x1292x16x80  0.0071 
+    reshape_20      reshape_20      aten.reshape.default    Reshape                             0    GT10s1292x1280     GT10s1292x1280     0.0071 
+    linear_21       linear_21       aten.linear.default     Gemm                                0    GT10s1292x1280     GT10s1292x1280     0.0015 
+    mul_54          mul_54          aten.mul.Tensor         SkipSimplifiedLayerNormalization    0    GT10s1292x1280     GT10s1292x1280     0.0098 
+    add_32          add_32          aten.add.Tensor         SkipSimplifiedLayerNormalization    3    GT10s1292x1280     GT10s1292x1280     0.0313 
+    linear_22       linear_22       aten.linear.default     Gemm                                0    GT10s1292x3420     GT10s1292x3420     0.0078 
+    silu_4          silu_4          aten.silu.default       QuickGelu                           0    GT10s1292x3420     GT10s1292x3420     0.0059 
+
+The available column are described by
+:class:`RunAlignedRecord <onnx_diagnostic.torch_onnx.sbs_dataclasses.RunAlignedRecord>`.
+It is possible to dump pieces of the model to study some particular input
+with :class:`ReplayConfiguration <onnx_diagnostic.torch_onnx.sbs_dataclasses.ReplayConfiguration>`.
diff --git a/_unittests/ut_torch_onnx/test_sbs.py b/_unittests/ut_torch_onnx/test_sbs.py
@@ -390,6 +390,7 @@ def forward(self, x):
                 "ep_time_run",
                 "err_abs",
                 "err_dev",
+                "err_h001",
                 "err_h01",
                 "err_rel",
                 "onnx_id_node",
@@ -455,6 +456,7 @@ def forward(self, x):
                 "ep_time_run",
                 "err_abs",
                 "err_dev",
+                "err_h001",
                 "err_h01",
                 "err_rel",
                 "onnx_id_node",
@@ -593,7 +595,7 @@ def forward(self, x):
         )
         df = pandas.DataFrame(list(results)).dropna(axis=1, how="all")
         df.to_excel(self.get_dump_file("test_sbs_replay.xlsx"))
-        self.assertEqual(df.shape, (8, 15))
+        self.assertEqual(df.shape, (8, 16))
         self.clean_dump()
 
     @hide_stdout()
@@ -637,6 +639,7 @@ def forward(self, x):
         df.to_excel(self.get_dump_file("test_sbs_run_onnx_with_torch_inputs.xlsx"))
         self.assertEqual(
             [
+                "comment",
                 "ep_id_node",
                 "ep_name",
                 "ep_shape_type",
@@ -646,6 +649,8 @@ def forward(self, x):
                 "err_abs2",
                 "err_dev",
                 "err_dev2",
+                "err_h001",
+                "err_h0012",
                 "err_h01",
                 "err_h012",
                 "err_rel",
diff --git a/onnx_diagnostic/torch_onnx/sbs.py b/onnx_diagnostic/torch_onnx/sbs.py
@@ -63,7 +63,7 @@ def _loop_cmp(
 
     to = mapping_onnx_to_torch.get(onnx_name, onnx_name)
     if to in torch_results:
-        d = max_diff(torch_results[to], onnx_result, hist=[0.1])
+        d = max_diff(torch_results[to], onnx_result, hist=[0.1, 0.01])
         if verbose > 1:
             if onnx_name == to:
                 print(f"[run_aligned-==] cmp {to}: {string_diff(d)}")
@@ -92,7 +92,7 @@ def _loop_cmp(
         )
         r.set_diff(d)
         if second_onnx_result is not None:
-            d2 = max_diff(torch_results[to], second_onnx_result, hist=[0.1])
+            d2 = max_diff(torch_results[to], second_onnx_result, hist=[0.1, 0.01])
             r.set_diff2(d2)
         mapping_onnx_to_torch[onnx_name] = to
         return r
@@ -942,7 +942,7 @@ def forward(self, x):
                             max_diff(
                                 t,
                                 onnx_results[torch_names_to_onnx_names[node.name]],
-                                hist=[0.1],
+                                hist=[0.1, 0.01],
                             )
                         )
                     yield record.check(already_yielded)
diff --git a/onnx_diagnostic/torch_onnx/sbs_dataclasses.py b/onnx_diagnostic/torch_onnx/sbs_dataclasses.py
@@ -205,6 +205,17 @@ def get_replay_code(self) -> str:
             print()
             print("-- end --")
             print()
+
+            if False:
+                # CUDA profiling
+                with torch.profiler.profile(
+                    activities=[torch.profiler.ProfilerActivity.CUDA],
+                    record_shapes=True,
+                    with_stack=True,
+                ) as prof:
+                    sess.run(None, ep_feeds)
+                obj = prof.key_averages()
+                print(obj.table())
             """
         )
 
@@ -329,6 +340,7 @@ class RunAlignedRecord:
     :param err_dev: 0 if the device is the same, 1 if not
     :param err_nan: number of nan values disagreeing
     :param err_h01: number of values for which the discrepancy is above 0.1
+    :param err_h001: number of values for which the discrepancy is above 0.01
     :param ep_time_run: execution time for the exported program
     :param onnx_time_run: execution time for the onnx model, that includes
         the creation of the onnx model so that's probably not very usable
@@ -337,6 +349,7 @@ class RunAlignedRecord:
     :param err_dev2: same as `err_dev` if onnx kernel is run with torch results
     :param err_nan2: same as `err_nan` if onnx kernel is run with torch results
     :param err_h012: same as `err_h01` if onnx kernel is run with torch results
+    :param err_h0012: same as `err_h001` if onnx kernel is run with torch results
     :param comment: any additional information
     """
 
@@ -354,13 +367,15 @@ class RunAlignedRecord:
     err_dev: Optional[float] = None
     err_nan: Optional[float] = None
     err_h01: Optional[float] = None
+    err_h001: Optional[float] = None
     ep_time_run: Optional[float] = None
     onnx_time_run: Optional[float] = None
     err_abs2: Optional[float] = None
     err_rel2: Optional[float] = None
     err_dev2: Optional[float] = None
     err_nan2: Optional[float] = None
     err_h012: Optional[float] = None
+    err_h0012: Optional[float] = None
     comment: Optional[str] = None
 
     def __post_init__(self):
@@ -384,6 +399,7 @@ def set_diff(self, diff: Dict[str, Any]) -> Self:
             self.err_nan = diff["nan"]
         if "rep" in diff:
             self.err_h01 = diff["rep"][">0.1"]
+            self.err_h001 = diff["rep"][">0.01"]
         return self
 
     def set_diff2(self, diff: Dict[str, Any]) -> Self:
@@ -400,6 +416,7 @@ def set_diff2(self, diff: Dict[str, Any]) -> Self:
             self.err_nan2 = diff["nan"]
         if "rep" in diff:
             self.err_h012 = diff["rep"][">0.1"]
+            self.err_h0012 = diff["rep"][">0.01"]
         return self
 
     @property