Skip to content

Commit 1e0b64f

Browse files
committed
doc
1 parent 1da11e2 commit 1e0b64f

File tree

4 files changed

+63
-4
lines changed

4 files changed

+63
-4
lines changed

_doc/cmds/sbs.rst

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,40 @@ CPU, CUDA
2020

2121
Inputs are saved :func:`torch.save`. The execution will run on CUDA
2222
if the device of the inputs is CUDA, same goes on CPU.
23+
24+
Example
25+
+++++++
26+
27+
.. code-block::
28+
29+
python -m onnx_diagnostic sbs \
30+
-i qwen_2_5_vl_instruct_visual.inputs.pt \
31+
--ep test_imagetext2text_qwen_2_5_vl_instruct_visual.cuda.float16.custom.graph.ep.pt2 \
32+
-m test_imagetext2text_qwen_2_5_vl_instruct_visual.cuda.float16.custom.onnx \
33+
-o results.dynamo.float16.xlsx \
34+
-v 1 --atol=0.1 --rtol=1 \
35+
--replay-names conv3d,rsqrt,to_4,mul_48,linear,linear_2,linear_84,linear_89,mul_172,linear_156,linear_159 \
36+
-2 --reset conv3d
37+
38+
A snippet of the table it produces:
39+
40+
::
41+
42+
ep_name onnx_name ep_target onnx_op_type onnx_id_output ep_shape_type onnx_shape_type err_abs
43+
transpose_18 transpose_18 aten.transpose.int Transpose 0 GT10s16x1292x80 GT10s16x1292x80 0.0083
44+
unsqueeze_50 unsqueeze_50 aten.unsqueeze.default Unsqueeze 0 GT10s1x16x1292x80 GT10s1x16x1292x80 0.0083
45+
eq_20 eq_20 aten.eq.Scalar Equal 0 GT9s1292x1292 GT9s1292x1292 0
46+
unsqueeze_56 unsqueeze_56 aten.unsqueeze.default Unsqueeze 0 GT9s1x1x1292x1292 GT9s1x1x1292x1292 0
47+
slice_29 slice_29 aten.slice.Tensor Slice 0 GT9s1x1x1292x1292 GT9s1x1x1292x1292 0
48+
transpose_19 transpose_19 aten.transpose.int Transpose 0 GT10s1x1292x16x80 GT10s1x1292x16x80 0.0071
49+
reshape_20 reshape_20 aten.reshape.default Reshape 0 GT10s1292x1280 GT10s1292x1280 0.0071
50+
linear_21 linear_21 aten.linear.default Gemm 0 GT10s1292x1280 GT10s1292x1280 0.0015
51+
mul_54 mul_54 aten.mul.Tensor SkipSimplifiedLayerNormalization 0 GT10s1292x1280 GT10s1292x1280 0.0098
52+
add_32 add_32 aten.add.Tensor SkipSimplifiedLayerNormalization 3 GT10s1292x1280 GT10s1292x1280 0.0313
53+
linear_22 linear_22 aten.linear.default Gemm 0 GT10s1292x3420 GT10s1292x3420 0.0078
54+
silu_4 silu_4 aten.silu.default QuickGelu 0 GT10s1292x3420 GT10s1292x3420 0.0059
55+
56+
The available column are described by
57+
:class:`RunAlignedRecord <onnx_diagnostic.torch_onnx.sbs_dataclasses.RunAlignedRecord>`.
58+
It is possible to dump pieces of the model to study some particular input
59+
with :class:`ReplayConfiguration <onnx_diagnostic.torch_onnx.sbs_dataclasses.ReplayConfiguration>`.

_unittests/ut_torch_onnx/test_sbs.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ def forward(self, x):
390390
"ep_time_run",
391391
"err_abs",
392392
"err_dev",
393+
"err_h001",
393394
"err_h01",
394395
"err_rel",
395396
"onnx_id_node",
@@ -455,6 +456,7 @@ def forward(self, x):
455456
"ep_time_run",
456457
"err_abs",
457458
"err_dev",
459+
"err_h001",
458460
"err_h01",
459461
"err_rel",
460462
"onnx_id_node",
@@ -593,7 +595,7 @@ def forward(self, x):
593595
)
594596
df = pandas.DataFrame(list(results)).dropna(axis=1, how="all")
595597
df.to_excel(self.get_dump_file("test_sbs_replay.xlsx"))
596-
self.assertEqual(df.shape, (8, 15))
598+
self.assertEqual(df.shape, (8, 16))
597599
self.clean_dump()
598600

599601
@hide_stdout()
@@ -637,6 +639,7 @@ def forward(self, x):
637639
df.to_excel(self.get_dump_file("test_sbs_run_onnx_with_torch_inputs.xlsx"))
638640
self.assertEqual(
639641
[
642+
"comment",
640643
"ep_id_node",
641644
"ep_name",
642645
"ep_shape_type",
@@ -646,6 +649,8 @@ def forward(self, x):
646649
"err_abs2",
647650
"err_dev",
648651
"err_dev2",
652+
"err_h001",
653+
"err_h0012",
649654
"err_h01",
650655
"err_h012",
651656
"err_rel",

onnx_diagnostic/torch_onnx/sbs.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def _loop_cmp(
6363

6464
to = mapping_onnx_to_torch.get(onnx_name, onnx_name)
6565
if to in torch_results:
66-
d = max_diff(torch_results[to], onnx_result, hist=[0.1])
66+
d = max_diff(torch_results[to], onnx_result, hist=[0.1, 0.01])
6767
if verbose > 1:
6868
if onnx_name == to:
6969
print(f"[run_aligned-==] cmp {to}: {string_diff(d)}")
@@ -92,7 +92,7 @@ def _loop_cmp(
9292
)
9393
r.set_diff(d)
9494
if second_onnx_result is not None:
95-
d2 = max_diff(torch_results[to], second_onnx_result, hist=[0.1])
95+
d2 = max_diff(torch_results[to], second_onnx_result, hist=[0.1, 0.01])
9696
r.set_diff2(d2)
9797
mapping_onnx_to_torch[onnx_name] = to
9898
return r
@@ -942,7 +942,7 @@ def forward(self, x):
942942
max_diff(
943943
t,
944944
onnx_results[torch_names_to_onnx_names[node.name]],
945-
hist=[0.1],
945+
hist=[0.1, 0.01],
946946
)
947947
)
948948
yield record.check(already_yielded)

onnx_diagnostic/torch_onnx/sbs_dataclasses.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,17 @@ def get_replay_code(self) -> str:
205205
print()
206206
print("-- end --")
207207
print()
208+
209+
if False:
210+
# CUDA profiling
211+
with torch.profiler.profile(
212+
activities=[torch.profiler.ProfilerActivity.CUDA],
213+
record_shapes=True,
214+
with_stack=True,
215+
) as prof:
216+
sess.run(None, ep_feeds)
217+
obj = prof.key_averages()
218+
print(obj.table())
208219
"""
209220
)
210221

@@ -329,6 +340,7 @@ class RunAlignedRecord:
329340
:param err_dev: 0 if the device is the same, 1 if not
330341
:param err_nan: number of nan values disagreeing
331342
:param err_h01: number of values for which the discrepancy is above 0.1
343+
:param err_h001: number of values for which the discrepancy is above 0.01
332344
:param ep_time_run: execution time for the exported program
333345
:param onnx_time_run: execution time for the onnx model, that includes
334346
the creation of the onnx model so that's probably not very usable
@@ -337,6 +349,7 @@ class RunAlignedRecord:
337349
:param err_dev2: same as `err_dev` if onnx kernel is run with torch results
338350
:param err_nan2: same as `err_nan` if onnx kernel is run with torch results
339351
:param err_h012: same as `err_h01` if onnx kernel is run with torch results
352+
:param err_h0012: same as `err_h001` if onnx kernel is run with torch results
340353
:param comment: any additional information
341354
"""
342355

@@ -354,13 +367,15 @@ class RunAlignedRecord:
354367
err_dev: Optional[float] = None
355368
err_nan: Optional[float] = None
356369
err_h01: Optional[float] = None
370+
err_h001: Optional[float] = None
357371
ep_time_run: Optional[float] = None
358372
onnx_time_run: Optional[float] = None
359373
err_abs2: Optional[float] = None
360374
err_rel2: Optional[float] = None
361375
err_dev2: Optional[float] = None
362376
err_nan2: Optional[float] = None
363377
err_h012: Optional[float] = None
378+
err_h0012: Optional[float] = None
364379
comment: Optional[str] = None
365380

366381
def __post_init__(self):
@@ -384,6 +399,7 @@ def set_diff(self, diff: Dict[str, Any]) -> Self:
384399
self.err_nan = diff["nan"]
385400
if "rep" in diff:
386401
self.err_h01 = diff["rep"][">0.1"]
402+
self.err_h001 = diff["rep"][">0.01"]
387403
return self
388404

389405
def set_diff2(self, diff: Dict[str, Any]) -> Self:
@@ -400,6 +416,7 @@ def set_diff2(self, diff: Dict[str, Any]) -> Self:
400416
self.err_nan2 = diff["nan"]
401417
if "rep" in diff:
402418
self.err_h012 = diff["rep"][">0.1"]
419+
self.err_h0012 = diff["rep"][">0.01"]
403420
return self
404421

405422
@property

0 commit comments

Comments
 (0)