Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOGS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Change Logs
0.8.3
+++++

* :pr:`322`: support rerunning onnx kernels with torch intermediate results in side-by-side
* :pr:`314`: fix modelbuilder download needed after this change https://github.com/microsoft/onnxruntime-genai/pull/1862
* :pr:`311`: use custom and local function to use PackedMultiHeadAttention from onnxruntime
* :pr:`310`: splits patches into multiple files
Expand Down
37 changes: 37 additions & 0 deletions _doc/cmds/sbs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,40 @@ CPU, CUDA

Inputs are saved :func:`torch.save`. The execution will run on CUDA
if the device of the inputs is CUDA, same goes on CPU.

Example
+++++++

.. code-block::
python -m onnx_diagnostic sbs \
-i qwen_2_5_vl_instruct_visual.inputs.pt \
--ep test_imagetext2text_qwen_2_5_vl_instruct_visual.cuda.float16.custom.graph.ep.pt2 \
-m test_imagetext2text_qwen_2_5_vl_instruct_visual.cuda.float16.custom.onnx \
-o results.dynamo.float16.xlsx \
-v 1 --atol=0.1 --rtol=1 \
--replay-names conv3d,rsqrt,to_4,mul_48,linear,linear_2,linear_84,linear_89,mul_172,linear_156,linear_159 \
-2 --reset conv3d
A snippet of the table it produces:

::

ep_name onnx_name ep_target onnx_op_type onnx_id_output ep_shape_type onnx_shape_type err_abs
transpose_18 transpose_18 aten.transpose.int Transpose 0 GT10s16x1292x80 GT10s16x1292x80 0.0083
unsqueeze_50 unsqueeze_50 aten.unsqueeze.default Unsqueeze 0 GT10s1x16x1292x80 GT10s1x16x1292x80 0.0083
eq_20 eq_20 aten.eq.Scalar Equal 0 GT9s1292x1292 GT9s1292x1292 0
unsqueeze_56 unsqueeze_56 aten.unsqueeze.default Unsqueeze 0 GT9s1x1x1292x1292 GT9s1x1x1292x1292 0
slice_29 slice_29 aten.slice.Tensor Slice 0 GT9s1x1x1292x1292 GT9s1x1x1292x1292 0
transpose_19 transpose_19 aten.transpose.int Transpose 0 GT10s1x1292x16x80 GT10s1x1292x16x80 0.0071
reshape_20 reshape_20 aten.reshape.default Reshape 0 GT10s1292x1280 GT10s1292x1280 0.0071
linear_21 linear_21 aten.linear.default Gemm 0 GT10s1292x1280 GT10s1292x1280 0.0015
mul_54 mul_54 aten.mul.Tensor SkipSimplifiedLayerNormalization 0 GT10s1292x1280 GT10s1292x1280 0.0098
add_32 add_32 aten.add.Tensor SkipSimplifiedLayerNormalization 3 GT10s1292x1280 GT10s1292x1280 0.0313
linear_22 linear_22 aten.linear.default Gemm 0 GT10s1292x3420 GT10s1292x3420 0.0078
silu_4 silu_4 aten.silu.default QuickGelu 0 GT10s1292x3420 GT10s1292x3420 0.0059

The available column are described by
:class:`RunAlignedRecord <onnx_diagnostic.torch_onnx.sbs_dataclasses.RunAlignedRecord>`.
It is possible to dump pieces of the model to study some particular input
with :class:`ReplayConfiguration <onnx_diagnostic.torch_onnx.sbs_dataclasses.ReplayConfiguration>`.
89 changes: 81 additions & 8 deletions _unittests/ut_torch_onnx/test_sbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ def forward(self, x):
use_tensor=True,
),
)
df = pandas.DataFrame(list(results))
df = pandas.DataFrame(list(results)).dropna(axis=1, how="all")
df.to_excel(self.get_dump_file("test_sbs_model_with_weights_custom.xlsx"))
self.assertEqual(
[
Expand All @@ -390,8 +390,8 @@ def forward(self, x):
"ep_time_run",
"err_abs",
"err_dev",
"err_h001",
"err_h01",
"err_nan",
"err_rel",
"onnx_id_node",
"onnx_id_output",
Expand Down Expand Up @@ -445,7 +445,7 @@ def forward(self, x):
use_tensor=True,
),
)
df = pandas.DataFrame(list(results))
df = pandas.DataFrame(list(results)).dropna(axis=1, how="all")
df.to_excel(self.get_dump_file("test_sbs_model_with_weights_dynamo.xlsx"))
self.assertEqual(
[
Expand All @@ -456,8 +456,8 @@ def forward(self, x):
"ep_time_run",
"err_abs",
"err_dev",
"err_h001",
"err_h01",
"err_nan",
"err_rel",
"onnx_id_node",
"onnx_id_output",
Expand Down Expand Up @@ -542,7 +542,7 @@ def forward(self, x):
reset_names=["linear"],
),
)
df = pandas.DataFrame(list(results))
df = pandas.DataFrame(list(results)).dropna(axis=1, how="all")
df.to_excel(self.get_dump_file("test_sbs_model_with_weights_custom_reset.xlsx"))
onnx_op_type = df["onnx_op_type"].tolist()
self.assertEqual(onnx_op_type.count("reset"), 1)
Expand Down Expand Up @@ -593,10 +593,83 @@ def forward(self, x):
),
),
)
df = pandas.DataFrame(list(results))
df = pandas.DataFrame(list(results)).dropna(axis=1, how="all")
df.to_excel(self.get_dump_file("test_sbs_replay.xlsx"))
print(df)
# self.clean_dump()
self.assertEqual(df.shape, (8, 16))
self.clean_dump()

@hide_stdout()
@ignore_warnings((DeprecationWarning, FutureWarning, UserWarning))
def test_sbs_run_onnx_with_torch_inputs(self):
torch = self.torch

class Model(self.torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = torch.nn.Linear(10, 32) # input size 10 → hidden size 32
self.relu = torch.nn.ReLU()
self.fc2 = torch.nn.Linear(32, 1) # hidden → output

def forward(self, x):
x = self.relu(self.fc1(x))
x = self.fc2(x)
return x

inputs = dict(x=self.torch.randn((5, 10)))
ds = dict(x={0: "batch"})
Model()(**inputs)
ep = self.torch.export.export(
Model(), (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds)
)
filename = self.get_dump_file("test_sbs_run_onnx_with_torch_inputs.onnx")
to_onnx(ep, exporter="custom", filename=filename)
onx = onnx.load(filename)
results = list(
run_aligned(
ep,
onx,
kwargs=inputs,
run_cls=OnnxruntimeEvaluator,
verbose=11,
use_tensor=True,
run_onnx_with_torch_inputs=True,
),
)
df = pandas.DataFrame(list(results)).dropna(axis=1, how="all")
df.to_excel(self.get_dump_file("test_sbs_run_onnx_with_torch_inputs.xlsx"))
self.assertEqual(
[
"comment",
"ep_id_node",
"ep_name",
"ep_shape_type",
"ep_target",
"ep_time_run",
"err_abs",
"err_abs2",
"err_dev",
"err_dev2",
"err_h001",
"err_h0012",
"err_h01",
"err_h012",
"err_rel",
"err_rel2",
"onnx_id_node",
"onnx_id_output",
"onnx_name",
"onnx_op_type",
"onnx_shape_type",
"onnx_time_run",
],
sorted(df.columns),
)
self.assertEqual(len(results), 8)
self.assertEqual([0, 0, 0, 0, None, 0, 0, 0], [r.err_dev for r in results])
self.assertEqual(
[-1, -1, -1, -1, -1, 0, 1, 2], df["onnx_id_node"].fillna(-10).tolist()
)
self.clean_dump()


if __name__ == "__main__":
Expand Down
20 changes: 18 additions & 2 deletions onnx_diagnostic/_command_lines_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,6 +1217,19 @@ def get_parser_sbs() -> ArgumentParser:
default=False,
help="First runs the whole model.",
)
parser.add_argument(
"-2",
"--second-run",
action=BooleanOptionalAction,
default=False,
help=textwrap.dedent(
"""
Tries to run all onnx nodes with torch results produced by the exported
program. It then measures the discrepancies again. It can be used
to identify kernel introduces discrepancies from other just propagating them.
"""
),
)
parser.add_argument(
"--reset",
required=False,
Expand Down Expand Up @@ -1365,6 +1378,7 @@ def _size(name):
reset_names=args.reset.split(","),
exc=False,
replay_configuration=replay_configuration,
run_onnx_with_torch_inputs=args.second_run,
):
data.append(obs)
if (
Expand All @@ -1377,8 +1391,10 @@ def _size(name):
)
df.to_excel(args.output)
print(f"-- final saves into {args.output!r}")
df = pandas.DataFrame(data).apply(
lambda col: col.fillna("") if col.dtype == "object" else col
df = (
pandas.DataFrame(data)
.apply(lambda col: col.fillna("") if col.dtype == "object" else col)
.dropna(axis=1, how="all")
)
df.to_excel(args.output, index=False)
print("-- done")
Expand Down
Loading
Loading