Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOGS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Change Logs
0.8.2
+++++

* :pr:`302`: adds helpers to analyse onnxruntime profiling
* :pr:`297`: experiment around a higher ops ``loop_for``
* :pr:`292`, :pr:`293`, :pr:`294`, :pr:`295`: new patches for Qwen models

Expand Down
259 changes: 258 additions & 1 deletion _unittests/ut_helpers/test_rt_helper.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,28 @@
import os
import unittest
import numpy as np
import onnx
import onnx.helper as oh
import onnx.numpy_helper as onh
import torch
from onnx_diagnostic.ext_test_case import (
ExtTestCase,
has_onnxruntime_genai,
hide_stdout,
requires_transformers,
ignore_warnings,
requires_torch,
requires_transformers,
skipif_ci_windows,
)
from onnx_diagnostic.helpers.rt_helper import (
onnx_generate,
generate_and_validate,
onnx_generate_with_genai,
name_type_to_onnx_dtype,
js_profile_to_dataframe,
plot_ort_profile_timeline,
plot_ort_profile,
_process_shape,
)
from onnx_diagnostic.torch_models.hghub import get_untrained_model_with_inputs
from onnx_diagnostic.torch_export_patches import torch_export_patches
Expand All @@ -24,6 +33,7 @@ class TestRtSession(ExtTestCase):
@requires_transformers("4.55")
@requires_torch("2.9")
@hide_stdout()
@ignore_warnings(FutureWarning)
def test_onnx_generate(self):
mid = "arnir0/Tiny-LLM"
print("-- test_onnx_generate: get model")
Expand Down Expand Up @@ -84,6 +94,253 @@ def test_name_type_to_onnx_dtype(self):
expected = getattr(onnx.TensorProto, name.upper())
self.assertEqual(expected, name_type_to_onnx_dtype(look))

def test_shapes(self):
tests = [
(
"U8[1x128x768]+F+U8",
[{"uint8": [1, 128, 768]}, {"float": []}, {"uint8": []}],
),
("F[1x128x768]", [{"float": [1, 128, 768]}]),
]
for expected, shapes in tests:
with self.subTest(shapes=shapes):
out = _process_shape(shapes)
self.assertEqual(expected, out)

def _get_model(self):
model_def0 = oh.make_model(
oh.make_graph(
[
oh.make_node("Add", ["X", "init1"], ["X1"]),
oh.make_node("Abs", ["X"], ["X2"]),
oh.make_node("Add", ["X", "init3"], ["inter"]),
oh.make_node("Mul", ["X1", "inter"], ["Xm"]),
oh.make_node("Sub", ["X2", "Xm"], ["final"]),
],
"test",
[oh.make_tensor_value_info("X", onnx.TensorProto.FLOAT, [None])],
[oh.make_tensor_value_info("final", onnx.TensorProto.FLOAT, [None])],
[
onh.from_array(np.array([1], dtype=np.float32), name="init1"),
onh.from_array(np.array([3], dtype=np.float32), name="init3"),
],
),
opset_imports=[oh.make_opsetid("", 18)],
ir_version=9,
)
return model_def0

def test_js_profile_to_dataframe(self):
import onnxruntime

sess_options = onnxruntime.SessionOptions()
sess_options.enable_profiling = True
sess = onnxruntime.InferenceSession(
self._get_model().SerializeToString(),
sess_options,
providers=["CPUExecutionProvider"],
)
for _ in range(11):
sess.run(None, dict(X=np.arange(10).astype(np.float32)))
prof = sess.end_profiling()

df = js_profile_to_dataframe(prof, first_it_out=True)
self.assertEqual(df.shape, (79, 18))
self.assertEqual(
set(df.columns),
{
"cat",
"pid",
"tid",
"dur",
"ts",
"ph",
"name",
"args_op_name",
"op_name",
"args_thread_scheduling_stats",
"args_output_size",
"args_parameter_size",
"args_activation_size",
"args_node_index",
"args_provider",
"event_name",
"iteration",
"it==0",
},
)

df = js_profile_to_dataframe(prof, agg=True)
self.assertEqual(df.shape, (9, 1))
self.assertEqual(list(df.columns), ["dur"])

df = js_profile_to_dataframe(prof, agg_op_name=True)
self.assertEqual(df.shape, (79, 17))
self.assertEqual(
set(df.columns),
{
"cat",
"pid",
"tid",
"dur",
"ts",
"ph",
"name",
"args_op_name",
"op_name",
"args_thread_scheduling_stats",
"args_output_size",
"args_parameter_size",
"args_activation_size",
"args_node_index",
"args_provider",
"event_name",
"iteration",
},
)

os.remove(prof)

@ignore_warnings(UserWarning)
@skipif_ci_windows("failing because of tkinter?")
def test_plot_profile_2(self):
import matplotlib.pyplot as plt
import onnxruntime

sess_options = onnxruntime.SessionOptions()
sess_options.enable_profiling = True
sess = onnxruntime.InferenceSession(
self._get_model().SerializeToString(),
sess_options,
providers=["CPUExecutionProvider"],
)
for _ in range(11):
sess.run(None, dict(X=np.arange(10).astype(np.float32)))
prof = sess.end_profiling()

df = js_profile_to_dataframe(prof, first_it_out=True)

fig, ax = plt.subplots(1, 2, figsize=(10, 5))
plot_ort_profile(df, ax[0], ax[1], "test_title")
# fig.savefig("graph1.png")
self.assertNotEmpty(fig)

os.remove(prof)

@ignore_warnings(UserWarning)
@skipif_ci_windows("failing because of tkinter?")
def test_plot_profile_2_shape(self):
import matplotlib.pyplot as plt
import onnxruntime

sess_options = onnxruntime.SessionOptions()
sess_options.enable_profiling = True
sess = onnxruntime.InferenceSession(
self._get_model().SerializeToString(),
sess_options,
providers=["CPUExecutionProvider"],
)
for _ in range(11):
sess.run(None, dict(X=np.arange(10).astype(np.float32)))
prof = sess.end_profiling()

df = js_profile_to_dataframe(prof, first_it_out=True, with_shape=True)

fig, ax = plt.subplots(1, 2, figsize=(10, 5))
plot_ort_profile(df, ax[0], ax[1], "test_title")
# fig.savefig("graph1.png")
self.assertNotEmpty(fig)

os.remove(prof)

@ignore_warnings(UserWarning)
@skipif_ci_windows("failing because of tkinter?")
def test_plot_profile_agg(self):
import matplotlib.pyplot as plt
import onnxruntime

sess_options = onnxruntime.SessionOptions()
sess_options.enable_profiling = True
sess = onnxruntime.InferenceSession(
self._get_model().SerializeToString(),
sess_options,
providers=["CPUExecutionProvider"],
)
for _ in range(11):
sess.run(None, dict(X=np.arange(10).astype(np.float32)))
prof = sess.end_profiling()

df = js_profile_to_dataframe(prof, first_it_out=True, agg=True)

fig, ax = plt.subplots(1, 1, figsize=(10, 5))
plot_ort_profile(df, ax, title="test_title")
fig.tight_layout()
# fig.savefig("graph2.png")
self.assertNotEmpty(fig)

os.remove(prof)

def _get_model2(self):
model_def0 = oh.make_model(
oh.make_graph(
[
oh.make_node("Add", ["X", "init1"], ["X1"]),
oh.make_node("Abs", ["X"], ["X2"]),
oh.make_node("Add", ["X", "init3"], ["inter"]),
oh.make_node("Mul", ["X1", "inter"], ["Xm"]),
oh.make_node("MatMul", ["X1", "Xm"], ["Xm2"]),
oh.make_node("Sub", ["X2", "Xm2"], ["final"]),
],
"test",
[oh.make_tensor_value_info("X", onnx.TensorProto.FLOAT, [None, None])],
[oh.make_tensor_value_info("final", onnx.TensorProto.FLOAT, [None, None])],
[
onh.from_array(np.array([1], dtype=np.float32), name="init1"),
onh.from_array(np.array([3], dtype=np.float32), name="init3"),
],
),
opset_imports=[oh.make_opsetid("", 18)],
ir_version=9,
)
return model_def0

@ignore_warnings(UserWarning)
@skipif_ci_windows("failing because of tkinter?")
def test_plot_profile_timeline(self):
import matplotlib.pyplot as plt
import onnxruntime

sess_options = onnxruntime.SessionOptions()
sess_options.enable_profiling = True
sess = onnxruntime.InferenceSession(
self._get_model2().SerializeToString(),
sess_options,
providers=["CPUExecutionProvider"],
)
for _ in range(11):
sess.run(None, dict(X=np.random.rand(2**10, 2**10).astype(np.float32)))
prof = sess.end_profiling()

df = js_profile_to_dataframe(prof, first_it_out=True)

fig, ax = plt.subplots(1, 1, figsize=(5, 10))
plot_ort_profile_timeline(df, ax, title="test_timeline", quantile=0.5)
fig.tight_layout()
fig.savefig("test_plot_profile_timeline.png")
self.assertNotEmpty(fig)

os.remove(prof)


if __name__ == "__main__":
import logging

for name in [
"matplotlib.font_manager",
"PIL.PngImagePlugin",
"matplotlib",
"matplotlib.pyplot",
]:
log = logging.getLogger(name)
log.setLevel(logging.ERROR)
unittest.main(verbosity=2)
Loading
Loading