Add OnnxruntimeEvaluator

xadupre · xadupre · commit 3371069b2bf8 · 2025-03-22T19:18:42.000+01:00
diff --git a/_doc/api/reference/index.rst b/_doc/api/reference/index.rst
@@ -13,13 +13,20 @@ onnx_diagnostic.reference
 
     evaluator
     quantized_tensor
+    ort_evaluator
 
 ExtendedReferenceEvaluator
 ++++++++++++++++++++++++++
 
 .. autoclass:: onnx_diagnostic.reference.ExtendedReferenceEvaluator
     :members:
 
+OnnxruntimeEvaluator
+++++++++++++++++++++
+
+.. autoclass:: onnx_diagnostic.reference.OnnxruntimeEvaluator
+    :members:
+
 Other functions
 +++++++++++++++
 
diff --git a/_doc/api/reference/ort_evaluator.rst b/_doc/api/reference/ort_evaluator.rst
@@ -0,0 +1,8 @@
+
+onnx_diagnostic.reference.ort_evaluator
+=======================================
+
+.. automodule:: onnx_diagnostic.reference.ort_evaluator
+    :members:
+    :no-undoc-members:
+    :exclude-members: OnnxruntimeEvaluator
diff --git a/_doc/examples/plot_failing_onnxruntime_evaluator.py b/_doc/examples/plot_failing_onnxruntime_evaluator.py
@@ -0,0 +1,90 @@
+"""
+.. _l-plot-failing-onnxruntime-evaluator:
+
+Running OnnxruntimeEvaluator on a failing model
+===============================================
+
+Example :ref:`l-plot-failing-reference-evaluator` demonstrated
+how to run a python runtime on a model but it may very slow sometimes
+and it could show some discrepancies if the only provider is not CPU.
+Let's use :class:`OnnxruntimeEvaluator <onnx_diagnostic.reference.OnnxruntimeEvaluator>`.
+It splits the model into node and runs them independantly until it succeeds
+or fails. This class converts every node into model based on the types
+discovered during the execution. It relies on :class:`InferenceSessionForTorch
+<onnx_diagnostic.ort_session.InferenceSessionForTorch>` or
+:class:`InferenceSessionForNumpy
+<onnx_diagnostic.ort_session.InferenceSessionForNumpy>`
+for the execution. This example uses torch tensor and
+bfloat16.
+
+A failing model
++++++++++++++++
+
+The issue here is a an operator ``Cast`` trying to convert a result
+into a non-existing type.
+"""
+
+import onnx
+import onnx.helper as oh
+import torch
+import onnxruntime
+from onnx_diagnostic.helpers import from_array_extended
+from onnx_diagnostic.reference import OnnxruntimeEvaluator
+
+TBFLOAT16 = onnx.TensorProto.BFLOAT16
+
+model = oh.make_model(
+    oh.make_graph(
+        [
+            oh.make_node("Mul", ["X", "Y"], ["xy"], name="n0"),
+            oh.make_node("Sigmoid", ["xy"], ["sy"], name="n1"),
+            oh.make_node("Add", ["sy", "one"], ["C"], name="n2"),
+            oh.make_node("Cast", ["C"], ["X999"], to=999, name="failing"),
+            oh.make_node("CastLike", ["X999", "Y"], ["Z"], name="n4"),
+        ],
+        "nd",
+        [
+            oh.make_tensor_value_info("X", TBFLOAT16, ["a", "b", "c"]),
+            oh.make_tensor_value_info("Y", TBFLOAT16, ["a", "b", "c"]),
+        ],
+        [oh.make_tensor_value_info("Z", TBFLOAT16, ["a", "b", "c"])],
+        [from_array_extended(torch.tensor([1], dtype=torch.bfloat16), name="one")],
+    ),
+    opset_imports=[oh.make_opsetid("", 18)],
+    ir_version=9,
+)
+
+# %%
+# We check it is failing.
+
+try:
+    onnxruntime.InferenceSession(model.SerializeToString(), providers=["CPUExecutionProvider"])
+except onnxruntime.capi.onnxruntime_pybind11_state.Fail as e:
+    print(e)
+
+
+# %%
+# OnnxruntimeEvaluator
+# ++++++++++++++++++++++++++
+#
+# This class extends :class:`onnx.reference.ReferenceEvaluator`
+# with operators outside the standard but defined by :epkg:`onnxruntime`.
+# `verbose=10` tells the class to print as much as possible,
+# `verbose=0` prints nothing. Intermediate values for more or less verbosity.
+
+ref = OnnxruntimeEvaluator(model, verbose=10)
+feeds = dict(
+    X=torch.rand((3, 4), dtype=torch.blofat16), Y=torch.rand((3, 4), dtype=torch.blofat16)
+)
+try:
+    ref.run(None, feeds)
+except Exception as e:
+    print("ERROR", type(e), e)
+
+# %%
+# We can see it run until it reaches `Cast` and stops.
+# The error message is not always obvious to interpret.
+# It gets improved everytime from time to time.
+# This runtime is useful when it fails for a numerical reason.
+# It is possible to insert prints in the python code to print
+# more information or debug if needed.
diff --git a/_doc/index.rst b/_doc/index.rst
@@ -51,6 +51,7 @@ Source are `sdpython/onnx-diagnostic
 * :ref:`l-plot-sxport-with-dynamio-shapes-auto`
 * :ref:`l-plot-tiny-llm-export`
 * :ref:`l-plot-failing-reference-evaluator`
+* :ref:`l-plot-failing-onnxruntime-evaluator`
 * :ref:`l-plot-failing-model-extract`
 
 **Some Usefuls Tools**
diff --git a/_unittests/ut_reference/test_ort_evaluator.py b/_unittests/ut_reference/test_ort_evaluator.py
@@ -0,0 +1,168 @@
+import unittest
+from typing import Optional
+import numpy as np
+from onnx import ModelProto, TensorProto
+from onnx.checker import check_model
+import onnx.helper as oh
+import onnx.numpy_helper as onh
+import torch
+from onnx_diagnostic.ext_test_case import (
+    ExtTestCase,
+    hide_stdout,
+    ignore_warnings,
+    requires_cuda,
+)
+from onnx_diagnostic.reference import ExtendedReferenceEvaluator, OnnxruntimeEvaluator
+
+TFLOAT = TensorProto.FLOAT
+
+
+class TestOnnxruntimeEvaluatoruator(ExtTestCase):
+    def _range(self, *shape, bias: Optional[float] = None):
+        n = np.prod(shape)
+        x = np.arange(n).astype(np.float32) / n
+        if bias:
+            x = x + bias
+        return x.reshape(tuple(shape)).astype(np.float32)
+
+    def _get_model(self) -> ModelProto:
+        model = oh.make_model(
+            oh.make_graph(
+                [
+                    oh.make_node("Unsqueeze", ["X", "zero"], ["xu1"]),
+                    oh.make_node("Unsqueeze", ["xu1", "un"], ["xu2"]),
+                    oh.make_node("Reshape", ["xu2", "shape1"], ["xm1"]),
+                    oh.make_node("Reshape", ["Y", "shape2"], ["xm2c"]),
+                    oh.make_node("Cast", ["xm2c"], ["xm2"], to=1),
+                    oh.make_node("MatMul", ["xm1", "xm2"], ["xm"]),
+                    oh.make_node("Reshape", ["xm", "shape3"], ["Z"]),
+                ],
+                "dummy",
+                [
+                    oh.make_tensor_value_info("X", TFLOAT, [32, 128]),
+                    oh.make_tensor_value_info("Y", TFLOAT, [3, 5, 128, 64]),
+                ],
+                [oh.make_tensor_value_info("Z", TFLOAT, [3, 5, 32, 64])],
+                [
+                    onh.from_array(np.array([0], dtype=np.int64), name="zero"),
+                    onh.from_array(np.array([1], dtype=np.int64), name="un"),
+                    onh.from_array(np.array([1, 32, 128], dtype=np.int64), name="shape1"),
+                    onh.from_array(np.array([15, 128, 64], dtype=np.int64), name="shape2"),
+                    onh.from_array(np.array([3, 5, 32, 64], dtype=np.int64), name="shape3"),
+                ],
+            ),
+            ir_version=9,
+            opset_imports=[oh.make_opsetid("", 18)],
+        )
+        check_model(model)
+        return model
+
+    @ignore_warnings(DeprecationWarning)
+    def test_ort_eval(self):
+        model = self._get_model()
+
+        feeds = {"X": self._range(32, 128), "Y": self._range(3, 5, 128, 64)}
+        ref = ExtendedReferenceEvaluator(model, verbose=10)
+        expected, out, _ = self.capture(lambda: ref.run(None, feeds)[0])
+        self.assertIn("Reshape(xm, shape3) -> Z", out)
+
+        ort_eval = OnnxruntimeEvaluator(model, verbose=10, opsets=20)
+        got, out, _ = self.capture(lambda: ort_eval.run(None, feeds)[0])
+        self.assertEqualArray(expected, got, atol=1e-4)
+        self.assertIn("Reshape(xm, shape3) -> Z", out)
+
+    @ignore_warnings(DeprecationWarning)
+    @requires_cuda()
+    @hide_stdout()
+    def test_ort_eval_cuda(self):
+        model = self._get_model()
+
+        feeds = {"X": self._range(32, 128), "Y": self._range(3, 5, 128, 64)}
+        ref = ExtendedReferenceEvaluator(model, verbose=10)
+        expected = ref.run(None, feeds)[0]
+
+        ort_eval = OnnxruntimeEvaluator(model, verbose=10, opsets=20, providers="cuda")
+        got = ort_eval.run(None, feeds)[0]
+        self.assertEqualArray(expected, got, atol=1e-1)
+
+    @ignore_warnings(DeprecationWarning)
+    @hide_stdout()
+    def test_ort_eval_node_proto(self):
+        model = self._get_model()
+
+        feeds = {"X": self._range(32, 128), "zero": np.array([0], dtype=np.int64)}
+        ref = ExtendedReferenceEvaluator(model.graph.node[0], verbose=10)
+        expected = ref.run(None, feeds)
+
+        ort_eval = OnnxruntimeEvaluator(model.graph.node[0], verbose=10, opsets=20)
+        got = ort_eval.run(None, feeds)
+        self.assertEqualArrayAny(expected, got, atol=1e-4)
+        self.assertIsInstance(expected[0], np.ndarray)
+        self.assertIsInstance(got[0], np.ndarray)
+
+    @ignore_warnings(DeprecationWarning)
+    @hide_stdout()
+    def test_ort_eval_node_proto_torch(self):
+        model = self._get_model()
+
+        feeds_np = {"X": self._range(32, 128), "zero": np.array([0], dtype=np.int64)}
+        feeds = {k: torch.from_numpy(v) for k, v in feeds_np.items()}
+        ref = ExtendedReferenceEvaluator(model.graph.node[0], verbose=10)
+        expected = ref.run(None, feeds_np)
+
+        ort_eval = OnnxruntimeEvaluator(model.graph.node[0], verbose=10, opsets=20)
+        got = ort_eval.run(None, feeds)
+        self.assertIsInstance(got[0], torch.Tensor)
+        self.assertEqualArray(expected[0], got[0], atol=1e-4)
+
+    @hide_stdout()
+    def test_local_function(self):
+        new_domain = "custom"
+
+        linear_regression = oh.make_function(
+            new_domain,
+            "LinearRegression",
+            ["x", "a", "b"],
+            ["y"],
+            [
+                oh.make_node("MatMul", ["x", "a"], ["xa"]),
+                oh.make_node("Add", ["xa", "b"], ["y"]),
+            ],
+            [oh.make_opsetid("", 14)],
+            [],
+        )
+
+        graph = oh.make_graph(
+            [
+                oh.make_node("LinearRegression", ["X", "A", "B"], ["Y1"], domain=new_domain),
+                oh.make_node("Abs", ["Y1"], ["Y"]),
+            ],
+            "example",
+            [
+                oh.make_tensor_value_info("X", TFLOAT, [None, None]),
+                oh.make_tensor_value_info("A", TFLOAT, [None, None]),
+                oh.make_tensor_value_info("B", TFLOAT, [None, None]),
+            ],
+            [oh.make_tensor_value_info("Y", TFLOAT, None)],
+        )
+
+        onnx_model = oh.make_model(
+            graph,
+            opset_imports=[oh.make_opsetid("", 14), oh.make_opsetid(new_domain, 1)],
+            functions=[linear_regression],
+            ir_version=10,
+        )
+        feeds = {
+            "X": np.random.randn(3, 3).astype(np.float32),
+            "A": np.random.randn(3, 3).astype(np.float32),
+            "B": np.random.randn(3, 3).astype(np.float32),
+        }
+        ref = ExtendedReferenceEvaluator(onnx_model)
+        ort_eval = OnnxruntimeEvaluator(onnx_model, verbose=10, opsets=20)
+        expected = ref.run(None, feeds)
+        got = ort_eval.run(None, feeds)
+        self.assertEqualArray(expected[0], got[0])
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_xrun_doc/test_helpers.py b/_unittests/ut_xrun_doc/test_helpers.py
@@ -407,6 +407,27 @@ def test_rename_dynamic_expression(self):
         text = rename_dynamic_expression("a * 10 - a", {"a": "x"})
         self.assertEqual(text, "x * 10 - x")
 
+    def test_from_tensor(self):
+        for dt in {
+            torch.float32,
+            torch.float64,
+            torch.bfloat16,
+            torch.float16,
+            torch.int32,
+            torch.int64,
+            torch.int8,
+            torch.int16,
+            torch.uint8,
+            torch.uint16,
+            torch.uint32,
+            torch.uint64,
+        }:
+            t = torch.rand((4, 3), dtype=torch.dtype)
+            proto = from_array_extended(t)
+            self.assertIsInstance(proto, onnx.TensorProto)
+            convert_endian(proto)
+            dtype_to_tensor_dtype(dt)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/onnx_diagnostic/helpers.py b/onnx_diagnostic/helpers.py
@@ -717,6 +717,13 @@ def from_array_extended(tensor: npt.ArrayLike, name: Optional[str] = None) -> Te
     :param name: name
     :return: TensorProto
     """
+    try:
+        import torch
+    except ImportError:
+        torch = None
+    if torch is not None and isinstance(tensor, torch.Tensor):
+        raise NotImplementedError()
+
     from onnx.reference.ops.op_cast import (
         bfloat16,
         float8e4m3fn,
diff --git a/onnx_diagnostic/reference/__init__.py b/onnx_diagnostic/reference/__init__.py
@@ -1 +1,2 @@
 from .evaluator import ExtendedReferenceEvaluator
+from .ort_evaluator import OnnxruntimeEvaluator
diff --git a/onnx_diagnostic/reference/ort_evaluator.py b/onnx_diagnostic/reference/ort_evaluator.py

Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`	`1`	`from .evaluator import ExtendedReferenceEvaluator`
	`2`	`+from .ort_evaluator import OnnxruntimeEvaluator`