doc

xadupre · xadupre · commit 536c658e5388 · 2025-03-23T12:57:29.000+01:00
diff --git a/_doc/conf.py b/_doc/conf.py
@@ -186,6 +186,7 @@
     "onnxrt backend": "https://pytorch.org/docs/stable/onnx_dynamo_onnxruntime_backend.html",
     "onnxruntime": "https://onnxruntime.ai/",
     "onnxruntime-training": "https://onnxruntime.ai/docs/get-started/training-on-device.html",
+    "onnxruntime kernels": "https://onnxruntime.ai/docs/reference/operators/OperatorKernels.html",
     "onnx-array-api": "https://sdpython.github.io/doc/onnx-array-api/dev/",
     "onnx-diagnostic": "https://sdpython.github.io/doc/onnx-diagnostic/dev/",
     "onnx-extended": "https://sdpython.github.io/doc/onnx-extended/dev/",
diff --git a/_doc/examples/plot_failing_onnxruntime_evaluator.py b/_doc/examples/plot_failing_onnxruntime_evaluator.py
@@ -28,6 +28,7 @@
 import onnx.helper as oh
 import torch
 import onnxruntime
+from onnx_diagnostic.ext_test_case import has_cuda
 from onnx_diagnostic.helpers import from_array_extended
 from onnx_diagnostic.reference import OnnxruntimeEvaluator
 
@@ -81,6 +82,21 @@
 except Exception as e:
     print("ERROR", type(e), e)
 
+
+# %%
+# :epkg:`onnxruntime` may not support bfloat16 on CPU.
+# See :epkg:`onnxruntime kernels`.
+
+if has_cuda():
+    ref = OnnxruntimeEvaluator(model, providers="cuda", verbose=10)
+    feeds = dict(
+        X=torch.rand((3, 4), dtype=torch.bfloat16), Y=torch.rand((3, 4), dtype=torch.bfloat16)
+    )
+    try:
+        ref.run(None, feeds)
+    except Exception as e:
+        print("ERROR", type(e), e)
+
 # %%
 # We can see it run until it reaches `Cast` and stops.
 # The error message is not always obvious to interpret.
diff --git a/_unittests/ut_reference/test_array_tensor.py b/_unittests/ut_reference/test_array_tensor.py
@@ -2,9 +2,8 @@
 import numpy as np
 from onnx import TensorProto
 from onnx.helper import make_graph, make_model, make_node, make_tensor_value_info
-from onnx.reference.op_run import to_array_extended
 from onnx_diagnostic.ext_test_case import ExtTestCase, ignore_warnings
-from onnx_diagnostic.helpers import from_array_extended
+from onnx_diagnostic.helpers import from_array_extended, to_array_extended
 from onnx_diagnostic.reference import ExtendedReferenceEvaluator
 
 
diff --git a/_unittests/ut_reference/test_ort_evaluator.py b/_unittests/ut_reference/test_ort_evaluator.py
@@ -1,6 +1,7 @@
 import unittest
-from typing import Optional
+from typing import Any, Dict, Optional, Tuple
 import numpy as np
+import ml_dtypes
 from onnx import ModelProto, TensorProto
 from onnx.checker import check_model
 import onnx.helper as oh
@@ -12,6 +13,11 @@
     ignore_warnings,
     requires_cuda,
 )
+from onnx_diagnostic.helpers import (
+    from_array_extended,
+    onnx_dtype_to_torch_dtype,
+    onnx_dtype_to_np_dtype,
+)
 from onnx_diagnostic.reference import ExtendedReferenceEvaluator, OnnxruntimeEvaluator
 
 TFLOAT = TensorProto.FLOAT
@@ -163,6 +169,85 @@ def test_local_function(self):
         got = ort_eval.run(None, feeds)
         self.assertEqualArray(expected[0], got[0])
 
+    @classmethod
+    def _trange(cls, *shape, bias: Optional[float] = None):
+        n = np.prod(shape)
+        x = np.arange(n).astype(np.float32) / n
+        if bias:
+            x = x + bias
+        return torch.from_numpy(x.reshape(tuple(shape)).astype(np.float32))
+
+    @classmethod
+    def _get_model_init(cls, itype) -> Tuple[ModelProto, Dict[str, Any], Tuple[Any, ...]]:
+        dtype = onnx_dtype_to_np_dtype(itype)
+        ttype = onnx_dtype_to_torch_dtype(itype)
+        cst = np.arange(6).astype(dtype)
+        model = oh.make_model(
+            oh.make_graph(
+                [
+                    oh.make_node("IsNaN", ["x"], ["xi"]),
+                    oh.make_node("IsNaN", ["y"], ["yi"]),
+                    oh.make_node("Cast", ["xi"], ["xii"], to=TensorProto.INT64),
+                    oh.make_node("Cast", ["yi"], ["yii"], to=TensorProto.INT64),
+                    oh.make_node("Add", ["xii", "yii"], ["gggg"]),
+                    oh.make_node("Cast", ["gggg"], ["final"], to=itype),
+                ],
+                "dummy",
+                [oh.make_tensor_value_info("x", itype, [None, None])],
+                [oh.make_tensor_value_info("final", itype, [None, None])],
+                [from_array_extended(cst, name="y")],
+            ),
+            opset_imports=[oh.make_opsetid("", 20)],
+            ir_version=10,
+        )
+        feeds = {"x": cls._trange(5, 6).to(ttype)}
+        expected = torch.isnan(feeds["x"]).to(int) + torch.isnan(
+            torch.from_numpy(cst.astype(float))
+        ).to(int)
+        return (model, feeds, (expected.to(ttype),))
+
+    @hide_stdout()
+    def test_init_numpy_afloat32(self):
+        model, feeds, expected = self._get_model_init(TensorProto.FLOAT)
+        wrap = OnnxruntimeEvaluator(
+            model, providers="cpu", graph_optimization_level=False, verbose=10
+        )
+        got = wrap.run(None, {k: v.numpy() for k, v in feeds.items()})
+        self.assertIsInstance(got[0], np.ndarray)
+        self.assertEqualArray(expected[0], got[0])
+
+    @hide_stdout()
+    def test_init_numpy_bfloat16(self):
+        model, feeds, expected = self._get_model_init(TensorProto.BFLOAT16)
+        wrap = OnnxruntimeEvaluator(
+            model, providers="cpu", graph_optimization_level=False, verbose=10
+        )
+        got = wrap.run(
+            None, {k: v.to(float).numpy().astype(ml_dtypes.bfloat16) for k, v in feeds.items()}
+        )
+        self.assertIsInstance(got[0], np.ndarray)
+        self.assertEqualArray(expected[0], got[0])
+
+    @hide_stdout()
+    def test_init_torch_afloat32(self):
+        model, feeds, expected = self._get_model_init(TensorProto.FLOAT)
+        wrap = OnnxruntimeEvaluator(
+            model, providers="cpu", graph_optimization_level=False, verbose=10
+        )
+        got = wrap.run(None, feeds)
+        self.assertIsInstance(got[0], torch.Tensor)
+        self.assertEqualArray(expected[0], got[0])
+
+    @hide_stdout()
+    def test_init_torch_bfloat16(self):
+        model, feeds, expected = self._get_model_init(TensorProto.BFLOAT16)
+        wrap = OnnxruntimeEvaluator(
+            model, providers="cpu", graph_optimization_level=False, verbose=10
+        )
+        got = wrap.run(None, feeds)
+        self.assertIsInstance(got[0], torch.Tensor)
+        self.assertEqualArray(expected[0], got[0])
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_xrun_doc/test_helpers.py b/_unittests/ut_xrun_doc/test_helpers.py
@@ -23,6 +23,7 @@
     np_dtype_to_tensor_dtype,
     torch_dtype_to_onnx_dtype,
     from_array_extended,
+    to_array_extended,
     convert_endian,
     from_array_ml_dtypes,
     dtype_to_tensor_dtype,
@@ -250,16 +251,20 @@ def test_from_array(self):
             t = np.random.rand(4, 3).astype(dt)
             proto = from_array_extended(t)
             self.assertIsInstance(proto, onnx.TensorProto)
-            convert_endian(proto)
             dtype_to_tensor_dtype(dt)
+            arr = to_array_extended(proto)
+            self.assertEqualArray(t, arr)
+            convert_endian(proto)
 
     def test_from_array_ml_dtypes(self):
         for dt in {
             ml_dtypes.bfloat16,
         }:
             t = np.random.rand(4, 3).astype(dt)
-            from_array_ml_dtypes(t)
+            proto = from_array_ml_dtypes(t)
             from_array_extended(t)
+            arr = to_array_extended(proto)
+            self.assertEqualArray(t, arr)
 
     def test_size_type_mldtypes(self):
         for dt in {
diff --git a/onnx_diagnostic/helpers.py b/onnx_diagnostic/helpers.py
@@ -22,8 +22,7 @@
     np_dtype_to_tensor_dtype as onnx_np_dtype_to_tensor_dtype,
     tensor_dtype_to_np_dtype as onnx_tensor_dtype_to_np_dtype,
 )
-from onnx.numpy_helper import from_array as onnx_from_array
-from onnx.reference.op_run import to_array_extended
+from onnx.numpy_helper import from_array as onnx_from_array, to_array
 
 
 def size_type(dtype: Any) -> int:
@@ -845,6 +844,16 @@ def from_array_extended(tensor: npt.ArrayLike, name: Optional[str] = None) -> Te
     return t
 
 
+def to_array_extended(proto: TensorProto) -> npt.ArrayLike:
+    """Converts :class:`onnx.TensorProto` into a numpy array."""
+    arr = to_array(proto)
+    if proto.data_type >= onnx.TensorProto.BFLOAT16:
+        # Types not supported by numpy
+        ml_dtypes = onnx_dtype_to_np_dtype(proto.data_type)
+        return arr.view(ml_dtypes)
+    return arr
+
+
 def onnx_dtype_to_torch_dtype(itype: int) -> "torch.dtype":  # noqa: F821
     """
     Converts an onnx type into a torch dtype.
diff --git a/onnx_diagnostic/ort_session.py b/onnx_diagnostic/ort_session.py
@@ -190,18 +190,9 @@ def run(
         self, output_names: Optional[List[str]], feeds: Dict[str, npt.ArrayLike]
     ) -> List[npt.ArrayLike]:
         """Calls :meth:`onnxruntime.InferenceSession.run`."""
-        if any(
-            (np_dtype_to_tensor_dtype(v.dtype) >= onnx.TensorProto.BFLOAT16)
-            for v in feeds.values()
-        ):
-            # bfloat16 not supported by onnxruntime
-            return self.run_dlpack(output_names, feeds)
-        if self.nvtx:
-            self.torch.cuda.nvtx.range_push("run")
-        res = self.sess.run(output_names, feeds)
-        if self.nvtx:
-            self.torch.cuda.nvtx.range_pop()
-        return res
+        # sess.run does not support blfoat16
+        # res = self.sess.run(output_names, feeds)
+        return self.run_dlpack(output_names, feeds)
 
     def run_dlpack(
         self, output_names: Optional[List[str]], feeds: Dict[str, np.ndarray]
@@ -213,8 +204,12 @@ def run_dlpack(
         """
         new_feeds = {}
         for k, v in feeds.items():
-            new_feeds[k] = ORTC.OrtValue.ortvalue_from_numpy_with_onnx_type(
-                v, np_dtype_to_tensor_dtype(v.dtype)
+            new_feeds[k] = (
+                ORTC.OrtValue.ortvalue_from_numpy_with_onnx_type(
+                    v, np_dtype_to_tensor_dtype(v.dtype)
+                )
+                if isinstance(v, np.ndarray)
+                else ORTC.OrtValue.from_dlpack(v.__dlpack__(), v.dtype == torch.bool)
             )
         if self.nvtx:
             self.torch.cuda.nvtx.range_push("run_with_ort_values")
diff --git a/onnx_diagnostic/reference/ort_evaluator.py b/onnx_diagnostic/reference/ort_evaluator.py
@@ -11,9 +11,8 @@
     load,
 )
 from onnx.defs import onnx_opset_version
-from onnx.numpy_helper import to_array
 import onnxruntime
-from ..helpers import pretty_onnx, dtype_to_tensor_dtype, string_type
+from ..helpers import pretty_onnx, dtype_to_tensor_dtype, string_type, to_array_extended
 from ..ort_session import InferenceSessionForTorch, InferenceSessionForNumpy, _InferenceSession
 
 PROTO = (FunctionProto, ModelProto, GraphProto, NodeProto)
@@ -54,7 +53,7 @@ def __init__(
         log_verbosity_level: Optional[int] = None,
         optimized_model_filepath: Optional[str] = None,
         disable_aot_function_inlining: Optional[bool] = None,
-        use_training_api: Optional[bool] = None,
+        use_training_api: bool = False,
         verbose: int = 0,
         local_functions: Optional[
             Dict[Tuple[str, str], Union[Proto, "OnnxruntimeEvaluator"]]
@@ -103,7 +102,7 @@ def __init__(
             )
         )
         self.rt_inits_ = (
-            {init.name: to_array(init) for init in self.proto.graph.initializer}
+            {init.name: to_array_extended(init) for init in self.proto.graph.initializer}
             if hasattr(self.proto, "graph")
             else {}
         )
@@ -144,12 +143,8 @@ def output_names(self) -> List[str]:
     def _log_arg(self, a: Any) -> Any:
         if isinstance(a, (str, int, float)):
             return a
-        if hasattr(a, "detach"):
-            device = f"D{a.get_device()}:"
-            a = a.detach().cpu().numpy()
-        else:
-            device = ""
-        if isinstance(a, np.ndarray):
+        device = f"D{a.get_device()}:" if hasattr(a, "detach") else ""
+        if hasattr(a, "shape"):
             if self.verbose < 4:  # noqa: PLR2004
                 return f"{device}{a.dtype}:{a.shape} in [{a.min()}, {a.max()}]"
             elements = a.ravel().tolist()