diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0bd23b68..9dca38dc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -56,12 +56,19 @@ jobs: run: | pip install pytest export PYTHONPATH=. - UNITTEST_GOING=1 pytest --durations=10 _unittests --ignore _unittests/ut_reference/test_backend_extended_reference_evaluator.py + UNITTEST_GOING=1 pytest --durations=10 _unittests --ignore _unittests/ut_reference/test_backend_extended_reference_evaluator.py --ignore _unittests/ut_reference/test_backend_onnxruntime_evaluator.py export PYTHONPATH= - - name: run backend tests + - name: run backend tests python run: | pip install pytest export PYTHONPATH=. UNITTEST_GOING=1 pytest --durations=10 _unittests/ut_reference/test_backend_extended_reference_evaluator.py export PYTHONPATH= + + - name: run backend tests onnxruntime + run: | + pip install pytest + export PYTHONPATH=. + UNITTEST_GOING=1 pytest --durations=10 _unittests/ut_reference/test_backend_onnxruntime_evaluator.py --maxfail=15 + export PYTHONPATH= diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml index b99a1372..d3dfbaae 100644 --- a/.github/workflows/documentation.yml +++ b/.github/workflows/documentation.yml @@ -59,7 +59,7 @@ jobs: pip install pytest pip install pytest-cov export PYTHONPATH=. - UNITTEST_GOING=1 pytest --cov=./onnx_diagnostic/ --cov-report=xml --durations=10 _unittests --ignore _unittests/ut_reference/test_backend_extended_reference_evaluator.py + UNITTEST_GOING=1 pytest --cov=./onnx_diagnostic/ --cov-report=xml --durations=10 _unittests --ignore _unittests/ut_reference/test_backend_extended_reference_evaluator.py --ignore _unittests/ut_reference/test_backend_onnxruntime_evaluator.py export PYTHONPATH= - name: Upload coverage reports to Codecov diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst index 9fa3adb0..42e207cd 100644 --- a/CHANGELOGS.rst +++ b/CHANGELOGS.rst @@ -4,6 +4,7 @@ Change Logs 0.2.0 +++++ +* :pr:`9`: adds ``OnnxruntimeEvaluator`` * :pr:`8`: adds ``ExtendedReferenceEvaluator`` * :pr:`7`: improves function ``investigate_onnxruntime_issue`` diff --git a/README.rst b/README.rst index 5a797b82..bb2bad34 100644 --- a/README.rst +++ b/README.rst @@ -11,10 +11,6 @@ onnx-diagnostic: investigate onnx models .. image:: https://badge.fury.io/py/onnx-diagnostic.svg :target: http://badge.fury.io/py/onnx-diagnostic -.. image:: http://img.shields.io/github/issues/sdpython/onnx-diagnostic.png - :alt: GitHub Issues - :target: https://github.com/sdpython/onnx-diagnostic/issues - .. image:: https://img.shields.io/badge/license-MIT-blue.svg :alt: MIT License :target: https://opensource.org/license/MIT/ diff --git a/_doc/api/reference/index.rst b/_doc/api/reference/index.rst index 0f617e77..1bd03683 100644 --- a/_doc/api/reference/index.rst +++ b/_doc/api/reference/index.rst @@ -13,6 +13,7 @@ onnx_diagnostic.reference evaluator quantized_tensor + ort_evaluator ExtendedReferenceEvaluator ++++++++++++++++++++++++++ @@ -20,6 +21,12 @@ ExtendedReferenceEvaluator .. autoclass:: onnx_diagnostic.reference.ExtendedReferenceEvaluator :members: +OnnxruntimeEvaluator +++++++++++++++++++++ + +.. autoclass:: onnx_diagnostic.reference.OnnxruntimeEvaluator + :members: + Other functions +++++++++++++++ diff --git a/_doc/api/reference/ort_evaluator.rst b/_doc/api/reference/ort_evaluator.rst new file mode 100644 index 00000000..9b161bc8 --- /dev/null +++ b/_doc/api/reference/ort_evaluator.rst @@ -0,0 +1,8 @@ + +onnx_diagnostic.reference.ort_evaluator +======================================= + +.. automodule:: onnx_diagnostic.reference.ort_evaluator + :members: + :no-undoc-members: + :exclude-members: OnnxruntimeEvaluator diff --git a/_doc/conf.py b/_doc/conf.py index e46703f1..f5503109 100644 --- a/_doc/conf.py +++ b/_doc/conf.py @@ -104,11 +104,12 @@ ("py:class", "False"), ("py:class", "True"), ("py:class", "Argument"), - ("py:class", "onnxscript.ir.Tuple"), - ("py:class", "pipeline.Pipeline"), ("py:class", "default=sklearn.utils.metadata_routing.UNCHANGED"), ("py:class", "ModelProto"), ("py:class", "Module"), + ("py:class", "np.ndarray"), + ("py:class", "onnxscript.ir.Tuple"), + ("py:class", "pipeline.Pipeline"), ("py:class", "torch.fx.passes.operator_support.OperatorSupport"), ("py:class", "torch.fx.proxy.TracerBase"), ("py:class", "torch.utils._pytree.Context"), @@ -177,6 +178,7 @@ "GraphModule": "https://pytorch.org/docs/stable/fx.html#torch.fx.GraphModule", "HuggingFace": "https://huggingface.co/docs/hub/en/index", "Linux": "https://www.linux.org/", + "ml_dtypes": "https://github.com/jax-ml/ml_dtypes", "monai": "https://monai.io/", "numpy": "https://numpy.org/", "onnx": "https://onnx.ai/onnx/", @@ -186,6 +188,7 @@ "onnxrt backend": "https://pytorch.org/docs/stable/onnx_dynamo_onnxruntime_backend.html", "onnxruntime": "https://onnxruntime.ai/", "onnxruntime-training": "https://onnxruntime.ai/docs/get-started/training-on-device.html", + "onnxruntime kernels": "https://onnxruntime.ai/docs/reference/operators/OperatorKernels.html", "onnx-array-api": "https://sdpython.github.io/doc/onnx-array-api/dev/", "onnx-diagnostic": "https://sdpython.github.io/doc/onnx-diagnostic/dev/", "onnx-extended": "https://sdpython.github.io/doc/onnx-extended/dev/", diff --git a/_doc/examples/plot_failing_onnxruntime_evaluator.py b/_doc/examples/plot_failing_onnxruntime_evaluator.py new file mode 100644 index 00000000..be274919 --- /dev/null +++ b/_doc/examples/plot_failing_onnxruntime_evaluator.py @@ -0,0 +1,106 @@ +""" +.. _l-plot-failing-onnxruntime-evaluator: + +Running OnnxruntimeEvaluator on a failing model +=============================================== + +Example :ref:`l-plot-failing-reference-evaluator` demonstrated +how to run a python runtime on a model but it may very slow sometimes +and it could show some discrepancies if the only provider is not CPU. +Let's use :class:`OnnxruntimeEvaluator `. +It splits the model into node and runs them independantly until it succeeds +or fails. This class converts every node into model based on the types +discovered during the execution. It relies on :class:`InferenceSessionForTorch +` or +:class:`InferenceSessionForNumpy +` +for the execution. This example uses torch tensor and +bfloat16. + +A failing model ++++++++++++++++ + +The issue here is a an operator ``Cast`` trying to convert a result +into a non-existing type. +""" + +import onnx +import onnx.helper as oh +import torch +import onnxruntime +from onnx_diagnostic.ext_test_case import has_cuda +from onnx_diagnostic.helpers import from_array_extended +from onnx_diagnostic.reference import OnnxruntimeEvaluator + +TBFLOAT16 = onnx.TensorProto.BFLOAT16 + +model = oh.make_model( + oh.make_graph( + [ + oh.make_node("Mul", ["X", "Y"], ["xy"], name="n0"), + oh.make_node("Sigmoid", ["xy"], ["sy"], name="n1"), + oh.make_node("Add", ["sy", "one"], ["C"], name="n2"), + oh.make_node("Cast", ["C"], ["X999"], to=999, name="failing"), + oh.make_node("CastLike", ["X999", "Y"], ["Z"], name="n4"), + ], + "nd", + [ + oh.make_tensor_value_info("X", TBFLOAT16, ["a", "b", "c"]), + oh.make_tensor_value_info("Y", TBFLOAT16, ["a", "b", "c"]), + ], + [oh.make_tensor_value_info("Z", TBFLOAT16, ["a", "b", "c"])], + [from_array_extended(torch.tensor([1], dtype=torch.bfloat16), name="one")], + ), + opset_imports=[oh.make_opsetid("", 18)], + ir_version=9, +) + +# %% +# We check it is failing. + +try: + onnxruntime.InferenceSession(model.SerializeToString(), providers=["CPUExecutionProvider"]) +except onnxruntime.capi.onnxruntime_pybind11_state.Fail as e: + print(e) + + +# %% +# OnnxruntimeEvaluator +# ++++++++++++++++++++++++++ +# +# This class extends :class:`onnx.reference.ReferenceEvaluator` +# with operators outside the standard but defined by :epkg:`onnxruntime`. +# `verbose=10` tells the class to print as much as possible, +# `verbose=0` prints nothing. Intermediate values for more or less verbosity. + +ref = OnnxruntimeEvaluator(model, verbose=10) +feeds = dict( + X=torch.rand((3, 4), dtype=torch.bfloat16), Y=torch.rand((3, 4), dtype=torch.bfloat16) +) +try: + ref.run(None, feeds) +except Exception as e: + print("ERROR", type(e), e) + + +# %% +# :epkg:`onnxruntime` may not support bfloat16 on CPU. +# See :epkg:`onnxruntime kernels`. + +if has_cuda(): + ref = OnnxruntimeEvaluator(model, providers="cuda", verbose=10) + feeds = dict( + X=torch.rand((3, 4), dtype=torch.bfloat16), Y=torch.rand((3, 4), dtype=torch.bfloat16) + ) + try: + ref.run(None, feeds) + except Exception as e: + print("ERROR", type(e), e) + +# %% +# We can see it run until it reaches `Cast` and stops. +# The error message is not always obvious to interpret. +# It gets improved everytime from time to time. +# This runtime is useful when it fails for a numerical reason. +# It is possible to insert prints in the python code to print +# more information or debug if needed. diff --git a/_doc/index.rst b/_doc/index.rst index 01b63d2c..e6231eb6 100644 --- a/_doc/index.rst +++ b/_doc/index.rst @@ -8,18 +8,10 @@ onnx-diagnostic: investigate onnx models .. image:: https://badge.fury.io/py/onnx-diagnostic.svg :target: http://badge.fury.io/py/onnx-diagnostic -.. image:: http://img.shields.io/github/issues/sdpython/onnx-diagnostic.png - :alt: GitHub Issues - :target: https://github.com/sdpython/onnx-diagnostic/issues - .. image:: https://img.shields.io/badge/license-MIT-blue.svg :alt: MIT License :target: https://opensource.org/license/MIT/ -.. image:: https://img.shields.io/github/repo-size/sdpython/onnx-diagnostic - :target: https://github.com/sdpython/onnx-diagnostic/ - :alt: size - .. image:: https://img.shields.io/badge/code%20style-black-000000.svg :target: https://github.com/psf/black @@ -51,6 +43,7 @@ Source are `sdpython/onnx-diagnostic * :ref:`l-plot-sxport-with-dynamio-shapes-auto` * :ref:`l-plot-tiny-llm-export` * :ref:`l-plot-failing-reference-evaluator` +* :ref:`l-plot-failing-onnxruntime-evaluator` * :ref:`l-plot-failing-model-extract` **Some Usefuls Tools** diff --git a/_unittests/ut_reference/test_array_tensor.py b/_unittests/ut_reference/test_array_tensor.py index c6983427..8c10f124 100644 --- a/_unittests/ut_reference/test_array_tensor.py +++ b/_unittests/ut_reference/test_array_tensor.py @@ -2,9 +2,8 @@ import numpy as np from onnx import TensorProto from onnx.helper import make_graph, make_model, make_node, make_tensor_value_info -from onnx.reference.op_run import to_array_extended from onnx_diagnostic.ext_test_case import ExtTestCase, ignore_warnings -from onnx_diagnostic.helpers import from_array_extended +from onnx_diagnostic.helpers import from_array_extended, to_array_extended from onnx_diagnostic.reference import ExtendedReferenceEvaluator diff --git a/_unittests/ut_reference/test_backend_extended_reference_evaluator.py b/_unittests/ut_reference/test_backend_extended_reference_evaluator.py index 2a4d7768..5410dd9b 100644 --- a/_unittests/ut_reference/test_backend_extended_reference_evaluator.py +++ b/_unittests/ut_reference/test_backend_extended_reference_evaluator.py @@ -43,13 +43,13 @@ def run(self, inputs, **kwargs): class ExtendedReferenceEvaluatorBackend(onnx.backend.base.Backend): @classmethod - def is_opset_supported(cls, model): # pylint: disable=unused-argument - return True, "" + def is_compatible(cls, model) -> bool: + return True @classmethod def supports_device(cls, device: str) -> bool: d = Device(device) - return d.type == DeviceType.CPU # type: ignore[no-any-return] + return d.type == DeviceType.CPU @classmethod def create_inference_session(cls, model): diff --git a/_unittests/ut_reference/test_backend_onnxruntime_evaluator.py b/_unittests/ut_reference/test_backend_onnxruntime_evaluator.py new file mode 100644 index 00000000..0fcef585 --- /dev/null +++ b/_unittests/ut_reference/test_backend_onnxruntime_evaluator.py @@ -0,0 +1,248 @@ +import unittest +import warnings +from typing import Any +import numpy +import onnx.backend.base +import onnx.backend.test +import onnx.shape_inference +import onnx.version_converter +from onnx import ModelProto +from onnx.backend.base import Device, DeviceType +from onnx.defs import onnx_opset_version +from onnx_diagnostic.reference import OnnxruntimeEvaluator + +ORT_OPSET = max(21, onnx_opset_version() - 2) + + +class OnnxruntimeEvaluatorBackendRep(onnx.backend.base.BackendRep): + def __init__(self, session): + self._session = session + + def run(self, inputs, **kwargs): + if isinstance(inputs, numpy.ndarray): + inputs = [inputs] + if isinstance(inputs, list): + if len(inputs) == len(self._session.input_names): + feeds = dict(zip(self._session.input_names, inputs)) + else: + feeds = {} + pos_inputs = 0 + for inp, tshape in zip(self._session.input_names, self._session.input_types): + shape = tuple(d.dim_value for d in tshape.tensor_type.shape.dim) + if shape == inputs[pos_inputs].shape: + feeds[inp] = inputs[pos_inputs] + pos_inputs += 1 + if pos_inputs >= len(inputs): + break + elif isinstance(inputs, dict): + feeds = inputs + else: + raise TypeError(f"Unexpected input type {type(inputs)!r}.") + outs = self._session.run(None, feeds) + return outs + + +class OnnxruntimeEvaluatorBackend(onnx.backend.base.Backend): + @classmethod + def is_compatible(cls, model) -> bool: + return all(not (d.domain == "" and d.version > ORT_OPSET) for d in model.opset_import) + + @classmethod + def supports_device(cls, device: str) -> bool: + d = Device(device) + return d.type == DeviceType.CPU + + @classmethod + def create_inference_session(cls, model): + return OnnxruntimeEvaluator(model) + + @classmethod + def prepare( + cls, model: Any, device: str = "CPU", **kwargs: Any + ) -> OnnxruntimeEvaluatorBackendRep: + if isinstance(model, OnnxruntimeEvaluator): + return OnnxruntimeEvaluatorBackendRep(model) + if isinstance(model, (str, bytes, ModelProto)): + inf = cls.create_inference_session(model) + return cls.prepare(inf, device, **kwargs) + raise TypeError(f"Unexpected type {type(model)} for model.") + + @classmethod + def run_model(cls, model, inputs, device=None, **kwargs): + rep = cls.prepare(model, device, **kwargs) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + return rep.run(inputs, **kwargs) + + @classmethod + def run_node(cls, node, inputs, device=None, outputs_info=None, **kwargs): + raise NotImplementedError("Unable to run the model node by node.") + + +dft_atol = 1e-3 +stft_atol = 1e-4 +ql_atol = 1e-5 +backend_test = onnx.backend.test.BackendTest( + OnnxruntimeEvaluatorBackend, + __name__, + test_kwargs={ + "test_dft": {"atol": dft_atol, "rtol": numpy.inf}, + "test_dft_axis": {"atol": dft_atol, "rtol": numpy.inf}, + "test_dft_axis_opset19": {"atol": dft_atol, "rtol": numpy.inf}, + "test_dft_inverse": {"atol": dft_atol, "rtol": numpy.inf}, + "test_dft_inverse_opset19": {"atol": dft_atol, "rtol": numpy.inf}, + "test_dft_opset19": {"atol": dft_atol, "rtol": numpy.inf}, + "test_stft": {"atol": stft_atol, "rtol": numpy.inf}, + "test_stft_with_window": {"atol": stft_atol, "rtol": numpy.inf}, + "test_qlinearmatmul_2D_int8_float32": {"atol": ql_atol}, + "test_qlinearmatmul_3D_int8_float32": {"atol": ql_atol}, + }, +) + +# rtol=inf does not work +backend_test.exclude("(test_dft|test_stft)") + +# The following tests are too slow with the reference implementation (Conv). +backend_test.exclude( + "(test_bvlc_alexnet" + "|test_densenet121" + "|test_inception_v1" + "|test_inception_v2" + "|test_resnet50" + "|test_shufflenet" + "|test_squeezenet" + "|test_vgg19" + "|test_zfnet512)" +) + +# The following tests cannot pass because they consists in generating random number. +backend_test.exclude("(test_bernoulli|test_PoissonNLLLLoss)") + +# The following tests are not supported. +backend_test.exclude( + "(test_gradient" + "|test_if_opt" + "|test_loop16_seq_none" + "|test_range_float_type_positive_delta_expanded" + "|test_range_int32_type_negative_delta_expanded" + "|test_scan_sum)" +) + +if onnx_opset_version() < 21: + backend_test.exclude( + "(test_averagepool_2d_dilations" + "|test_if*" + "|test_loop*" + "|test_scan*" + "|test_sequence_map*" + "|test_cast_FLOAT_to_STRING|" + "test_castlike_FLOAT_to_STRING|test_strnorm|" + "test_center_crop_pad_crop_axes_hwc_expanded|" + "test_lppool_2d_dilations|test_eyelike_without_dtype)" + ) + +# Disable test about float 8 +backend_test.exclude( + "(test_castlike_BFLOAT16*" + "|test_cast_BFLOAT16*" + "|test_cast_no_saturate*" + "|test_cast_FLOAT_to_FLOAT8*" + "|test_cast_FLOAT16_to_FLOAT8*" + "|test_cast_FLOAT8_to_*" + "|test_castlike_BFLOAT16*" + "|test_castlike_no_saturate*" + "|test_castlike_FLOAT_to_FLOAT8*" + "|test_castlike_FLOAT16_to_FLOAT8*" + "|test_castlike_FLOAT8_to_*" + "|test_quantizelinear_e*)" +) + +# Disable test about INT 4 +backend_test.exclude( + "(test_cast_FLOAT_to_INT4" + "|test_cast_FLOAT16_to_INT4" + "|test_cast_INT4_to_" + "|test_castlike_INT4_to_" + "|test_cast_FLOAT_to_UINT4" + "|test_cast_FLOAT16_to_UINT4" + "|test_cast_UINT4_to_" + "|test_castlike_UINT4_to_)" +) + +backend_test.exclude( + "(test_regex_full_match|" + "test_adagrad|" + "test_adam|" + "test_add_uint8|" + "test_ai_onnx_ml_label_encoder_string|" + "test_ai_onnx_ml_label_encoder_tensor_mapping|" + "test_ai_onnx_ml_label_encoder_tensor_value_only_mapping|" + "test_AvgPool|" + "test_BatchNorm|" + "test_bitshift_[a-z]+_uint16|" + "test_center_crop_pad_crop|" + "test_clip_[0-9a-z_]*expanded|" + "test_elu_[0-9a-z_]*expanded|" + "test_equal_string|" + "test_GLU_|" + "test_identity_opt|" + "test_if|" + "test_image|" + "test_leakyrelu|" + "test_((less)|(greater))_equal_bcast|" + "test_((less)|(greater))[a-z_]*expanded|" + "test_Linear|" + "test_loop13|" + "test_momentum|" + "test_nesterov|" + "test_((mul)|(min)|(max)|(div))_u?int((8)|(16))|" + "test_operator|" + "test_optional_|" + "test_pow_types_float32_uint|" + "test_qlinearmatmul|" + "test_prelu|" + "test_PReLU|" + "test_reduce_max_empty|" + "test_resize_downsample_scales|" + "test_scatter_with_axis|" + "test_scatter_without_axis" + "|test_selu" + "|test_sequence" + "|test_shrink_" + "|test_Softsign" + "|test_split_to_sequence" + "|test_string_concat" + "|test_string_split" + "|test_strnorm_model" + "|test_strnormalizer" + "|test_sub_uint8" + "|test_thresholdedrelu" + "|test_top_k_uint64" + ")" +) + +# failing on CI only +backend_test.exclude( + "(_to_STRING|to_BFLOAT16|STRING_to|BFLOAT16_to|" + "test_constant|test_(de)?quantizelinear_u?int4" + "|test_identity_sequence" + ")" +) + + +# import all test cases at global scope to make them visible to python.unittest +globals().update(backend_test.test_cases) + +if __name__ == "__main__": + res = unittest.main(verbosity=2, exit=False) + tests_run = res.result.testsRun + errors = len(res.result.errors) + skipped = len(res.result.skipped) + unexpected_successes = len(res.result.unexpectedSuccesses) + expected_failures = len(res.result.expectedFailures) + print("---------------------------------") + print( + f"tests_run={tests_run} errors={errors} skipped={skipped} " + f"unexpected_successes={unexpected_successes} " + f"expected_failures={expected_failures}" + ) diff --git a/_unittests/ut_reference/test_ort_evaluator.py b/_unittests/ut_reference/test_ort_evaluator.py new file mode 100644 index 00000000..74994f0b --- /dev/null +++ b/_unittests/ut_reference/test_ort_evaluator.py @@ -0,0 +1,253 @@ +import unittest +from typing import Any, Dict, Optional, Tuple +import numpy as np +import ml_dtypes +from onnx import ModelProto, TensorProto +from onnx.checker import check_model +import onnx.helper as oh +import onnx.numpy_helper as onh +import torch +from onnx_diagnostic.ext_test_case import ( + ExtTestCase, + hide_stdout, + ignore_warnings, + requires_cuda, +) +from onnx_diagnostic.helpers import ( + from_array_extended, + onnx_dtype_to_torch_dtype, + onnx_dtype_to_np_dtype, +) +from onnx_diagnostic.reference import ExtendedReferenceEvaluator, OnnxruntimeEvaluator + +TFLOAT = TensorProto.FLOAT + + +class TestOnnxruntimeEvaluatoruator(ExtTestCase): + def _range(self, *shape, bias: Optional[float] = None): + n = np.prod(shape) + x = np.arange(n).astype(np.float32) / n + if bias: + x = x + bias + return x.reshape(tuple(shape)).astype(np.float32) + + def _get_model(self) -> ModelProto: + model = oh.make_model( + oh.make_graph( + [ + oh.make_node("Unsqueeze", ["X", "zero"], ["xu1"]), + oh.make_node("Unsqueeze", ["xu1", "un"], ["xu2"]), + oh.make_node("Reshape", ["xu2", "shape1"], ["xm1"]), + oh.make_node("Reshape", ["Y", "shape2"], ["xm2c"]), + oh.make_node("Cast", ["xm2c"], ["xm2"], to=1), + oh.make_node("MatMul", ["xm1", "xm2"], ["xm"]), + oh.make_node("Reshape", ["xm", "shape3"], ["Z"]), + ], + "dummy", + [ + oh.make_tensor_value_info("X", TFLOAT, [32, 128]), + oh.make_tensor_value_info("Y", TFLOAT, [3, 5, 128, 64]), + ], + [oh.make_tensor_value_info("Z", TFLOAT, [3, 5, 32, 64])], + [ + onh.from_array(np.array([0], dtype=np.int64), name="zero"), + onh.from_array(np.array([1], dtype=np.int64), name="un"), + onh.from_array(np.array([1, 32, 128], dtype=np.int64), name="shape1"), + onh.from_array(np.array([15, 128, 64], dtype=np.int64), name="shape2"), + onh.from_array(np.array([3, 5, 32, 64], dtype=np.int64), name="shape3"), + ], + ), + ir_version=9, + opset_imports=[oh.make_opsetid("", 18)], + ) + check_model(model) + return model + + @ignore_warnings(DeprecationWarning) + def test_ort_eval(self): + model = self._get_model() + + feeds = {"X": self._range(32, 128), "Y": self._range(3, 5, 128, 64)} + ref = ExtendedReferenceEvaluator(model, verbose=10) + expected, out, _ = self.capture(lambda: ref.run(None, feeds)[0]) + self.assertIn("Reshape(xm, shape3) -> Z", out) + + ort_eval = OnnxruntimeEvaluator(model, verbose=10, opsets=20) + got, out, _ = self.capture(lambda: ort_eval.run(None, feeds)[0]) + self.assertEqualArray(expected, got, atol=1e-4) + self.assertIn("Reshape(xm, shape3) -> Z", out) + + @ignore_warnings(DeprecationWarning) + @requires_cuda() + @hide_stdout() + def test_ort_eval_cuda(self): + model = self._get_model() + + feeds = {"X": self._range(32, 128), "Y": self._range(3, 5, 128, 64)} + ref = ExtendedReferenceEvaluator(model, verbose=10) + expected = ref.run(None, feeds)[0] + + ort_eval = OnnxruntimeEvaluator(model, verbose=10, opsets=20, providers="cuda") + got = ort_eval.run(None, feeds)[0] + self.assertEqualArray(expected, got, atol=1e-1) + + @ignore_warnings(DeprecationWarning) + @hide_stdout() + def test_ort_eval_node_proto(self): + model = self._get_model() + + feeds = {"X": self._range(32, 128), "zero": np.array([0], dtype=np.int64)} + ref = ExtendedReferenceEvaluator(model.graph.node[0], verbose=10) + expected = ref.run(None, feeds) + + ort_eval = OnnxruntimeEvaluator(model.graph.node[0], verbose=10, opsets=20) + got = ort_eval.run(None, feeds) + self.assertEqualArrayAny(expected, got, atol=1e-4) + self.assertIsInstance(expected[0], np.ndarray) + self.assertIsInstance(got[0], np.ndarray) + + @ignore_warnings(DeprecationWarning) + @hide_stdout() + def test_ort_eval_node_proto_torch(self): + model = self._get_model() + + feeds_np = {"X": self._range(32, 128), "zero": np.array([0], dtype=np.int64)} + feeds = {k: torch.from_numpy(v) for k, v in feeds_np.items()} + ref = ExtendedReferenceEvaluator(model.graph.node[0], verbose=10) + expected = ref.run(None, feeds_np) + + ort_eval = OnnxruntimeEvaluator(model.graph.node[0], verbose=10, opsets=20) + got = ort_eval.run(None, feeds) + self.assertIsInstance(got[0], torch.Tensor) + self.assertEqualArray(expected[0], got[0], atol=1e-4) + + @hide_stdout() + def test_local_function(self): + new_domain = "custom" + + linear_regression = oh.make_function( + new_domain, + "LinearRegression", + ["x", "a", "b"], + ["y"], + [ + oh.make_node("MatMul", ["x", "a"], ["xa"]), + oh.make_node("Add", ["xa", "b"], ["y"]), + ], + [oh.make_opsetid("", 14)], + [], + ) + + graph = oh.make_graph( + [ + oh.make_node("LinearRegression", ["X", "A", "B"], ["Y1"], domain=new_domain), + oh.make_node("Abs", ["Y1"], ["Y"]), + ], + "example", + [ + oh.make_tensor_value_info("X", TFLOAT, [None, None]), + oh.make_tensor_value_info("A", TFLOAT, [None, None]), + oh.make_tensor_value_info("B", TFLOAT, [None, None]), + ], + [oh.make_tensor_value_info("Y", TFLOAT, None)], + ) + + onnx_model = oh.make_model( + graph, + opset_imports=[oh.make_opsetid("", 14), oh.make_opsetid(new_domain, 1)], + functions=[linear_regression], + ir_version=10, + ) + feeds = { + "X": np.random.randn(3, 3).astype(np.float32), + "A": np.random.randn(3, 3).astype(np.float32), + "B": np.random.randn(3, 3).astype(np.float32), + } + ref = ExtendedReferenceEvaluator(onnx_model) + ort_eval = OnnxruntimeEvaluator(onnx_model, verbose=10, opsets=20) + expected = ref.run(None, feeds) + got = ort_eval.run(None, feeds) + self.assertEqualArray(expected[0], got[0]) + + @classmethod + def _trange(cls, *shape, bias: Optional[float] = None): + n = np.prod(shape) + x = np.arange(n).astype(np.float32) / n + if bias: + x = x + bias + return torch.from_numpy(x.reshape(tuple(shape)).astype(np.float32)) + + @classmethod + def _get_model_init(cls, itype) -> Tuple[ModelProto, Dict[str, Any], Tuple[Any, ...]]: + dtype = onnx_dtype_to_np_dtype(itype) + ttype = onnx_dtype_to_torch_dtype(itype) + cst = np.arange(6).astype(dtype) + model = oh.make_model( + oh.make_graph( + [ + oh.make_node("IsNaN", ["x"], ["xi"]), + oh.make_node("IsNaN", ["y"], ["yi"]), + oh.make_node("Cast", ["xi"], ["xii"], to=TensorProto.INT64), + oh.make_node("Cast", ["yi"], ["yii"], to=TensorProto.INT64), + oh.make_node("Add", ["xii", "yii"], ["gggg"]), + oh.make_node("Cast", ["gggg"], ["final"], to=itype), + ], + "dummy", + [oh.make_tensor_value_info("x", itype, [None, None])], + [oh.make_tensor_value_info("final", itype, [None, None])], + [from_array_extended(cst, name="y")], + ), + opset_imports=[oh.make_opsetid("", 20)], + ir_version=10, + ) + feeds = {"x": cls._trange(5, 6).to(ttype)} + expected = torch.isnan(feeds["x"]).to(int) + torch.isnan( + torch.from_numpy(cst.astype(float)) + ).to(int) + return (model, feeds, (expected.to(ttype),)) + + @hide_stdout() + def test_init_numpy_afloat32(self): + model, feeds, expected = self._get_model_init(TensorProto.FLOAT) + wrap = OnnxruntimeEvaluator( + model, providers="cpu", graph_optimization_level=False, verbose=10 + ) + got = wrap.run(None, {k: v.numpy() for k, v in feeds.items()}) + self.assertIsInstance(got[0], np.ndarray) + self.assertEqualArray(expected[0], got[0]) + + @hide_stdout() + def test_init_numpy_bfloat16(self): + model, feeds, expected = self._get_model_init(TensorProto.BFLOAT16) + wrap = OnnxruntimeEvaluator( + model, providers="cpu", graph_optimization_level=False, verbose=10 + ) + got = wrap.run( + None, {k: v.to(float).numpy().astype(ml_dtypes.bfloat16) for k, v in feeds.items()} + ) + self.assertIsInstance(got[0], np.ndarray) + self.assertEqualArray(expected[0], got[0]) + + @hide_stdout() + def test_init_torch_afloat32(self): + model, feeds, expected = self._get_model_init(TensorProto.FLOAT) + wrap = OnnxruntimeEvaluator( + model, providers="cpu", graph_optimization_level=False, verbose=10 + ) + got = wrap.run(None, feeds) + self.assertIsInstance(got[0], (torch.Tensor, np.ndarray)) + self.assertEqualArray(expected[0], got[0]) + + @hide_stdout() + def test_init_torch_bfloat16(self): + model, feeds, expected = self._get_model_init(TensorProto.BFLOAT16) + wrap = OnnxruntimeEvaluator( + model, providers="cpu", graph_optimization_level=False, verbose=10 + ) + got = wrap.run(None, feeds) + self.assertIsInstance(got[0], (torch.Tensor, np.ndarray)) + self.assertEqualArray(expected[0], got[0]) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/_unittests/ut_xrun_doc/test_helpers.py b/_unittests/ut_xrun_doc/test_helpers.py index 4033e5b3..0fef8bd1 100644 --- a/_unittests/ut_xrun_doc/test_helpers.py +++ b/_unittests/ut_xrun_doc/test_helpers.py @@ -5,7 +5,12 @@ import onnx import onnx.helper as oh import torch -from onnx_diagnostic.ext_test_case import ExtTestCase, skipif_ci_windows, hide_stdout +from onnx_diagnostic.ext_test_case import ( + ExtTestCase, + skipif_ci_windows, + hide_stdout, + requires_onnx, +) from onnx_diagnostic.helpers import ( string_type, string_sig, @@ -19,9 +24,11 @@ string_signature, make_hash, onnx_dtype_to_torch_dtype, + onnx_dtype_to_np_dtype, np_dtype_to_tensor_dtype, torch_dtype_to_onnx_dtype, from_array_extended, + to_array_extended, convert_endian, from_array_ml_dtypes, dtype_to_tensor_dtype, @@ -213,6 +220,7 @@ def test_size_type_onnx(self): "FLOAT8E4M3FNUZ", }: onnx_dtype_to_torch_dtype(i) + onnx_dtype_to_np_dtype(i) def test_size_type_numpy(self): for dt in { @@ -248,16 +256,21 @@ def test_from_array(self): t = np.random.rand(4, 3).astype(dt) proto = from_array_extended(t) self.assertIsInstance(proto, onnx.TensorProto) - convert_endian(proto) dtype_to_tensor_dtype(dt) + arr = to_array_extended(proto) + self.assertEqualArray(t, arr) + convert_endian(proto) + @requires_onnx("1.18.0") def test_from_array_ml_dtypes(self): for dt in { ml_dtypes.bfloat16, }: t = np.random.rand(4, 3).astype(dt) - from_array_ml_dtypes(t) + proto = from_array_ml_dtypes(t) from_array_extended(t) + arr = to_array_extended(proto) + self.assertEqualArray(t, arr) def test_size_type_mldtypes(self): for dt in { @@ -407,6 +420,28 @@ def test_rename_dynamic_expression(self): text = rename_dynamic_expression("a * 10 - a", {"a": "x"}) self.assertEqual(text, "x * 10 - x") + def test_from_tensor(self): + for dt in { + torch.float32, + torch.float64, + torch.bfloat16, + torch.float16, + torch.int32, + torch.int64, + torch.int8, + torch.int16, + torch.uint8, + torch.uint16, + torch.uint32, + torch.uint64, + }: + t = torch.arange(12).reshape((4, 3)).to(dt) + from_array_extended(t) + proto = from_array_extended(t, name="a") + self.assertIsInstance(proto, onnx.TensorProto) + convert_endian(proto) + dtype_to_tensor_dtype(dt) + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/_unittests/ut_xrun_doc/test_ort_session.py b/_unittests/ut_xrun_doc/test_ort_session.py index c6860faf..e76297d0 100644 --- a/_unittests/ut_xrun_doc/test_ort_session.py +++ b/_unittests/ut_xrun_doc/test_ort_session.py @@ -1,6 +1,7 @@ import unittest -from typing import Dict, Optional, Tuple +from typing import Any, Dict, Optional, Tuple import numpy as np +import ml_dtypes import onnx import onnx.helper as oh import torch @@ -11,6 +12,11 @@ requires_onnxruntime_training, requires_cuda, ) +from onnx_diagnostic.helpers import ( + from_array_extended, + onnx_dtype_to_np_dtype, + onnx_dtype_to_torch_dtype, +) from onnx_diagnostic.ort_session import ( InferenceSessionForNumpy, InferenceSessionForTorch, @@ -232,6 +238,66 @@ def test_investigate_onnxruntime_issue_callable_str(self): onnx_to_session="cpu_session", ) + @classmethod + def _get_model_init(cls, itype) -> Tuple[onnx.ModelProto, Dict[str, Any], Tuple[Any, ...]]: + dtype = onnx_dtype_to_np_dtype(itype) + ttype = onnx_dtype_to_torch_dtype(itype) + cst = np.arange(6).astype(dtype) + model = oh.make_model( + oh.make_graph( + [ + oh.make_node("IsNaN", ["x"], ["xi"]), + oh.make_node("IsNaN", ["y"], ["yi"]), + oh.make_node("Cast", ["xi"], ["xii"], to=onnx.TensorProto.INT64), + oh.make_node("Cast", ["yi"], ["yii"], to=onnx.TensorProto.INT64), + oh.make_node("Add", ["xii", "yii"], ["gggg"]), + oh.make_node("Cast", ["gggg"], ["final"], to=itype), + ], + "dummy", + [oh.make_tensor_value_info("x", itype, [None, None])], + [oh.make_tensor_value_info("final", itype, [None, None])], + [from_array_extended(cst, name="y")], + ), + opset_imports=[oh.make_opsetid("", 20)], + ir_version=10, + ) + onnx.checker.check_model(model) + feeds = {"x": cls._range(5, 6).to(ttype)} + expected = torch.isnan(feeds["x"]).to(int) + torch.isnan( + torch.from_numpy(cst.astype(float)) + ).to(int) + return (model, feeds, (expected.to(ttype),)) + + def test_init_numpy_afloat32(self): + model, feeds, expected = self._get_model_init(onnx.TensorProto.FLOAT) + wrap = InferenceSessionForNumpy(model, providers="cpu", graph_optimization_level=False) + got = wrap.run(None, {k: v.numpy() for k, v in feeds.items()}) + self.assertIsInstance(got[0], np.ndarray) + self.assertEqualArray(expected[0], got[0]) + + def test_init_numpy_bfloat16(self): + model, feeds, expected = self._get_model_init(onnx.TensorProto.BFLOAT16) + wrap = InferenceSessionForNumpy(model, providers="cpu", graph_optimization_level=False) + got = wrap.run( + None, {k: v.to(float).numpy().astype(ml_dtypes.bfloat16) for k, v in feeds.items()} + ) + self.assertIsInstance(got[0], np.ndarray) + self.assertEqualArray(expected[0], got[0]) + + def test_init_torch_afloat32(self): + model, feeds, expected = self._get_model_init(onnx.TensorProto.FLOAT) + wrap = InferenceSessionForTorch(model, providers="cpu", graph_optimization_level=False) + got = wrap.run(None, feeds) + self.assertIsInstance(got[0], torch.Tensor) + self.assertEqualArray(expected[0], got[0]) + + def test_init_torch_bfloat16(self): + model, feeds, expected = self._get_model_init(onnx.TensorProto.BFLOAT16) + wrap = InferenceSessionForTorch(model, providers="cpu", graph_optimization_level=False) + got = wrap.run(None, feeds) + self.assertIsInstance(got[0], torch.Tensor) + self.assertEqualArray(expected[0], got[0]) + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/onnx_diagnostic/helpers.py b/onnx_diagnostic/helpers.py index 31dc7f5f..b3fbe468 100644 --- a/onnx_diagnostic/helpers.py +++ b/onnx_diagnostic/helpers.py @@ -1,4 +1,5 @@ import ast +import ctypes import enum import functools import inspect @@ -21,8 +22,7 @@ np_dtype_to_tensor_dtype as onnx_np_dtype_to_tensor_dtype, tensor_dtype_to_np_dtype as onnx_tensor_dtype_to_np_dtype, ) -from onnx.numpy_helper import from_array as onnx_from_array -from onnx.reference.op_run import to_array_extended +from onnx.numpy_helper import from_array as onnx_from_array, to_array def size_type(dtype: Any) -> int: @@ -317,6 +317,7 @@ def string_type( if not with_shape: return f"{prefix}F{i}r{len(obj.shape)}" return f"{prefix}F{i}s{'x'.join(map(str, obj.shape))}" + if isinstance(obj, torch.Tensor): if with_min_max: s = string_type(obj, with_shape=with_shape, with_device=with_device) @@ -341,6 +342,25 @@ def string_type( if not with_shape: return f"{prefix}T{i}r{len(obj.shape)}" return f"{prefix}T{i}s{'x'.join(map(str, obj.shape))}" + + if obj.__class__.__name__ == "OrtValue": + if not obj.has_value(): + return "OV()" + if not obj.is_tensor(): + return "OV(NOTENSOR)" + if with_min_max: + try: + t = obj.numpy() + except Exception: + # pass unable to convert into numpy (bfloat16, ...) + return "OV(NO-NUMPY:FIXIT)" + return f"OV({string_type(t, with_shape=with_shape, with_min_max=with_min_max)})" + dt = obj.element_type() + shape = obj.shape() + if with_shape: + return f"OV{dt}s{'x'.join(map(str, shape))}" + return f"OV{dt}r{len(shape)}" + if isinstance(obj, bool): if with_min_max: return f"bool={obj}" @@ -442,6 +462,7 @@ def string_type( if ignore: return f"{obj.__class__.__name__}(...)" + raise AssertionError(f"Unsupported type {type(obj).__name__!r} - {type(obj)}") @@ -709,14 +730,97 @@ def from_array_ml_dtypes(arr: npt.ArrayLike, name: Optional[str] = None) -> Tens return tensor +_STORAGE_TYPE = { + TensorProto.FLOAT16: np.int16, + TensorProto.BFLOAT16: np.int16, +} + + +def proto_from_tensor( + arr: "torch.Tensor", # noqa: F821 + name: Optional[str] = None, + verbose: int = 0, +) -> TensorProto: + """ + Converts a torch Tensor into a TensorProto. + + :param arr: tensor + :param verbose: display the type and shape + :return: a TensorProto + """ + import torch + + if not isinstance(arr, torch.Tensor): + raise TypeError(f"Unexpected type {type(arr)}.") + if arr.is_sparse: + raise NotImplementedError( + f"Sparse tensor is not supported yet but initializer {name!r} is." + ) + + # arr.contiguous() is slow after a transpose, maybe there is a way to optimize this. + if arr.is_contiguous(): + arr_cpu = arr.cpu() + else: + arr_cpu = arr.contiguous().cpu() + + numel = torch.numel(arr_cpu) + element_size = arr_cpu.element_size() + + if arr_cpu.dtype in {torch.bfloat16}: + np_arr = arr_cpu + elif arr_cpu.data_ptr() == arr.data_ptr(): + copy = arr_cpu.clone().detach().requires_grad_(False) + assert ( + arr_cpu.data_ptr() == 0 or arr_cpu.data_ptr() != copy.data_ptr() + ), f"Pointers are not null and different {arr_cpu.data_ptr()} != {copy.data_ptr()}" + np_arr = np.from_dlpack(copy) + else: + np_arr = np.from_dlpack(arr_cpu.detach()) + + tensor = TensorProto() + tensor.dims.extend(arr_cpu.shape) + if name: + tensor.name = name + itype = torch_dtype_to_onnx_dtype(arr_cpu.dtype) + assert not hasattr(TensorProto, "INT4") or itype not in { + TensorProto.INT4, + TensorProto.UINT4, + }, f"Type {arr.dtype} is not supported yet for name={name!r}" + tensor.data_type = itype + + if verbose > 1 and numel > 100: + print(f"[proto_from_array] {tensor.data_type}[{arr_cpu.shape}]") + + if isinstance(np_arr, torch.Tensor): + byte_data = (ctypes.c_ubyte * numel * element_size).from_address(np_arr.data_ptr()) + tensor.raw_data = bytes(byte_data) + if sys.byteorder == "big": + np_dtype = _STORAGE_TYPE[tensor.data_type] # type: ignore + np.byteswap(np.frombuffer(tensor.raw_data, dtype=np_dtype), inplace=True) + else: + tensor.raw_data = np_arr.tobytes() + if sys.byteorder == "big": + np_dtype = tensor_dtype_to_np_dtype(tensor.data_type) + np.byteswap(np.frombuffer(tensor.raw_data, dtype=np_dtype), inplace=True) + + return tensor + + def from_array_extended(tensor: npt.ArrayLike, name: Optional[str] = None) -> TensorProto: """ Converts an array into a :class:`onnx.TensorProto`. - :param tensor: numpy array + :param tensor: numpy array or torch tensor :param name: name :return: TensorProto """ + try: + import torch + except ImportError: + torch = None + if torch is not None and isinstance(tensor, torch.Tensor): + return proto_from_tensor(tensor, name=name) + from onnx.reference.ops.op_cast import ( bfloat16, float8e4m3fn, @@ -761,6 +865,16 @@ def from_array_extended(tensor: npt.ArrayLike, name: Optional[str] = None) -> Te return t +def to_array_extended(proto: TensorProto) -> npt.ArrayLike: + """Converts :class:`onnx.TensorProto` into a numpy array.""" + arr = to_array(proto) + if proto.data_type >= onnx.TensorProto.BFLOAT16: + # Types not supported by numpy + ml_dtypes = onnx_dtype_to_np_dtype(proto.data_type) + return arr.view(ml_dtypes) + return arr + + def onnx_dtype_to_torch_dtype(itype: int) -> "torch.dtype": # noqa: F821 """ Converts an onnx type into a torch dtype. @@ -805,6 +919,51 @@ def onnx_dtype_to_torch_dtype(itype: int) -> "torch.dtype": # noqa: F821 ) +def onnx_dtype_to_np_dtype(itype: int) -> Any: + """ + Converts an onnx type into a to numpy dtype. + That includes :epkg:`ml_dtypes` dtypes. + + :param to: onnx dtype + :return: numpy dtype + """ + if itype == TensorProto.FLOAT: + return np.float32 + if itype == TensorProto.FLOAT16: + return np.float16 + if itype == TensorProto.BFLOAT16: + import ml_dtypes + + return ml_dtypes.bfloat16 + if itype == TensorProto.DOUBLE: + return np.float64 + if itype == TensorProto.INT32: + return np.int32 + if itype == TensorProto.INT64: + return np.int64 + if itype == TensorProto.UINT32: + return np.uint32 + if itype == TensorProto.UINT64: + return np.uint64 + if itype == TensorProto.BOOL: + return np.bool + if itype == TensorProto.INT16: + return np.int16 + if itype == TensorProto.UINT16: + return np.uint16 + if itype == TensorProto.INT8: + return np.int16 + if itype == TensorProto.UINT8: + return np.uint16 + if itype == TensorProto.COMPLEX64: + return np.complex64 + if itype == TensorProto.COMPLEX128: + return np.complex128 + raise NotImplementedError( + f"Unable to convert onnx type {onnx_dtype_name(itype)} to torch.type." + ) + + def torch_dtype_to_onnx_dtype(to: "torch.dtype") -> int: # noqa: F821 """ Converts a torch dtype into a onnx element type. diff --git a/onnx_diagnostic/ort_session.py b/onnx_diagnostic/ort_session.py index c8135cb5..8b8141e1 100644 --- a/onnx_diagnostic/ort_session.py +++ b/onnx_diagnostic/ort_session.py @@ -6,6 +6,13 @@ from torch._C import _from_dlpack import onnxruntime from onnxruntime.capi import _pybind_state as ORTC +from .helpers import ( + torch_dtype_to_onnx_dtype, + onnx_dtype_to_np_dtype, + np_dtype_to_tensor_dtype, + onnx_dtype_name, + size_type, +) DEVICES = {-1: ORTC.OrtDevice(ORTC.OrtDevice.cpu(), ORTC.OrtDevice.default_memory(), 0)} @@ -48,7 +55,14 @@ def __init__( ): # onnxruntime is importing when needed as it takes a # couple of seconds if it contains CUDA EP. + can_use_training_api = True if isinstance(sess, (onnx.ModelProto, str)): + if isinstance(sess, onnx.ModelProto): + for i in sess.graph.initializer: + if i.data_type >= onnx.TensorProto.BFLOAT16: + # Cannot use training api as it relies too much on numpy. + can_use_training_api = False + break assert session_options is None or ( providers is None and graph_optimization_level is None @@ -113,7 +127,7 @@ def __init__( if log_verbosity_level is not None: self.run_options.log_verbosity_level = log_verbosity_level - self.use_training_api = ( + self.use_training_api = can_use_training_api and ( self.has_onnxruntime_training() if use_training_api is None else use_training_api ) @@ -174,9 +188,75 @@ def __init__( def run( self, output_names: Optional[List[str]], feeds: Dict[str, npt.ArrayLike] - ) -> List[npt.ArrayLike]: + ) -> List[Optional[npt.ArrayLike]]: """Calls :meth:`onnxruntime.InferenceSession.run`.""" - return self.sess.run(output_names, feeds) + # sess.run does not support blfoat16 + # res = self.sess.run(output_names, feeds) + return list(self.run_dlpack(output_names, feeds)) + + def run_dlpack( + self, output_names: Optional[List[str]], feeds: Dict[str, npt.ArrayLike] + ) -> Tuple[Optional[npt.ArrayLike], ...]: + """ + Same as :meth:`onnxruntime.InferenceSession.run` except that + feeds is a dictionary of :class:`np.ndarray`. + The output device is CPU even if the outputs are on CUDA. + """ + new_feeds = {} + for k, v in feeds.items(): + if not k: + continue + new_feeds[k] = ( + ORTC.OrtValue.ortvalue_from_numpy_with_onnx_type( + v, np_dtype_to_tensor_dtype(v.dtype) + ) + if isinstance(v, np.ndarray) + else ORTC.OrtValue.from_dlpack(v.__dlpack__(), v.dtype == torch.bool) + ) + if self.nvtx: + self.torch.cuda.nvtx.range_push("run_with_ort_values") + ort_outputs = self.sess._sess.run_with_ort_values( + new_feeds, output_names or self.output_names, self.run_options + ) + if self.nvtx: + self.torch.cuda.nvtx.range_pop() + pth_outputs = self._ortvalues_to_numpy_tensor(ort_outputs) + return pth_outputs + + def _ortvalues_to_numpy_tensor( + self, + ortvalues: Union[List[ORTC.OrtValue], ORTC.OrtValueVector], + ) -> Tuple[Optional[npt.ArrayLike], ...]: + if len(ortvalues) == 0: + return tuple() + + if self.nvtx: + self.torch.cuda.nvtx.range_push("_ortvalues_to_numpy_tensor") + res: List[Optional[npt.ArrayLike]] = [] # noqa: F823 + for i in range(len(ortvalues)): + if not ortvalues[i].has_value(): + res.append(None) + continue + + el_type = ortvalues[i].element_type() + if el_type < onnx.TensorProto.BFLOAT16: + res.append(np.from_dlpack(ortvalues[i])) + continue + + # no easy conversion, let's use torch + tch = torch.from_dlpack(ortvalues[i].to_dlpack()) + size = size_type(el_type) + assert size == 2, f"Not implemented for type {onnx_dtype_name(el_type)}" + it = torch.uint16 + itch = tch.view(it) + npt = itch.numpy() + + dtype = onnx_dtype_to_np_dtype(el_type) + res.append(npt.view(dtype)) + + if self.nvtx: + self.torch.cuda.nvtx.range_pop() + return tuple(res) class InferenceSessionForTorch(_InferenceSession): @@ -225,33 +305,6 @@ def __init__( use_training_api=use_training_api, ) - self.TORCH_DTYPE_TO_ONNX_DTYPE = { - torch.float16: onnx.TensorProto.FLOAT16, - torch.bfloat16: onnx.TensorProto.BFLOAT16, - torch.float32: onnx.TensorProto.FLOAT, - torch.float64: onnx.TensorProto.DOUBLE, - torch.uint32: onnx.TensorProto.UINT32, - torch.uint16: onnx.TensorProto.UINT16, - torch.uint8: onnx.TensorProto.UINT8, - torch.int8: onnx.TensorProto.INT8, - torch.int16: onnx.TensorProto.INT16, - torch.int32: onnx.TensorProto.INT32, - torch.int64: onnx.TensorProto.INT64, - torch.bool: onnx.TensorProto.BOOL, - } - - self.TORCH_DTYPE_TO_NUMPY_DTYPE = { - torch.float16: np.float16, - torch.float32: np.float32, - torch.float64: np.float64, - torch.uint8: np.uint8, - torch.int8: np.int8, - torch.int16: np.int16, - torch.int32: np.int32, - torch.int64: np.int64, - torch.bool: np.bool_, - } - def _get_ortvalues_from_torch_tensors( self, tensors: Tuple[torch.Tensor, ...], n_outputs: int ) -> Tuple[ORTC.OrtValueVector, List[onnxruntime.OrtDevice]]: @@ -269,7 +322,7 @@ def _get_ortvalues_from_torch_tensors( new_tensors = [] for tensor in tensors: assert isinstance(tensor, self.torch.Tensor), f"Unexpected type {type(tensor)}" - dtypes.append(self.TORCH_DTYPE_TO_NUMPY_DTYPE[tensor.dtype]) + dtypes.append(onnx_dtype_to_np_dtype(torch_dtype_to_onnx_dtype(tensor.dtype))) shapes.append(tensor.size()) data_ptrs.append(tensor.data_ptr()) d = tensor.get_device() diff --git a/onnx_diagnostic/reference/__init__.py b/onnx_diagnostic/reference/__init__.py index e4db27cc..7a4d7128 100644 --- a/onnx_diagnostic/reference/__init__.py +++ b/onnx_diagnostic/reference/__init__.py @@ -1 +1,2 @@ from .evaluator import ExtendedReferenceEvaluator +from .ort_evaluator import OnnxruntimeEvaluator diff --git a/onnx_diagnostic/reference/ort_evaluator.py b/onnx_diagnostic/reference/ort_evaluator.py new file mode 100644 index 00000000..501017f5 --- /dev/null +++ b/onnx_diagnostic/reference/ort_evaluator.py @@ -0,0 +1,420 @@ +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union +import numpy as np +from onnx import ( + GraphProto, + FunctionProto, + ModelProto, + NodeProto, + TypeProto, + ValueInfoProto, + helper as oh, + load, +) +from onnx.defs import onnx_opset_version +import onnxruntime +from ..helpers import pretty_onnx, dtype_to_tensor_dtype, string_type, to_array_extended +from ..ort_session import InferenceSessionForTorch, InferenceSessionForNumpy, _InferenceSession + +PROTO = (FunctionProto, ModelProto, GraphProto, NodeProto) +Proto = Union[FunctionProto, ModelProto, GraphProto, NodeProto] + + +class OnnxruntimeEvaluator: + """ + This class loads an onnx model and the executes one by one the nodes + with onnxruntime. This class is mostly meant for debugging. + + :param proto: proto or filaname + :param session_options: options + :param providers: providers + :param nvtx: enable nvidia events + :param providers: `None`, `"CPU"`, `"CUDA"` or a list of providers + :param graph_optimization_level: see :class:`onnxruntime.SessionOptions` + :param log_severity_level: see :class:`onnxruntime.SessionOptions` + :param log_verbosity_level: see :class:`onnxruntime.SessionOptions` + :param optimized_model_filepath: see :class:`onnxruntime.SessionOptions` + :param disable_aot_function_inlining: see :class:`onnxruntime.SessionOptions` + :param use_training_api: use onnxruntime-traning API + :param verbose: verbosity + :param local_functions: additional local function + :param ir_version: ir verions to use when unknown + :param opsets: opsets to use when unknown + """ + + def __init__( + self, + proto: Union[str, Proto, "OnnxruntimeEvaluator"], + session_options: Optional[onnxruntime.SessionOptions] = None, + providers: Optional[Union[str, List[str]]] = None, + nvtx: bool = False, + enable_profiling: bool = False, + graph_optimization_level: Union[onnxruntime.GraphOptimizationLevel, bool] = None, + log_severity_level: Optional[int] = None, + log_verbosity_level: Optional[int] = None, + optimized_model_filepath: Optional[str] = None, + disable_aot_function_inlining: Optional[bool] = None, + use_training_api: bool = False, + verbose: int = 0, + local_functions: Optional[ + Dict[Tuple[str, str], Union[Proto, "OnnxruntimeEvaluator"]] + ] = None, + ir_version: int = 10, + opsets: Optional[Union[int, Dict[str, int]]] = None, + ): + if isinstance(proto, str): + self.proto: Proto = load(proto) + elif isinstance(proto, OnnxruntimeEvaluator): + assert isinstance( + proto.proto, PROTO + ), f"Unexpected type for proto.proto {type(proto.proto)}" + self.proto = proto.proto + else: + self.proto = proto + assert isinstance( + self.proto, PROTO + ), f"Unexpected type for self.proto {type(self.proto)}" + + self._cache: Dict[ + Any, Tuple[Proto, Union["OnnxruntimeEvaluator", _InferenceSession]] # noqa: UP037 + ] = {} + self.ir_version = ir_version + self.opsets = opsets + self.session_kwargs: Dict[str, Any] = dict( + session_options=session_options, + providers=providers, + nvtx=nvtx, + enable_profiling=enable_profiling, + graph_optimization_level=graph_optimization_level, + log_severity_level=log_severity_level, + log_verbosity_level=log_verbosity_level, + optimized_model_filepath=optimized_model_filepath, + disable_aot_function_inlining=disable_aot_function_inlining, + use_training_api=use_training_api, + ) + + self.nodes = ( + [self.proto] + if isinstance(self.proto, NodeProto) + else ( + list( + self.proto.graph.node if hasattr(self.proto, "graph") else self.proto.node + ) + ) + ) + self.rt_inits_ = ( + {init.name: to_array_extended(init) for init in self.proto.graph.initializer} + if hasattr(self.proto, "graph") + else {} + ) + self.rt_nodes_ = self.nodes.copy() + self.verbose = verbose + self.local_functions: Dict[Tuple[str, str], "OnnxruntimeEvaluator"] = ( # noqa: UP037 + {(f.domain, f.name): self.__class__(f) for f in self.proto.functions} + if hasattr(self.proto, "functions") + else {} + ) + if local_functions: + self.local_functions.update(local_functions) + + @property + def input_names(self) -> List[str]: + "Returns input names." + if isinstance(self.proto, NodeProto): + return self.nodes[0].input + return [ + getattr(o, "name", o) + for o in ( + self.proto.graph.input if hasattr(self.proto, "graph") else self.proto.input + ) + ] + + @property + def output_names(self) -> List[str]: + "Returns output names." + if isinstance(self.proto, NodeProto): + return self.nodes[0].output + return [ + getattr(o, "name", o) + for o in ( + self.proto.graph.output if hasattr(self.proto, "graph") else self.proto.output + ) + ] + + @property + def input_types(self) -> List[TypeProto]: + "Returns input types." + if not isinstance(self.proto, (ModelProto, GraphProto)): + raise ValueError(f"Cannot guess input types for type {type(self.proto)}") + g = self.proto.graph if hasattr(self.proto, "graph") else self.proto + return [i.type for i in g.input] + + @property + def output_types(self) -> List[TypeProto]: + "Returns output types." + if not isinstance(self.proto, (ModelProto, GraphProto)): + raise ValueError(f"Cannot guess output types for type {type(self.proto)}") + g = self.proto.graph if hasattr(self.proto, "graph") else self.proto + return [i.type for i in g.output] + + def _log_arg(self, a: Any) -> Any: + if isinstance(a, (str, int, float)): + return a + device = f"D{a.get_device()}:" if hasattr(a, "detach") else "" + if hasattr(a, "shape"): + if self.verbose < 4: # noqa: PLR2004 + return f"{device}{a.dtype}:{a.shape} in [{a.min()}, {a.max()}]" + elements = a.ravel().tolist() + if len(elements) > 10: # noqa: PLR2004 + elements = elements[:10] + return f"{device}{a.dtype}:{a.shape}:{','.join(map(str, elements))}..." + return f"{device}{a.dtype}:{a.shape}:{elements}" + if hasattr(a, "append"): + return ", ".join(map(self._log_arg, a)) + return a + + def _log(self, level: int, pattern: str, *args: Any) -> None: + if level < self.verbose: + new_args = [self._log_arg(a) for a in args] + print(pattern % tuple(new_args)) + + def _is_local_function(self, node: NodeProto) -> bool: + return (node.domain, node.op_type) in self.local_functions + + def run( + self, + outputs: Optional[List[str]], + feed_inputs: Dict[str, Any], + intermediate: bool = False, + ) -> Union[Dict[str, Any], List[Any]]: + """ + Runs the model. + It only works with numpy arrays. + + :param outputs: required outputs or None for all + :param feed_inputs: inputs + :param intermediate: returns all output instead of the last ones + :return: outputs, as a list if return_all is False, + as a dictionary if return_all is True + """ + if outputs is None: + outputs = self.output_names + results: Dict[str, Any] = self.rt_inits_.copy() + + for k, v in self.rt_inits_.items(): + self._log(2, " +C %s: %s", k, v) + for k, v in feed_inputs.items(): + self._log(2, " +I %s: %s", k, v) + results[k] = v + + for node in self.rt_nodes_: + self._log(1, "%s(%s) -> %s", node.op_type, node.input, node.output) + for i in node.input: + if i != "" and i not in results: + raise RuntimeError( + f"Unable to find input {i!r} in known results {sorted(results)}, " + f"self.rt_inits_ has {sorted(self.rt_inits_)}, " + f"feed_inputs has {sorted(feed_inputs)}." + ) + inputs = [(results[i] if i != "" else None) for i in node.input] + if node.op_type == "If" and node.domain == "": + outputs = self._run_if(node, inputs, results) + elif self._is_local_function(node): + outputs = self._run_local(node, inputs, results) + else: + outputs = self._run(node, inputs, results) + for name, value in zip(node.output, outputs): + if name == "": + continue + self._log(2, " + %s: %s", name, value) # type: ignore[arg-type] + assert isinstance(name, str), f"unexpected type for name {type(name)}" + results[name] = value + + if intermediate: + return results + output_names = self.output_names + for name in output_names: + if name == "": + continue + if name not in results: + raise RuntimeError( + f"Unable to find output name {name!r} " + f"in {sorted(results)}, proto is\n{pretty_onnx(self.proto)}" + ) + return [results[name] for name in output_names if name != ""] + + def _make_model_proto( + self, + nodes: Sequence[NodeProto], + vinputs: Sequence[ValueInfoProto], + voutputs: Sequence[ValueInfoProto], + ) -> ModelProto: + onx = oh.make_model( + oh.make_graph(nodes, "-", vinputs, voutputs), + ir_version=getattr(self.proto, "ir_version", self.ir_version), + functions=getattr(self.proto, "functions", None), + ) + del onx.opset_import[:] + if hasattr(self.proto, "opset_import"): + onx.opset_import.extend(self.proto.opset_import) + elif self.opsets: + if isinstance(self.opsets, int): + onx.opset_import.append(oh.make_opsetid("", self.opsets)) + else: + onx.opset_import.extend( + [oh.make_opsetid(k, v) for k, v in self.opsets.items()] + ) + else: + onx.opset_import.append(oh.make_opsetid("", onnx_opset_version())) + + return onx + + def _get_sess( + self, node: NodeProto, inputs: List[Any] + ) -> Tuple[ModelProto, _InferenceSession]: + unique_names = set() + vinputs = [] + for i, it in zip(node.input, inputs): + if i == "" or i in unique_names: + continue + unique_names.add(i) + value = oh.make_tensor_value_info(i, dtype_to_tensor_dtype(it.dtype), it.shape) + vinputs.append(value) + + # no need to run shape inference + voutputs = [oh.make_value_info(o, TypeProto()) for o in node.output] + onx = self._make_model_proto([node], vinputs, voutputs) + + cls = ( + InferenceSessionForNumpy + if any(isinstance(i, np.ndarray) for i in inputs) + else InferenceSessionForTorch + ) + try: + sess = cls(onx, **self.session_kwargs) + except ( + onnxruntime.capi.onnxruntime_pybind11_state.Fail, + onnxruntime.capi.onnxruntime_pybind11_state.InvalidGraph, + onnxruntime.capi.onnxruntime_pybind11_state.InvalidArgument, + ) as e: + raise RuntimeError( + f"Unable to infer a session with inputs\n{string_type(inputs)}" + f"\ndue to {e}\n{pretty_onnx(onx)}" + ) from e + return onx, sess + + def _get_sess_if( + self, node: NodeProto, branch: str, inputs: List[Any], context: Dict[str, Any] + ) -> Tuple[ModelProto, "OnnxruntimeEvaluator"]: + unique_names = set() + vinputs = [] + for i, it in zip(node.input, inputs): + if i == "" or i in unique_names: + continue + unique_names.add(i) + value = oh.make_tensor_value_info(i, dtype_to_tensor_dtype(it.dtype), it.shape) + vinputs.append(value) + + for i, v in context.items(): + if i not in unique_names: + unique_names.add(i) + value = oh.make_tensor_value_info(i, dtype_to_tensor_dtype(v.dtype), v.shape) + vinputs.append(value) + + for att in node.attribute: + if att.name == branch: + g = att.g + + voutputs = g.output + + onx = self._make_model_proto(g.node, vinputs, voutputs) + sess = OnnxruntimeEvaluator( + onx, + local_functions=self.local_functions, + verbose=self.verbose, + ir_version=self.ir_version, + opsets=self.opsets, + **self.session_kwargs, + ) + return onx, sess + + def _get_sess_local( + self, node: NodeProto, inputs: List[Any] + ) -> Tuple[FunctionProto, "OnnxruntimeEvaluator"]: + ev = self.local_functions[node.domain, node.op_type] + sess = OnnxruntimeEvaluator( + ev, + local_functions=self.local_functions, + verbose=self.verbose, + ir_version=self.ir_version, + opsets=self.opsets, + **self.session_kwargs, + ) + return ev.proto, sess + + def _run(self, node: NodeProto, inputs: List[Any], results: Dict[str, Any]) -> List[Any]: + """Runs a node.""" + types = [(None if a is None else (a.dtype, a.shape)) for a in inputs] + key = (id(node), *types) + if key in self._cache: + sess = self._cache[key][1] + else: + onx, sess = self._get_sess(node, inputs) + self._cache[key] = onx, sess + + feeds = dict(zip(node.input, inputs)) + if "" in feeds: + feeds[""] = np.array([0], dtype=np.float32) + + assert hasattr(sess, "run"), f"Missing method run for type {type(sess)}" + outputs = list(sess.run(None, feeds)) + assert isinstance(outputs, list), f"Unexpected type for outputs {type(outputs)}" + return outputs + + def _run_if( + self, node: NodeProto, inputs: List[Any], results: Dict[str, Any] + ) -> List[Any]: + """Runs a node if.""" + feeds = dict(zip(node.input, inputs)) + feeds.update(results) + if feeds[node.input[0]]: + name = "then_branch" + else: + name = "else_branch" + + key = (id(node), name) + if key in self._cache: + sess = self._cache[key][1] + else: + self._cache[key] = onx, sess = self._get_sess_if(node, name, inputs, results) + + assert hasattr(sess, "run"), f"Missing method run for type {type(sess)}" + outputs = sess.run(None, feeds) + assert isinstance(outputs, list), f"Unexpected type for outputs {type(outputs)}" + return outputs + + def _run_local( + self, node: NodeProto, inputs: List[Any], results: Dict[str, Any] + ) -> List[Any]: + """Runs a node.""" + types = [(None if a is None else (a.dtype, a.shape)) for a in inputs] + key = (id(node), *types) + if key in self._cache: + sess = self._cache[key][1] + else: + onx, sess = self._get_sess_local(node, inputs) + self._cache[key] = onx, sess + + replace = dict(zip(node.input, sess.input_names)) + assert len(node.input) == len(sess.input_names), ( + f"Input mismatch: input_names={sess.input_names}, " + f"replace={replace}, " + f"type(self.proto)={type(self.proto)}, and node=\n{node}" + ) + feeds = {replace[i]: v for i, v in zip(node.input, inputs)} + if "" in feeds: + feeds[""] = np.array([0], dtype=np.float32) + + assert hasattr(sess, "run"), f"Missing method run for type {type(sess)}" + outputs = sess.run(None, feeds) + assert isinstance(outputs, list), f"Unexpected type for outputs {type(outputs)}" + return outputs