final

xadupre · xadupre · commit 225a3aca0ffc · 2025-03-30T16:54:34.000+02:00
diff --git a/_doc/examples/plot_export_tiny_phi2.py b/_doc/examples/plot_export_tiny_phi2.py
@@ -130,7 +130,7 @@
 # Let's make sure the ONNX model produces the same outputs.
 # It takes flatten inputs.
 
-feeds = make_feeds(onx, copy.deepcopy(inputs), use_numpy=True)
+feeds = make_feeds(onx, copy.deepcopy(inputs), use_numpy=True, copy=True)
 
 print(f"torch inputs: {string_type(inputs)}")
 print(f"onxrt inputs: {string_type(feeds)}")
diff --git a/_unittests/ut_helpers/test_ort_session_tinyllm.py b/_unittests/ut_helpers/test_ort_session_tinyllm.py
@@ -1,7 +1,10 @@
 import copy
 import unittest
+import numpy as np
+import onnx
 import torch
 import onnxruntime
+from onnxruntime.capi import _pybind_state as ORTC
 from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout, ignore_warnings
 from onnx_diagnostic.helpers import max_diff
 from onnx_diagnostic.helpers.ort_session import (
@@ -12,10 +15,61 @@
 from onnx_diagnostic.torch_export_patches import bypass_export_some_errors
 from onnx_diagnostic.torch_models.llms import get_tiny_llm
 from onnx_diagnostic.reference import ExtendedReferenceEvaluator
+from onnx_diagnostic.helpers.onnx_helper import np_dtype_to_tensor_dtype
 
 
 class TestOrtSessionTinyLLM(ExtTestCase):
 
+    def test_ort_value(self):
+        val = np.array([30, 31, 32], dtype=np.int64)
+        ort = ORTC.OrtValue.ortvalue_from_numpy_with_onnx_type(val, onnx.TensorProto.INT64)
+        self.assertEqual(np_dtype_to_tensor_dtype(val.dtype), onnx.TensorProto.INT64)
+        val2 = ort.numpy()
+        self.assertEqualArray(val, val2)
+        ort = ORTC.OrtValue.ortvalue_from_numpy_with_onnx_type(
+            val, np_dtype_to_tensor_dtype(val.dtype)
+        )
+        val2 = ort.numpy()
+        self.assertEqualArray(val, val2)
+
+    def test_ort_value_py(self):
+        data = get_tiny_llm()
+        inputs = data["inputs"]
+        feeds = make_feeds(
+            ["input_ids", "attention_mask", "position_ids", "key0", "value0"],
+            inputs,
+            use_numpy=True,
+            copy=True,
+        )
+        new_feeds = {}
+        for k, v in feeds.items():
+            new_feeds[k] = onnxruntime.OrtValue.ortvalue_from_numpy_with_onnx_type(
+                v, np_dtype_to_tensor_dtype(v.dtype)
+            )
+        other_feeds = {k: v.numpy() for k, v in new_feeds.items()}
+        self.assertEqualAny(feeds, other_feeds)
+
+    def test_ort_value_more(self):
+        data = get_tiny_llm()
+        inputs = data["inputs"]
+        feeds = make_feeds(
+            ["input_ids", "attention_mask", "position_ids", "key0", "value0"],
+            inputs,
+            use_numpy=True,
+            copy=True,
+        )
+        feeds = {
+            k: feeds[k].copy()
+            for k in ["input_ids", "attention_mask", "key0", "value0", "position_ids"]
+        }
+        new_feeds = {}
+        for k, v in feeds.items():
+            new_feeds[k] = ORTC.OrtValue.ortvalue_from_numpy_with_onnx_type(
+                v, np_dtype_to_tensor_dtype(v.dtype)
+            )
+        other_feeds = {k: v.numpy() for k, v in new_feeds.items()}
+        self.assertEqualAny(feeds, other_feeds)
+
     @ignore_warnings((UserWarning, DeprecationWarning, FutureWarning))
     @hide_stdout()
     def test_check_allruntimes_on_tiny_llm(self):
@@ -30,7 +84,7 @@ def test_check_allruntimes_on_tiny_llm(self):
 
         proto = ep.model_proto
         self.dump_onnx("test_check_allruntimes_on_tiny_llm.onnx", proto)
-        feeds = make_feeds(proto, inputs, use_numpy=True)
+        feeds = make_feeds(proto, inputs, use_numpy=True, copy=True)
         sess = onnxruntime.InferenceSession(
             proto.SerializeToString(), providers=["CPUExecutionProvider"]
         )
@@ -45,10 +99,10 @@ def test_check_allruntimes_on_tiny_llm(self):
             self.assertEqualArray(got[0], all_outputs["linear_7"])
 
         sess = InferenceSessionForNumpy(proto)
-        got = sess.run(None, feeds, expected=all_outputs)
+        got = sess.run(None, feeds)
         self.assertLess(max_diff(expected, got, flatten=True)["abs"], 1e-5)
 
-        feeds = make_feeds(proto, inputs)
+        feeds = make_feeds(proto, inputs, copy=True)
         sess = InferenceSessionForTorch(proto)
         got = sess.run(None, feeds)
         self.assertLess(max_diff(expected, got, flatten=True)["abs"], 1e-5)
diff --git a/onnx_diagnostic/ext_test_case.py b/onnx_diagnostic/ext_test_case.py
@@ -1066,7 +1066,7 @@ def assert_onnx_disc(
         if verbose:
             print(f"[{vname}] make feeds {string_type(inputs, **kws)}")
         if use_ort:
-            feeds = make_feeds(proto, inputs, use_numpy=True)
+            feeds = make_feeds(proto, inputs, use_numpy=True, copy=True)
             if verbose:
                 print(f"[{vname}] feeds {string_type(feeds, **kws)}")
             import onnxruntime
@@ -1076,7 +1076,7 @@ def assert_onnx_disc(
             )
             got = sess.run(None, feeds)
         else:
-            feeds = make_feeds(proto, inputs)
+            feeds = make_feeds(proto, inputs, copy=True)
             if verbose:
                 print(f"[{vname}] feeds {string_type(feeds, **kws)}")
             sess = InferenceSessionForTorch(proto, **kwargs)
diff --git a/onnx_diagnostic/helpers/ort_session.py b/onnx_diagnostic/helpers/ort_session.py
@@ -19,15 +19,20 @@
 
 
 def make_feeds(
-    proto: onnx.ModelProto, inputs: Any, use_numpy: bool = False
+    proto: Union[onnx.ModelProto, List[str]],
+    inputs: Any,
+    use_numpy: bool = False,
+    copy: bool = False,
 ) -> Dict[str, Union[torch.Tensor, np.ndarray]]:
     """
     Serializes the inputs to produce feeds expected
     by :class:`onnxruntime.InferenceSession`.
 
-    :param proto: onnx model
+    :param proto: onnx model or list of names
     :param inputs: any kind of inputs
     :param use_numpy: if True, converts torch tensors into numpy arrays
+    :param copy: a copy is made, this should be the case if the inputs is ingested
+        by ``OrtValue``
     :return: feeds dictionary
     """
     flat = flatten_object(inputs, drop_keys=True)
@@ -42,7 +47,11 @@ def make_feeds(
     )
     if use_numpy:
         flat = [t.detach().cpu().numpy() if isinstance(t, torch.Tensor) else t for t in flat]
-    names = [i.name for i in proto.graph.input]
+    names = (
+        [i.name for i in proto.graph.input] if isinstance(proto, onnx.ModelProto) else proto
+    )
+    if copy:
+        flat = [t.copy() if hasattr(t, "copy") else t.clone() for t in flat]
     return dict(zip(names, flat))
 
 
@@ -242,6 +251,7 @@ def run_dlpack(
                 if isinstance(v, np.ndarray)
                 else ORTC.OrtValue.from_dlpack(v.__dlpack__(), v.dtype == torch.bool)
             )
+
         if self.nvtx:
             self.torch.cuda.nvtx.range_push("run_with_ort_values")
         ort_outputs = self.sess._sess.run_with_ort_values(