fix ser

xadupre · xadupre · commit 022a7661663f · 2025-04-10T17:42:11.000+02:00
diff --git a/_doc/api/helpers/index.rst b/_doc/api/helpers/index.rst
@@ -13,6 +13,7 @@ onnx_diagnostic.helpers
     memory_peak
     onnx_helper
     ort_session
+    rt_helper
     torch_test_helper
 
 .. autofunction:: onnx_diagnostic.helpers.max_diff
diff --git a/_doc/api/helpers/rt_helper.rst b/_doc/api/helpers/rt_helper.rst
@@ -0,0 +1,7 @@
+
+onnx_diagnostic.helpers.rt_helper
+=================================
+
+.. automodule:: onnx_diagnostic.helpers.rt_helper
+    :members:
+    :no-undoc-members:
diff --git a/_doc/examples/plot_export_tiny_phi2.py b/_doc/examples/plot_export_tiny_phi2.py
@@ -25,7 +25,7 @@
 from onnx_diagnostic import doc
 from onnx_diagnostic.helpers import max_diff, string_diff, string_type
 from onnx_diagnostic.helpers.cache_helper import is_cache_dynamic_registered
-from onnx_diagnostic.helpers.ort_session import make_feeds
+from onnx_diagnostic.helpers.rt_helper import make_feeds
 from onnx_diagnostic.torch_export_patches import bypass_export_some_errors
 from onnx_diagnostic.torch_models.hghub import (
     get_untrained_model_with_inputs,
diff --git a/_unittests/ut_helpers/test_ort_session_tinyllm.py b/_unittests/ut_helpers/test_ort_session_tinyllm.py
@@ -7,10 +7,10 @@
 from onnxruntime.capi import _pybind_state as ORTC
 from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout, ignore_warnings
 from onnx_diagnostic.helpers import max_diff
+from onnx_diagnostic.rt_helper import make_feeds
 from onnx_diagnostic.helpers.ort_session import (
     InferenceSessionForNumpy,
     InferenceSessionForTorch,
-    make_feeds,
 )
 from onnx_diagnostic.torch_export_patches import bypass_export_some_errors
 from onnx_diagnostic.torch_models.llms import get_tiny_llm
diff --git a/onnx_diagnostic/ext_test_case.py b/onnx_diagnostic/ext_test_case.py
@@ -1081,7 +1081,8 @@ def assert_onnx_disc(
             :class:`onnx_diagnostic.helpers.ort_session.InferenceSessionForTorch`
         """
         from .helpers import string_type, string_diff, max_diff
-        from .helpers.ort_session import InferenceSessionForTorch, make_feeds
+        from .rt_helper import make_feeds
+        from .helpers.ort_session import InferenceSessionForTorch
 
         kws = dict(with_shape=True, with_min_max=verbose > 1)
         if verbose:
diff --git a/onnx_diagnostic/helpers/ort_session.py b/onnx_diagnostic/helpers/ort_session.py
@@ -6,8 +6,7 @@
 from torch._C import _from_dlpack
 import onnxruntime
 from onnxruntime.capi import _pybind_state as ORTC
-from .cache_helper import is_cache_dynamic_registered
-from .helper import size_type, string_type, flatten_object
+from .helper import size_type
 from .onnx_helper import (
     torch_dtype_to_onnx_dtype,
     onnx_dtype_to_np_dtype,
@@ -18,43 +17,6 @@
 DEVICES = {-1: ORTC.OrtDevice(ORTC.OrtDevice.cpu(), ORTC.OrtDevice.default_memory(), 0)}
 
 
-def make_feeds(
-    proto: Union[onnx.ModelProto, List[str]],
-    inputs: Any,
-    use_numpy: bool = False,
-    copy: bool = False,
-) -> Dict[str, Union[torch.Tensor, np.ndarray]]:
-    """
-    Serializes the inputs to produce feeds expected
-    by :class:`onnxruntime.InferenceSession`.
-
-    :param proto: onnx model or list of names
-    :param inputs: any kind of inputs
-    :param use_numpy: if True, converts torch tensors into numpy arrays
-    :param copy: a copy is made, this should be the case if the inputs is ingested
-        by ``OrtValue``
-    :return: feeds dictionary
-    """
-    flat = flatten_object(inputs, drop_keys=True)
-    assert (
-        not all(isinstance(obj, torch.Tensor) for obj in flat)
-        or not is_cache_dynamic_registered(fast=True)
-        or len(flat) == len(torch.utils._pytree.tree_flatten(inputs)[0])
-    ), (
-        f"Unexpected number of flattened objects, "
-        f"{string_type(flat, with_shape=True, limit=20)} != "
-        f"{string_type(torch.utils._pytree.tree_flatten(inputs)[0], with_shape=True,limit=20)}"
-    )
-    if use_numpy:
-        flat = [t.detach().cpu().numpy() if isinstance(t, torch.Tensor) else t for t in flat]
-    names = (
-        [i.name for i in proto.graph.input] if isinstance(proto, onnx.ModelProto) else proto
-    )
-    if copy:
-        flat = [t.copy() if hasattr(t, "copy") else t.clone() for t in flat]
-    return dict(zip(names, flat))
-
-
 class _InferenceSession:
 
     @classmethod
diff --git a/onnx_diagnostic/helpers/rt_helper.py b/onnx_diagnostic/helpers/rt_helper.py
@@ -0,0 +1,43 @@
+from typing import Any, Dict, List, Union
+import numpy as np
+import onnx
+import torch
+from .helper import string_type, flatten_object
+from .cache_helper import is_cache_dynamic_registered
+
+
+def make_feeds(
+    proto: Union[onnx.ModelProto, List[str]],
+    inputs: Any,
+    use_numpy: bool = False,
+    copy: bool = False,
+) -> Dict[str, Union[torch.Tensor, np.ndarray]]:
+    """
+    Serializes the inputs to produce feeds expected
+    by :class:`onnxruntime.InferenceSession`.
+
+    :param proto: onnx model or list of names
+    :param inputs: any kind of inputs
+    :param use_numpy: if True, converts torch tensors into numpy arrays
+    :param copy: a copy is made, this should be the case if the inputs is ingested
+        by ``OrtValue``
+    :return: feeds dictionary
+    """
+    flat = flatten_object(inputs, drop_keys=True)
+    assert (
+        not all(isinstance(obj, torch.Tensor) for obj in flat)
+        or not is_cache_dynamic_registered(fast=True)
+        or len(flat) == len(torch.utils._pytree.tree_flatten(inputs)[0])
+    ), (
+        f"Unexpected number of flattened objects, "
+        f"{string_type(flat, with_shape=True, limit=20)} != "
+        f"{string_type(torch.utils._pytree.tree_flatten(inputs)[0], with_shape=True,limit=20)}"
+    )
+    if use_numpy:
+        flat = [t.detach().cpu().numpy() if isinstance(t, torch.Tensor) else t for t in flat]
+    names = (
+        [i.name for i in proto.graph.input] if isinstance(proto, onnx.ModelProto) else proto
+    )
+    if copy:
+        flat = [t.copy() if hasattr(t, "copy") else t.clone() for t in flat]
+    return dict(zip(names, flat))
diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py
@@ -298,14 +298,17 @@ def random_input_kwargs(config: Any, task: str) -> Tuple[Dict[str, Any], Callabl
         kwargs = dict(
             batch_size=2,
             sequence_length=30,
-            dummy_max_token_id=config.vocab_size,
-            max_source_positions=config.max_source_positions,
-            d_model=config.d_model,
+            dummy_max_token_id=31000 if config is None else config.vocab_size,
+            max_source_positions=1500 if config is None else config.max_source_positions,
+            d_model=384 if config is None else config.d_model,
             num_hidden_layers=4 if config is None else config.num_hidden_layers,
-            encoder_attention_heads=config.encoder_attention_heads,
-            encoder_layers=config.encoder_layers,
-            decoder_layers=config.decoder_layers,
-            head_dim=config.d_model // config.encoder_attention_heads,
+            encoder_attention_heads=6 if config is None else config.encoder_attention_heads,
+            encoder_layers=4 if config is None else config.encoder_layers,
+            decoder_attention_heads=6 if config is None else config.decoder_attention_heads,
+            decoder_layers=4 if config is None else config.decoder_layers,
+            head_dim=(
+                64 if config is None else (config.d_model // config.encoder_attention_heads)
+            ),
         )
         fct = get_inputs_for_speech_automatic_recognition  # type: ignore
     else:
diff --git a/onnx_diagnostic/torch_models/test_helper.py b/onnx_diagnostic/torch_models/test_helper.py
@@ -6,7 +6,7 @@
 import torch
 from ..helpers import max_diff, string_type, string_diff
 from ..helpers.helper import flatten_object
-from ..helpers.ort_session import make_feeds
+from ..helpers.rt_helper import make_feeds
 from ..helpers.torch_test_helper import to_any, torch_deepcopy
 from ..torch_export_patches import bypass_export_some_errors
 from .hghub import get_untrained_model_with_inputs