fix iterate

xadupre · xadupre · commit 11434ab963d5 · 2025-05-13T11:01:49.000+02:00
diff --git a/_unittests/ut_helpers/test_helper.py b/_unittests/ut_helpers/test_helper.py
@@ -28,16 +28,18 @@
     get_onnx_signature,
     type_info,
     onnx_dtype_name,
-    onnx_dtype_to_torch_dtype,
     onnx_dtype_to_np_dtype,
     np_dtype_to_tensor_dtype,
-    torch_dtype_to_onnx_dtype,
     from_array_extended,
     to_array_extended,
     convert_endian,
     from_array_ml_dtypes,
     dtype_to_tensor_dtype,
 )
+from onnx_diagnostic.helpers.torch_helper import (
+    onnx_dtype_to_torch_dtype,
+    torch_dtype_to_onnx_dtype,
+)
 from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
 from onnx_diagnostic.torch_models.hghub.hub_api import get_pretrained_config
 
diff --git a/_unittests/ut_helpers/test_onnx_helper.py b/_unittests/ut_helpers/test_onnx_helper.py
@@ -1,16 +1,20 @@
 import unittest
+from typing import Any, Dict, List
 import numpy as np
 import onnx.helper as oh
 import onnx.numpy_helper as onh
-from onnx import TensorProto
+from onnx import TensorProto, FunctionProto, ValueInfoProto
 from onnx.checker import check_model
+import torch
 from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout
 from onnx_diagnostic.helpers.onnx_helper import (
     onnx_lighten,
     onnx_unlighten,
     onnx_find,
     _validate_function,
     check_model_ort,
+    iterator_initializer_constant,
+    from_array_extended,
 )
 
 
@@ -122,6 +126,122 @@ def test_check_model_ort(self):
         )
         check_model_ort(model)
 
+    def test_iterate_init(self):
+        itype = TensorProto.FLOAT
+        cst = np.arange(6).astype(np.float32)
+        model = oh.make_model(
+            oh.make_graph(
+                [
+                    oh.make_node("IsNaN", ["x"], ["xi"]),
+                    oh.make_node("IsNaN", ["y"], ["yi"]),
+                    oh.make_node("Cast", ["xi"], ["xii"], to=TensorProto.INT64),
+                    oh.make_node("Cast", ["yi"], ["yii"], to=TensorProto.INT64),
+                    oh.make_node("Add", ["xii", "yii"], ["gggg"]),
+                    oh.make_node("Cast", ["gggg"], ["final"], to=itype),
+                ],
+                "dummy",
+                [oh.make_tensor_value_info("x", itype, [None, None])],
+                [oh.make_tensor_value_info("final", itype, [None, None])],
+                [from_array_extended(cst, name="y")],
+            ),
+            opset_imports=[oh.make_opsetid("", 20)],
+            ir_version=10,
+        )
+        li = list(iterator_initializer_constant(model))
+        self.assertEqual(len(li), 1)
+        self.assertEqual(li[0][0], "y")
+        self.assertEqualArray(li[0][1], cst)
+        li = list(iterator_initializer_constant(model, use_numpy=False))
+        self.assertEqual(len(li), 1)
+        self.assertEqual(li[0][0], "y")
+        self.assertEqualArray(li[0][1], cst)
+        self.assertIsInstance(li[0][1], torch.Tensor)
+
+    def _get_cdist_implementation(
+        self,
+        node_inputs: List[str],
+        node_outputs: List[str],
+        opsets: Dict[str, int],
+        **kwargs: Any,
+    ) -> FunctionProto:
+        """
+        Returns the CDist implementation as a function.
+        """
+        assert len(node_inputs) == 2
+        assert len(node_outputs) == 1
+        assert opsets
+        assert "" in opsets
+        assert set(kwargs) == {"metric"}, f"kwargs={kwargs}"
+        metric = kwargs["metric"]
+        assert metric in ("euclidean", "sqeuclidean")
+        # subgraph
+        nodes = [
+            oh.make_node("Sub", ["next", "next_in"], ["diff"]),
+            oh.make_node("Constant", [], ["axis"], value_ints=[1]),
+            oh.make_node("ReduceSumSquare", ["diff", "axis"], ["scan_out"], keepdims=0),
+            oh.make_node("Identity", ["next_in"], ["next_out"]),
+        ]
+
+        def make_value(name):
+            value = ValueInfoProto()
+            value.name = name
+            return value
+
+        graph = oh.make_graph(
+            nodes,
+            "loop",
+            [make_value("next_in"), make_value("next")],
+            [make_value("next_out"), make_value("scan_out")],
+        )
+
+        scan = oh.make_node(
+            "Scan", ["xb", "xa"], ["next_out", "zout"], num_scan_inputs=1, body=graph
+        )
+        final = (
+            oh.make_node("Sqrt", ["zout"], ["z"])
+            if metric == "euclidean"
+            else oh.make_node("Identity", ["zout"], ["z"])
+        )
+        return oh.make_function(
+            "npx",
+            f"CDist_{metric}",
+            ["xa", "xb"],
+            ["z"],
+            [scan, final],
+            [oh.make_opsetid("", opsets[""])],
+        )
+
+    def test_iterate_function(self):
+        itype = TensorProto.FLOAT
+        proto = self._get_cdist_implementation(
+            ["X", "Y"], ["Z"], opsets={"": 18}, metric="euclidean"
+        )
+        model = oh.make_model(
+            oh.make_graph(
+                [
+                    oh.make_node(proto.name, ["X", "Y"], ["Z"]),
+                ],
+                "dummy",
+                [
+                    oh.make_tensor_value_info("X", itype, [None, None]),
+                    oh.make_tensor_value_info("Y", itype, [None, None]),
+                ],
+                [oh.make_tensor_value_info("final", itype, [None, None])],
+            ),
+            opset_imports=[oh.make_opsetid("", 18)],
+            ir_version=10,
+        )
+        model.functions.append(proto)
+        li = list(iterator_initializer_constant(model))
+        self.assertEqual(len(li), 1)
+        self.assertEqual(li[0][0], "CDist_euclideanCDist_euclidean.axis")
+        self.assertEqualArray(li[0][1], np.array([1], dtype=np.int64))
+        li = list(iterator_initializer_constant(model, use_numpy=False))
+        self.assertEqual(len(li), 1)
+        self.assertEqual(li[0][0], "CDist_euclideanCDist_euclidean.axis")
+        self.assertEqualArray(li[0][1], np.array([1], dtype=np.int64))
+        self.assertIsInstance(li[0][1], torch.Tensor)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_helpers/test_ort_session.py b/_unittests/ut_helpers/test_ort_session.py
@@ -12,11 +12,8 @@
     requires_onnxruntime_training,
     requires_cuda,
 )
-from onnx_diagnostic.helpers.onnx_helper import (
-    from_array_extended,
-    onnx_dtype_to_np_dtype,
-    onnx_dtype_to_torch_dtype,
-)
+from onnx_diagnostic.helpers.onnx_helper import from_array_extended, onnx_dtype_to_np_dtype
+from onnx_diagnostic.helpers.torch_helper import onnx_dtype_to_torch_dtype
 from onnx_diagnostic.helpers.ort_session import (
     InferenceSessionForNumpy,
     InferenceSessionForTorch,
diff --git a/_unittests/ut_reference/test_ort_evaluator.py b/_unittests/ut_reference/test_ort_evaluator.py
@@ -14,11 +14,8 @@
     ignore_warnings,
     requires_cuda,
 )
-from onnx_diagnostic.helpers.onnx_helper import (
-    from_array_extended,
-    onnx_dtype_to_torch_dtype,
-    onnx_dtype_to_np_dtype,
-)
+from onnx_diagnostic.helpers.onnx_helper import from_array_extended, onnx_dtype_to_np_dtype
+from onnx_diagnostic.helpers.torch_helper import onnx_dtype_to_torch_dtype
 from onnx_diagnostic.reference import ExtendedReferenceEvaluator, OnnxruntimeEvaluator
 from onnx_diagnostic.helpers.ort_session import _InferenceSession
 
diff --git a/onnx_diagnostic/helpers/onnx_helper.py b/onnx_diagnostic/helpers/onnx_helper.py
@@ -2,7 +2,7 @@
 import json
 import os
 import sys
-from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
+from typing import Any, Dict, Iterator, List, Optional, Sequence, Set, Tuple, Union
 import numpy as np
 import numpy.typing as npt
 import onnx
@@ -762,3 +762,59 @@ def tensor_dtype_to_np_dtype(tensor_dtype: int) -> np.dtype:
         return mapping[tensor_dtype]
 
     return oh.tensor_dtype_to_np_dtype(tensor_dtype)
+
+
+def iterator_initializer_constant(
+    model: Union[onnx.FunctionProto, onnx.GraphProto, onnx.ModelProto],
+    use_numpy: bool = True,
+    prefix: str = "",
+) -> Iterator[Tuple[str, Union["torch.Tensor", np.ndarray]]]:  # noqa: F821
+    """
+    Iterates on iniatialiers and constant in an onnx model.
+
+    :param model: model
+    :param use_numpy: use numpy or pytorch
+    :param prefix: for subgraph
+    :return: iterator
+    """
+    if not isinstance(model, onnx.FunctionProto):
+        graph = model if isinstance(model, onnx.GraphProto) else model.graph
+        if not use_numpy:
+            from .torch_helper import to_tensor
+        if prefix:
+            prefix += "."
+        for init in graph.initializer:
+            yield f"{prefix}{init.name}", (
+                to_array_extended(init) if use_numpy else to_tensor(init)
+            )
+        nodes = graph.node
+        name = graph.name
+        if isinstance(model, onnx.ModelProto):
+            for f in model.functions:
+                yield from iterator_initializer_constant(
+                    f, use_numpy=use_numpy, prefix=f"{prefix}{f.name}"
+                )
+    else:
+        nodes = model.node
+        name = model.name
+    for node in nodes:
+        if node.op_type == "Constant" and node.domain == "":
+            from ..reference import ExtendedReferenceEvaluator as Inference
+
+            if not use_numpy:
+                import torch
+            sess = Inference(node)
+            value = sess.run(None, {})[0]
+            yield f"{prefix}{node.output[0]}", (
+                value if use_numpy else torch.from_numpy(value)
+            )
+
+        if node.op_type in {"Loop", "Body", "Scan"}:
+            for att in node.attribute:
+                assert (
+                    att.type != onnx.AttributeProto.GRAPHS
+                ), "Not implemented for type AttributeProto.GRAPHS."
+                if att.type == onnx.AttributeProto.GRAPH:
+                    yield from iterator_initializer_constant(
+                        att.g, use_numpy=use_numpy, prefix=f"{prefix}{name}"
+                    )
diff --git a/onnx_diagnostic/helpers/torch_helper.py b/onnx_diagnostic/helpers/torch_helper.py
@@ -5,7 +5,7 @@
 import sys
 import warnings
 from collections.abc import Iterable
-from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 import numpy as np
 import onnx
 from onnx.external_data_helper import load_external_data_for_tensor, uses_external_data
@@ -858,19 +858,5 @@ def to_tensor(tensor: onnx.TensorProto, base_dir: str = "") -> torch.Tensor:
             return torch.frombuffer(raw_data, dtype=torch_dtype).reshape(dims)
 
     # Other cases, it should be small tensor. We use numpy.
-    np_tensor = to_array_extended(tensor, base_dir)
+    np_tensor = to_array_extended(tensor)
     return torch.from_numpy(np_tensor)
-
-
-def iterator_initializer_constant(
-    model: onnx.ModelProto, use_numpy: bool = True
-) -> Iterator[Tuple[str, Union[torch.Tensor, np.ndarray]]]:
-    """
-    Iterates on iniatialiers and constant in an onnx model.
-
-    :param model: model
-    :param use_numpy: use numpy or pytorch
-    :return: iterator
-    """
-    for init in model.graph.initializer:
-        yield init.name, (to_array_extended(init) if use_numpy else to_tensor(init))