Handles parameters in OnnxruntimeEvaluator for FunctionProto

xadupre · xadupre · commit 1272495263e5 · 2025-11-26T15:17:02.000+01:00
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.8.3
 +++++
 
+* :pr:`324`: supports FunctionProto with arguments in OnnxruntimeEvaluator
 * :pr:`323`: drops torch 2.8 on CI
 * :pr:`322`: support rerunning onnx kernels with torch intermediate results in side-by-side
 * :pr:`314`: fix modelbuilder download needed after this change https://github.com/microsoft/onnxruntime-genai/pull/1862
diff --git a/_unittests/ut_reference/test_onnxruntime_evaluator.py b/_unittests/ut_reference/test_onnxruntime_evaluator.py
@@ -284,6 +284,41 @@ def test_skip_simplified_layer_normalization(self):
         self.assertEqual(got[1].shape, feeds["x"].shape)
         self.assertEqual(got[1].dtype, feeds["x"].dtype)
 
+    def test_function_proto_with_kwargs(self):
+        linear_function = oh.make_function(
+            "test_domain",
+            "LinearRegression",
+            ["x", "a", "b"],
+            ["y"],
+            [
+                oh.make_node("Constant", [], ["eps"]),
+                oh.make_node("Constant", [], ["zero"], value_ints=[0]),
+                oh.make_node("Unsqueeze", ["eps", "zero"], ["eps1d"]),
+                oh.make_node("MatMul", ["x", "a"], ["xa"]),
+                oh.make_node("Add", ["b", "eps1d"], ["beps"]),
+                oh.make_node("Add", ["xa", "beps"], ["y"]),
+            ],
+            [oh.make_opsetid("", 14)],
+            ["epsilon"],
+        )
+        att = onnx.AttributeProto()
+        att.name = "value_float"
+        att.ref_attr_name = "epsilon"
+        att.type = onnx.AttributeProto.FLOAT
+        linear_function.node[0].attribute.append(att)
+        feeds = dict(
+            x=np.random.rand(4, 4).astype(np.float32),
+            a=np.random.rand(4, 2).astype(np.float32),
+            b=np.random.rand(1, 2).astype(np.float32),
+        )
+        epsilon = 15.6
+        expected = feeds["x"] @ feeds["a"] + feeds["b"] + epsilon
+        sess = OnnxruntimeEvaluator(
+            linear_function, whole=True, function_kwargs=dict(epsilon=epsilon)
+        )
+        got = sess.run(None, feeds)
+        self.assertEqualArray(expected, got[0], atol=1e-5)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_torch_export_patches/test_patch_transformers.py b/_unittests/ut_torch_export_patches/test_patch_transformers.py
@@ -562,9 +562,27 @@ def test_plug_packed_multi_head_attention_qwen25(self):
                 dtype=torch.int64,
             ).to("cuda"),
         )
-        qwen_sdpa_attention_versatile.verify(
+
+        results = qwen_sdpa_attention_versatile.verify(
+            *inputs,
+            scaling=0.5,
+            num_heads=16,
+            dump_onnx_model=self.get_dump_file(
+                "test_plug_packed_multi_head_attention_qwen25.onnx"
+            ),
+        )
+        self.assertEqual(len(results.eager_outputs), len(results.onnx_outputs))
+        self.assertEqual(len(results.eager_outputs), len(results.diffs))
+        self.assertEqualArray(results.eager_outputs[0], results.onnx_outputs[0], atol=0.01)
+        self.assertLess(results.diffs[0]["abs"], 0.01)
+
+        results = qwen_sdpa_attention_versatile.verify(
             *inputs, scaling=0.11180339887498948, num_heads=16
         )
+        self.assertEqual(len(results.eager_outputs), len(results.onnx_outputs))
+        self.assertEqual(len(results.eager_outputs), len(results.diffs))
+        self.assertEqualArray(results.eager_outputs[0], results.onnx_outputs[0], atol=0.01)
+        self.assertLess(results.diffs[0]["abs"], 0.01)
 
 
 if __name__ == "__main__":
diff --git a/onnx_diagnostic/export/onnx_plug.py b/onnx_diagnostic/export/onnx_plug.py
@@ -27,7 +27,7 @@ class VerifyResult:
     """
 
     eager_outputs: TUPLE_TENSORS
-    onnx_output: TUPLE_TENSORS
+    onnx_outputs: TUPLE_TENSORS
     diffs: Tuple[Dict[str, float], ...]
 
 
@@ -238,7 +238,13 @@ def _register(self):
         custom_def.register_kernel(None)(self.eager_fn)
         custom_def._abstract_fn = self.shape_fn
 
-    def verify(self, *args, engine: Optional[Callable] = None, **kwargs) -> VerifyResult:
+    def verify(
+        self,
+        *args,
+        engine: Optional[Callable] = None,
+        dump_onnx_model: Optional[str] = None,
+        **kwargs,
+    ) -> VerifyResult:
         """
         Verifies that the eager mode is equivalent to the onnx function given
         as a replacements. This function evaluates `eager_fn`, checks that the shapes
@@ -249,6 +255,9 @@ def verify(self, *args, engine: Optional[Callable] = None, **kwargs) -> VerifyRe
         :param kwargs: arguments for eager_fn
         :param engine: by default an instance of
             :class:`onnx_diagnostic.reference.OnnxruntimeEvaluator`.
+        :param dump_onnx_model: to dump the onnx model used to verify
+            eager and onnx produce the same results
+        :param kwargs: additional arguments to the function
         :return: outputs of :func:`onnx_diagnostic.helpers.max_diff`
         """
         expected = self.eager_fn(*args, **kwargs)
@@ -280,11 +289,23 @@ def verify(self, *args, engine: Optional[Callable] = None, **kwargs) -> VerifyRe
 
         # Now the ONNX execution.
         assert engine is None, f"Not implemented yet with engine={engine!r}"
-        sess = OnnxruntimeEvaluator(self.function_proto, whole=True)
-        feeds = dict(zip(sess.input_names, args))
+        ags, kws = self._make_args_kwargs(*args, **kwargs)
+        sess = OnnxruntimeEvaluator(
+            self.function_proto,
+            whole=True,
+            dump_onnx_model=dump_onnx_model,
+            function_kwargs=kws,
+        )
+        feeds = dict(zip(sess.input_names, ags))
         got = sess.run(None, feeds)
-        diffs = tuple(max_diff(e, g) for e, g in zip(expected, got))
-        return VerifyResult(eager_outputs=expected, onnx_output=tuple(got), diffs=diffs)  # type: ignore[arg-type]
+        diffs = tuple(max_diff(e, g, hist=[0.1, 0.01]) for e, g in zip(expected, got))
+        return VerifyResult(eager_outputs=expected, onnx_outputs=tuple(got), diffs=diffs)  # type: ignore[arg-type]
+
+    def _make_args_kwargs(self, *args, **kwargs):
+        ags = args[: len(self.args_name)]
+        kws = dict(zip(self.kwargs_name, args[len(self.args_name) :]))
+        kws.update(kwargs)
+        return ags, kws
 
     def custom_converter(
         self,
@@ -307,9 +328,7 @@ def converter(
                 self.function_proto.name, domain=self.function_proto.domain
             ):
                 g.add_function(self.function_proto)
-            ags = args[: len(self.args_name)]
-            kws = dict(zip(self.kwargs_name, args[len(self.args_name) :]))
-            kws.update(kwargs)
+            ags, kws = self._make_args_kwargs(*args, **kwargs)
             res = g.make_node(
                 self.function_proto.name,
                 ags,
@@ -370,7 +389,8 @@ def onnx_dynamo_converter(self) -> Callable:
             onnx.defs.register_schema(schema)
         op = onnxscript.values.Op(onnx_plug_op, self.function_proto.name, schema)
 
-        def converter(*cargs):
-            return op(*cargs, n_outputs=self.n_outputs)
+        def converter(*cargs, **ckwargs):
+            ags, kws = self._make_args_kwargs(*cargs, **ckwargs)
+            return op(*ags, n_outputs=self.n_outputs, **kws)
 
         return onnxscript.values.TracedOnnxFunction(onnx_plug_op, converter)
diff --git a/onnx_diagnostic/reference/ort_evaluator.py b/onnx_diagnostic/reference/ort_evaluator.py
@@ -54,6 +54,9 @@ class OnnxruntimeEvaluator:
     :param whole: if True, do not split node by node
     :param torch_or_numpy: force the use of one of them, True for torch,
         False for numpy, None to let the class choose
+    :param dump_onnx_model: dumps the temporary onnx model created if whole is True
+    :param function_kwargs: a FunctionProto may have parameters,
+        this contains the values of them
     """
 
     def __init__(
@@ -77,6 +80,8 @@ def __init__(
         opsets: Optional[Union[int, Dict[str, int]]] = None,
         whole: bool = False,
         torch_or_numpy: Optional[bool] = None,
+        function_kwargs: Optional[Dict[str, Any]] = None,
+        dump_onnx_model: Optional[str] = None,
     ):
         if isinstance(proto, str):
             self.proto: Proto = load(proto)
@@ -90,6 +95,9 @@ def __init__(
         assert isinstance(
             self.proto, PROTO
         ), f"Unexpected type for self.proto {type(self.proto)}"
+        assert (
+            whole or not dump_onnx_model
+        ), f"whole must be True for dump_onnx_model={dump_onnx_model!r}"
 
         self._cache: Dict[
             Any, Tuple[Proto, Union["OnnxruntimeEvaluator", _InferenceSession]]  # noqa: UP037
@@ -109,6 +117,8 @@ def __init__(
             use_training_api=use_training_api,
         )
         self.to_tensor_or_array = to_array_extended if not torch_or_numpy else to_tensor
+        self.function_kwargs = function_kwargs
+        self.dump_onnx_model = dump_onnx_model
 
         self.verbose = verbose
         self.torch_or_numpy = torch_or_numpy
@@ -357,11 +367,12 @@ def _make_model_proto(
         nodes: Sequence[NodeProto],
         vinputs: Sequence[ValueInfoProto],
         voutputs: Sequence[ValueInfoProto],
+        functions: Optional[Sequence[FunctionProto]] = None,
     ) -> ModelProto:
         onx = oh.make_model(
             oh.make_graph(nodes, "-", vinputs, voutputs),
             ir_version=getattr(self.proto, "ir_version", self.ir_version),
-            functions=getattr(self.proto, "functions", None),
+            functions=[*getattr(self.proto, "functions", []), *(functions or [])],
         )
         del onx.opset_import[:]
         if hasattr(self.proto, "opset_import"):
@@ -430,8 +441,18 @@ def _get_sess(
         if isinstance(node, ModelProto):
             onx = node
         else:
+            functions = []
+            if isinstance(node, FunctionProto):
+                functions.append(node)
+                node = oh.make_node(
+                    node.name,
+                    list(node.input),
+                    list(node.output),
+                    domain=node.domain,
+                    **(self.function_kwargs or {}),
+                )
             assert isinstance(node, NodeProto), f"Unexpected type {type(node)} for node"
-            if node.op_type == "Constant":
+            if node.op_type == "Constant" and node.domain == "":
                 # We force the type to be a boolean.
                 ref = ExtendedReferenceEvaluator(node)
                 cst = ref.run(None, {})[0]
@@ -457,10 +478,16 @@ def _get_sess(
                 # no need to run shape inference
                 prenodes, voutputs = self._make_model_outputs(node, vinputs)
 
-            onx = self._make_model_proto([*prenodes, node], vinputs, voutputs)
+            onx = self._make_model_proto(
+                [*prenodes, node], vinputs, voutputs, functions=functions
+            )
             if node.op_type in {"Shape", "Size"}:
                 on_cpu = True
 
+        if self.dump_onnx_model:
+            onnx_save(
+                onx, self.dump_onnx_model, save_as_external_data=len(onx.graph.node) > 100
+            )
         cls = (
             InferenceSessionForNumpy
             if any(isinstance(i, np.ndarray) for i in inputs)