fix

xadupre · xadupre · commit b5aa71bbf480 · 2025-11-19T15:08:53.000+01:00
diff --git a/_unittests/ut_export/test_onnx_plug.py b/_unittests/ut_export/test_onnx_plug.py
@@ -1,4 +1,5 @@
 import unittest
+import onnx
 import onnx.helper as oh
 import torch
 from onnx_diagnostic.ext_test_case import ExtTestCase, has_torch, hide_stdout, ignore_warnings
@@ -38,7 +39,7 @@ def make_function_proto():
 
     @hide_stdout()
     @ignore_warnings(FutureWarning)
-    def test_onnx_plug_export(self):
+    def test_onnx_plug_export_nokwargs(self):
         def _test_customsub(x, y):
             return x - y
 
@@ -85,7 +86,95 @@ def forward(self, x):
                 onnx_plugs=replacements,
                 target_opset=22,
             )
-            self.assert_onnx_disc("test_onnx_plug_export_custom", onx.model_proto, model, (x,))
+            self.assert_onnx_disc(
+                "test_onnx_plug_export_nokwargs_custom", onx.model_proto, model, (x,)
+            )
+
+        if not has_torch("2.9"):
+            raise unittest.SkipTest("onnx-dynamo + custom op not fully working on 2.8")
+        with self.subTest(exporter="onnx-dynamo"):
+            onx = to_onnx(
+                model,
+                (x,),
+                dynamic_shapes=ds,
+                exporter="onnx-dynamo",
+                onnx_plugs=replacements,
+                target_opset=22,
+            )
+            self.assert_onnx_disc(
+                "test_onnx_plug_export_nokwargs_onnx_dynamo", onx.model_proto, model, (x,)
+            )
+
+    @unittest.skip("not ready yet")
+    @hide_stdout()
+    @ignore_warnings(FutureWarning)
+    def test_onnx_plug_export_kwargs(self):
+        def _test_customdiv(x, y, epsilon: float = 1e-5):
+            return x / (y + epsilon)
+
+        def _test_customdiv_shape(x, y, *args, **kwargs):
+            return torch.empty(torch.broadcast_shapes(x.shape, y.shape), dtype=x.dtype)
+
+        def make_function_proto():
+            f = oh.make_function(
+                "onnx_plug",
+                "_test_customdiv",
+                ["x", "y"],
+                ["z"],
+                [
+                    oh.make_node("Constant", [], ["eps"]),
+                    oh.make_node("Add", ["y", "eps"], ["yeps"]),
+                    oh.make_node("Div", ["x", "yeps"], ["z"]),
+                ],
+                opset_imports=[oh.make_opsetid("", 22)],
+                attributes=["epsilon"],
+            )
+            att = onnx.AttributeProto()
+            att.name = "value_float"
+            att.ref_attr_name = "epsilon"
+            att.type = onnx.AttributeProto.FLOAT
+            f.node[0].attribute.append(att)
+            return f
+
+        class Model(torch.nn.Module):
+            def forward(self, x):
+                y = x.sum(axis=1, keepdim=True)
+                d = torch.ops.onnx_plug._test_customdiv(x, y, epsilon=3.5)
+                return torch.abs(d)
+
+        replacements = [
+            EagerDirectReplacementWithOnnx(
+                _test_customdiv,
+                _test_customdiv_shape,
+                make_function_proto(),
+                2,
+                1,
+                kwargs=dict(epsilon=1e-5),
+                verbose=1,
+            )
+        ]
+
+        x = torch.randn((3, 4), dtype=torch.float32)
+        model = Model()
+        expected = model(x)
+        ds = ({0: "d1", 1: "d2"},)
+        ep = torch.export.export(model, (x,), dynamic_shapes=self.use_dyn_not_str(ds))
+        self.assertIn("torch.ops.onnx_plug._test_customdiv.default", str(ep))
+        got = ep.module()(x)
+        self.assertEqualArray(expected, got)
+
+        with self.subTest(exporter="custom"):
+            onx = to_onnx(
+                model,
+                (x,),
+                dynamic_shapes=ds,
+                exporter="custom",
+                onnx_plugs=replacements,
+                target_opset=22,
+            )
+            self.assert_onnx_disc(
+                "test_onnx_plug_export_kwargs_custom", onx.model_proto, model, (x,)
+            )
 
         if not has_torch("2.9"):
             raise unittest.SkipTest("onnx-dynamo + custom op not fully working on 2.8")
@@ -99,7 +188,7 @@ def forward(self, x):
                 target_opset=22,
             )
             self.assert_onnx_disc(
-                "test_onnx_plug_export_onnx_dynamo", onx.model_proto, model, (x,)
+                "test_onnx_plug_export_kwargs_onnx_dynamo", onx.model_proto, model, (x,)
             )
 
 
diff --git a/onnx_diagnostic/export/onnx_plug.py b/onnx_diagnostic/export/onnx_plug.py
@@ -49,7 +49,7 @@ class EagerDirectReplacementWithOnnx:
     :param n_outputs: same for the number of outputs,
         only tensors must be counted
     :param name: the name of the custom op, the function name if not specified
-    :param kwargs: constants
+    :param kwargs: constants parameters with their default values
     :param verbose: verbose level
 
     Here is an example:
@@ -163,8 +163,8 @@ def __init__(
             .replace("<lambda>", "l")
             .replace(".", "_")
         )
-        self.kwargs = kwargs
-        assert kwargs is None or all(isinstance(v, (int, float)) for v in kwargs.values()), (
+        self.kwargs = kwargs or {}
+        assert all(isinstance(v, (int, float)) for v in self.kwargs.values()), (
             f"Only int or floats are allowed for kwargs={kwargs}, one of them "
             f"does not respect that constraint."
         )
@@ -184,7 +184,8 @@ def __init__(
         assert (
             function_proto.domain == self.domain
         ), f"Function domain must be {self.domain!r} but it is {function_proto.domain!r}"
-        self.arg_names = params
+        self.args_name = [p for p in params if p not in self.kwargs]
+        self.kwargs_name = [p for p in params if p in self.kwargs]
         self.verbose = verbose
         self.custom_op = self._register()
 
@@ -211,7 +212,19 @@ def __call__(self, *args):
 
     def _register(self):
         """Registers the custom op."""
-        inputs = ", ".join([f"Tensor {p}" for p in self.arg_names])
+        input_args = [f"Tensor {p}" for p in self.args_name]
+        for p in self.kwargs_name:
+            val = self.kwargs[p]
+            if isinstance(val, int):
+                input_args.append(f"int {p}={val}")
+            elif isinstance(val, float):
+                input_args.append(f"float {p}={val}")
+            else:
+                raise NotImplementedError(
+                    f"kwargs {p!r} has a default value of unsupported type {type(val)}"
+                )
+
+        inputs = ", ".join(input_args)
         schema = f"({inputs}) -> Tensor"
         if self.n_outputs > 1:
             schema += "[]"
@@ -292,12 +305,15 @@ def converter(
                 self.function_proto.name, domain=self.function_proto.domain
             ):
                 g.add_function(self.function_proto)
+            ags = args[: len(self.args_name)]
+            kws = dict(zip(self.kwargs_name, args[len(self.args_name) :]))
             res = g.make_node(
                 self.function_proto.name,
-                args,
+                ags,
                 outputs,
                 domain=self.function_proto.domain,
                 name=self.target_name,
+                **kws,
             )
             if not sts:
                 new_shapes = self.shape_fn(*args)
diff --git a/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py b/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py
@@ -27,7 +27,12 @@
 
     @onnxscript.script(opset=onnx_plugs_op)
     def LoopMHAAttention(
-        query_states, key_states, value_states, cu_seqlens, scale: float, num_heads: int
+        query_states,
+        key_states,
+        value_states,
+        cu_seqlens,
+        scaling: float = 0.11180339887498948,
+        num_heads: int = 16,
     ):
         to_3d_shape = op.Constant(value_ints=[0, 0, -1])
         query_transposed = op.Transpose(query_states, perm=[0, 2, 1, 3])
@@ -52,7 +57,7 @@ def LoopMHAAttention(
                 key_i,
                 value_i,
                 num_heads=num_heads,
-                scale=scale,
+                scale=scaling,
             )
             attn_output = op.Concat(attn_output, mha_output, axis=1)
         attn_output_4d = op.Reshape(attn_output, output_shape)
@@ -64,7 +69,7 @@ def PackedAttention(
         key,
         value,
         cu_seqlens,
-        scale: float = 0.11180339887498948,
+        scaling: float = 0.11180339887498948,
         num_heads: int = 16,
     ):
         num_patches = op.Cast(op.Size(cu_seqlens), to=onnx.TensorProto.INT32) - 1
@@ -102,7 +107,7 @@ def PackedAttention(
             None,
             op.Cast(token_offset, to=onnx.TensorProto.INT32),
             op.Cast(cu_seqlens, to=onnx.TensorProto.INT32),
-            scale=scale,
+            scale=scaling,
             num_heads=num_heads,
         )
         packed_attn_output_3d = op.Reshape(packed_attn_output_2d, shape_3d)
@@ -139,10 +144,8 @@ def qwen_sdpa_attention(
 
     # not ideal
     qwen_sdpa_attention_versatile = EagerDirectReplacementWithOnnx(
-        lambda qs, ks, vs, cuseq: qwen_sdpa_attention(
-            qs, ks, vs, cuseq, scaling=0.11180339887498948
-        ),
-        lambda qs, *args: torch.empty(
+        qwen_sdpa_attention,
+        lambda qs, *args, **kwargs: torch.empty(
             (qs.shape[0], qs.shape[2], qs.shape[1], qs.shape[3]),
             dtype=qs.dtype,
             device=qs.device,
@@ -489,16 +492,13 @@ def forward(
                 is transformers.integrations.sdpa_attention.sdpa_attention_forward
                 or attention_interface is patched_sdpa_attention_forward
             ) and strategy_for_attention_in_qwen_2_5 == "PACKED":
-                torch._check(
-                    qwen_sdpa_attention_versatile.kwargs["scaling"] == self.scaling,
-                    lambda: f"Not implemented for scaling={self.scaling}",
-                )
-                torch._check(
-                    qwen_sdpa_attention_versatile.kwargs["num_heads"] == self.num_heads,
-                    lambda: f"Not implemented for num_heads={self.num_heads}",
-                )
                 attn_output = qwen_sdpa_attention_versatile(
-                    query_states, key_states, value_states, cu_seqlens
+                    query_states,
+                    key_states,
+                    value_states,
+                    cu_seqlens,
+                    scaling=self.scaling,
+                    num_heads=self.num_heads,
                 )
             elif _is_torchdynamo_exporting():
                 if self.config._attn_implementation == "flash_attention_2":