fix

xadupre · xadupre · commit e559a5690d44 · 2025-11-20T12:48:10.000+01:00
diff --git a/_unittests/ut_torch_export_patches/test_patch_transformers.py b/_unittests/ut_torch_export_patches/test_patch_transformers.py
@@ -407,7 +407,7 @@ def test_patched_qwen2_5_vl_vision_attention_forward(self):
                 _is_torchdynamo_exporting()
             ), f"exporting is not set to true? {torch.compiler.is_exporting_flag}"
             got = patched_Qwen2_5_VLVisionAttention.forward(instance, **inputs)
-            self.assertEqualArray(expected, got, atol=1e-5)
+            self.assertEqualArray(expected, got, atol=1e-2)
 
         class Model(patched_class):
             def forward(
diff --git a/onnx_diagnostic/export/onnx_plug.py b/onnx_diagnostic/export/onnx_plug.py
@@ -300,13 +300,15 @@ def converter(
             sts: Optional[Dict[str, Any]],
             outputs: List[str],
             *args,
+            **kwargs,
         ) -> Any:
             if not g.has_local_function(
                 self.function_proto.name, domain=self.function_proto.domain
             ):
                 g.add_function(self.function_proto)
             ags = args[: len(self.args_name)]
             kws = dict(zip(self.kwargs_name, args[len(self.args_name) :]))
+            kws.update(kwargs)
             res = g.make_node(
                 self.function_proto.name,
                 ags,
diff --git a/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py b/onnx_diagnostic/torch_export_patches/patches/_patch_transformers_qwen2_5.py
@@ -119,6 +119,7 @@ def qwen_sdpa_attention(
         value_states: torch.Tensor,  # F10s1x16xs47x80
         cu_seqlens: torch.Tensor,  # F7su19
         scaling: float = 0,
+        num_heads: int = 16,
     ) -> torch.Tensor:
         lengths = cu_seqlens[1:] - cu_seqlens[:-1]
         splits = [
@@ -497,8 +498,8 @@ def forward(
                     key_states,
                     value_states,
                     cu_seqlens,
-                    scaling=self.scaling,
-                    num_heads=self.num_heads,
+                    self.scaling,
+                    self.num_heads,
                 )
             elif _is_torchdynamo_exporting():
                 if self.config._attn_implementation == "flash_attention_2":