fix _make_causal

xadupre · xadupre · commit 56e0b4c372d1 · 2025-04-14T19:18:31.000+02:00
diff --git a/_unittests/ut_torch_models/test_hghub_model.py b/_unittests/ut_torch_models/test_hghub_model.py
@@ -102,7 +102,8 @@ def test_get_untrained_model_with_inputs_clip_vit(self):
         mid = "openai/clip-vit-base-patch16"
         data = get_untrained_model_with_inputs(mid, verbose=1)
         model, inputs = data["model"], data["inputs"]
-        model(**inputs)
+        with bypass_export_some_errors(patch_transformers=True):
+            model(**inputs)
         # different expected value for different version of transformers
         self.assertIn((data["size"], data["n_weights"]), [(188872708, 47218177)])
 
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py b/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
@@ -55,20 +55,35 @@ class patched_AttentionMaskConverter:
 
     @staticmethod
     def _make_causal_mask(
-        input_ids_shape: torch.Size,
-        dtype: torch.dtype,
-        device: torch.device,
-        past_key_values_length: int = 0,
-        sliding_window: Optional[int] = None,
+        *args,
+        **kwargs,
+        # input_ids_shape: torch.Size,
+        # dtype: torch.dtype,
+        # device: torch.device,
+        # past_key_values_length: int = 0,
+        # sliding_window: Optional[int] = None,
     ):
-        """Patched method."""
-        return _patch_make_causal_mask(
-            input_ids_shape=input_ids_shape,
-            dtype=dtype,
-            device=device,
-            past_key_values_length=past_key_values_length,
-            sliding_window=sliding_window,
-        )
+        """
+        Patched method.
+
+        This static method may be called with ``AttentionMaskConverter._make_causal_mask``
+        or ``self._make_causal_mask``. That changes this argument is receives.
+        That should not matter but...
+        """
+        if args:
+            index = 0 if isinstance(args[0], (tuple, torch.Size)) else 1
+            names = [
+                "input_ids_shape",
+                "dtype",
+                "device",
+                "past_key_values_length",
+                "sliding_window",
+            ]
+            for i, a in enumerate(args):
+                if i < index:
+                    continue
+                kwargs[names[i - index]] = a
+        return _patch_make_causal_mask(**kwargs)
 
 
 class patched_DynamicCache: