fix patches

xadupre · xadupre · commit 8cadd992a534 · 2025-09-15T17:10:14.000+02:00
diff --git a/_unittests/ut_torch_export_patches/test_dynamic_class.py b/_unittests/ut_torch_export_patches/test_dynamic_class.py
@@ -3,6 +3,7 @@
 import unittest
 from typing import Any, Dict, List, Tuple
 import torch
+import transformers
 from onnx_diagnostic.ext_test_case import (
     ExtTestCase,
     ignore_warnings,
@@ -16,6 +17,7 @@
 )
 from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
 from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
+import onnx_diagnostic.torch_export_patches.patches.patch_transformers as patch_transformers
 
 
 class TestOnnxExportErrors(ExtTestCase):
@@ -339,7 +341,11 @@ def test_phi2_export_interpreter(self):
             str_inputs, string_type(inputs_copied, with_shape=True, with_min_max=True)
         )
 
-        with torch_export_patches(patch_transformers=True):
+        with torch_export_patches(patch_transformers=True, verbose=1):
+            self.assertEqual(
+                transformers.masking_utils.ALL_MASK_ATTENTION_FUNCTIONS["sdpa"],
+                patch_transformers.patched_sdpa_mask_recent_torch,
+            )
             ep = torch.export.export(
                 model,
                 (),
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py b/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
@@ -39,7 +39,6 @@
     # Introduced in 4.52
     from transformers.masking_utils import (
         causal_mask_function,
-        sdpa_mask,
         padding_mask_function,
         and_masks,
         _ignore_causal_mask_sdpa,
@@ -112,7 +111,7 @@ def patched_eager_mask(
         """manual patch for function ``transformers.masking_utils.eager_mask``."""
         # The masks for eager attention are simply boolean mask from sdpa, casted to 0 and -inf
         _ = kwargs.pop("allow_is_causal_skip", None)
-        mask = sdpa_mask(
+        mask = patched_sdpa_mask_recent_torch(
             batch_size=batch_size,
             cache_position=cache_position,
             kv_length=kv_length,