fix patch for other version of transformers

xadupre · xadupre · commit 5a4c01c2709c · 2025-10-30T17:22:23.000+01:00
diff --git a/_unittests/ut_tasks/test_tasks_text_generation.py b/_unittests/ut_tasks/test_tasks_text_generation.py
@@ -48,7 +48,7 @@ def test_text_generation_phi_3_mini_128k_instruct(self):
 
     @hide_stdout()
     @requires_transformers("4.53")
-    @requires_torch("2.7.99")
+    @requires_torch("2.8.99")  # check_guards not supported
     def test_text_generation_tiny_llm(self):
         mid = "arnir0/Tiny-LLM"
         data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
diff --git a/_unittests/ut_torch_export_patches/test_patch_transformers.py b/_unittests/ut_torch_export_patches/test_patch_transformers.py
@@ -3,7 +3,7 @@
 import transformers
 import transformers.integrations.sdpa_attention as sdpa_attention
 import onnx_diagnostic.torch_export_patches.patches.patch_transformers as patch_transformers
-from onnx_diagnostic.ext_test_case import ExtTestCase, requires_transformers
+from onnx_diagnostic.ext_test_case import ExtTestCase, requires_transformers, ignore_warnings
 from onnx_diagnostic.helpers.torch_helper import torch_deepcopy
 from onnx_diagnostic.export.shape_helper import make_fake_with_dynamic_dimensions
 from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
@@ -123,7 +123,7 @@ def test_causal_mask_in_scaled_dot_product_attention(self):
         attn_causal_bias.masked_fill_(temp_mask.logical_not(), float("-inf"))
         self.assertEqual(attn_causal_bias.min().item(), -float("inf"))
 
-    # @ignore_warnings(UserWarning)
+    @ignore_warnings(UserWarning)
     def test_causal_mask_in_scaled_dot_product_attention_export(self):
         sdpa_attention_forward = sdpa_attention.sdpa_attention_forward
         patched_sdpa_attention_forward = patch_transformers.patched_sdpa_attention_forward
diff --git a/onnx_diagnostic/export/api.py b/onnx_diagnostic/export/api.py
@@ -15,7 +15,38 @@ def to_onnx(
     output_dynamic_shapes: Optional[Union[Dict[str, Any], Tuple[Any]]] = None,
     exporter: str = "onnx-dynamo",
 ) -> Any:
-    """Common API for exporters."""
+    """
+    Common API for exporters. By default, the models are optimized to use the
+    most efficient kernels implemented in :epkg:`onnxruntime`.
+
+    :param mod: torch model
+    :param args: unnamed arguments
+    :param kwargs: named arguments
+    :param input_names: input names for the onnx model (optional)
+    :param target_opset: opset to target, if not specified, each converter
+        keeps its default value
+    :param verbose: verbosity level
+    :param dynamic_shapes: dynamic shapes, usually a nested structure
+        included a dictionary for each tensor
+    :param filename: output filename
+    :param output_names: to change the output of the onnx model
+    :param output_dynamic_shapes: to overwrite the dynamic shapes names
+    :param exporter: exporter to use (``onnx-dynamo``, ``modelbuilder``, ``custom``)
+    :return: the output of the selected exporter, usually a structure including
+        an onnx model
+
+    A simple example:
+
+    .. code-block:: python
+
+        to_onnx(
+            model,
+            kwargs=inputs,
+            dynamic_shapes=ds,
+            exporter=exporter,
+            filename=filename,
+        )
+    """
     if exporter == "custom":
         from experimental_experiment.torch_interpreter import to_onnx as _to_onnx
         from experimental_experiment.xbuilder import OptimizationOptions
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py b/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
@@ -66,6 +66,7 @@ def _is_torchdynamo_exporting() -> bool:
             return False
 
 
+patch_is_causal = _has_transformers("4.55")
 patch_is_initialized = _has_transformers("4.56.99")
 
 
@@ -1365,10 +1366,15 @@ def patched_sdpa_attention_forward(
     if attention_mask is not None and attention_mask.ndim == 4:
         attention_mask = attention_mask[:, :, :, : key.shape[-2]]
 
-    is_causal = is_causal if is_causal is not None else getattr(module, "is_causal", True)
-    # PATCHED: remove the test query.shape[2] > 1
-    # is_causal = query.shape[2] > 1 and attention_mask is None and is_causal
-    is_causal = attention_mask is None and is_causal
+    if patch_is_causal:
+        is_causal = is_causal if is_causal is not None else getattr(module, "is_causal", True)
+
+        # PATCHED: remove the test query.shape[2] > 1
+        # is_causal = query.shape[2] > 1 and attention_mask is None and is_causal
+        # and we split the test to keep the minimum in torch.cond
+        is_causal = attention_mask is None and is_causal
+    elif is_causal is None:
+        is_causal = attention_mask is None
 
     torch._check(
         attention_mask is None or attention_mask.shape[3] == key.shape[2],