fix issues

xadupre · xadupre · commit cb27ace9ad23 · 2025-07-11T16:45:10.000+02:00
diff --git a/_unittests/ut_tasks/test_tasks.py b/_unittests/ut_tasks/test_tasks.py
@@ -6,7 +6,7 @@
     has_transformers,
     requires_transformers,
 )
-from onnx_diagnostic.helpers.torch_helper import to_any
+from onnx_diagnostic.helpers.torch_helper import to_any, torch_deepcopy
 from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
 from onnx_diagnostic.torch_export_patches import torch_export_patches
 from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py b/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
@@ -864,6 +864,42 @@ def wrapper(self, x, position_ids):
     return wrapper
 
 
+def patched_model_bart_eager_attention_forward(
+    module: torch.nn.Module,
+    query: torch.Tensor,
+    key: torch.Tensor,
+    value: torch.Tensor,
+    attention_mask: Optional[torch.Tensor],
+    scaling: Optional[float] = None,
+    dropout: float = 0.0,
+    head_mask: Optional[torch.Tensor] = None,
+    **kwargs,
+):
+    """[patch:transformers.models.bart.modeling_bart.eager_attention_forward]"""
+    if scaling is None:
+        scaling = query.size(-1) ** -0.5
+
+    attn_weights = torch.matmul(query, key.transpose(2, 3)) * scaling
+    if attention_mask is not None:
+        # The two following lines were added.
+        if attention_mask is not None and attention_mask.ndim == 4:
+            attention_mask = attention_mask[:, :, :, : key.shape[-2]]
+        attn_weights = attn_weights + attention_mask
+
+    attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1)
+
+    if head_mask is not None:
+        attn_weights = attn_weights * head_mask.view(1, -1, 1, 1)
+
+    attn_weights = torch.nn.functional.dropout(
+        attn_weights, p=dropout, training=module.training
+    )
+    attn_output = torch.matmul(attn_weights, value)
+    attn_output = attn_output.transpose(1, 2).contiguous()
+
+    return attn_output, attn_weights
+
+
 class common_RotaryEmbedding(torch.nn.Module):
     @torch.no_grad()
     @patched_dynamic_rope_update
diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py
@@ -144,6 +144,11 @@ def get_untrained_model_with_inputs(
                 f"[get_untrained_model_with_inputs] config._attn_implementation="
                 f"{config._attn_implementation!r}"  # type: ignore[union-attr]
             )
+    elif verbose:
+        print(
+            f"[get_untrained_model_with_inputs] default config._attn_implementation="
+            f"{config._attn_implementation!r}"  # type: ignore[union-attr]
+        )
 
     if type(config) is dict and "_diffusers_version" in config:
         import diffusers

Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,7 @@`
`6`	`6`	`has_transformers,`
`7`	`7`	`requires_transformers,`
`8`	`8`	`)`
`9`		`-from onnx_diagnostic.helpers.torch_helper import to_any`
	`9`	`+from onnx_diagnostic.helpers.torch_helper import to_any, torch_deepcopy`
`10`	`10`	`from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs`
`11`	`11`	`from onnx_diagnostic.torch_export_patches import torch_export_patches`
`12`	`12`	`from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str`