fix

xadupre · xadupre · commit 72c69992afa7 · 2025-11-10T15:11:38.000+01:00
diff --git a/_unittests/ut_torch_export_patches/test_patch_transformers.py b/_unittests/ut_torch_export_patches/test_patch_transformers.py
@@ -387,12 +387,13 @@ def test_patched_qwen2_5_vl_vision_attention_forward(self):
         expected = instance.forward(**inputs)
         got = patched_Qwen2_5_VLVisionAttention.forward(instance, **inputs)
         self.assertEqualArray(expected, got)
-        with fake_torchdynamo_exporting():
-            assert (
-                _is_torchdynamo_exporting()
-            ), f"exporting is not set to true? {torch.compiler.is_exporting_flag}"
-            got = patched_Qwen2_5_VLVisionAttention.forward(instance, **inputs)
-            self.assertEqualArray(expected, got)
+        if 1:  # with torch_export_patches(patch_transformers=False, patch_torch=True):
+            with fake_torchdynamo_exporting():
+                assert (
+                    _is_torchdynamo_exporting()
+                ), f"exporting is not set to true? {torch.compiler.is_exporting_flag}"
+                got = patched_Qwen2_5_VLVisionAttention.forward(instance, **inputs)
+                self.assertEqualArray(expected, got)
 
 
 if __name__ == "__main__":
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py b/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
@@ -1401,6 +1401,18 @@ def patched_sdpa_attention_forward(
         is_causal = attention_mask is None and is_causal
 
         if not is_causal:
+            torch._check(query.shape[0] > 0)
+            torch._check(query.shape[1] > 0)
+            torch._check(query.shape[2] > 0)
+            torch._check(query.shape[3] > 0)
+            torch._check(key.shape[0] > 0)
+            torch._check(key.shape[1] > 0)
+            torch._check(key.shape[2] > 0)
+            torch._check(key.shape[3] > 0)
+            torch._check(value.shape[0] > 0)
+            torch._check(value.shape[1] > 0)
+            torch._check(value.shape[2] > 0)
+            torch._check(value.shape[3] > 0)
             return (
                 torch.nn.functional.scaled_dot_product_attention(
                     query,
@@ -2342,25 +2354,29 @@ def forward(
                     **kwargs,
                 )
             elif _is_torchdynamo_exporting():
+                if (
+                    attention_interface
+                    is transformers.integrations.sdpa_attention.sdpa_attention_forward
+                ):
+                    attention_interface = patched_sdpa_attention_forward
 
                 def _iteration(start_end, query_states, key_states, value_states):
-                    a, b = start_end
+                    a = start_end[0]
+                    b = start_end[1]
                     q = query_states[:, :, a:b, :]
                     k = key_states[:, :, a:b, :]
                     v = value_states[:, :, a:b, :]
-                    return (
-                        attention_interface(
-                            self,
-                            q,
-                            k,
-                            v,
-                            attention_mask=None,
-                            scaling=self.scaling,
-                            dropout=0.0 if not self.training else self.attention_dropout,
-                            is_causal=False,
-                            **kwargs,
-                        )[0],
-                    )
+                    return attention_interface(
+                        self,
+                        q,
+                        k,
+                        v,
+                        attention_mask=None,
+                        scaling=self.scaling,
+                        dropout=0.0 if not self.training else self.attention_dropout,
+                        is_causal=False,
+                        **kwargs,
+                    )[0]
 
                 starts = cu_seqlens[:-1]
                 ends = cu_seqlens[1:]
@@ -2369,6 +2385,13 @@ def _iteration(start_end, query_states, key_states, value_states):
                     _iteration(start_end, query_states, key_states, value_states)
                     for start_end in starts_ends
                 ]
+                # attn_outputs = torch._higher_order_ops.while_loop(
+                # attn_outputs = torch.ops.higher_order.while_loop(
+                #    (lambda it, starts_ends, *_args: it < starts_ends.shape[0]),
+                #    _iteration,
+                #    (torch.tensor(0),
+                #       starts_ends, query_states, key_states, value_states), tuple(),
+                # )
                 attn_output = torch.cat(attn_outputs, dim=1)
             else:
                 # Other implementations: Process each chunk separately