fix patch

xadupre · xadupre · commit 9b86d3e3950b · 2025-10-30T22:46:59.000+01:00
diff --git a/_doc/recipes/plot_export_dim1.py b/_doc/recipes/plot_export_dim1.py
@@ -13,6 +13,8 @@
 
 import torch
 from onnx_diagnostic import doc
+from onnx_diagnostic.helpers import string_type
+from onnx_diagnostic.torch_export_patches import torch_export_patches
 
 
 class Model(torch.nn.Module):
@@ -29,21 +31,28 @@ def forward(self, x, y, z):
 DYN = torch.export.Dim.DYNAMIC
 ds = {0: DYN, 1: DYN}
 
+print("-- export shape:", string_type((x, y, z), with_shape=True))
+print("-- dynamic shapes:", string_type((ds, ds, ds)))
+
 ep = torch.export.export(model, (x, y, z), dynamic_shapes=(ds, ds, ds))
-print(ep.graph)
+print(ep)
 
 # %%
 # Same model, a dynamic dimension = 1
 # +++++++++++++++++++++++++++++++++++
 
+
 z = z[:1]
 
 DYN = torch.export.Dim.DYNAMIC
 ds = {0: DYN, 1: DYN}
 
+print("-- export shape:", string_type((x, y, z), with_shape=True))
+print("-- dynamic shapes:", string_type((ds, ds, ds)))
+
 try:
     ep = torch.export.export(model, (x, y, z), dynamic_shapes=(ds, ds, ds))
-    print(ep.graph)
+    print(ep)
 except Exception as e:
     print("ERROR", e)
 
@@ -54,14 +63,33 @@ def forward(self, x, y, z):
 # Same model, a dynamic dimension = 1 and backed_size_oblivious=True
 # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
+print("-- export shape:", string_type((x, y, z), with_shape=True))
+print("-- dynamic shapes:", string_type((ds, ds, ds)))
+
 try:
     with torch.fx.experimental._config.patch(backed_size_oblivious=True):
         ep = torch.export.export(model, (x, y, z), dynamic_shapes=(ds, ds, ds))
-        print(ep.graph)
+        print(ep)
 except RuntimeError as e:
     print("ERROR", e)
 
+
+# %%
+# Final try with pathes...
+# ++++++++++++++++++++++++
+
+print("-- export shape:", string_type((x, y, z), with_shape=True))
+print("-- dynamic shapes:", string_type((ds, ds, ds)))
+
+with torch_export_patches(patch_torch=1):
+    try:
+        ep = torch.export.export(model, (x, y, z), dynamic_shapes=(ds, ds, ds))
+        print(ep)
+    except RuntimeError as e:
+        print("ERROR", e)
+
 # %%
-# It worked.
+# It is difficult to find the good option. It is possible on a simple model
+# but sometimes impossible on a bigger model mixing different shapes.
 
 doc.plot_legend("dynamic dimension\nworking with\n0 or 1", "torch.export.export", "green")
diff --git a/_unittests/ut_torch_export_patches/test_patch_transformers.py b/_unittests/ut_torch_export_patches/test_patch_transformers.py
@@ -124,7 +124,7 @@ def test_causal_mask_in_scaled_dot_product_attention(self):
         self.assertEqual(attn_causal_bias.min().item(), -float("inf"))
 
     @ignore_warnings(UserWarning)
-    def test_causal_mask_in_scaled_dot_product_attention_export(self):
+    def test_sdpa_attention_forward_export_is_causal(self):
         sdpa_attention_forward = sdpa_attention.sdpa_attention_forward
         patched_sdpa_attention_forward = patch_transformers.patched_sdpa_attention_forward
         kwargs = {
@@ -172,6 +172,55 @@ def forward(self, query, key, value):
         got = epd.module()(query, key, value)
         self.assertEqualArray(expected, got)
 
+    @ignore_warnings(UserWarning)
+    def test_sdpa_attention_forward_export_is_causal_none(self):
+        sdpa_attention_forward = sdpa_attention.sdpa_attention_forward
+        patched_sdpa_attention_forward = patch_transformers.patched_sdpa_attention_forward
+        kwargs = {
+            "module": None,
+            "query": torch.rand((1, 2, 1, 96), dtype=torch.float32),
+            "key": torch.rand((1, 2, 4, 96), dtype=torch.float32),
+            "value": torch.rand((1, 2, 4, 96), dtype=torch.float32),
+            "attention_mask": None,
+            "attention_dropout": 0,
+            "scaling": 0.10206207261596575,
+            "is_causal": None,
+        }
+        expected = sdpa_attention_forward(**torch_deepcopy(kwargs))[0]
+        got = patched_sdpa_attention_forward(**torch_deepcopy(kwargs))[0]
+        self.assertEqualArray(expected, got)
+
+        class Model(torch.nn.Module):
+            def forward(self, query, key, value):
+                kwargs = {
+                    "module": None,
+                    "query": query,
+                    "key": key,
+                    "value": value,
+                    "attention_mask": None,
+                    "attention_dropout": 0,
+                    "scaling": 0.10206207261596575,
+                    "is_causal": None,
+                }
+                return patched_sdpa_attention_forward(**kwargs)[0]
+
+        query, key, value = kwargs["query"], kwargs["key"], kwargs["value"]
+        model = Model()
+        got = model(query, key, value)
+        self.assertEqualArray(expected, got)
+
+        # static export
+        ep = torch.export.export(model, (query, key, value))
+        got = ep.module()(query, key, value)
+        self.assertEqualArray(expected, got)
+
+        # dynamic
+        ds = ({0: "batch", 2: "seq1"}, {0: "batch", 2: "seq2"}, {0: "batch", 2: "seq2"})
+        fake_inputs, _ = make_fake_with_dynamic_dimensions((query, key, value), ds)
+        epd = torch.export.export(model, fake_inputs, dynamic_shapes=use_dyn_not_str(ds))
+        got = epd.module()(query, key, value)
+        self.assertEqualArray(expected, got)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py b/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
@@ -1394,7 +1394,7 @@ def patched_sdpa_attention_forward(
             f"value: {value.shape}"
         ),
     )
-    if not is_causal:
+    if not is_causal or not patch_is_causal:
         return (
             torch.nn.functional.scaled_dot_product_attention(
                 query,

Original file line number	Diff line number	Diff line change
`@@ -1394,7 +1394,7 @@ def patched_sdpa_attention_forward(`
`1394`	`1394`	`f"value: {value.shape}"`
`1395`	`1395`	`),`
`1396`	`1396`	`)`
`1397`		`- if not is_causal:`
	`1397`	`+ if not is_causal or not patch_is_causal:`
`1398`	`1398`	`return (`
`1399`	`1399`	`torch.nn.functional.scaled_dot_product_attention(`
`1400`	`1400`	`query,`