switched to _all_to_all_single helper in _all_to_all_dim_exchange due contiguity issues

Bissmella · Bissmella · commit 3fbd1cf88afc · 2025-11-24T10:33:53.000+01:00
diff --git a/src/diffusers/models/attention_dispatch.py b/src/diffusers/models/attention_dispatch.py
@@ -1036,8 +1036,8 @@ def _all_to_all_dim_exchange(x: torch.Tensor, scatter_idx: int = 2, gather_idx:
 
         if group_world_size >1:
             #maybe here need to use the _all_to_all_single helper to avoid contiguity issues
-            out = funcol.all_to_all_single(x_temp, None, None, group=group)
-            out = _wait_tensor(out)
+            out = _all_to_all_single(x_temp, None, None, group=group)
+            #out = _wait_tensor(out)
         else:
             out = x_temp
         # group_world_size, S_LOCAL, B, H_LOCAL, D -> B, S, H_LOCAL, D
@@ -1055,8 +1055,8 @@ def _all_to_all_dim_exchange(x: torch.Tensor, scatter_idx: int = 2, gather_idx:
         
         if group_world_size >1:
             #maybe here need to use the _all_to_all_single helper to avoid contiguity issues
-            output = funcol.all_to_all_single(x_temp, None, None, group)
-            output = _wait_tensor(output)
+            output = _all_to_all_single(x_temp, None, None, group)
+            #output = _wait_tensor(output)
         else:
             output = x_temp
         output = output.reshape(H, S_LOCAL, B, D).transpose(0, 2).contiguous()
diff --git a/tests/others/test_unified_sp_attention.py b/tests/others/test_unified_sp_attention.py
@@ -102,10 +102,8 @@ def dummy_backward_op(ctx, grad_out, *args, **kwargs):
             grad_v,
         )
 
-    attn = TemplatedUnifiedAttention()
 
-    out = attn(
-        None,
+    out = TemplatedUnifiedAttention(
         q, k, v, None,
         dropout_p=0.0,
         is_causal=False,