code formatting fix

Bissmella · Bissmella · commit da748347e1b5 · 2025-12-11T10:31:45.000+01:00
diff --git a/src/diffusers/models/attention_dispatch.py b/src/diffusers/models/attention_dispatch.py
@@ -1096,14 +1096,10 @@ def _all_to_all_dim_exchange(x: torch.Tensor, scatter_idx: int = 2, gather_idx:
         num_heads = num_heads_local * group_world_size
         seq_len_local = seq_len // group_world_size
 
-        # B, S, H_LOCAL, D -> group_world_size, H_LOCAL, S_LOCAL, B, D
-        x_temp = (
-            x.reshape(batch_size, group_world_size, seq_len_local, num_heads_local, head_dim)
-            .permute(1, 3, 2, 0, 4)
-            .reshape(group_world_size, num_heads_local, seq_len_local, batch_size, head_dim)
-        )
+        #B, S, H_LOCAL, D -> group_world_size, H_LOCAL, S_LOCAL, B, D
+        x_temp = x.reshape(batch_size, group_world_size, seq_len_local, num_heads_local, head_dim).permute(1, 3, 2, 0, 4).reshape(group_world_size, num_heads_local, seq_len_local, batch_size, head_dim)
 
-        if group_world_size > 1:
+        if group_world_size >1:
             output = _all_to_all_single(x_temp, group)
         else:
             output = x_temp
diff --git a/tests/others/test_unified_sp_attention.py b/tests/others/test_unified_sp_attention.py
@@ -21,6 +21,8 @@ def run(rank, world_size):
 
     q.requires_grad_(True)
 
+
+
     pc = ParallelConfig(
         context_parallel_config=ContextParallelConfig(
             ring_degree=2,