huggingface · a-r-r-o-w · Aug 23, 2025 · Aug 21, 2025 · Aug 21, 2025 · Aug 22, 2025
diff --git a/src/diffusers/models/attention_dispatch.py b/src/diffusers/models/attention_dispatch.py
@@ -475,23 +475,25 @@ def _flex_attention_causal_mask_mod(batch_idx, head_idx, q_idx, kv_idx):
 # Registrations are required for fullgraph tracing compatibility
 
 
-# TODO: library.custom_op and register_fake probably need version guards?
+
+
+# Version guard for PyTorch compatibility - custom_op was added in PyTorch 2.4
 # TODO: this is only required because the beta release FA3 does not have it. There is a PR adding
 # this but it was never merged: https://github.com/Dao-AILab/flash-attention/pull/1590
-@torch.library.custom_op("flash_attn_3::_flash_attn_forward", mutates_args=(), device_types="cuda")
-def _wrapped_flash_attn_3_original(
-    query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
-) -> Tuple[torch.Tensor, torch.Tensor]:
-    out, lse = flash_attn_3_func(query, key, value)
-    lse = lse.permute(0, 2, 1)
-    return out, lse
-
-
-@torch.library.register_fake("flash_attn_3::_flash_attn_forward")
-def _(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-    batch_size, seq_len, num_heads, head_dim = query.shape
-    lse_shape = (batch_size, seq_len, num_heads)
-    return torch.empty_like(query), query.new_empty(lse_shape)
+if hasattr(torch.library, 'custom_op') and hasattr(torch.library, 'register_fake'):
+    @torch.library.custom_op("flash_attn_3::_flash_attn_forward", mutates_args=(), device_types="cuda")
+    def _wrapped_flash_attn_3_original(
+        query: torch.Tensor, key: torch.Tensor, value: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        out, lse = flash_attn_3_func(query, key, value)
+        lse = lse.permute(0, 2, 1)
+        return out, lse
+
+    @torch.library.register_fake("flash_attn_3::_flash_attn_forward")
+    def _(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        batch_size, seq_len, num_heads, head_dim = query.shape
+        lse_shape = (batch_size, seq_len, num_heads)
+        return torch.empty_like(query), query.new_empty(lse_shape)
 
 
 # ===== Attention backends =====