jax-ml · mingxu1067 · Nov 24, 2025 · Nov 24, 2025 · gemini-code-assist · Nov 24, 2025
diff --git a/jax/_src/cudnn/fused_attention_stablehlo.py b/jax/_src/cudnn/fused_attention_stablehlo.py
@@ -380,7 +380,8 @@ def check_is_flash_attention(
         # bf16/fp16 attention conditions
         # Check the head dim.
         is_on_hopper = is_cuda_compute_capability_equal("9.0")
-        H_max = 256 if is_on_hopper else 128
+        is_on_blackwell = is_cuda_compute_capability_equal("10.0")
+        H_max = 256 if (is_on_hopper or is_on_blackwell) else 128
-        is_on_hopper = is_cuda_compute_capability_equal("9.0")
-        H_max = 256 if is_on_hopper else 128
-        is_on_blackwell = is_cuda_compute_capability_equal("10.0")
-        H_max = 256 if (is_on_hopper or is_on_blackwell) else 128
+        is_hopper_or_later = check_compute_capability("9.0")
+        H_max = 256 if is_hopper_or_later else 128
-        is_on_hopper = is_cuda_compute_capability_equal("9.0")
-        H_max = 256 if is_on_hopper else 128
-        is_on_blackwell = is_cuda_compute_capability_equal("10.0")
-        H_max = 256 if (is_on_hopper or is_on_blackwell) else 128
+        is_hopper_or_later = check_compute_capability("9.0")
+        H_max = 256 if is_hopper_or_later else 128
         # check if multi-head latent attention is needed
         is_mla = qH != vH
         if not (qH <= H_max and qH % 8 == 0):