Fixes to FMHA tests on amd (#238)

bottler · meta-codesync[bot] · commit 38a75215ff9b · 2026-03-20T10:42:37.000-07:00
Summary: Pull Request resolved: #238 Further fixes to tests Reviewed By: cthi Differential Revision: D97479860 fbshipit-source-id: 3ff44140478550a79346c9f5be2b7d8a0f003920
diff --git a/test/attention/fmha/test_fmha_merge_attentions.py b/test/attention/fmha/test_fmha_merge_attentions.py
@@ -22,6 +22,7 @@
 
 from .utils import (
     assert_allclose,
+    cuda_only,
     disable_on_rocm,
     sm80_or_better_only,
     UNSUPPORTED_OP_PASSES,
@@ -479,6 +480,7 @@ def test_merge_attentions_sharedinput(
     )
 
 
+@cuda_only
 @sm80_or_better_only
 @pytest.mark.parametrize("bmghk", (False, True))
 def test_merge_attentions_against_ref(bmghk: bool):
@@ -685,6 +687,7 @@ def test_merge_training_zilch():
 
 
 @sm80_or_better_only
+@cuda_only
 def test_merge_training_undilate():
     torch.manual_seed(1)
 
diff --git a/test/attention/fmha/test_fmha_split_blocks_fairinternal.py b/test/attention/fmha/test_fmha_split_blocks_fairinternal.py
@@ -46,6 +46,7 @@ def test_split_blocks_for_decoding():
     assert (chunked_bias.k_seqinfo.seqstart >= attn_bias.k_seqinfo.seqstart).all()
 
 
+@cuda_only
 def test_split_blocks_for_decoding_with_paged():
     torch.manual_seed(0)
     max_len_kv = 2048
diff --git a/test/attention/fmha/test_mem_eff_attention.py b/test/attention/fmha/test_mem_eff_attention.py
@@ -313,7 +313,7 @@ def test_dropout_ck(q_len, kv_len, batch_size, k_len, p, seed, attn_bias):
 def test_dropout_backward_ck(q_len, kv_len, batch_size, k, p):
     op = fmha.ck.FwOp
     dtype = torch.float16
-    if not op.is_available():
+    if not fmha.ck.BwOp.is_available():
         if UNSUPPORTED_OP_PASSES:
             return
         pytest.skip()
@@ -614,6 +614,7 @@ def test_unsupported_stride_alignment(op: Type[fmha.AttentionFwOpBase]):
 
 
 @sm75_or_better_only
+@cuda_only
 def test_unsupported_dropout_combine_flash_cutlass() -> None:
     q = torch.empty(
         [1, 4, 1, 16], device="cuda", dtype=torch.float16, requires_grad=True
@@ -1893,6 +1894,10 @@ def test_memeff_compile(bias_t, create_bias_inside_compiled: bool, op) -> None:
         if UNSUPPORTED_OP_PASSES:
             return
         pytest.skip("Op is not available")
+    if (not not torch.version.hip) and not fmha.ck.BwOp.is_available():
+        if UNSUPPORTED_OP_PASSES:
+            return
+        pytest.skip("Op is not available")
     torch._dynamo.reset_code_caches()  # avoids hitting recompilation limit
     B, M, H, K = 1, 256, 2, 64
     q, k, v, bias = create_tensors(