[release/2.5] fix and skip test_causal_variants on Navi4x (#2304)

dnikolaev-amd · web-flow · commit aade71861310 · 2025-07-21T12:00:36.000-05:00
This PR will fix test_transformers.py and skip one test:
- Add missed lines in test_transformers.py to fix `NameError: name
'_cur_sdpa_kernel_backends' is not defined`
- skip
`test_transformers.py::TestAttnBias::test_causal_variants_causal_variant_CausalVariant_UPPER_LEFT_shape3_cuda`
on Navi4x (tested on gfx1200 and gfx1201). Test failed on shape (1, 1,
23, 56, 15) only on Naxi4x only in release/2.5. Test uses `hipBlas`, but
switching to `hipBlasLT` doesn't help

release/2.6 has significant changes in `SDPBackend.MATH` backend and the
test passes

Fixes SWDEV-522844
diff --git a/test/test_transformers.py b/test/test_transformers.py
@@ -3779,6 +3779,10 @@ def test_causal_variants(self, device, causal_variant: CausalVariant, shape: Lis
         if TEST_WITH_ROCM and causal_variant == CausalVariant.LOWER_RIGHT:
             self.skipTest("No support for LOWER_RIGHT variant for now")
             return
+        if (TEST_WITH_ROCM
+                and "gfx12" in torch.cuda.get_device_properties(0).gcnArchName.split(":")[0]
+                and self._testMethodName == "test_causal_variants_causal_variant_CausalVariant_UPPER_LEFT_shape3_cuda"):
+            self.skipTest(f"Failed on Navi4x in release/2.5 for shape {shape}")
 
         bsz, num_heads, seq_len_q, seq_len_kv, head_dim = shape
         make_q_tensor = partial(make_tensor, SdpaShape(bsz, num_heads, seq_len_q, head_dim))
diff --git a/torch/nn/attention/__init__.py b/torch/nn/attention/__init__.py
@@ -1,21 +1,14 @@
 # mypy: allow-untyped-defs
 """ This module contains functions and classes that alter the behavior of torch.nn.functional.scaled_dot_product_attention """
 import contextlib
-from typing import List, Union
+from typing import Iterable, List, Union
 from warnings import warn
 
+import torch.backends.cuda
 from torch._C import _SDPBackend as SDPBackend
 from torch.backends.cuda import (
     can_use_efficient_attention,
     can_use_flash_attention,
-    cudnn_sdp_enabled,
-    enable_cudnn_sdp,
-    enable_flash_sdp,
-    enable_math_sdp,
-    enable_mem_efficient_sdp,
-    flash_sdp_enabled,
-    math_sdp_enabled,
-    mem_efficient_sdp_enabled,
     SDPAParams,
 )
 
@@ -66,6 +59,30 @@ def _raise_kernel_warnings(params: SDPAParams) -> None:
             warn("Flash attention can't be used because:")
             can_use_flash_attention(params, True)
 
+_backend_names = {
+    "cudnn": "CUDNN_ATTENTION",
+    "flash": "FLASH_ATTENTION",
+    "mem_efficient": "EFFICIENT_ATTENTION",
+    "math": "MATH",
+}
+
+
+def _backend_from_string(name: str):
+    return getattr(SDPBackend, name)
+
+
+def _cur_sdpa_kernel_backends():
+    backends: List[SDPBackend] = []
+    for name, val in _backend_names.items():
+        if getattr(torch.backends.cuda, f"{name}_sdp_enabled")():
+            backends.append(getattr(SDPBackend, val))
+    return backends
+
+
+def _sdpa_kernel(backends: Iterable[SDPBackend]):
+    for name, val in _backend_names.items():
+        enabled = getattr(SDPBackend, val) in backends
+        getattr(torch.backends.cuda, f"enable_{name}_sdp")(enabled)
 
 @contextlib.contextmanager
 def sdpa_kernel(