[ROCm] testing: enable MEFF/FA unittests for gfx1100 (#1951)

xinyazhang · web-flow · commit 5a980aa46e25 · 2025-03-19T11:51:26.000-05:00
Include gfx1100, and optionally enable gfx1201/gfx950 according to env
var TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL

Fixes #SWDEV-515529
diff --git a/torch/testing/_internal/common_cuda.py b/torch/testing/_internal/common_cuda.py
@@ -33,32 +33,30 @@
 
 IS_JETSON = LazyVal(lambda: torch.cuda.is_available() and torch.cuda.get_device_capability() in [(7, 2), (8, 7)])
 
-def CDNA2OrLater():
-    if TEST_WITH_ROCM:
-        gcn_arch_name = torch.cuda.get_device_properties('cuda').gcnArchName
-        return any(arch in gcn_arch_name for arch in {"gfx90a", "gfx940", "gfx941", "gfx942"})
-    return False
-
-def evaluate_gfx_arch_exact(matching_arch):
+def evaluate_gfx_arch_within(arch_list):
     if not torch.cuda.is_available():
         return False
     gcn_arch_name = torch.cuda.get_device_properties('cuda').gcnArchName
-    arch = os.environ.get('PYTORCH_DEBUG_FLASH_ATTENTION_GCN_ARCH_OVERRIDE', gcn_arch_name)
-    return arch == matching_arch
+    effective_arch = os.environ.get('PYTORCH_DEBUG_FLASH_ATTENTION_GCN_ARCH_OVERRIDE', gcn_arch_name)
+    # gcnArchName can be complicated strings like gfx90a:sramecc+:xnack-
+    # Hence the matching should be done reversely
+    return any(arch in effective_arch for arch in arch_list)
 
-GFX90A_Exact = LazyVal(lambda: evaluate_gfx_arch_exact('gfx90a:sramecc+:xnack-'))
-GFX942_Exact = LazyVal(lambda: evaluate_gfx_arch_exact('gfx942:sramecc+:xnack-'))
+def CDNA2OrLater():
+    return evaluate_gfx_arch_within(["gfx90a", "gfx942"])
 
 def evaluate_platform_supports_flash_attention():
     if TEST_WITH_ROCM:
-        return evaluate_gfx_arch_exact('gfx90a:sramecc+:xnack-') or evaluate_gfx_arch_exact('gfx942:sramecc+:xnack-')
+        arch_list = ["gfx90a", "gfx942", "gfx1100"]
+        return evaluate_gfx_arch_within(arch_list)
     if TEST_CUDA:
         return not IS_WINDOWS and SM80OrLater
     return False
 
 def evaluate_platform_supports_efficient_attention():
     if TEST_WITH_ROCM:
-        return evaluate_gfx_arch_exact('gfx90a:sramecc+:xnack-') or evaluate_gfx_arch_exact('gfx942:sramecc+:xnack-')
+        arch_list = ["gfx90a", "gfx942", "gfx1100"]
+        return evaluate_gfx_arch_within(arch_list)
     if TEST_CUDA:
         return True
     return False