Revert "Also always return false in fp16_mma_hardware_available when compiled for AMD and GGML_HIP_ROCWMMA_FATTN not enabled."

hjc4869 · hjc4869 · commit fea171f548a0 · 2025-02-25T23:53:31.000+08:00
This reverts commit 5516909.
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
@@ -237,12 +237,8 @@ static bool fp16_mma_available(const int cc) {
 
 // To be used for feature selection of external libraries, e.g. cuBLAS.
 static bool fp16_mma_hardware_available(const int cc) {
-#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && !defined(GGML_HIP_ROCWMMA_FATTN)
-    return false;
-#else
     return cc < GGML_CUDA_CC_OFFSET_AMD && cc >= GGML_CUDA_CC_VOLTA ||
            cc >= GGML_CUDA_CC_CDNA && cc < GGML_CUDA_CC_RDNA1 || cc >= GGML_CUDA_CC_RDNA3;
-#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && !defined(GGML_HIP_ROCWMMA_FATTN)
 }
 
 // Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.