Skip to content

Commit 5516909

Browse files
committed
Also always return false in fp16_mma_hardware_available when compiled for AMD and GGML_HIP_ROCWMMA_FATTN not enabled.
1 parent 5d4ab04 commit 5516909

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,12 @@ static bool fp16_mma_available(const int cc) {
237237

238238
// To be used for feature selection of external libraries, e.g. cuBLAS.
239239
static bool fp16_mma_hardware_available(const int cc) {
240+
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && !defined(GGML_HIP_ROCWMMA_FATTN)
241+
return false;
242+
#else
240243
return cc < GGML_CUDA_CC_OFFSET_AMD && cc >= GGML_CUDA_CC_VOLTA ||
241244
cc >= GGML_CUDA_CC_CDNA && cc < GGML_CUDA_CC_RDNA1 || cc >= GGML_CUDA_CC_RDNA3;
245+
#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && !defined(GGML_HIP_ROCWMMA_FATTN)
242246
}
243247

244248
// Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.

0 commit comments

Comments
 (0)