File tree Expand file tree Collapse file tree 1 file changed +2
-6
lines changed Expand file tree Collapse file tree 1 file changed +2
-6
lines changed Original file line number Diff line number Diff line change @@ -231,7 +231,7 @@ typedef float2 dfloat2;
231231#define FP16_MMA_AVAILABLE
232232#endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || (defined(GGML_HIP_ROCWMMA_FATTN_GFX12) && defined(RDNA4)))
233233
234- #if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3) && !defined(GGML_HIP_NO_MMQ_MFMA)
234+ #if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3)
235235#define AMD_MFMA_AVAILABLE
236236#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && defined(CDNA3)
237237
@@ -299,11 +299,7 @@ static bool fp32_mma_hardware_available(const int cc) {
299299
300300// AMD CDNA3 matrix cores.. Will add support for other CDNA generations later.
301301static bool amd_mfma_available (const int cc) {
302- #if !defined(GGML_HIP_NO_MMQ_MFMA)
303- return GGML_CUDA_CC_IS_CDNA3 (cc);
304- #else
305- return false ;
306- #endif // !defined(GGML_HIP_NO_MMQ_MFMA)
302+ return cc >= GGML_CUDA_CC_OFFSET_AMD && GGML_CUDA_CC_IS_CDNA3 (cc);
307303}
308304
309305// Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.
You can’t perform that action at this time.
0 commit comments