Skip to content

Commit f334b79

Browse files
HIP: fix RDNA3 FP16/BF16 matrix multiplication (ggml-org#17817)
1 parent a28e3c7 commit f334b79

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

ggml/src/ggml-cuda/mmf.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,9 @@ bool ggml_cuda_should_use_mmf(enum ggml_type type, int cc, int warp_size, const
160160
case GGML_TYPE_F32:
161161
return ampere_mma_available(cc);
162162
case GGML_TYPE_F16:
163-
return volta_mma_available(cc) || turing_mma_available(cc) || amd_wmma_available(cc);
163+
return volta_mma_available(cc) || turing_mma_available(cc) || (amd_wmma_available(cc) && GGML_CUDA_CC_IS_RDNA4(cc));
164164
case GGML_TYPE_BF16:
165-
return ampere_mma_available(cc) || amd_wmma_available(cc);
165+
return ampere_mma_available(cc) || (amd_wmma_available(cc) && GGML_CUDA_CC_IS_RDNA4(cc));
166166
default:
167167
return false;
168168
}

0 commit comments

Comments
 (0)