Skip to content

Commit 6a414d9

Browse files
committed
CUDA/HIP: optimize mmv paths taken for HIP/CDNA
1 parent bf2a99e commit 6a414d9

File tree

2 files changed

+11
-1
lines changed

2 files changed

+11
-1
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,11 @@ static bool fp16_mma_hardware_available(const int cc) {
263263
}
264264

265265
static bool bf16_mma_hardware_available(const int cc) {
266-
return GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_AMPERE;
266+
return GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_AMPERE || GGML_CUDA_CC_IS_CDNA(cc) || cc >= GGML_CUDA_CC_RDNA3;
267+
}
268+
269+
static bool fp32_mma_hardware_available(const int cc) {
270+
return GGML_CUDA_CC_IS_CDNA(cc);
267271
}
268272

269273
// Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.

ggml/src/ggml-cuda/mmv.cu

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,8 @@ bool ggml_cuda_should_use_mmv(enum ggml_type type, int cc, const int64_t * src0_
456456
return ne11 <= 4;
457457
}
458458
return ne11 <= 3;
459+
} else if (fp32_mma_hardware_available(cc)) {
460+
return ne11 <= 3;
459461
}
460462
return ne11 <= 8;
461463
case GGML_TYPE_F16:
@@ -468,6 +470,8 @@ bool ggml_cuda_should_use_mmv(enum ggml_type type, int cc, const int64_t * src0_
468470
return src0_small && ne11 <= 3;
469471
}
470472
return ne11 <= 8;
473+
} else if (fp16_mma_hardware_available(cc)) {
474+
return ne11 <= 2;
471475
}
472476
return ne11 <= 8;
473477
case GGML_TYPE_BF16:
@@ -480,6 +484,8 @@ bool ggml_cuda_should_use_mmv(enum ggml_type type, int cc, const int64_t * src0_
480484
return src0_small && ne11 <= 3;
481485
}
482486
return ne11 <= 8;
487+
} else if (bf16_mma_hardware_available(cc)) {
488+
return ne11 <= 3;
483489
}
484490
return ne11 <= 8;
485491
default:

0 commit comments

Comments
 (0)