Skip to content

Commit 1316a4d

Browse files
committed
Revert "CUDA/HIP: optimize mmv paths taken for HIP devices (ggml-org#14324)"
This reverts commit 0142961.
1 parent b884a7f commit 1316a4d

File tree

2 files changed

+1
-23
lines changed

2 files changed

+1
-23
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -267,11 +267,7 @@ static bool fp16_mma_hardware_available(const int cc) {
267267
}
268268

269269
static bool bf16_mma_hardware_available(const int cc) {
270-
return (GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_AMPERE) || GGML_CUDA_CC_IS_CDNA(cc) || cc >= GGML_CUDA_CC_RDNA3;
271-
}
272-
273-
static bool fp32_mma_hardware_available(const int cc) {
274-
return GGML_CUDA_CC_IS_CDNA(cc);
270+
return GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_AMPERE;
275271
}
276272

277273
// Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.

ggml/src/ggml-cuda/mmv.cu

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -456,11 +456,6 @@ bool ggml_cuda_should_use_mmv(enum ggml_type type, int cc, const int64_t * src0_
456456
return ne11 <= 4;
457457
}
458458
return ne11 <= 3;
459-
} else if (GGML_CUDA_CC_IS_AMD(cc)) {
460-
if (fp32_mma_hardware_available(cc)) {
461-
return ne11 <= 3;
462-
}
463-
return ne11 <= 8;
464459
}
465460
return ne11 <= 8;
466461
case GGML_TYPE_F16:
@@ -473,14 +468,6 @@ bool ggml_cuda_should_use_mmv(enum ggml_type type, int cc, const int64_t * src0_
473468
return src0_small && ne11 <= 3;
474469
}
475470
return ne11 <= 8;
476-
} else if (GGML_CUDA_CC_IS_AMD(cc)) {
477-
if (fp16_mma_hardware_available(cc)) {
478-
if (GGML_CUDA_CC_IS_RDNA3(cc) || GGML_CUDA_CC_IS_RDNA4(cc)) {
479-
return ne11 <= 5;
480-
}
481-
return ne11 <= 2;
482-
}
483-
return ne11 <= 8;
484471
}
485472
return ne11 <= 8;
486473
case GGML_TYPE_BF16:
@@ -493,11 +480,6 @@ bool ggml_cuda_should_use_mmv(enum ggml_type type, int cc, const int64_t * src0_
493480
return src0_small && ne11 <= 3;
494481
}
495482
return ne11 <= 8;
496-
} else if (GGML_CUDA_CC_IS_AMD(cc)) {
497-
if (bf16_mma_hardware_available(cc)) {
498-
return ne11 <= 3;
499-
}
500-
return ne11 <= 8;
501483
}
502484
return ne11 <= 8;
503485
default:

0 commit comments

Comments
 (0)