Skip to content

Commit d7e36b7

Browse files
add xy_hardware_available
1 parent d55e584 commit d7e36b7

File tree

3 files changed

+16
-6
lines changed

3 files changed

+16
-6
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,11 +226,21 @@ static bool fast_fp16_available(const int cc) {
226226
return fp16_available(cc) && cc != 610;
227227
}
228228

229-
// Any FP16 tensor cores are available.
229+
// To be used for feature selection of external libraries, e.g. cuBLAS.
230+
static bool fast_fp16_hardware_available(const int cc) {
231+
return cc >= GGML_CUDA_CC_PASCAL && cc != 610;
232+
}
233+
234+
// Any FP16 tensor core instructions are available for ggml code.
230235
static bool fp16_mma_available(const int cc) {
231236
return cc < GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA;
232237
}
233238

239+
// To be used for feature selection of external libraries, e.g. cuBLAS.
240+
static bool fp16_mma_hardware_available(const int cc) {
241+
return cc < GGML_CUDA_CC_OFFSET_AMD && cc >= GGML_CUDA_CC_VOLTA;
242+
}
243+
234244
// Volta technically had FP16 tensor cores but they work very differently compared to Turing and later.
235245
static bool new_mma_available(const int cc) {
236246
return cc < GGML_CUDA_CC_OFFSET_AMD && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_TURING;

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1867,14 +1867,14 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
18671867

18681868
const int cc = ggml_cuda_info().devices[id].cc;
18691869
use_mul_mat_q = use_mul_mat_q && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]);
1870-
any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_available(cc);
1871-
any_gpus_without_fp16_mma = any_gpus_without_fp16_mma || !fp16_mma_available(cc);
1870+
any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_hardware_available(cc);
1871+
any_gpus_without_fp16_mma = any_gpus_without_fp16_mma || !fp16_mma_hardware_available(cc);
18721872
}
18731873
} else {
18741874
const int cc = ggml_cuda_info().devices[ctx.device].cc;
18751875
use_mul_mat_q = use_mul_mat_q && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]);
1876-
any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_available(cc);
1877-
any_gpus_without_fp16_mma = any_gpus_without_fp16_mma || !fp16_mma_available(cc);
1876+
any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_hardware_available(cc);
1877+
any_gpus_without_fp16_mma = any_gpus_without_fp16_mma || !fp16_mma_hardware_available(cc);
18781878
}
18791879

18801880
// debug helpers

ggml/src/ggml-cuda/mmq.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) {
146146
#endif //GGML_CUDA_FORCE_MMQ
147147

148148
if (cc < GGML_CUDA_CC_OFFSET_AMD) {
149-
return cc < GGML_CUDA_CC_VOLTA || ne11 < MMQ_DP4A_MAX_BATCH_SIZE;
149+
return !fp16_mma_hardware_available(cc) || ne11 < MMQ_DP4A_MAX_BATCH_SIZE;
150150
}
151151

152152
return (!GGML_CUDA_CC_IS_RDNA3(cc) && !GGML_CUDA_CC_IS_CDNA(cc) && !GGML_CUDA_CC_IS_GCN(cc)) || ne11 < MMQ_DP4A_MAX_BATCH_SIZE;

0 commit comments

Comments
 (0)