From 96b344c6435dee2940f33085330823b4c1be1153 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Thu, 27 Feb 2025 18:29:24 +0100 Subject: [PATCH] CUDA: fix logic for V100 + GGML_CUDA_FORCE_MMQ --- ggml/src/ggml-cuda/mmq.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index 0451c65f30277..f2aca1f2014e1 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -109,9 +109,9 @@ static constexpr __device__ int get_mmq_x_max_device() { #if __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA #ifdef GGML_CUDA_FORCE_MMQ - return MMQ_DP4A_MAX_BATCH_SIZE; -#else // GGML_CUDA_FORCE_MMQ return 128; +#else // GGML_CUDA_FORCE_MMQ + return MMQ_DP4A_MAX_BATCH_SIZE; #endif // GGML_CUDA_FORCE_MMQ #else // __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA