Skip to content

Commit 651fdf4

Browse files
authored
Add option (again)
1 parent 945e1f1 commit 651fdf4

File tree

4 files changed

+14
-0
lines changed

4 files changed

+14
-0
lines changed

ggml/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ option(GGML_CUDA "ggml: use CUDA"
158158
option(GGML_MUSA "ggml: use MUSA" OFF)
159159
option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)
160160
option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF)
161+
option(GGML_CUDA_NO_TURING_MMA "ggml: disable the use of mma in mmq kernels" OFF)
162+
161163
set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
162164
"ggml: max. batch size for using peer access")
163165
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)

ggml/src/ggml-cuda/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ if (CUDAToolkit_FOUND)
7777
add_compile_definitions(GGML_CUDA_FORCE_CUBLAS)
7878
endif()
7979

80+
if (GGML_CUDA_NO_TURING_MMA)
81+
add_compile_definitions(GGML_CUDA_NO_TURING_MMA)
82+
add_compile_definitions(GGML_CUDA_FORCE_MMQ)
83+
endif()
84+
8085
if (GGML_CUDA_NO_VMM)
8186
add_compile_definitions(GGML_CUDA_NO_VMM)
8287
endif()

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@
7171
#include <string>
7272
#include <vector>
7373

74+
#ifdef GGML_CUDA_NO_TURING_MMA
75+
#define CUBLAS_COMPUTE_16F CUBLAS_COMPUTE_16F_PEDANTIC
76+
#endif
77+
7478
static_assert(sizeof(half) == sizeof(ggml_fp16_t), "wrong fp16 size");
7579

7680
[[noreturn]]

ggml/src/ggml-cuda/mmq.cuh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99

1010
using namespace ggml_cuda_mma;
1111

12+
#ifdef GGML_CUDA_NO_TURING_MMA
13+
#undef TURING_MMA_AVAILABLE
14+
#endif
1215
#define MMQ_DP4A_MAX_BATCH_SIZE 64 // Max. batch size to use for dp4a MMQ kernels when FP16 tensor cores are available.
1316
#define MMQ_ITER_K 256
1417
#define MMQ_NWARPS 8

0 commit comments

Comments
 (0)