File tree Expand file tree Collapse file tree 4 files changed +14
-0
lines changed
Expand file tree Collapse file tree 4 files changed +14
-0
lines changed Original file line number Diff line number Diff line change @@ -158,6 +158,8 @@ option(GGML_CUDA "ggml: use CUDA"
158158option (GGML_MUSA "ggml: use MUSA" OFF )
159159option (GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF )
160160option (GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF )
161+ option (GGML_CUDA_NO_TURING_MMA "ggml: disable the use of mma in mmq kernels" OFF )
162+
161163set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
162164 "ggml: max. batch size for using peer access" )
163165option (GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF )
Original file line number Diff line number Diff line change @@ -77,6 +77,11 @@ if (CUDAToolkit_FOUND)
7777 add_compile_definitions (GGML_CUDA_FORCE_CUBLAS)
7878 endif ()
7979
80+ if (GGML_CUDA_NO_TURING_MMA)
81+ add_compile_definitions (GGML_CUDA_NO_TURING_MMA)
82+ add_compile_definitions (GGML_CUDA_FORCE_MMQ)
83+ endif ()
84+
8085 if (GGML_CUDA_NO_VMM)
8186 add_compile_definitions (GGML_CUDA_NO_VMM)
8287 endif ()
Original file line number Diff line number Diff line change 7171#include < string>
7272#include < vector>
7373
74+ #ifdef GGML_CUDA_NO_TURING_MMA
75+ #define CUBLAS_COMPUTE_16F CUBLAS_COMPUTE_16F_PEDANTIC
76+ #endif
77+
7478static_assert (sizeof (half) == sizeof (ggml_fp16_t ), " wrong fp16 size" );
7579
7680[[noreturn]]
Original file line number Diff line number Diff line change 99
1010using namespace ggml_cuda_mma ;
1111
12+ #ifdef GGML_CUDA_NO_TURING_MMA
13+ #undef TURING_MMA_AVAILABLE
14+ #endif
1215#define MMQ_DP4A_MAX_BATCH_SIZE 64 // Max. batch size to use for dp4a MMQ kernels when FP16 tensor cores are available.
1316#define MMQ_ITER_K 256
1417#define MMQ_NWARPS 8
You can’t perform that action at this time.
0 commit comments