@@ -42,11 +42,7 @@ endif()
4242
4343# 3rd party libs
4444option (LLAMA_CUBLAS "llama: use CUDA" ON )
45- set (LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels" )
46- set (LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels" )
47- set (LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels" )
4845option (LLAMA_CUDA_F16 "llama: use 16 bit floats for dmmv CUDA kernels" OFF )
49- set (LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K" )
5046set (LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
5147 "llama: max. batch size for using peer access" )
5248
@@ -101,13 +97,9 @@ if (LLAMA_CUBLAS)
10197 add_compile_definitions (GGML_USE_CUDA)
10298 add_compile_definitions (SD_USE_CUBLAS)
10399
104- add_compile_definitions (GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} )
105- add_compile_definitions (GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y} )
106- add_compile_definitions (GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y} )
107100 if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
108101 add_compile_definitions (GGML_CUDA_F16)
109102 endif ()
110- add_compile_definitions (K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER} )
111103 add_compile_definitions (GGML_CUDA_PEER_MAX_BATCH_SIZE=${LLAMA_CUDA_PEER_MAX_BATCH_SIZE} )
112104
113105 # only build minimal quants required for fattn quant kv
@@ -185,9 +177,6 @@ if (LLAMA_HIPBLAS)
185177 list (APPEND GGML_SOURCES_ROCM ${SRCS} )
186178 add_compile_definitions (GGML_USE_HIP GGML_USE_CUDA SD_USE_CUBLAS)
187179 add_library (ggml-rocm ${GGML_SOURCES_CUDA} )
188- if (LLAMA_CUDA_FORCE_DMMV)
189- target_compile_definitions (ggml-rocm PUBLIC GGML_CUDA_FORCE_DMMV)
190- endif ()
191180
192181 file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu" )
193182 list (APPEND GGML_SOURCES_ROCM ${SRCS} )
@@ -197,39 +186,18 @@ if (LLAMA_HIPBLAS)
197186 list (APPEND GGML_SOURCES_ROCM ${SRCS} )
198187
199188 # only build minimal quants required for fattn quant kv
200- target_compile_definitions (ggml-rocm PUBLIC GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} )
201- target_compile_definitions (ggml-rocm PUBLIC GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y} )
202- target_compile_definitions (ggml-rocm PUBLIC K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER} )
203189 set_source_files_properties (${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
204190 target_link_libraries (ggml-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
205191
206192 add_library (ggml-v2-rocm ${GGML_V2_CUDA_SOURCES} )
207- if (LLAMA_CUDA_FORCE_DMMV)
208- target_compile_definitions (ggml-v2-rocm PUBLIC GGML_CUDA_FORCE_DMMV)
209- endif ()
210- target_compile_definitions (ggml-v2-rocm PUBLIC GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} )
211- target_compile_definitions (ggml-v2-rocm PUBLIC GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y} )
212- target_compile_definitions (ggml-v2-rocm PUBLIC K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER} )
213193 set_source_files_properties (otherarch/ggml_v2-cuda.cu PROPERTIES LANGUAGE CXX)
214194 target_link_libraries (ggml-v2-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
215195
216196 add_library (ggml-v3-rocm ${GGML_V3_CUDA_SOURCES} )
217- if (LLAMA_CUDA_FORCE_DMMV)
218- target_compile_definitions (ggml-v3-rocm PUBLIC GGML_CUDA_FORCE_DMMV)
219- endif ()
220- target_compile_definitions (ggml-v3-rocm PUBLIC GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} )
221- target_compile_definitions (ggml-v3-rocm PUBLIC GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y} )
222- target_compile_definitions (ggml-v3-rocm PUBLIC K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER} )
223197 set_source_files_properties (otherarch/ggml_v3-cuda.cu PROPERTIES LANGUAGE CXX)
224198 target_link_libraries (ggml-v3-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
225199
226200 add_library (ggml-v2-legacy-rocm ${GGML_V2_LEGACY_CUDA_SOURCES} )
227- if (LLAMA_CUDA_FORCE_DMMV)
228- target_compile_definitions (ggml-v2-legacy-rocm PUBLIC GGML_CUDA_FORCE_DMMV)
229- endif ()
230- target_compile_definitions (ggml-v2-legacy-rocm PUBLIC GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} )
231- target_compile_definitions (ggml-v2-legacy-rocm PUBLIC GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y} )
232- target_compile_definitions (ggml-v2-legacy-rocm PUBLIC K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER} )
233201 set_source_files_properties (otherarch/ggml_v2-cuda-legacy.cu PROPERTIES LANGUAGE CXX)
234202 target_link_libraries (ggml-v2-legacy-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
235203
0 commit comments