@@ -47,11 +47,7 @@ endif()
4747# 3rd party libs
4848option (LLAMA_CUBLAS "llama: use CUDA" ON )
4949option (LLAMA_CUDA "llama: use CUDA" OFF )
50- set (LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels" )
51- set (LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels" )
52- set (LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels" )
5350option (LLAMA_CUDA_F16 "llama: use 16 bit floats for dmmv CUDA kernels" OFF )
54- set (LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K" )
5551set (LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
5652 "llama: max. batch size for using peer access" )
5753
@@ -114,13 +110,9 @@ if (LLAMA_CUBLAS)
114110 add_compile_definitions (GGML_USE_CUDA)
115111 add_compile_definitions (SD_USE_CUBLAS)
116112
117- add_compile_definitions (GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} )
118- add_compile_definitions (GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y} )
119- add_compile_definitions (GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y} )
120113 if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
121114 add_compile_definitions (GGML_CUDA_F16)
122115 endif ()
123- add_compile_definitions (K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER} )
124116 add_compile_definitions (GGML_CUDA_PEER_MAX_BATCH_SIZE=${LLAMA_CUDA_PEER_MAX_BATCH_SIZE} )
125117
126118 if (GGML_CUDA_USE_GRAPHS)
@@ -336,9 +328,6 @@ if (LLAMA_HIPBLAS)
336328 list (APPEND GGML_SOURCES_ROCM ${SRCS} )
337329 add_compile_definitions (GGML_USE_HIP GGML_USE_CUDA SD_USE_CUBLAS)
338330 add_library (ggml-rocm ${GGML_SOURCES_CUDA} )
339- if (LLAMA_CUDA_FORCE_DMMV)
340- target_compile_definitions (ggml-rocm PUBLIC GGML_CUDA_FORCE_DMMV)
341- endif ()
342331
343332 if (LLAMA_CUDA_DISABLE_MMQ_IQ1_S_Q4_1)
344333 # all quants necessary for Kobold CPP Frankenstein are compiled
@@ -478,39 +467,18 @@ if (LLAMA_HIPBLAS)
478467 endif ()
479468
480469 # only build minimal quants required for fattn quant kv
481- target_compile_definitions (ggml-rocm PUBLIC GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} )
482- target_compile_definitions (ggml-rocm PUBLIC GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y} )
483- target_compile_definitions (ggml-rocm PUBLIC K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER} )
484470 set_source_files_properties (${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX)
485471 target_link_libraries (ggml-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
486472
487473 add_library (ggml-v2-rocm ${GGML_V2_CUDA_SOURCES} )
488- if (LLAMA_CUDA_FORCE_DMMV)
489- target_compile_definitions (ggml-v2-rocm PUBLIC GGML_CUDA_FORCE_DMMV)
490- endif ()
491- target_compile_definitions (ggml-v2-rocm PUBLIC GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} )
492- target_compile_definitions (ggml-v2-rocm PUBLIC GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y} )
493- target_compile_definitions (ggml-v2-rocm PUBLIC K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER} )
494474 set_source_files_properties (otherarch/ggml_v2-cuda.cu PROPERTIES LANGUAGE CXX)
495475 target_link_libraries (ggml-v2-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
496476
497477 add_library (ggml-v3-rocm ${GGML_V3_CUDA_SOURCES} )
498- if (LLAMA_CUDA_FORCE_DMMV)
499- target_compile_definitions (ggml-v3-rocm PUBLIC GGML_CUDA_FORCE_DMMV)
500- endif ()
501- target_compile_definitions (ggml-v3-rocm PUBLIC GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} )
502- target_compile_definitions (ggml-v3-rocm PUBLIC GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y} )
503- target_compile_definitions (ggml-v3-rocm PUBLIC K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER} )
504478 set_source_files_properties (otherarch/ggml_v3-cuda.cu PROPERTIES LANGUAGE CXX)
505479 target_link_libraries (ggml-v3-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
506480
507481 add_library (ggml-v2-legacy-rocm ${GGML_V2_LEGACY_CUDA_SOURCES} )
508- if (LLAMA_CUDA_FORCE_DMMV)
509- target_compile_definitions (ggml-v2-legacy-rocm PUBLIC GGML_CUDA_FORCE_DMMV)
510- endif ()
511- target_compile_definitions (ggml-v2-legacy-rocm PUBLIC GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X} )
512- target_compile_definitions (ggml-v2-legacy-rocm PUBLIC GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y} )
513- target_compile_definitions (ggml-v2-legacy-rocm PUBLIC K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER} )
514482 set_source_files_properties (otherarch/ggml_v2-cuda-legacy.cu PROPERTIES LANGUAGE CXX)
515483 target_link_libraries (ggml-v2-legacy-rocm PUBLIC hip::device hip::host roc::rocblas roc::hipblas)
516484
0 commit comments