@@ -64,7 +64,6 @@ option(LLAMA_OPENMP "llama: use OpenMP"
6464
6565# Croco.Cpp Specifics
6666option (LLAMA_CUDA_FA_ALL_QUANTS "llama: compile 18 quants for FlashAttention" OFF )
67- option (LLAMA_CUDA_DISABLE_MMQ_IQ1_S_Q4_1 "llama: compile 18 quants for FlashAttention" OFF )
6867option (GGML_CUDA_USE_GRAPHS "Use Cuda Graphs to increase a bit performancess" OFF )
6968set (GGML_SCHED_MAX_COPIES "1" CACHE STRING "llama: max input copies for pipeline parallelism" )
7069set (LLAMA_SCHED_MAX_COPIES "1" CACHE STRING "llama: max input copies for pipeline parallelism" )
@@ -101,6 +100,8 @@ file(GLOB GGML_SOURCES_CUDA "ggml/src/ggml-cuda/*.cu")
101100list (APPEND GGML_SOURCES_CUDA "ggml/src/ggml-cuda/ggml-cuda.cu" )
102101file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu" )
103102list (APPEND GGML_SOURCES_CUDA ${SRCS} )
103+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu" )
104+ list (APPEND GGML_SOURCES_CUDA ${SRCS} )
104105set (GGML_V3_CUDA_SOURCES otherarch/ggml_v3-cuda.cu otherarch/ggml_v3-cuda.h)
105106set (GGML_V2_CUDA_SOURCES otherarch/ggml_v2-cuda.cu otherarch/ggml_v2-cuda.h)
106107set (GGML_V2_LEGACY_CUDA_SOURCES otherarch/ggml_v2-cuda-legacy.cu otherarch/ggml_v2-cuda-legacy.h)
@@ -160,55 +161,10 @@ if (LLAMA_CUBLAS)
160161 if (GGML_CUDA_USE_GRAPHS)
161162 add_compile_definitions (GGML_CUDA_USE_GRAPHS)
162163 endif ()
163-
164- if (LLAMA_CUDA_DISABLE_MMQ_IQ1_S_Q4_1)
165- # all quants necessary for Kobold CPP Frankenstein are compiled
166- # the other are ignored but not deleted from the ggml_cuda templates directory
167- # file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu")
168- # list(APPEND GGML_SOURCES_CUDA ${SRCS})
169- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_m.cu" )
170- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
171- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu" )
172- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
173- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu" )
174- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
175- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu" )
176- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
177- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu" )
178- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
179- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu" )
180- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
181- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu" )
182- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
183- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu" )
184- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
185- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu" )
186- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
187- # file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu")
188- # list(APPEND GGML_SOURCES_CUDA ${SRCS})
189- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu" )
190- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
191- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu" )
192- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
193- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q6_0.cu" )
194- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
195- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu" )
196- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
197- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu" )
198- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
199- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu" )
200- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
201- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu" )
202- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
203- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu" )
204- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
205- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu" )
206- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
207- else ()
208- # Build All MMQ Kernels
209- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu" )
210- list (APPEND GGML_SOURCES_CUDA ${SRCS} )
211- endif ()
164+
165+ # Build All MMQ Kernels
166+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu" )
167+ list (APPEND GGML_SOURCES_CUDA ${SRCS} )
212168
213169 if (LLAMA_CUDA_FA_ALL_QUANTS)
214170 # all quants necessary for Kobold CPP Frankenstein are compiled
@@ -374,54 +330,9 @@ if (LLAMA_HIPBLAS)
374330 target_compile_definitions (ggml-rocm PUBLIC GGML_CUDA_FORCE_DMMV)
375331 endif ()
376332
377- if (LLAMA_CUDA_DISABLE_MMQ_IQ1_S_Q4_1)
378- # all quants necessary for Kobold CPP Frankenstein are compiled
379- # the other are ignored but not deleted from the ggml_cuda templates directory
380- # file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu")
381- # list(APPEND GGML_SOURCES_ROCM ${SRCS})
382- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_m.cu" )
383- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
384- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu" )
385- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
386- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu" )
387- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
388- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu" )
389- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
390- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu" )
391- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
392- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu" )
393- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
394- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu" )
395- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
396- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu" )
397- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
398- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu" )
399- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
400- # file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu")
401- # list(APPEND GGML_SOURCES_ROCM ${SRCS})
402- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu" )
403- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
404- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu" )
405- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
406- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q6_0.cu" )
407- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
408- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu" )
409- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
410- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu" )
411- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
412- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu" )
413- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
414- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu" )
415- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
416- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu" )
417- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
418- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu" )
419- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
420- else ()
421- # Build All MMQ Kernels
422- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu" )
423- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
424- endif ()
333+ # Build All MMQ Kernels
334+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu" )
335+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
425336
426337 if (LLAMA_CUDA_FA_ALL_QUANTS)
427338 # all quants necessary for Kobold CPP Frankenstein are compiled
0 commit comments