@@ -156,6 +156,8 @@ if (LLAMA_CUBLAS)
156156 list (APPEND GGML_SOURCES_CUDA ${SRCS} )
157157 file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu" )
158158 list (APPEND GGML_SOURCES_CUDA ${SRCS} )
159+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q6_0.cu" )
160+ list (APPEND GGML_SOURCES_CUDA ${SRCS} )
159161 file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu" )
160162 list (APPEND GGML_SOURCES_CUDA ${SRCS} )
161163 file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu" )
@@ -332,13 +334,60 @@ if (LLAMA_HIPBLAS)
332334 list (APPEND GGML_SOURCES_ROCM "ggml/src/ggml-cuda/ggml-cuda.cu" )
333335 file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu" )
334336 list (APPEND GGML_SOURCES_ROCM ${SRCS} )
335- file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu" )
336- list (APPEND GGML_SOURCES_ROCM ${SRCS} )
337337 add_compile_definitions (GGML_USE_HIP GGML_USE_CUDA SD_USE_CUBLAS)
338338 add_library (ggml-rocm ${GGML_SOURCES_CUDA} )
339339 if (LLAMA_CUDA_FORCE_DMMV)
340340 target_compile_definitions (ggml-rocm PUBLIC GGML_CUDA_FORCE_DMMV)
341341 endif ()
342+
343+ if (LLAMA_CUDA_DISABLE_MMQ_IQ1_S_Q4_1)
344+ # all quants necessary for Kobold CPP Frankenstein are compiled
345+ # the other are ignored but not deleted from the ggml_cuda templates directory
346+ # file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu")
347+ # list(APPEND GGML_SOURCES_ROCM ${SRCS})
348+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_m.cu" )
349+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
350+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu" )
351+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
352+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu" )
353+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
354+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu" )
355+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
356+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu" )
357+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
358+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu" )
359+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
360+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu" )
361+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
362+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu" )
363+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
364+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu" )
365+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
366+ # file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu")
367+ # list(APPEND GGML_SOURCES_ROCM ${SRCS})
368+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu" )
369+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
370+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu" )
371+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
372+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q6_0.cu" )
373+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
374+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu" )
375+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
376+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu" )
377+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
378+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu" )
379+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
380+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu" )
381+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
382+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu" )
383+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
384+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu" )
385+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
386+ else ()
387+ # Build All MMQ Kernels
388+ file (GLOB SRCS "ggml/src/ggml-cuda/template-instances/mmq*.cu" )
389+ list (APPEND GGML_SOURCES_ROCM ${SRCS} )
390+ endif ()
342391
343392 if (LLAMA_CUDA_FA_ALL_QUANTS)
344393 # all quants necessary for Kobold CPP Frankenstein are compiled
0 commit comments