Skip to content

Commit f4b5138

Browse files
committed
Q6_0 KVQ for KCPP/Croco -> KV22
For release. fix a few lazy-cuts and hiccups left during the merge of IQ4_NL.
1 parent 3e76b60 commit f4b5138

33 files changed

+310
-119
lines changed

CMakeLists.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,12 @@ if (LLAMA_CUBLAS)
148148
list(APPEND GGML_SOURCES_CUDA ${SRCS})
149149
# file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q5_1-q5_1.cu")
150150
# list(APPEND GGML_SOURCES_CUDA ${SRCS})
151+
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q6_0-iq4_nl.cu")
152+
list(APPEND GGML_SOURCES_CUDA ${SRCS})
153+
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q6_0-q5_0.cu")
154+
list(APPEND GGML_SOURCES_CUDA ${SRCS})
155+
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q6_0-q6_0.cu")
156+
list(APPEND GGML_SOURCES_CUDA ${SRCS})
151157
# file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q4_0.cu")
152158
# list(APPEND GGML_SOURCES_CUDA ${SRCS})
153159
# file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q4_1.cu")
@@ -156,6 +162,8 @@ if (LLAMA_CUBLAS)
156162
list(APPEND GGML_SOURCES_CUDA ${SRCS})
157163
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q5_0.cu")
158164
list(APPEND GGML_SOURCES_CUDA ${SRCS})
165+
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q6_0.cu")
166+
list(APPEND GGML_SOURCES_CUDA ${SRCS})
159167
# file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q5_1.cu")
160168
# list(APPEND GGML_SOURCES_CUDA ${SRCS})
161169
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
@@ -170,6 +178,8 @@ if (LLAMA_CUBLAS)
170178
# list(APPEND GGML_SOURCES_CUDA ${SRCS})
171179
# file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*f16-q5_1.cu")
172180
# list(APPEND GGML_SOURCES_CUDA ${SRCS})
181+
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*f16-q6_0.cu")
182+
list(APPEND GGML_SOURCES_CUDA ${SRCS})
173183
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*f16-q8_0.cu")
174184
list(APPEND GGML_SOURCES_CUDA ${SRCS})
175185
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu")
@@ -293,6 +303,12 @@ if (LLAMA_HIPBLAS)
293303
list(APPEND GGML_SOURCES_ROCM ${SRCS})
294304
# file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q5_1-q5_1.cu")
295305
# list(APPEND GGML_SOURCES_ROCM ${SRCS})
306+
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q6_0-iq4_nl.cu")
307+
list(APPEND GGML_SOURCES_ROCM ${SRCS})
308+
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q6_0-q5_0.cu")
309+
list(APPEND GGML_SOURCES_ROCM ${SRCS})
310+
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q6_0-q6_0.cu")
311+
list(APPEND GGML_SOURCES_ROCM ${SRCS})
296312
# file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q4_0.cu")
297313
# list(APPEND GGML_SOURCES_ROCM ${SRCS})
298314
# file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q4_1.cu")
@@ -301,6 +317,8 @@ if (LLAMA_HIPBLAS)
301317
list(APPEND GGML_SOURCES_ROCM ${SRCS})
302318
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q5_0.cu")
303319
list(APPEND GGML_SOURCES_ROCM ${SRCS})
320+
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q6_0.cu")
321+
list(APPEND GGML_SOURCES_ROCM ${SRCS})
304322
# file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q5_1.cu")
305323
# list(APPEND GGML_SOURCES_ROCM ${SRCS})
306324
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu")
@@ -315,6 +333,8 @@ if (LLAMA_HIPBLAS)
315333
# list(APPEND GGML_SOURCES_ROCM ${SRCS})
316334
# file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*f16-q5_1.cu")
317335
# list(APPEND GGML_SOURCES_ROCM ${SRCS})
336+
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*f16-q6_0.cu")
337+
list(APPEND GGML_SOURCES_ROCM ${SRCS})
318338
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*f16-q8_0.cu")
319339
list(APPEND GGML_SOURCES_ROCM ${SRCS})
320340
file(GLOB SRCS "ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu")

ggml/src/ggml-cuda/fattn-vec-f16.cuh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,7 @@ extern DECL_FATTN_VEC_F16_CASE( 64, GGML_TYPE_F16, GGML_TYPE_Q4_0);
394394
extern DECL_FATTN_VEC_F16_CASE( 64, GGML_TYPE_F16, GGML_TYPE_Q4_1);
395395
extern DECL_FATTN_VEC_F16_CASE( 64, GGML_TYPE_F16, GGML_TYPE_Q5_0);
396396
extern DECL_FATTN_VEC_F16_CASE( 64, GGML_TYPE_F16, GGML_TYPE_Q5_1);
397+
//extern DECL_FATTN_VEC_F16_CASE( 64, GGML_TYPE_F16, GGML_TYPE_Q6_0);
397398
extern DECL_FATTN_VEC_F16_CASE( 64, GGML_TYPE_F16, GGML_TYPE_Q8_0);
398399
extern DECL_FATTN_VEC_F16_CASE( 64, GGML_TYPE_F16, GGML_TYPE_F16);
399400
//extern DECL_FATTN_VEC_F16_CASE( 64, GGML_TYPE_Q8_0, GGML_TYPE_IQ4_NL);
@@ -403,6 +404,7 @@ extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0);
403404
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0);
404405
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_0);
405406
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_0);
407+
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_Q4_0);
406408
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0);
407409
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0);
408410
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_Q4_0);
@@ -411,6 +413,7 @@ extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_IQ4_NL);
411413
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_IQ4_NL);
412414
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_IQ4_NL);
413415
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_IQ4_NL);
416+
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_IQ4_NL);
414417
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_IQ4_NL);
415418
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_IQ4_NL);
416419
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_IQ4_NL);
@@ -419,6 +422,7 @@ extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1);
419422
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_1);
420423
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1);
421424
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_1);
425+
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_Q4_1);
422426
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_1);
423427
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1);
424428
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_Q4_1);
@@ -427,6 +431,7 @@ extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0);
427431
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0);
428432
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_0);
429433
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0);
434+
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_Q5_0);
430435
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_0);
431436
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0);
432437
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_Q5_0);
@@ -435,14 +440,25 @@ extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1);
435440
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_1);
436441
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1);
437442
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_1);
443+
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_Q5_1);
438444
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1);
439445
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1);
440446
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_Q5_1);
441447

448+
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q6_0);
449+
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q6_0);
450+
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q6_0);
451+
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q6_0);
452+
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_Q6_0);
453+
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q6_0);
454+
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q6_0);
455+
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_Q6_0);
456+
442457
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0);
443458
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q8_0);
444459
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q8_0);
445460
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q8_0);
461+
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_Q8_0);
446462
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q8_0);
447463
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0);
448464
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_Q8_0);
@@ -451,6 +467,7 @@ extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16);
451467
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16);
452468
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_F16);
453469
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_F16);
470+
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_F16);
454471
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_F16);
455472
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16);
456473
extern DECL_FATTN_VEC_F16_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_F16);

ggml/src/ggml-cuda/fattn-vec-f32.cuh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,7 @@ extern DECL_FATTN_VEC_F32_CASE( 64, GGML_TYPE_F16, GGML_TYPE_Q4_0);
372372
extern DECL_FATTN_VEC_F32_CASE( 64, GGML_TYPE_F16, GGML_TYPE_Q4_1);
373373
extern DECL_FATTN_VEC_F32_CASE( 64, GGML_TYPE_F16, GGML_TYPE_Q5_0);
374374
extern DECL_FATTN_VEC_F32_CASE( 64, GGML_TYPE_F16, GGML_TYPE_Q5_1);
375+
//extern DECL_FATTN_VEC_F32_CASE( 64, GGML_TYPE_F16, GGML_TYPE_Q6_0);
375376
extern DECL_FATTN_VEC_F32_CASE( 64, GGML_TYPE_F16, GGML_TYPE_Q8_0);
376377
extern DECL_FATTN_VEC_F32_CASE( 64, GGML_TYPE_F16, GGML_TYPE_F16);
377378
//extern DECL_FATTN_VEC_F32_CASE( 64, GGML_TYPE_Q8_0, GGML_TYPE_IQ4_NL);
@@ -381,6 +382,7 @@ extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_0);
381382
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_0);
382383
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_0);
383384
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_0);
385+
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_Q4_0);
384386
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0);
385387
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_0);
386388
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_Q4_0);
@@ -389,6 +391,7 @@ extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_IQ4_NL);
389391
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_IQ4_NL);
390392
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_IQ4_NL);
391393
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_IQ4_NL);
394+
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_IQ4_NL);
392395
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_IQ4_NL);
393396
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_IQ4_NL);
394397
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_IQ4_NL);
@@ -397,6 +400,7 @@ extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q4_1);
397400
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q4_1);
398401
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q4_1);
399402
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q4_1);
403+
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_Q4_1);
400404
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q4_1);
401405
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q4_1);
402406
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_Q4_1);
@@ -405,6 +409,7 @@ extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_0);
405409
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_0);
406410
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_0);
407411
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_0);
412+
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_Q5_0);
408413
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_0);
409414
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_0);
410415
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_Q5_0);
@@ -413,14 +418,25 @@ extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q5_1);
413418
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q5_1);
414419
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q5_1);
415420
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q5_1);
421+
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_Q5_1);
416422
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q5_1);
417423
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q5_1);
418424
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_Q5_1);
419425

426+
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q6_0);
427+
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q6_0);
428+
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q6_0);
429+
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q6_0);
430+
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_Q6_0);
431+
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q6_0);
432+
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q6_0);
433+
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_Q6_0);
434+
420435
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_Q8_0);
421436
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_Q8_0);
422437
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_Q8_0);
423438
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_Q8_0);
439+
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_Q8_0);
424440
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_Q8_0);
425441
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_Q8_0);
426442
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_Q8_0);
@@ -429,6 +445,7 @@ extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_0, GGML_TYPE_F16);
429445
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q4_1, GGML_TYPE_F16);
430446
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_0, GGML_TYPE_F16);
431447
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q5_1, GGML_TYPE_F16);
448+
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q6_0, GGML_TYPE_F16);
432449
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_Q8_0, GGML_TYPE_F16);
433450
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16);
434451
extern DECL_FATTN_VEC_F32_CASE(128, GGML_TYPE_IQ4_NL, GGML_TYPE_F16);

0 commit comments

Comments
 (0)