@@ -394,9 +394,20 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
394394 case GGML_TYPE_IQ1_M:
395395 case GGML_TYPE_Q2_K:
396396 case GGML_TYPE_Q3_K:
397- case GGML_TYPE_IQ4_XS: new_type = GGML_TYPE_Q4_0; break ;
397+ case GGML_TYPE_IQ2_KS:
398+ case GGML_TYPE_IQ2_K:
399+ case GGML_TYPE_IQ3_K:
400+ case GGML_TYPE_IQ2_KT:
401+ case GGML_TYPE_IQ3_KT:
402+ case GGML_TYPE_IQ4_KSS:
403+ case GGML_TYPE_IQ4_KS:
404+ case GGML_TYPE_IQ4_XS: new_type = GGML_TYPE_IQ4_NL; break ;
405+ case GGML_TYPE_IQ4_K:
406+ case GGML_TYPE_IQ4_KT:
398407 case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break ;
399- case GGML_TYPE_Q5_K: new_type = GGML_TYPE_Q5_1; break ;
408+ case GGML_TYPE_IQ5_K:
409+ case GGML_TYPE_Q5_K: new_type = GGML_TYPE_Q6_0; break ;
410+ case GGML_TYPE_IQ6_K:
400411 case GGML_TYPE_Q6_K: new_type = GGML_TYPE_Q8_0; break ;
401412 default : throw std::runtime_error (" \n Unsupported tensor size encountered\n " );
402413 }
@@ -473,6 +484,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
473484 case LLAMA_FTYPE_MOSTLY_Q4_1: default_type = GGML_TYPE_Q4_1; break ;
474485 case LLAMA_FTYPE_MOSTLY_Q5_0: default_type = GGML_TYPE_Q5_0; break ;
475486 case LLAMA_FTYPE_MOSTLY_Q5_1: default_type = GGML_TYPE_Q5_1; break ;
487+ case LLAMA_FTYPE_MOSTLY_Q6_0: default_type = GGML_TYPE_Q6_0; break ;
476488 case LLAMA_FTYPE_MOSTLY_Q8_0: default_type = GGML_TYPE_Q8_0; break ;
477489 case LLAMA_FTYPE_MOSTLY_F16: default_type = GGML_TYPE_F16; break ;
478490 case LLAMA_FTYPE_MOSTLY_BF16: default_type = GGML_TYPE_BF16; break ;
@@ -494,13 +506,27 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
494506 case LLAMA_FTYPE_MOSTLY_TQ2_0: default_type = GGML_TYPE_TQ2_0; break ;
495507 case LLAMA_FTYPE_MOSTLY_IQ2_XXS: default_type = GGML_TYPE_IQ2_XXS; break ;
496508 case LLAMA_FTYPE_MOSTLY_IQ2_XS: default_type = GGML_TYPE_IQ2_XS; break ;
509+ case LLAMA_FTYPE_MOSTLY_IQ2_KS: default_type = GGML_TYPE_IQ2_KS; break ;
510+ case LLAMA_FTYPE_MOSTLY_IQ2_KT: default_type = GGML_TYPE_IQ2_KT; break ;
497511 case LLAMA_FTYPE_MOSTLY_IQ2_S: default_type = GGML_TYPE_IQ2_XS; break ;
498512 case LLAMA_FTYPE_MOSTLY_IQ2_M: default_type = GGML_TYPE_IQ2_S; break ;
499513 case LLAMA_FTYPE_MOSTLY_IQ3_XXS: default_type = GGML_TYPE_IQ3_XXS; break ;
514+ case LLAMA_FTYPE_MOSTLY_IQ3_KT: default_type = GGML_TYPE_IQ3_KT; break ;
515+ case LLAMA_FTYPE_MOSTLY_IQ4_KT: default_type = GGML_TYPE_IQ4_KT; break ;
500516 case LLAMA_FTYPE_MOSTLY_IQ1_S: default_type = GGML_TYPE_IQ1_S; break ;
501517 case LLAMA_FTYPE_MOSTLY_IQ1_M: default_type = GGML_TYPE_IQ1_M; break ;
518+ // case LLAMA_FTYPE_MOSTLY_IQ1_BN: default_type = GGML_TYPE_IQ1_BN; break;
519+ // case LLAMA_FTYPE_MOSTLY_IQ2_BN: default_type = GGML_TYPE_IQ2_BN; break;
502520 case LLAMA_FTYPE_MOSTLY_IQ4_NL: default_type = GGML_TYPE_IQ4_NL; break ;
503521 case LLAMA_FTYPE_MOSTLY_IQ4_XS: default_type = GGML_TYPE_IQ4_XS; break ;
522+ case LLAMA_FTYPE_MOSTLY_IQ4_KS: default_type = GGML_TYPE_IQ4_KS; break ;
523+ case LLAMA_FTYPE_MOSTLY_IQ4_KSS: default_type = GGML_TYPE_IQ4_KSS; break ;
524+ case LLAMA_FTYPE_MOSTLY_IQ2_K: default_type = GGML_TYPE_IQ2_K; break ;
525+ case LLAMA_FTYPE_MOSTLY_IQ3_K: default_type = GGML_TYPE_IQ3_K; break ;
526+ case LLAMA_FTYPE_MOSTLY_IQ3_KL: default_type = GGML_TYPE_IQ3_K; break ;
527+ case LLAMA_FTYPE_MOSTLY_IQ4_K: default_type = GGML_TYPE_IQ4_K; break ;
528+ case LLAMA_FTYPE_MOSTLY_IQ5_K: default_type = GGML_TYPE_IQ5_K; break ;
529+ case LLAMA_FTYPE_MOSTLY_IQ6_K: default_type = GGML_TYPE_IQ6_K; break ;
504530 case LLAMA_FTYPE_MOSTLY_IQ3_S: default_type = GGML_TYPE_IQ3_S; break ;
505531 case LLAMA_FTYPE_MOSTLY_IQ3_M: default_type = GGML_TYPE_IQ3_S; break ;
506532
0 commit comments