@@ -16426,23 +16426,35 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1642616426 if (arch == LLM_ARCH_FALCON || nx % QK_K != 0) {
1642716427 new_type = GGML_TYPE_Q8_0;
1642816428 }
16429+ else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) {
16430+ if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
16431+ else new_type = GGML_TYPE_Q4_K;
16432+ }
16433+ else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
16434+ if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
16435+ else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q5_K;
16436+ else new_type = GGML_TYPE_Q6_K;
16437+ }
1642916438 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
1643016439 if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
1643116440 else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ4_XS;
16441+ else if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q4_K;
16442+ else new_type = GGML_TYPE_IQ4_XS;
16443+ }
16444+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
16445+ if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
1643216446 else new_type = GGML_TYPE_Q4_K;
1643316447 }
16434- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS ||
16435- ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL ||
16436- ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) {
16448+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
1643716449 if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
1643816450 else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q4_K;
1643916451 else new_type = GGML_TYPE_Q5_K;
1644016452 }
16441- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
16453+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS ) {
1644216454 if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
1644316455 else new_type = GGML_TYPE_Q5_K;
1644416456 }
16445- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
16457+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
1644616458 if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
1644716459 else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q5_K;
1644816460 else new_type = GGML_TYPE_Q6_K;
0 commit comments