@@ -16441,14 +16441,15 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1644116441 else if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q4_K;
1644216442 else new_type = GGML_TYPE_IQ4_XS;
1644316443 }
16444- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
16444+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS ||
16445+ ftype == LLAMA_FTYPE_MOSTLY_IQ2_S) {
1644516446 if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
16446- else if (qs.model.hparams.n_head <= 20) new_type = GGML_TYPE_IQ4_XS;
16447+ else if (qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_IQ4_XS;
1644716448 else new_type = GGML_TYPE_Q4_K;
1644816449 }
16449- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
16450+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
1645016451 if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
16451- else if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head <= 20 ) new_type = GGML_TYPE_Q4_K;
16452+ else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q4_K;
1645216453 else new_type = GGML_TYPE_Q5_K;
1645316454 }
1645416455 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
@@ -16457,7 +16458,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1645716458 }
1645816459 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
1645916460 if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
16460- else if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head <= 20) new_type = GGML_TYPE_Q5_K;
16461+ else if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_Q5_K;
1646116462 else new_type = GGML_TYPE_Q6_K;
1646216463 }
1646316464 else if (new_type != GGML_TYPE_Q8_0) {
@@ -16488,18 +16489,18 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1648816489 new_type = GGML_TYPE_IQ2_S;
1648916490 }
1649016491 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) {
16491- if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head <= 20) new_type = GGML_TYPE_IQ2_S;
16492+ if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_IQ2_S;
1649216493 else new_type = GGML_TYPE_IQ3_XXS;
1649316494 }
1649416495 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
1649516496 new_type = GGML_TYPE_IQ3_XXS;
1649616497 }
16497- else if ( || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
16498- if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head <= 20) new_type = GGML_TYPE_IQ3_XXS;
16498+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
16499+ if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_IQ3_XXS;
1649916500 else new_type = GGML_TYPE_IQ3_S;
1650016501 }
1650116502 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXL) {
16502- if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head <= 20) new_type = GGML_TYPE_IQ3_S;
16503+ if (qs.model.hparams.n_vocab >= 127999 || qs.model.hparams.n_head() <= 20) new_type = GGML_TYPE_IQ3_S;
1650316504 new_type = GGML_TYPE_IQ4_XS;
1650416505 }
1650516506 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) new_type = GGML_TYPE_IQ3_XXS;
0 commit comments