@@ -15878,15 +15878,23 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1587815878 if (arch == LLM_ARCH_FALCON || nx % QK_K != 0) {
1587915879 new_type = GGML_TYPE_Q8_0;
1588015880 }
15881- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M ||
15882- ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
15883- new_type = GGML_TYPE_Q4_K;
15881+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
15882+ if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ4_XS;
15883+ else new_type = GGML_TYPE_Q4_K;
15884+ }
15885+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
15886+ if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q4_K;
15887+ else new_type = GGML_TYPE_Q5_K;
1588415888 }
1588515889 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ||
1588615890 ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M ||
1588715891 ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) {
1588815892 new_type = GGML_TYPE_Q5_K;
1588915893 }
15894+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
15895+ if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q5_K;
15896+ else new_type = GGML_TYPE_Q6_K;
15897+ }
1589015898 else if (new_type != GGML_TYPE_Q8_0) {
1589115899 new_type = GGML_TYPE_Q6_K;
1589215900 }
@@ -15895,10 +15903,25 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1589515903 if (qs.params->token_embedding_type < GGML_TYPE_COUNT) {
1589615904 new_type = qs.params->token_embedding_type;
1589715905 } else {
15898- if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M ||
15899- ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
15906+ if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
15907+ if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ2_XS;
15908+ else new_type = GGML_TYPE_IQ2_S;
15909+ }
15910+ if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
1590015911 new_type = GGML_TYPE_IQ2_S;
1590115912 }
15913+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
15914+ if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ2_S;
15915+ else new_type = GGML_TYPE_IQ3_XXS;
15916+ }
15917+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
15918+ if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ3_XXS;
15919+ else new_type = GGML_TYPE_IQ3_S;
15920+ }
15921+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL) {
15922+ if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ3_S;
15923+ else new_type = GGML_TYPE_IQ4_XS;
15924+ }
1590215925 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) new_type = GGML_TYPE_IQ3_XXS;
1590315926 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) new_type = GGML_TYPE_IQ3_XXS;
1590415927 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) new_type = GGML_TYPE_IQ3_S;
0 commit comments