Update IQ3_M attn_k and IQ3_XL token_embd

Nexesenex · Nexesenex · commit 73dd5b333c13 · 2024-09-20T05:46:04.000+02:00
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -15900,11 +15900,12 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
                 new_type = GGML_TYPE_IQ2_S;
             }
             else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) new_type = GGML_TYPE_IQ3_XXS;
+            else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) new_type = GGML_TYPE_IQ3_XXS;
             else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) new_type = GGML_TYPE_IQ3_S;
             else if (new_type == GGML_TYPE_Q4_0_4_4 || new_type == GGML_TYPE_Q4_0_4_8 || new_type == GGML_TYPE_Q4_0_8_8) {
                 new_type = GGML_TYPE_Q4_0;
             }
-            else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_XSR) new_type = GGML_TYPE_IQ4_XS;
+            else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL) new_type = GGML_TYPE_IQ4_XS;
         }
     } else if (name.find("attn_v.weight") != std::string::npos) {
         if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_v_type < GGML_TYPE_COUNT) {
@@ -16010,9 +16011,9 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
         else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS && qs.model.hparams.n_gqa() < 2 && qs.model.hparams.n_expert < 2) {
             new_type = GGML_TYPE_IQ3_XXS;
         }
-        else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL) &&
-                (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
-            new_type = GGML_TYPE_IQ4_XS;
+        else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL) {
+                if (qs.model.hparams.n_gqa() >= 4 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;
+                else if (qs.model.hparams.n_gqa() >= 2) new_type = GGML_TYPE_IQ4_XS;
         }
         else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
             new_type = GGML_TYPE_Q4_K;

Original file line number	Diff line number	Diff line change
`@@ -15900,11 +15900,12 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n`
`15900`	`15900`	`new_type = GGML_TYPE_IQ2_S;`
`15901`	`15901`	`}`
`15902`	`15902`	`else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ2_M) new_type = GGML_TYPE_IQ3_XXS;`
	`15903`	`+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) new_type = GGML_TYPE_IQ3_XXS;`
`15903`	`15904`	`else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) new_type = GGML_TYPE_IQ3_S;`
`15904`	`15905`	`else if (new_type == GGML_TYPE_Q4_0_4_4 \|\| new_type == GGML_TYPE_Q4_0_4_8 \|\| new_type == GGML_TYPE_Q4_0_8_8) {`
`15905`	`15906`	`new_type = GGML_TYPE_Q4_0;`
`15906`	`15907`	`}`
`15907`		`- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_XSR) new_type = GGML_TYPE_IQ4_XS;`
	`15908`	`+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL) new_type = GGML_TYPE_IQ4_XS;`
`15908`	`15909`	`}`
`15909`	`15910`	`} else if (name.find("attn_v.weight") != std::string::npos) {`
`15910`	`15911`	`if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_v_type < GGML_TYPE_COUNT) {`
`@@ -16010,9 +16011,9 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n`
`16010`	`16011`	`else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS && qs.model.hparams.n_gqa() < 2 && qs.model.hparams.n_expert < 2) {`
`16011`	`16012`	`new_type = GGML_TYPE_IQ3_XXS;`
`16012`	`16013`	`}`
`16013`		`- else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ3_M \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL) &&`
`16014`		`- (qs.model.hparams.n_gqa() >= 2 \|\| qs.model.hparams.n_expert >= 2)) {`
`16015`		`- new_type = GGML_TYPE_IQ4_XS;`
	`16014`	`+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M \|\| ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL) {`
	`16015`	`+ if (qs.model.hparams.n_gqa() >= 4 \|\| qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;`
	`16016`	`+ else if (qs.model.hparams.n_gqa() >= 2) new_type = GGML_TYPE_IQ4_XS;`
`16016`	`16017`	`}`
`16017`	`16018`	`else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L && (qs.model.hparams.n_gqa() >= 2 \|\| qs.model.hparams.n_expert >= 2)) {`
`16018`	`16019`	`new_type = GGML_TYPE_Q4_K;`