Skip to content

Commit 6b5cebf

Browse files
committed
Revamp a bit output weight
for more granularity in low quants.
1 parent f796954 commit 6b5cebf

File tree

1 file changed

+17
-5
lines changed

1 file changed

+17
-5
lines changed

src/llama.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16426,23 +16426,35 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1642616426
if (arch == LLM_ARCH_FALCON || nx % QK_K != 0) {
1642716427
new_type = GGML_TYPE_Q8_0;
1642816428
}
16429+
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) {
16430+
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
16431+
else new_type = GGML_TYPE_Q4_K;
16432+
}
16433+
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) {
16434+
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
16435+
else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q5_K;
16436+
else new_type = GGML_TYPE_Q6_K;
16437+
}
1642916438
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
1643016439
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
1643116440
else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_IQ4_XS;
16441+
else if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q4_K;
16442+
else new_type = GGML_TYPE_IQ4_XS;
16443+
}
16444+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS) {
16445+
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
1643216446
else new_type = GGML_TYPE_Q4_K;
1643316447
}
16434-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS ||
16435-
ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL ||
16436-
ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) {
16448+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
1643716449
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
1643816450
else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q4_K;
1643916451
else new_type = GGML_TYPE_Q5_K;
1644016452
}
16441-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
16453+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1644216454
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
1644316455
else new_type = GGML_TYPE_Q5_K;
1644416456
}
16445-
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
16457+
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
1644616458
if (qs.model.hparams.n_expert >= 4) new_type = GGML_TYPE_Q6_K;
1644716459
else if (qs.model.hparams.n_vocab >= 127999) new_type = GGML_TYPE_Q5_K;
1644816460
else new_type = GGML_TYPE_Q6_K;

0 commit comments

Comments
 (0)