Skip to content

Commit 7f123a6

Browse files
committed
Shorten conditions for specify quants
1 parent a0ad75f commit 7f123a6

File tree

1 file changed

+8
-24
lines changed

1 file changed

+8
-24
lines changed

src/llama.cpp

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15908,9 +15908,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1590815908
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL) new_type = GGML_TYPE_IQ4_XS;
1590915909
}
1591015910
} else if (name.find("attn_v.weight") != std::string::npos) {
15911-
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_v_type < GGML_TYPE_COUNT) {
15912-
new_type = qs.params->attn_v_type;
15913-
}
15911+
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_v_type < GGML_TYPE_COUNT) new_type = qs.params->attn_v_type;
1591415912
else if (qs.model.hparams.n_expert >= 4) {
1591515913
// for the 8-expert model, bumping this to Q8_0 trades just ~128MB
1591615914
// TODO: explore better strategies
@@ -15974,9 +15972,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1597415972
}
1597515973
++qs.i_attention_wv;
1597615974
} else if (name.find("attn_k.weight") != std::string::npos) {
15977-
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_k_type < GGML_TYPE_COUNT) {
15978-
new_type = qs.params->attn_k_type;
15979-
}
15975+
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_k_type < GGML_TYPE_COUNT) new_type = qs.params->attn_k_type;
1598015976
else if (qs.model.hparams.n_expert >= 4) {
1598115977
// for the 8-expert model, bumping this to Q8_0 trades just ~128MB
1598215978
// TODO: explore better strategies
@@ -16030,9 +16026,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1603016026
}
1603116027
++qs.i_attention_wk;
1603216028
} else if (name.find("attn_q.weight") != std::string::npos) {
16033-
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_q_type < GGML_TYPE_COUNT) {
16034-
new_type = qs.params->attn_q_type;
16035-
}
16029+
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_q_type < GGML_TYPE_COUNT) new_type = qs.params->attn_q_type;
1603616030
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) new_type = GGML_TYPE_IQ2_S;
1603716031
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) new_type = GGML_TYPE_IQ3_XXS;
1603816032
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
@@ -16055,9 +16049,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1605516049
} else if (name.find("ffn_down") != std::string::npos) {
1605616050
auto info = layer_info(qs.i_ffn_down, qs.n_ffn_down, name.c_str());
1605716051
int i_layer = info.first, n_layer = info.second;
16058-
if (ftype == LLAMA_FTYPE_CQS && qs.params->ffn_down_type < GGML_TYPE_COUNT) {
16059-
new_type = qs.params->ffn_down_type;
16060-
}
16052+
if (ftype == LLAMA_FTYPE_CQS && qs.params->ffn_down_type < GGML_TYPE_COUNT) new_type = qs.params->ffn_down_type;
1606116053
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) new_type = GGML_TYPE_Q3_K;
1606216054
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && (use_more_bits(i_layer, n_layer))) new_type = GGML_TYPE_Q3_K;
1606316055
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XXS;
@@ -16119,9 +16111,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1611916111
}
1612016112
++qs.i_ffn_down;
1612116113
} else if (name.find("attn_output.weight") != std::string::npos) {
16122-
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_output_type < GGML_TYPE_COUNT) {
16123-
new_type = qs.params->attn_output_type;
16124-
}
16114+
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_output_type < GGML_TYPE_COUNT) new_type = qs.params->attn_output_type;
1612516115
else if (arch != LLM_ARCH_FALCON) {
1612616116
if (qs.model.hparams.n_expert >= 4) {
1612716117
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ||
@@ -16160,9 +16150,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1616016150
++qs.i_attention_wo;
1616116151
}
1616216152
else if (name.find("attn_qkv.weight") != std::string::npos) {
16163-
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_qkv_type < GGML_TYPE_COUNT) {
16164-
new_type = qs.params->attn_qkv_type;
16165-
}
16153+
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_qkv_type < GGML_TYPE_COUNT) new_type = qs.params->attn_qkv_type;
1616616154
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) {
1616716155
new_type = GGML_TYPE_Q4_K;
1616816156
}
@@ -16188,9 +16176,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1618816176
else if (name.find("ffn_gate") != std::string::npos) {
1618916177
auto info = layer_info(qs.i_ffn_gate, qs.n_ffn_gate, name.c_str());
1619016178
int i_layer = info.first, n_layer = info.second;
16191-
if (ftype == LLAMA_FTYPE_CQS && qs.params->ffn_gate_type < GGML_TYPE_COUNT) {
16192-
new_type = qs.params->ffn_gate_type;
16193-
}
16179+
if (ftype == LLAMA_FTYPE_CQS && qs.params->ffn_gate_type < GGML_TYPE_COUNT) new_type = qs.params->ffn_gate_type;
1619416180
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L && (use_more_bits(i_layer, n_layer))) new_type = GGML_TYPE_Q3_K;
1619516181
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XXS;
1619616182
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_M && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XXS;
@@ -16206,9 +16192,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1620616192
else if (name.find("ffn_up") != std::string::npos) {
1620716193
auto info = layer_info(qs.i_ffn_up, qs.n_ffn_up, name.c_str());
1620816194
int i_layer = info.first, n_layer = info.second;
16209-
if (ftype == LLAMA_FTYPE_CQS && qs.params->ffn_up_type < GGML_TYPE_COUNT) {
16210-
new_type = qs.params->ffn_up_type;
16211-
}
16195+
if (ftype == LLAMA_FTYPE_CQS && qs.params->ffn_up_type < GGML_TYPE_COUNT) new_type = qs.params->ffn_up_type;
1621216196
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L && (use_more_bits(i_layer, n_layer))) new_type = GGML_TYPE_Q3_K;
1621316197
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XXS;
1621416198
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_M && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XXS;

0 commit comments

Comments
 (0)