Skip to content

Commit 97c0ae0

Browse files
committed
Disable necessity of CQS type for specify tensor quant.
1 parent 7f123a6 commit 97c0ae0

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

src/llama.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15908,7 +15908,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1590815908
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL) new_type = GGML_TYPE_IQ4_XS;
1590915909
}
1591015910
} else if (name.find("attn_v.weight") != std::string::npos) {
15911-
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_v_type < GGML_TYPE_COUNT) new_type = qs.params->attn_v_type;
15911+
if (qs.params->attn_v_type < GGML_TYPE_COUNT) new_type = qs.params->attn_v_type;
1591215912
else if (qs.model.hparams.n_expert >= 4) {
1591315913
// for the 8-expert model, bumping this to Q8_0 trades just ~128MB
1591415914
// TODO: explore better strategies
@@ -15972,7 +15972,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1597215972
}
1597315973
++qs.i_attention_wv;
1597415974
} else if (name.find("attn_k.weight") != std::string::npos) {
15975-
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_k_type < GGML_TYPE_COUNT) new_type = qs.params->attn_k_type;
15975+
if (qs.params->attn_k_type < GGML_TYPE_COUNT) new_type = qs.params->attn_k_type;
1597615976
else if (qs.model.hparams.n_expert >= 4) {
1597715977
// for the 8-expert model, bumping this to Q8_0 trades just ~128MB
1597815978
// TODO: explore better strategies
@@ -16026,7 +16026,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1602616026
}
1602716027
++qs.i_attention_wk;
1602816028
} else if (name.find("attn_q.weight") != std::string::npos) {
16029-
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_q_type < GGML_TYPE_COUNT) new_type = qs.params->attn_q_type;
16029+
if (qs.params->attn_q_type < GGML_TYPE_COUNT) new_type = qs.params->attn_q_type;
1603016030
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) new_type = GGML_TYPE_IQ2_S;
1603116031
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) new_type = GGML_TYPE_IQ3_XXS;
1603216032
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
@@ -16049,7 +16049,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1604916049
} else if (name.find("ffn_down") != std::string::npos) {
1605016050
auto info = layer_info(qs.i_ffn_down, qs.n_ffn_down, name.c_str());
1605116051
int i_layer = info.first, n_layer = info.second;
16052-
if (ftype == LLAMA_FTYPE_CQS && qs.params->ffn_down_type < GGML_TYPE_COUNT) new_type = qs.params->ffn_down_type;
16052+
if (qs.params->ffn_down_type < GGML_TYPE_COUNT) new_type = qs.params->ffn_down_type;
1605316053
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) new_type = GGML_TYPE_Q3_K;
1605416054
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && (use_more_bits(i_layer, n_layer))) new_type = GGML_TYPE_Q3_K;
1605516055
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_XS && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XXS;
@@ -16111,7 +16111,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1611116111
}
1611216112
++qs.i_ffn_down;
1611316113
} else if (name.find("attn_output.weight") != std::string::npos) {
16114-
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_output_type < GGML_TYPE_COUNT) new_type = qs.params->attn_output_type;
16114+
if (qs.params->attn_output_type < GGML_TYPE_COUNT) new_type = qs.params->attn_output_type;
1611516115
else if (arch != LLM_ARCH_FALCON) {
1611616116
if (qs.model.hparams.n_expert >= 4) {
1611716117
if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ||
@@ -16150,7 +16150,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1615016150
++qs.i_attention_wo;
1615116151
}
1615216152
else if (name.find("attn_qkv.weight") != std::string::npos) {
16153-
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_qkv_type < GGML_TYPE_COUNT) new_type = qs.params->attn_qkv_type;
16153+
if (qs.params->attn_qkv_type < GGML_TYPE_COUNT) new_type = qs.params->attn_qkv_type;
1615416154
else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) {
1615516155
new_type = GGML_TYPE_Q4_K;
1615616156
}
@@ -16176,7 +16176,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1617616176
else if (name.find("ffn_gate") != std::string::npos) {
1617716177
auto info = layer_info(qs.i_ffn_gate, qs.n_ffn_gate, name.c_str());
1617816178
int i_layer = info.first, n_layer = info.second;
16179-
if (ftype == LLAMA_FTYPE_CQS && qs.params->ffn_gate_type < GGML_TYPE_COUNT) new_type = qs.params->ffn_gate_type;
16179+
if (qs.params->ffn_gate_type < GGML_TYPE_COUNT) new_type = qs.params->ffn_gate_type;
1618016180
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L && (use_more_bits(i_layer, n_layer))) new_type = GGML_TYPE_Q3_K;
1618116181
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XXS;
1618216182
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_M && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XXS;
@@ -16192,7 +16192,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1619216192
else if (name.find("ffn_up") != std::string::npos) {
1619316193
auto info = layer_info(qs.i_ffn_up, qs.n_ffn_up, name.c_str());
1619416194
int i_layer = info.first, n_layer = info.second;
16195-
if (ftype == LLAMA_FTYPE_CQS && qs.params->ffn_up_type < GGML_TYPE_COUNT) new_type = qs.params->ffn_up_type;
16195+
if (qs.params->ffn_up_type < GGML_TYPE_COUNT) new_type = qs.params->ffn_up_type;
1619616196
else if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L && (use_more_bits(i_layer, n_layer))) new_type = GGML_TYPE_Q3_K;
1619716197
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XXS;
1619816198
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_M && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XXS;
@@ -16206,10 +16206,10 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1620616206
++qs.i_ffn_up;
1620716207
}
1620816208
else if (name.find("attn_norm.weight") != std::string::npos) {
16209-
if (ftype == LLAMA_FTYPE_CQS && qs.params->attn_norm_type < GGML_TYPE_COUNT) new_type = qs.params->attn_norm_type;
16209+
if (qs.params->attn_norm_type < GGML_TYPE_COUNT) new_type = qs.params->attn_norm_type;
1621016210
}
1621116211
else if (name.find("ffn_norm") != std::string::npos) {
16212-
if (ftype == LLAMA_FTYPE_CQS && qs.params->ffn_norm_type < GGML_TYPE_COUNT) new_type = qs.params->ffn_norm_type;
16212+
if (qs.params->ffn_norm_type < GGML_TYPE_COUNT) new_type = qs.params->ffn_norm_type;
1621316213
}
1621416214

1621516215
// if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K) new_type = GGML_TYPE_Q3_K;

0 commit comments

Comments
 (0)