@@ -15599,8 +15599,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1559915599 new_type = GGML_TYPE_Q5_K;
1560015600 }
1560115601 } else if (name.find("attn_q.weight") != std::string::npos) {
15602- if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS ) new_type = GGML_TYPE_IQ3_XXS ;
15603- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ) new_type = GGML_TYPE_IQ2_S ;
15602+ if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS ) new_type = GGML_TYPE_IQ2_S ;
15603+ else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S ) new_type = GGML_TYPE_IQ3_XXS ;
1560415604 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
1560515605 ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M ||
1560615606 ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL) {
@@ -15715,9 +15715,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1571515715 else if (name.find("ffn_gate") != std::string::npos) {
1571615716 auto info = layer_info(qs.i_ffn_gate, qs.n_ffn_gate, name.c_str());
1571715717 int i_layer = info.first, n_layer = info.second;
15718- if ((ftype == LLAMA_FTYPE_MOSTLY_Q2_K) && (i_layer < n_layer/8)) new_type = GGML_TYPE_Q3_K;
15719- else if ((ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) && (use_more_bits(i_layer, n_layer))) new_type = GGML_TYPE_Q3_K;
15720- else if ((ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) && (i_layer < n_layer/8)) new_type = GGML_TYPE_Q4_K;
15718+ if ((ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) && (use_more_bits(i_layer, n_layer))) new_type = GGML_TYPE_Q3_K;
1572115719 else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XXS;
1572215720 else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XXS;
1572315721 else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS) && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XS;
@@ -15731,9 +15729,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1573115729 else if (name.find("ffn_up") != std::string::npos) {
1573215730 auto info = layer_info(qs.i_ffn_up, qs.n_ffn_up, name.c_str());
1573315731 int i_layer = info.first, n_layer = info.second;
15734- if ((ftype == LLAMA_FTYPE_MOSTLY_Q2_K) && (i_layer < n_layer/8)) new_type = GGML_TYPE_Q3_K;
15735- else if ((ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) && (use_more_bits(i_layer, n_layer))) new_type = GGML_TYPE_Q3_K;
15736- else if ((ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) && (i_layer < n_layer/8)) new_type = GGML_TYPE_Q4_K;
15732+ if ((ftype == LLAMA_FTYPE_MOSTLY_Q2_K_L) && (use_more_bits(i_layer, n_layer))) new_type = GGML_TYPE_Q3_K;
1573715733 else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ1_S) && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XXS;
1573815734 else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XXS;
1573915735 else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS) && (i_layer < n_layer/8)) new_type = GGML_TYPE_IQ2_XS;
@@ -16212,7 +16208,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1621216208 else if (new_type == GGML_TYPE_Q4_0_4_4 || new_type == GGML_TYPE_Q4_0_4_8) chunk_size_multiplier = 4;
1621316209 }
1621416210
16215- LLAMA_LOG_INFO("converting to %s .. ", ggml_type_name(new_type));
16211+ LLAMA_LOG_INFO("converts to %s .. ", ggml_type_name(new_type));
1621616212 fflush(stdout);
1621716213
1621816214 if (work.size() < (size_t)nelements * 4) {
0 commit comments