@@ -15470,7 +15470,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1547015470 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M && (i_layer < n_layer/8 ||
1547115471 (qs.model.hparams.n_expert == 8 && use_more_bits(i_layer, n_layer)))) {
1547215472 new_type = GGML_TYPE_Q4_K;
15473- }
15473+ }
1547415474 else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) {
1547515475 new_type = arch == LLM_ARCH_FALCON ? GGML_TYPE_Q4_K : GGML_TYPE_Q5_K;
1547615476 }
@@ -15516,7 +15516,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1551615516 else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M ) new_type = GGML_TYPE_Q4_K;
1551715517 else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L ) new_type = GGML_TYPE_Q5_K;
1551815518 else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M ) new_type = GGML_TYPE_Q4_K;
15519- }
15519+ }
1552015520 } else {
1552115521 if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q4_K;
1552215522 }
@@ -15542,18 +15542,18 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1554215542 if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS && (i_layer >= n_layer/8 && i_layer < 7*n_layer/8)) {
1554315543 new_type = GGML_TYPE_IQ3_XXS;
1554415544 }
15545- }
15545+ }
1554615546 ++qs.i_ffn_gate;
1554715547 }
1554815548 else if (name.find("ffn_up") != std::string::npos) {
1554915549 auto info = layer_info(qs.i_ffn_up, qs.n_ffn_up, name.c_str());
1555015550 int i_layer = info.first, n_layer = info.second;
1555115551 if (qs.params->ffn_up_type < GGML_TYPE_COUNT) {
1555215552 new_type = qs.params->ffn_up_type;
15553- } else {
15553+ } else {
1555415554 if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS && (i_layer >= n_layer/8 && i_layer < 7*n_layer/8)) {
1555515555 new_type = GGML_TYPE_IQ3_XXS;
15556- }
15556+ }
1555715557 }
1555815558 ++qs.i_ffn_up;
1555915559 }
0 commit comments