Skip to content

Commit d97ebef

Browse files
authored
Update llama.cpp
Restore original GGLM_ASSERT
1 parent a3641e6 commit d97ebef

File tree

1 file changed

+1
-11
lines changed

1 file changed

+1
-11
lines changed

src/llama.cpp

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20712,17 +20712,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
2071220712
// - qs.n_attention_wv == 3 * model.hparams.n_layer for Encoder-Decoder models
2071320713
// - model.arch == LLM_ARCH_DECI for Deci-Nemotron models
2071420714
//
20715-
//GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer || model.arch == LLM_ARCH_DECI) && "n_attention_wv is unexpected");
20716-
// allow any count for GLM4-MoE, but still enforce for all others
20717-
if (model.arch != LLM_ARCH_GLM4_MOE) {
20718-
GGML_ASSERT(
20719-
qs.n_attention_wv == 0
20720-
|| qs.n_attention_wv == (int)model.hparams.n_layer
20721-
|| qs.n_attention_wv == 3 * (int)model.hparams.n_layer
20722-
|| model.arch == LLM_ARCH_DECI
20723-
&& "n_attention_wv is unexpected"
20724-
);
20725-
}
20715+
GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer || model.arch == LLM_ARCH_DECI) && "n_attention_wv is unexpected");
2072620716

2072720717
size_t total_size_org = 0;
2072820718
size_t total_size_new = 0;

0 commit comments

Comments
 (0)