Skip to content

Commit 3ae4504

Browse files
authored
Merge pull request #21 from Thireus/glm-4.5-testing
Revert to original GGML_ASSERT
2 parents cae058f + 323e7f3 commit 3ae4504

File tree

1 file changed

+2
-12
lines changed

1 file changed

+2
-12
lines changed

src/llama.cpp

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20712,18 +20712,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
2071220712
// - qs.n_attention_wv == 3 * model.hparams.n_layer for Encoder-Decoder models
2071320713
// - model.arch == LLM_ARCH_DECI for Deci-Nemotron models
2071420714
//
20715-
//GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer || model.arch == LLM_ARCH_DECI) && "n_attention_wv is unexpected");
20716-
// allow any count for GLM4-MoE, but still enforce for all others
20717-
if (model.arch != LLM_ARCH_GLM4_MOE) {
20718-
GGML_ASSERT(
20719-
qs.n_attention_wv == 0
20720-
|| qs.n_attention_wv == (int)model.hparams.n_layer
20721-
|| qs.n_attention_wv == 3 * (int)model.hparams.n_layer
20722-
|| model.arch == LLM_ARCH_DECI
20723-
&& "n_attention_wv is unexpected"
20724-
);
20725-
}
20726-
20715+
GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer || model.arch == LLM_ARCH_DECI) && "n_attention_wv is unexpected");
20716+
2072720717
size_t total_size_org = 0;
2072820718
size_t total_size_new = 0;
2072920719

0 commit comments

Comments
 (0)