Merge pull request #21 from Thireus/glm-4.5-testing

Thireus · web-flow · commit 3ae45048f35a · 2025-08-05T20:00:09.000+01:00
Revert to original GGML_ASSERT
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -20712,18 +20712,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
     //  - qs.n_attention_wv == 3 * model.hparams.n_layer for Encoder-Decoder models
     //  - model.arch == LLM_ARCH_DECI                    for Deci-Nemotron   models
     //
-    //GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer || model.arch == LLM_ARCH_DECI) && "n_attention_wv is unexpected");
-    // allow any count for GLM4-MoE, but still enforce for all others
-    if (model.arch != LLM_ARCH_GLM4_MOE) {
-        GGML_ASSERT(
-             qs.n_attention_wv == 0
-          || qs.n_attention_wv == (int)model.hparams.n_layer
-          || qs.n_attention_wv == 3 * (int)model.hparams.n_layer
-          || model.arch == LLM_ARCH_DECI
-          && "n_attention_wv is unexpected"
-        );
-    }
-    
+    GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer || model.arch == LLM_ARCH_DECI) && "n_attention_wv is unexpected");
+
     size_t total_size_org = 0;
     size_t total_size_new = 0;