Update llama.cpp

Thireus · web-flow · commit d97ebefd691d · 2025-08-05T17:35:37.000+01:00
Restore original GGLM_ASSERT
diff --git a/src/llama.cpp b/src/llama.cpp
@@ -20712,17 +20712,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
     //  - qs.n_attention_wv == 3 * model.hparams.n_layer for Encoder-Decoder models
     //  - model.arch == LLM_ARCH_DECI                    for Deci-Nemotron   models
     //
-    //GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer || model.arch == LLM_ARCH_DECI) && "n_attention_wv is unexpected");
-    // allow any count for GLM4-MoE, but still enforce for all others
-    if (model.arch != LLM_ARCH_GLM4_MOE) {
-        GGML_ASSERT(
-             qs.n_attention_wv == 0
-          || qs.n_attention_wv == (int)model.hparams.n_layer
-          || qs.n_attention_wv == 3 * (int)model.hparams.n_layer
-          || model.arch == LLM_ARCH_DECI
-          && "n_attention_wv is unexpected"
-        );
-    }
+    GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer || model.arch == LLM_ARCH_DECI) && "n_attention_wv is unexpected");
     
     size_t total_size_org = 0;
     size_t total_size_new = 0;