feat: support GLM 4.5 family of models

sammcj · sammcj · commit 999c07a2e287 · 2025-07-31T12:49:02.000+10:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -6616,6 +6616,20 @@ def set_vocab(self):
             "bos", tokenizer.get_added_vocab()["<|endoftext|>"]
         )
         special_vocab._set_special_token("eom", tokenizer.get_added_vocab()["<|observation|>"])  # 151338
+
+        # Fix chat template syntax error in GLM-4.5 models
+        if special_vocab.chat_template and isinstance(special_vocab.chat_template, str):
+            # Fix multiple syntax issues in GLM-4.5 chat template
+            template = special_vocab.chat_template
+            # Fix nested double quotes issue
+            template = template.replace('endswith("/nothink")', "endswith('/nothink')")
+            # Fix any other potential parentheses/tuple issues
+            template = template.replace(
+                "not visible_text(m.content).endswith('/nothink'))",
+                "not visible_text(m.content).endswith('/nothink')"
+            )
+            special_vocab.chat_template = template
+
         special_vocab.add_to_gguf(self.gguf_writer)
 
     def set_gguf_parameters(self):
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -1430,6 +1430,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                 ml.get_key(LLM_KV_EXPERT_SHARED_COUNT,         hparams.n_expert_shared,  0);
                 ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT,   hparams.n_layer_dense_lead, 0);
                 ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE,        hparams.expert_weights_scale);
+                ml.get_key(LLM_KV_EXPERT_WEIGHTS_NORM,         hparams.expert_weights_norm, false);
 
                 // Expert gating function (GLM4_MOE uses sigmoid)
                 ml.get_key(LLM_KV_EXPERT_GATING_FUNC,          hparams.expert_gating_func, false);
@@ -13587,7 +13588,7 @@ struct llm_build_glm4_moe : public llm_graph_context {
                             model.layers[il].ffn_down_exps,
                             model.layers[il].ffn_exp_probs_b,
                             n_expert, n_expert_used,
-                            LLM_FFN_SILU, true,
+                            LLM_FFN_SILU, hparams.expert_weights_norm,
                             true, hparams.expert_weights_scale,
                             (llama_expert_gating_func_type) hparams.expert_gating_func,
                             il);