Skip to content

Commit 999c07a

Browse files
committed
feat: support GLM 4.5 family of models
1 parent b61fc91 commit 999c07a

File tree

2 files changed

+16
-1
lines changed

2 files changed

+16
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6616,6 +6616,20 @@ def set_vocab(self):
66166616
"bos", tokenizer.get_added_vocab()["<|endoftext|>"]
66176617
)
66186618
special_vocab._set_special_token("eom", tokenizer.get_added_vocab()["<|observation|>"]) # 151338
6619+
6620+
# Fix chat template syntax error in GLM-4.5 models
6621+
if special_vocab.chat_template and isinstance(special_vocab.chat_template, str):
6622+
# Fix multiple syntax issues in GLM-4.5 chat template
6623+
template = special_vocab.chat_template
6624+
# Fix nested double quotes issue
6625+
template = template.replace('endswith("/nothink")', "endswith('/nothink')")
6626+
# Fix any other potential parentheses/tuple issues
6627+
template = template.replace(
6628+
"not visible_text(m.content).endswith('/nothink'))",
6629+
"not visible_text(m.content).endswith('/nothink')"
6630+
)
6631+
special_vocab.chat_template = template
6632+
66196633
special_vocab.add_to_gguf(self.gguf_writer)
66206634

66216635
def set_gguf_parameters(self):

src/llama-model.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1430,6 +1430,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
14301430
ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared, 0);
14311431
ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead, 0);
14321432
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
1433+
ml.get_key(LLM_KV_EXPERT_WEIGHTS_NORM, hparams.expert_weights_norm, false);
14331434

14341435
// Expert gating function (GLM4_MOE uses sigmoid)
14351436
ml.get_key(LLM_KV_EXPERT_GATING_FUNC, hparams.expert_gating_func, false);
@@ -13587,7 +13588,7 @@ struct llm_build_glm4_moe : public llm_graph_context {
1358713588
model.layers[il].ffn_down_exps,
1358813589
model.layers[il].ffn_exp_probs_b,
1358913590
n_expert, n_expert_used,
13590-
LLM_FFN_SILU, true,
13591+
LLM_FFN_SILU, hparams.expert_weights_norm,
1359113592
true, hparams.expert_weights_scale,
1359213593
(llama_expert_gating_func_type) hparams.expert_gating_func,
1359313594
il);

0 commit comments

Comments
 (0)