Skip to content

Commit 0edf732

Browse files
committed
feat: support GLM 4.5 family of models
1 parent c755038 commit 0edf732

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

src/llama-model.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4418,6 +4418,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
44184418
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), { n_embd }, 0);
44194419

44204420
// Check if this layer uses MoE or dense FFN based on n_layer_dense_lead
4421+
// GLM 4.5 uses hybrid architecture: layer 0 is dense, layers 1+ are MoE
44214422
const bool use_moe =
44224423
(hparams.n_expert > 0) && (static_cast<uint32_t>(i) >= hparams.n_layer_dense_lead);
44234424

@@ -13586,7 +13587,7 @@ struct llm_build_glm4_moe : public llm_graph_context {
1358613587
n_expert, n_expert_used,
1358713588
LLM_FFN_SILU, true,
1358813589
false, 0.0,
13589-
LLAMA_EXPERT_GATING_FUNC_TYPE_SIGMOID,
13590+
(llama_expert_gating_func_type) hparams.expert_gating_func,
1359013591
il);
1359113592
cb(moe_out, "ffn_moe_out", il);
1359213593

0 commit comments

Comments
 (0)