Skip to content

Commit ab3183e

Browse files
committed
feat: support GLM 4.5 family of models
1 parent 58898b5 commit ab3183e

File tree

1 file changed

+14
-9
lines changed

1 file changed

+14
-9
lines changed

src/llama-model.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13580,6 +13580,15 @@ struct llm_build_glm4_moe : public llm_graph_context {
1358013580
const int64_t n_expert = hparams.n_expert;
1358113581
const int64_t n_expert_used = hparams.n_expert_used;
1358213582

13583+
// Compute shared expert output first
13584+
ggml_tensor * cur_shexp = build_ffn(cur,
13585+
model.layers[il].ffn_up_shexp, NULL, NULL,
13586+
model.layers[il].ffn_gate_shexp, NULL, NULL,
13587+
model.layers[il].ffn_down_shexp, NULL, NULL,
13588+
NULL,
13589+
LLM_FFN_SILU, LLM_FFN_PAR, il);
13590+
cb(cur_shexp, "ffn_shexp_out", il);
13591+
1358313592
ggml_tensor * moe_out =
1358413593
build_moe_ffn(cur,
1358513594
model.layers[il].ffn_gate_inp,
@@ -13594,16 +13603,12 @@ struct llm_build_glm4_moe : public llm_graph_context {
1359413603
il);
1359513604
cb(moe_out, "ffn_moe_out", il);
1359613605

13597-
// Add shared expert computation
13598-
ggml_tensor * cur_shexp = build_ffn(cur,
13599-
model.layers[il].ffn_up_shexp, NULL, NULL,
13600-
model.layers[il].ffn_gate_shexp, NULL, NULL,
13601-
model.layers[il].ffn_down_shexp, NULL, NULL,
13602-
NULL,
13603-
LLM_FFN_SILU, LLM_FFN_PAR, il);
13604-
cb(cur_shexp, "ffn_shexp_out", il);
13606+
// For GLM4_MOE: Shared expert is always active alongside routed experts
13607+
// Apply proper scaling to shared expert to match architectural design
13608+
cur_shexp = ggml_scale(ctx0, cur_shexp, hparams.expert_weights_scale);
13609+
cb(cur_shexp, "ffn_shexp_scaled", il);
1360513610

13606-
// Combine MoE output with shared expert output
13611+
// Combine with proper mathematical balance
1360713612
cur = ggml_add(ctx0, moe_out, cur_shexp);
1360813613
cb(cur, "ffn_out", il);
1360913614
}

0 commit comments

Comments
 (0)