Skip to content

Commit c56a513

Browse files
committed
feat: support GLM 4.5 family of models - aNoeda screenshot
1 parent 3a4ac7e commit c56a513

File tree

1 file changed

+5
-11
lines changed

1 file changed

+5
-11
lines changed

src/llama-model.cpp

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13557,16 +13557,13 @@ struct llm_build_glm4_moe : public llm_graph_context {
1355713557
inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids);
1355813558
}
1355913559

13560-
// Post-attention norm
13561-
cur = build_norm(cur,
13562-
model.layers[il].attn_post_norm,
13563-
NULL,
13564-
LLM_NORM_RMS, il);
13565-
cb(cur, "post_attn_norm", il);
13566-
1356713560
ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
1356813561
cb(ffn_inp, "ffn_inp", il);
1356913562

13563+
// Post-attention norm
13564+
cur = build_norm(ffn_inp, model.layers[il].attn_post_norm, NULL, LLM_NORM_RMS, il);
13565+
cb(cur, "post_attn_norm", il);
13566+
1357013567
// Check if this is a dense layer (n_layer_dense_lead=1, so layer 0 is dense)
1357113568
if (static_cast<uint32_t>(il) < hparams.n_layer_dense_lead) {
1357213569
// Dense FFN layer
@@ -13582,9 +13579,6 @@ struct llm_build_glm4_moe : public llm_graph_context {
1358213579
const int64_t n_expert = hparams.n_expert;
1358313580
const int64_t n_expert_used = hparams.n_expert_used;
1358413581

13585-
// Save original input for shared expert
13586-
ggml_tensor * residuals = cur;
13587-
1358813582
// Process routed experts using existing MoE infrastructure
1358913583
ggml_tensor * routed_out = build_moe_ffn(cur,
1359013584
model.layers[il].ffn_gate_inp,
@@ -13600,7 +13594,7 @@ struct llm_build_glm4_moe : public llm_graph_context {
1360013594
cb(routed_out, "ffn_moe_out", il);
1360113595

1360213596
// Process shared expert on original input
13603-
ggml_tensor * shared_out = build_ffn(residuals,
13597+
ggml_tensor * shared_out = build_ffn(cur,
1360413598
model.layers[il].ffn_up_shexp, NULL, NULL,
1360513599
model.layers[il].ffn_gate_shexp, NULL, NULL,
1360613600
model.layers[il].ffn_down_shexp, NULL, NULL,

0 commit comments

Comments
 (0)