Skip to content

Commit 9763c9a

Browse files
committed
fix: Fix the input to the shared experts
I had misread that the shared experts take the inputs _before_ the standard MoE layer and was feeding the output of the MoE to the shared experts. Branch: GraniteMoEShared Signed-off-by: Gabe Goodhart <[email protected]>
1 parent 5a98b48 commit 9763c9a

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

src/llama-model.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4674,7 +4674,7 @@ struct llm_build_llama : public llm_graph_context {
46744674
LLM_NORM_RMS, il);
46754675
cb(cur, "ffn_norm", il);
46764676

4677-
cur = build_moe_ffn(cur,
4677+
ggml_tensor * moe_out = build_moe_ffn(cur,
46784678
model.layers[il].ffn_gate_inp,
46794679
model.layers[il].ffn_up_exps,
46804680
model.layers[il].ffn_gate_exps,
@@ -4685,7 +4685,7 @@ struct llm_build_llama : public llm_graph_context {
46854685
false, 0.0,
46864686
LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX,
46874687
il);
4688-
cb(cur, "ffn_moe_out", il);
4688+
cb(moe_out, "ffn_moe_out", il);
46894689

46904690
// For Granite MoE Shared
46914691
if (model.arch == LLM_ARCH_GRANITE_MOE_SHARED) {
@@ -4697,8 +4697,10 @@ struct llm_build_llama : public llm_graph_context {
46974697
LLM_FFN_SILU, LLM_FFN_PAR, il);
46984698
cb(ffn_shexp, "ffn_shexp", il);
46994699

4700-
cur = ggml_add(ctx0, cur, ffn_shexp);
4700+
cur = ggml_add(ctx0, moe_out, ffn_shexp);
47014701
cb(cur, "ffn_out", il);
4702+
} else {
4703+
cur = moe_out;
47024704
}
47034705
}
47044706

0 commit comments

Comments
 (0)