@@ -17103,12 +17103,9 @@ struct llm_build_smallthinker : public llm_graph_context{
1710317103 for (int il = 0; il < n_layer; ++il) {
1710417104 ggml_tensor * inpSA = inpL;
1710517105 ggml_tensor * probs = nullptr;
17106- bool is_moe = hparams.n_ff_exp == hparams.n_ff_arr[il];
1710717106
17108- if (is_moe) {
17109- probs = build_lora_mm(model.layers[il].ffn_gate_inp, inpL); // [n_expert, n_tokens]
17110- cb(probs, "ffn_moe_logits", il);
17111- }
17107+ probs = build_lora_mm(model.layers[il].ffn_gate_inp, inpL); // [n_expert, n_tokens]
17108+ cb(probs, "ffn_moe_logits", il);
1711217109
1711317110 // norm
1711417111 cur = build_norm(inpL,model.layers[il].attn_norm, NULL, LLM_NORM_RMS, il);
@@ -17165,16 +17162,10 @@ struct llm_build_smallthinker : public llm_graph_context{
1716517162 cur = build_norm(ffn_inp, model.layers[il].ffn_norm, NULL, LLM_NORM_RMS, il);
1716617163 cb(cur, "ffn_norm", il);
1716717164
17168- ggml_tensor * ffn_out = nullptr;
17169- if (is_moe) {
17170- ffn_out = build_moe_ffn_from_probs(cur, probs, model.layers[il].ffn_up_exps,
17165+ ggml_tensor * ffn_out = build_moe_ffn_from_probs(cur, probs, model.layers[il].ffn_up_exps,
1717117166 model.layers[il].ffn_gate_exps, model.layers[il].ffn_down_exps,
1717217167 nullptr, n_expert, n_expert_used,
1717317168 static_cast<llama_expert_gating_func_type>(hparams.expert_gating_func), il);
17174- } else {
17175- ffn_out = build_ffn(cur, model.layers[il].ffn_up, NULL, NULL, model.layers[il].ffn_gate, NULL, NULL,
17176- model.layers[il].ffn_down, NULL, NULL, NULL, LLM_FFN_RELU, LLM_FFN_PAR, il);
17177- }
1717817169
1717917170 cb(ffn_out, "ffn_out", il);
1718017171 cur = ffn_out;
0 commit comments