fix: update callback for ffn_moe_weighted and add callback for attn_out in deepseek2 model

bluebread · bluebread · commit 6c0715befcab · 2025-11-18T06:19:38.000Z
diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
@@ -1106,7 +1106,7 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
 
     if (!weight_before_ffn) {
         experts = ggml_mul(ctx0, experts, weights);
-        cb(cur, "ffn_moe_weighted", il);
+        cb(experts, "ffn_moe_weighted", il);
     }
 
     ggml_tensor * cur_experts[LLAMA_MAX_EXPERTS] = { nullptr };
diff --git a/src/models/deepseek2.cpp b/src/models/deepseek2.cpp
@@ -74,6 +74,7 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_gr
             cur = build_attn(inp_attn,
                         model.layers[il].wo, NULL,
                         Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
+            cb(cur, "attn_out", il);
         }
         else {
             ggml_tensor * q = NULL;

Original file line number	Diff line number	Diff line change
`@@ -1106,7 +1106,7 @@ ggml_tensor * llm_graph_context::build_moe_ffn(`
`1106`	`1106`
`1107`	`1107`	`if (!weight_before_ffn) {`
`1108`	`1108`	`experts = ggml_mul(ctx0, experts, weights);`
`1109`		`- cb(cur, "ffn_moe_weighted", il);`
	`1109`	`+ cb(experts, "ffn_moe_weighted", il);`
`1110`	`1110`	`}`
`1111`	`1111`
`1112`	`1112`	`ggml_tensor * cur_experts[LLAMA_MAX_EXPERTS] = { nullptr };`
Original file line number	Diff line number	Diff line change
`@@ -74,6 +74,7 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_gr`
`74`	`74`	`cur = build_attn(inp_attn,`
`75`	`75`	`model.layers[il].wo, NULL,`
`76`	`76`	`Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);`
	`77`	`+ cb(cur, "attn_out", il);`
`77`	`78`	`}`
`78`	`79`	`else {`
`79`	`80`	`ggml_tensor * q = NULL;`