skip group selection when there are no tokens

CISC · web-flow · commit e5c32f28726d · 2025-10-14T23:11:04.000+02:00
diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
@@ -929,7 +929,7 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
     }
 
     // select top n_group_used expert groups
-    if (arch == LLM_ARCH_BAILINGMOE2) {
+    if (arch == LLM_ARCH_BAILINGMOE2 && n_tokens > 0) {
         const int64_t n_exp_per_group = n_expert / hparams.n_expert_groups;
 
         // organize experts into n_expert_groups

Original file line number	Diff line number	Diff line change
`@@ -929,7 +929,7 @@ ggml_tensor * llm_graph_context::build_moe_ffn(`
`929`	`929`	`}`
`930`	`930`
`931`	`931`	`// select top n_group_used expert groups`
`932`		`- if (arch == LLM_ARCH_BAILINGMOE2) {`
	`932`	`+ if (arch == LLM_ARCH_BAILINGMOE2 && n_tokens > 0) {`
`933`	`933`	`const int64_t n_exp_per_group = n_expert / hparams.n_expert_groups;`
`934`	`934`
`935`	`935`	`// organize experts into n_expert_groups`