bugfix: fix moe parameter settings for qwen3 moe vl model on mlu device. (#1102)

phantomlei3 · xuetinggogo · web-flow · commit 23f31d47374b · 2026-03-24T21:37:24.000+08:00
Co-authored-by: xuetinggogo &lt;xuetinggo@outlook.com&gt;
diff --git a/xllm/models/vlm/qwen3_vl_moe.h b/xllm/models/vlm/qwen3_vl_moe.h
@@ -196,6 +196,7 @@ TORCH_MODULE(Qwen3_VLMoeForConditionalGeneration);
 REGISTER_INPUT_PROCESSOR(qwen3_vl_moe, Qwen2_5_VLInputProcessor);
 REGISTER_CAUSAL_VLM_MODEL(qwen3_vl_moe, Qwen3_VLMoeForConditionalGeneration);
 REGISTER_IMAGE_PROCESSOR(qwen3_vl_moe, Qwen2VLImageProcessor);
+
 // register the model args
 REGISTER_MODEL_ARGS(qwen3_vl_moe, [&] {
   // text config
@@ -257,5 +258,14 @@ REGISTER_MODEL_ARGS(qwen3_vl_moe, [&] {
   LOAD_ARG_OR(video_token_id, "video_token_id", 151656);
   LOAD_ARG_OR(vision_end_token_id, "vision_end_token_id", 151653);
   LOAD_ARG_OR(vision_start_token_id, "vision_start_token_id", 151652);
+
+  // arguments to be compatible with other fused moe models
+  LOAD_ARG_OR(n_routed_experts, "num_experts", 128);
+  SET_ARG(n_shared_experts, 0);
+  SET_ARG(scoring_func, "softmax");
+  SET_ARG(topk_method, "");
+  SET_ARG(n_group, -1);
+  SET_ARG(topk_group, 0);
+  SET_ARG(routed_scaling_factor, 1.0);
 });
 }  // namespace xllm