[Bugfix] fix use-ep bug to enable ep by dp/tp size > 1 (#16161)

zxfan-cpu · web-flow · commit ad971af8c7dc · 2025-04-07T20:48:47.000-07:00
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
@@ -437,7 +437,7 @@ def __init__(
         # Use expert parallelism instead of tensor parallelism?
         vllm_config = get_current_vllm_config()
         use_ep = (vllm_config.parallel_config.enable_expert_parallel
-                  and self.tp_size > 1)
+                  and self.tp_size * self.dp_size > 1)
 
         # For smuggling this layer into the fused moe custom op
         self.use_direct_call = self.dp_size == 1