support static EPLB for allgather EP

realliujiaxu · realliujiaxu · commit ae556498b233 · 2025-08-14T11:28:01.000+08:00
Signed-off-by: realliujiaxu &lt;realliujiaxu@163.com&gt;
diff --git a/vllm_ascend/quantization/w8a8_dynamic.py b/vllm_ascend/quantization/w8a8_dynamic.py
@@ -539,7 +539,12 @@ def fused_experts_with_allgather(hidden_states: torch.Tensor,
                                  topk_weights: torch.Tensor,
                                  topk_ids: torch.Tensor,
                                  top_k: int,
-                                 expert_map: torch.Tensor = None):
+                                 expert_map: torch.Tensor = None,
+                                 log2phy: torch.Tensor = None,
+                                 global_redundant_expert_num: int = 0,
+                                 ) -> torch.Tensor:
+    if log2phy is not None:
+        topk_ids = log2phy[topk_ids]
     original_shape = hidden_states.shape
     if len(original_shape) == 3:
         hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
@@ -551,7 +556,7 @@ def fused_experts_with_allgather(hidden_states: torch.Tensor,
     ep_rank = torch.distributed.get_rank(group=ep_group)
     ep_size = torch.distributed.get_world_size(ep_group)
 
-    global_num_experts = len(expert_map)
+    global_num_experts = len(expert_map) + global_redundant_expert_num
     local_num_experts = global_num_experts // ep_size
 
     hidden_states, pertoken_scale = torch_npu.npu_dynamic_quant(hidden_states)
@@ -961,7 +966,9 @@ def apply(
                 topk_weights=topk_weights,
                 topk_ids=topk_ids,
                 top_k=top_k,
-                expert_map=expert_map)
+                expert_map=expert_map,
+                log2phy=log2phy,
+                global_redundant_expert_num=global_redundant_expert_num)
         elif fused_moe_state == FusedMoEState.MC2:
             return fused_experts_with_mc2(
                 hidden_states=x,