tweak bound_m

bnellnm · bnellnm · commit 918e62b4d007 · 2025-04-30T16:53:32.000Z
Signed-off-by: Bill Nell &lt;bnell@redhat.com&gt;
diff --git a/vllm/model_executor/layers/fused_moe/pplx_dispatch_combine.py b/vllm/model_executor/layers/fused_moe/pplx_dispatch_combine.py
@@ -123,10 +123,9 @@ def combine(
         apply_router_weight_on_input: bool,
     ) -> None:
         # This argument is optional
-        #num_tokens = output.shape[0]   # M
-        #bound_m = torch.tensor([num_tokens], dtype=torch.uint32,
-        #                       device=fused_expert_output.device)
-        bound_m = None
+        num_tokens = output.shape[0]   # M
+        bound_m = torch.tensor([num_tokens], dtype=torch.uint32,
+                               device=fused_expert_output.device)
 
         assert output.shape[0] <= self.max_num_tokens
         assert output.shape[1] == fused_expert_output.shape[-1]