varun's fixes

bnellnm · bnellnm · commit 800dde167b95 · 2025-04-30T16:53:32.000Z
Signed-off-by: Bill Nell &lt;bnell@redhat.com&gt;
diff --git a/vllm/model_executor/layers/fused_moe/pplx_dispatch_combine.py b/vllm/model_executor/layers/fused_moe/pplx_dispatch_combine.py
@@ -123,9 +123,10 @@ def combine(
         apply_router_weight_on_input: bool,
     ) -> None:
         # This argument is optional
-        num_tokens = output.shape[0]   # M
-        bound_m = torch.tensor([num_tokens], dtype=torch.uint32,
-                               device=fused_expert_output.device)
+        #num_tokens = output.shape[0]   # M
+        #bound_m = torch.tensor([num_tokens], dtype=torch.uint32,
+        #                       device=fused_expert_output.device)
+        bound_m = None
 
         assert output.shape[0] <= self.max_num_tokens
         assert output.shape[1] == fused_expert_output.shape[-1]