We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3433b73 commit 800dde1Copy full SHA for 800dde1
vllm/model_executor/layers/fused_moe/pplx_dispatch_combine.py
@@ -123,9 +123,10 @@ def combine(
123
apply_router_weight_on_input: bool,
124
) -> None:
125
# This argument is optional
126
- num_tokens = output.shape[0] # M
127
- bound_m = torch.tensor([num_tokens], dtype=torch.uint32,
128
- device=fused_expert_output.device)
+ #num_tokens = output.shape[0] # M
+ #bound_m = torch.tensor([num_tokens], dtype=torch.uint32,
+ # device=fused_expert_output.device)
129
+ bound_m = None
130
131
assert output.shape[0] <= self.max_num_tokens
132
assert output.shape[1] == fused_expert_output.shape[-1]
0 commit comments