We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent f5b86ad commit 68375edCopy full SHA for 68375ed
tensorrt_llm/_torch/modules/fused_moe/fused_moe_cutlass.py
@@ -532,7 +532,7 @@ def forward_chunk(
532
533
# Optionally provide an output tensor to fused_moe so it writes directly to our buffer
534
moe_output: Optional[torch.Tensor] = None
535
- if self.enable_alltoall and self.moe_alltoall_backend == "NVLINK_ONE_SIDEDz":
+ if self.enable_alltoall and self.moe_alltoall_backend == "NVLINK_ONE_SIDED":
536
# Retrieve a workspace-backed output tensor sized by runtime tokens
537
runtime_max_tokens_per_rank = max(
538
all_rank_num_tokens) if all_rank_num_tokens else x.shape[0]
0 commit comments