Skip to content

Commit 0788ff0

Browse files
authored
[Bugfix] Gracefully disable AllReduceFusionPass on GPUs without multicast support (vllm-project#35085)
Signed-off-by: haosdent <haosdent@gmail.com>
1 parent d72b0be commit 0788ff0

File tree

1 file changed

+20
-8
lines changed

1 file changed

+20
-8
lines changed

vllm/compilation/passes/fusion/allreduce_rms_fusion.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -729,14 +729,26 @@ def __init__(self, config: VllmConfig) -> None:
729729
scope="global",
730730
)
731731

732-
self.workspace = flashinfer_comm.create_allreduce_fusion_workspace(
733-
backend="trtllm",
734-
world_size=self.tp_size,
735-
rank=rank,
736-
max_token_num=self.max_token_num,
737-
hidden_dim=self.hidden_dim,
738-
dtype=self.model_dtype,
739-
)
732+
try:
733+
self.workspace = flashinfer_comm.create_allreduce_fusion_workspace(
734+
backend="trtllm",
735+
world_size=self.tp_size,
736+
rank=rank,
737+
max_token_num=self.max_token_num,
738+
hidden_dim=self.hidden_dim,
739+
dtype=self.model_dtype,
740+
)
741+
except RuntimeError as e:
742+
if "multicast" not in str(e).lower():
743+
raise
744+
logger.warning_once(
745+
"AllReduce fusion pass is disabled: flashinfer workspace "
746+
"creation failed: %s. This is expected on GPUs without "
747+
"NVSwitch (e.g., NVLink bridge-only or PCIe topologies). "
748+
"Falling back to non-fused allreduce.",
749+
str(e),
750+
)
751+
return
740752

741753
global _FI_WORKSPACE
742754
_FI_WORKSPACE = self.workspace

0 commit comments

Comments
 (0)