File tree Expand file tree Collapse file tree 1 file changed +20
-8
lines changed
vllm/compilation/passes/fusion Expand file tree Collapse file tree 1 file changed +20
-8
lines changed Original file line number Diff line number Diff line change @@ -729,14 +729,26 @@ def __init__(self, config: VllmConfig) -> None:
729729 scope = "global" ,
730730 )
731731
732- self .workspace = flashinfer_comm .create_allreduce_fusion_workspace (
733- backend = "trtllm" ,
734- world_size = self .tp_size ,
735- rank = rank ,
736- max_token_num = self .max_token_num ,
737- hidden_dim = self .hidden_dim ,
738- dtype = self .model_dtype ,
739- )
732+ try :
733+ self .workspace = flashinfer_comm .create_allreduce_fusion_workspace (
734+ backend = "trtllm" ,
735+ world_size = self .tp_size ,
736+ rank = rank ,
737+ max_token_num = self .max_token_num ,
738+ hidden_dim = self .hidden_dim ,
739+ dtype = self .model_dtype ,
740+ )
741+ except RuntimeError as e :
742+ if "multicast" not in str (e ).lower ():
743+ raise
744+ logger .warning_once (
745+ "AllReduce fusion pass is disabled: flashinfer workspace "
746+ "creation failed: %s. This is expected on GPUs without "
747+ "NVSwitch (e.g., NVLink bridge-only or PCIe topologies). "
748+ "Falling back to non-fused allreduce." ,
749+ str (e ),
750+ )
751+ return
740752
741753 global _FI_WORKSPACE
742754 _FI_WORKSPACE = self .workspace
You can’t perform that action at this time.
0 commit comments