We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent fe315a5 commit 86ea597Copy full SHA for 86ea597
lightllm/server/router/model_infer/mode_backend/base_backend.py
@@ -84,8 +84,10 @@ def init_model(self, kvargs):
84
init_method=f'tcp://127.0.0.1:{kvargs["nccl_port"]}',
85
rank=self.tp_rank,
86
world_size=self.world_size,
87
- device_id=torch.device(f"cuda:{self.tp_rank}"),
88
)
+ # warmup nccl communicator
89
+ a = torch.zeros([1]).to(f"cuda:{self.tp_rank}")
90
+ dist.all_reduce(a)
91
92
from lightllm.distributed import set_custom_reduce
93
0 commit comments