fix

hiworldwzj · web-flow · commit bc605a9a0fd3 · 2024-11-29T15:02:55.000+08:00
diff --git a/lightllm/distributed/communication_op.py b/lightllm/distributed/communication_op.py
@@ -30,8 +30,6 @@
 
 try:
     HAS_VLLM = True
-    # import vllm.distributed.device_communicators.custom_all_reduce_utils as tgt
-    # setattr(tgt, "gpu_p2p_access_check", lambda *arg, **kwargs: True)
     from .custom_all_reduce import CustomAllreduce
 except:
     HAS_VLLM = False
diff --git a/lightllm/server/router/model_infer/mode_backend/base_backend.py b/lightllm/server/router/model_infer/mode_backend/base_backend.py
@@ -39,9 +39,7 @@
 from lightllm.server.router.model_infer.infer_batch import InferBatch, InferReq, InferSamplingParams, requests_mapping
 from lightllm.server.router.token_load import TokenLoad
 from lightllm.common.basemodel.infer_lock import g_infer_state_lock, InferStateLock
-from lightllm.distributed import (
-    set_custom_reduce,
-)
+
 import torch.distributed as dist
 
 
@@ -87,6 +85,9 @@ def init_model(self, kvargs):
             rank=self.tp_rank,
             world_size=self.world_size,
         )
+        
+        from lightllm.distributed import set_custom_reduce
+
         set_custom_reduce()
 
         # 为 p d 分离模式添加的全局锁管理，用于做一些同步操作。 一定需要在