Skip to content

Commit 92ef542

Browse files
committed
fix multinode tp.
1 parent 26aead8 commit 92ef542

File tree

2 files changed

+7
-4
lines changed

2 files changed

+7
-4
lines changed

lightllm/server/httpserver/manager.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ def __init__(
5757
self.node_rank = args.node_rank
5858
self.transfer_lock = asyncio.Lock() # the lock for transfer to next module in multi node mode.
5959
self.disable_abort = args.nnodes > 1 and args.dp == 1 # mulitnode dp=1 mode, disable abort
60-
if args.nnodes > 1:
60+
self.is_multinode_tp = args.dp == 1 and args.nnodes > 1
61+
if self.is_multinode_tp:
6162
if args.node_rank == 0:
6263
self.multinode_req_manager = []
6364
for child_ip in args.child_ips:

lightllm/utils/multinode_utils.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66

77

88
def send_and_receive_node_ip(args):
9-
# 传输子node的ip
10-
if args.nnodes > 1:
11-
9+
# 在多节点tp的部署形式中,0 号节点作为主节点,其他节点作为
10+
# 从节点,0 号节点需要知道所有从节点的ip信息,这样才能构建
11+
# 一些通信组件转发请求信息给从节点。
12+
is_multinode_tp = args.dp == 1 and args.nnodes > 1
13+
if is_multinode_tp:
1214
if args.node_rank == 0:
1315
args.child_ips = None
1416
args.child_ips = []

0 commit comments

Comments
 (0)