File tree Expand file tree Collapse file tree 2 files changed +7
-4
lines changed
Expand file tree Collapse file tree 2 files changed +7
-4
lines changed Original file line number Diff line number Diff line change @@ -57,7 +57,8 @@ def __init__(
5757 self .node_rank = args .node_rank
5858 self .transfer_lock = asyncio .Lock () # the lock for transfer to next module in multi node mode.
5959 self .disable_abort = args .nnodes > 1 and args .dp == 1 # mulitnode dp=1 mode, disable abort
60- if args .nnodes > 1 :
60+ self .is_multinode_tp = args .dp == 1 and args .nnodes > 1
61+ if self .is_multinode_tp :
6162 if args .node_rank == 0 :
6263 self .multinode_req_manager = []
6364 for child_ip in args .child_ips :
Original file line number Diff line number Diff line change 66
77
88def send_and_receive_node_ip (args ):
9- # 传输子node的ip
10- if args .nnodes > 1 :
11-
9+ # 在多节点tp的部署形式中,0 号节点作为主节点,其他节点作为
10+ # 从节点,0 号节点需要知道所有从节点的ip信息,这样才能构建
11+ # 一些通信组件转发请求信息给从节点。
12+ is_multinode_tp = args .dp == 1 and args .nnodes > 1
13+ if is_multinode_tp :
1214 if args .node_rank == 0 :
1315 args .child_ips = None
1416 args .child_ips = []
You can’t perform that action at this time.
0 commit comments