File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed
lightllm/server/httpserver Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -57,7 +57,8 @@ def __init__(
5757 self .child_node_lock = asyncio .Lock ()
5858 self .nnodes = args .nnodes
5959 self .node_rank = args .node_rank
60- self .transfer_lock = asyncio .Lock ()
60+ self .transfer_lock = asyncio .Lock () # the lock for transfer to next module in multi node mode.
61+ self .disable_abort = args .nnodes > 1 and args .dp == 1 # mulitnode dp=1 mode, disable abort
6162 if args .nnodes > 1 :
6263 if args .node_rank == 0 :
6364 self .multinode_req_manager = []
@@ -341,6 +342,7 @@ async def transfer_to_next_module_or_node(
341342 multimodal_params : MultimodalParams ,
342343 group_req_objs : Optional [GroupReqObjs ] = None ,
343344 ):
345+ # 多节点纯tp 运行模式下,保证请求能保持相同的顺序转发到其他节点和当前节点next module.
344346 if self .nnodes > 1 and self .node_rank == 0 and self .args .dp == 1 :
345347 async with self .transfer_lock :
346348 for sender in self .multinode_req_manager :
@@ -431,7 +433,7 @@ async def _wait_to_token_package(
431433 except asyncio .TimeoutError :
432434 pass
433435
434- if request is not None and await request .is_disconnected () and self . nnodes == 1 :
436+ if not self . disable_abort and request is not None and await request .is_disconnected ():
435437 await self .abort (group_request_id )
436438 raise Exception (f"req_id { group_request_id } disconnected" )
437439
You can’t perform that action at this time.
0 commit comments