Skip to content

Commit 4377c20

Browse files
committed
update
1 parent 429f9c3 commit 4377c20

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

lightllm/server/httpserver/manager.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ def __init__(
5757
self.child_node_lock = asyncio.Lock()
5858
self.nnodes = args.nnodes
5959
self.node_rank = args.node_rank
60-
self.transfer_lock = asyncio.Lock()
60+
self.transfer_lock = asyncio.Lock() # the lock for transfer to next module in multi node mode.
61+
self.disable_abort = args.nnodes > 1 and args.dp == 1 # mulitnode dp=1 mode, disable abort
6162
if args.nnodes > 1:
6263
if args.node_rank == 0:
6364
self.multinode_req_manager = []
@@ -341,6 +342,7 @@ async def transfer_to_next_module_or_node(
341342
multimodal_params: MultimodalParams,
342343
group_req_objs: Optional[GroupReqObjs] = None,
343344
):
345+
# 多节点纯tp 运行模式下,保证请求能保持相同的顺序转发到其他节点和当前节点next module.
344346
if self.nnodes > 1 and self.node_rank == 0 and self.args.dp == 1:
345347
async with self.transfer_lock:
346348
for sender in self.multinode_req_manager:
@@ -431,7 +433,7 @@ async def _wait_to_token_package(
431433
except asyncio.TimeoutError:
432434
pass
433435

434-
if request is not None and await request.is_disconnected() and self.nnodes == 1:
436+
if not self.disable_abort and request is not None and await request.is_disconnected():
435437
await self.abort(group_request_id)
436438
raise Exception(f"req_id {group_request_id} disconnected")
437439

0 commit comments

Comments
 (0)