Skip to content

Commit 384ff32

Browse files
authored
Fix deadlock when alloc resource (#901)
1 parent dd29e11 commit 384ff32

File tree

1 file changed

+18
-14
lines changed

1 file changed

+18
-14
lines changed

lightllm/server/httpserver/manager.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def __init__(
5252

5353
self.multinode_req_manager = None
5454
self.nnodes = args.nnodes
55+
self._resource_lock = asyncio.Lock()
5556
self.node_rank = args.node_rank
5657
self.transfer_lock = asyncio.Lock() # the lock for transfer to next module in multi node mode.
5758
self.disable_abort = args.nnodes > 1 and args.dp == 1 # mulitnode dp=1 mode, disable abort
@@ -141,19 +142,23 @@ async def _alloc_resource(self, item: Union[ImageItem, AudioItem]):
141142
async def _alloc_multimodal_resources(self, multimodal_params: MultimodalParams, sampling_params: SamplingParams):
142143
# 只有 P 和 NORMAL 节点需要真的管理多模态资源
143144
if self.pd_mode.is_P_or_NORMAL():
144-
for img in multimodal_params.images:
145-
self.tokenizer.init_imageitem_extral_params(img, multimodal_params, sampling_params)
146-
record = await self._alloc_resource(img)
147-
img.uuid = record["id"]
148-
img.token_id = record["token_id"]
149-
img.token_num = record["token_num"]
150-
for audio in multimodal_params.audios:
151-
self.tokenizer.init_audioitem_extral_params(audio, multimodal_params, sampling_params)
152-
record = await self._alloc_resource(audio)
153-
audio.uuid = record["id"]
154-
audio.token_id = record["token_id"]
155-
audio.token_num = record["token_num"]
156-
return
145+
# 这里的锁是为了 防止多个含有多张图片的请求 同时申请的record数量 大于cache_capacity,从而造成死锁的问题。
146+
# 如果不加任何锁,假如请求1和请求2都有6张图片,而cache_capacity为10,
147+
# 那么如果某一时刻shm中存在请求1的5张图和请求2的5张图,将会资源竞争产生死锁。
148+
async with self._resource_lock:
149+
for img in multimodal_params.images:
150+
self.tokenizer.init_imageitem_extral_params(img, multimodal_params, sampling_params)
151+
record = await self._alloc_resource(img)
152+
img.uuid = record["id"]
153+
img.token_id = record["token_id"]
154+
img.token_num = record["token_num"]
155+
for audio in multimodal_params.audios:
156+
self.tokenizer.init_audioitem_extral_params(audio, multimodal_params, sampling_params)
157+
record = await self._alloc_resource(audio)
158+
audio.uuid = record["id"]
159+
audio.token_id = record["token_id"]
160+
audio.token_num = record["token_num"]
161+
return
157162

158163
async def _release_multimodal_resources(self, multimodal_params: MultimodalParams):
159164
# 只有 P 和 NORMAL 节点需要真的管理多模态资源
@@ -594,7 +599,6 @@ async def recycle_resource_loop(self):
594599
for req_status in self.req_id_to_out_inf.values():
595600
if req_status.can_release():
596601
release_req_status.append(req_status)
597-
598602
for req_status in release_req_status:
599603
self.req_id_to_out_inf.pop(req_status.group_req_objs.group_req_id, None)
600604
for req in req_status.group_req_objs.shm_req_objs:

0 commit comments

Comments
 (0)