@@ -52,6 +52,7 @@ def __init__(
5252
5353 self .multinode_req_manager = None
5454 self .nnodes = args .nnodes
55+ self ._resource_lock = asyncio .Lock ()
5556 self .node_rank = args .node_rank
5657 self .transfer_lock = asyncio .Lock () # the lock for transfer to next module in multi node mode.
5758 self .disable_abort = args .nnodes > 1 and args .dp == 1 # mulitnode dp=1 mode, disable abort
@@ -141,19 +142,23 @@ async def _alloc_resource(self, item: Union[ImageItem, AudioItem]):
141142 async def _alloc_multimodal_resources (self , multimodal_params : MultimodalParams , sampling_params : SamplingParams ):
142143 # 只有 P 和 NORMAL 节点需要真的管理多模态资源
143144 if self .pd_mode .is_P_or_NORMAL ():
144- for img in multimodal_params .images :
145- self .tokenizer .init_imageitem_extral_params (img , multimodal_params , sampling_params )
146- record = await self ._alloc_resource (img )
147- img .uuid = record ["id" ]
148- img .token_id = record ["token_id" ]
149- img .token_num = record ["token_num" ]
150- for audio in multimodal_params .audios :
151- self .tokenizer .init_audioitem_extral_params (audio , multimodal_params , sampling_params )
152- record = await self ._alloc_resource (audio )
153- audio .uuid = record ["id" ]
154- audio .token_id = record ["token_id" ]
155- audio .token_num = record ["token_num" ]
156- return
145+ # 这里的锁是为了 防止多个含有多张图片的请求 同时申请的record数量 大于cache_capacity,从而造成死锁的问题。
146+ # 如果不加任何锁,假如请求1和请求2都有6张图片,而cache_capacity为10,
147+ # 那么如果某一时刻shm中存在请求1的5张图和请求2的5张图,将会资源竞争产生死锁。
148+ async with self ._resource_lock :
149+ for img in multimodal_params .images :
150+ self .tokenizer .init_imageitem_extral_params (img , multimodal_params , sampling_params )
151+ record = await self ._alloc_resource (img )
152+ img .uuid = record ["id" ]
153+ img .token_id = record ["token_id" ]
154+ img .token_num = record ["token_num" ]
155+ for audio in multimodal_params .audios :
156+ self .tokenizer .init_audioitem_extral_params (audio , multimodal_params , sampling_params )
157+ record = await self ._alloc_resource (audio )
158+ audio .uuid = record ["id" ]
159+ audio .token_id = record ["token_id" ]
160+ audio .token_num = record ["token_num" ]
161+ return
157162
158163 async def _release_multimodal_resources (self , multimodal_params : MultimodalParams ):
159164 # 只有 P 和 NORMAL 节点需要真的管理多模态资源
@@ -594,7 +599,6 @@ async def recycle_resource_loop(self):
594599 for req_status in self .req_id_to_out_inf .values ():
595600 if req_status .can_release ():
596601 release_req_status .append (req_status )
597-
598602 for req_status in release_req_status :
599603 self .req_id_to_out_inf .pop (req_status .group_req_objs .group_req_id , None )
600604 for req in req_status .group_req_objs .shm_req_objs :
0 commit comments