[fix]fix rpyc in multimodal process

SangChengC · SangChengC · commit 803e095d6aa5 · 2025-07-07T21:32:03.000+08:00
diff --git a/lightllm/models/whisper/whisper_audio.py b/lightllm/models/whisper/whisper_audio.py
@@ -190,8 +190,14 @@ def encode(self, audio_items: List[AudioItem]):
         audio_lens_after_cnn = np.array(audio_lens_after_cnn, dtype=np.int32)
         audio_token_num = (audio_lens_after_cnn - 2) // 2 + 1
 
-        for i in range(len(uuids)):
-            if not self.cache_client.root.get_item_embed(uuids[i]):
-                cur_embed_bytes = tensor2bytes(audios[i][: audio_token_num[i]])
-                create_shm(get_shm_name_embed(uuids[i]), cur_embed_bytes)
-                self.cache_client.root.set_item_embed(uuids[i])
+        ready_audio = self.cache_client.root.get_items_data(uuids)
+        ids_to_set = []
+        for i, ready in enumerate(ready_audio):
+            if ready:
+                continue
+            uid = uuids[i]
+            cur_embed_bytes = tensor2bytes(audios[i][: audio_token_num[i]])
+            create_shm(get_shm_name_data(uid), cur_embed_bytes)
+            ids_to_set.append(uid)
+        if ids_to_set:
+            self.cache_client.root.set_items_data(ids=ids_to_set)
diff --git a/lightllm/server/audioserver/manager.py b/lightllm/server/audioserver/manager.py
@@ -94,8 +94,11 @@ async def loop_for_fwd(self):
 
                     multimodal_params = group_req_indexes.multimodal_params
 
-                    for audio in multimodal_params.audios:
-                        if not self.cache_client.root.get_item_embed(audio.uuid):
+                    audio_uuids = [audio.uuid for audio in multimodal_params.audios]
+                    ready_audio = self.cache_client.root.get_items_embed(audio_uuids)
+
+                    for audio, ready in zip(multimodal_params.audios, ready_audio):
+                        if not ready:
                             audios_need_infer.append(audio)
 
                         if len(audios_need_infer) == self.infer_batch_size:
diff --git a/lightllm/server/embed_cache/impl/naive_memory_cache.py b/lightllm/server/embed_cache/impl/naive_memory_cache.py
@@ -89,52 +89,12 @@ def _clear(self):
                 if deleted >= max_delete:
                     break
 
-    # def alloc(self, md5sum: str, token_num: int) -> dict:
-    #     with self.lock:
-    #         t = time.time()
-    #         # add new record
-    #         if md5sum not in self._md5_to_record:
-
-    #             # full, need to clear some unused items
-    #             if self.occupied >= self.capacity:
-    #                 self._clear()
-    #                 if self.occupied >= self.capacity:
-    #                     return None
-
-    #             id = uuid.uuid1()
-    #             id = id.int
-    #             self._check_and_set_new_id_range(token_num)
-    #             record = Record(
-    #                 id=id,
-    #                 md5sum=md5sum,
-    #                 ref=1,
-    #                 data=False,
-    #                 embed=False,
-    #                 createtime=t,
-    #                 visittime=t,
-    #                 token_id=self.token_id_range_start,
-    #                 token_num=token_num,
-    #             )
-    #             self.token_id_range_start += token_num
-    #             self._records[id] = record
-    #             self._md5_to_record[md5sum] = record
-    #             self.occupied += 1
-
-    #         # cache hit
-    #         else:
-    #             record = self._md5_to_record[md5sum]
-    #             record.visittime = t
-    #             record.ref += 1
-
-    #         return {"id": record.id, "token_id": record.token_id, "token_num": record.token_num}
-
     def alloc_batch(self, md5_list: list[str], token_num_list: list[int]) -> list[dict]:
         results = []
         with self.lock:
             for md5, tnum in zip(md5_list, token_num_list):
                 t = time.time()
                 if md5 not in self._md5_to_record:
-                    # 若不存在则分配新记录（与alloc逻辑相同）
                     if self.occupied >= self.capacity:
                         self._clear()
                         if self.occupied >= self.capacity:
@@ -158,7 +118,6 @@ def alloc_batch(self, md5_list: list[str], token_num_list: list[int]) -> list[di
                     self._md5_to_record[md5] = record
                     self.occupied += 1
                 else:
-                    # 缓存命中，更新引用计数和访问时间
                     record = self._md5_to_record[md5]
                     record.visittime = t
                     record.ref += 1
@@ -169,23 +128,20 @@ def release(self, id: int) -> None:
         with self.lock:
             self._records[id].ref -= 1
 
-    # def set_item_data(self, id: int) -> None:
-    #     self._records[id].data = True
-
-    # def get_item_data(self, id: int) -> bool:
-    #     return self._records[id].data
+    def set_items_data(self, ids: list[int]) -> None:
+        with self.lock:
+            for id in ids:
+                self._records[id].data = True
 
     def get_items_data(self, ids: list[int]) -> list[bool]:
         with self.lock:
             return [self._records.get(i).data if i in self._records else False for i in ids]
 
-    def set_items_data(self, ids: list[int]) -> None:
+    def set_items_embed(self, ids: list[int]) -> None:
         with self.lock:
-            for i in ids:
-                self._records[i].data = True
+            for id in ids:
+                self._records[id].embed = True
 
-    def set_item_embed(self, id: int) -> None:
-        self._records[id].embed = True
-
-    def get_item_embed(self, id: int) -> bool:
-        return self._records[id].embed
+    def get_items_embed(self, ids: list[int]) -> list[bool]:
+        with self.lock:
+            return [self._records.get(i).embed if i in self._records else False for i in ids]
diff --git a/lightllm/server/embed_cache/interface.py b/lightllm/server/embed_cache/interface.py
@@ -19,10 +19,10 @@ def set_items_data(self, ids: list[int]) -> None:
     def get_items_data(self, ids: list[int]) -> list[bool]:
         pass
 
-    def set_item_embed(self, id: int) -> None:
+    def set_items_embed(self, ids: list[int]) -> None:
         pass
 
-    def get_item_embed(self, id: int) -> bool:
+    def get_items_embed(self, ids: list[int]) -> list[bool]:
         pass
 
 
diff --git a/lightllm/server/embed_cache/manager.py b/lightllm/server/embed_cache/manager.py
@@ -25,7 +25,7 @@ def on_disconnect(self, conn):
     def exposed_alloc_batch(self, md5sum_list: list[str], token_num_list: list[int]) -> dict:
         md5sum_list = obtain(md5sum_list)
         token_num_list = obtain(token_num_list)
-        record = self._impl.alloc(md5sum_list, token_num_list)
+        record = self._impl.alloc_batch(md5sum_list, token_num_list)
         return record
 
     def exposed_release(self, id: int) -> None:
@@ -40,13 +40,13 @@ def exposed_get_items_data(self, ids: list[int]) -> list[bool]:
         ids = obtain(ids)
         return self._impl.get_items_data(ids=ids)
 
-    def exposed_set_item_embed(self, id: int) -> None:
-        id = obtain(id)
-        return self._impl.set_item_embed(id=id)
+    def exposed_set_items_embed(self, ids: list[int]) -> None:
+        ids = obtain(ids)
+        return self._impl.set_items_embed(ids=ids)
 
-    def exposed_get_item_embed(self, id: int) -> bool:
-        id = obtain(id)
-        return self._impl.get_item_embed(id=id)
+    def exposed_get_items_embed(self, ids: list[int]) -> list[bool]:
+        ids = obtain(ids)
+        return self._impl.get_items_embed(ids=ids)
 
 
 def start_cache_manager(port: int, args, pipe_writer):
diff --git a/lightllm/server/httpserver/manager.py b/lightllm/server/httpserver/manager.py
@@ -82,7 +82,7 @@ def __init__(
 
         self.enable_multimodal = enable_multimodal
         if self.enable_multimodal:
-            self.cache_client = rpyc.connect("localhost", cache_port, onfig={"allow_pickle": True})
+            self.cache_client = rpyc.connect("localhost", cache_port, config={"allow_pickle": True})
             self.send_to_visual = context.socket(zmq.PUSH)
             self.send_to_visual.connect(f"{args.zmq_mode}127.0.0.1:{visual_port}")
 
@@ -160,9 +160,6 @@ async def _alloc_multimodal_resources(self, multimodal_params: MultimodalParams,
                     token_nums.append(num_tokens)
                     datas.append(data)
                     items.append(img)
-                    # img.uuid = record["id"]
-                    # img.token_id = record["token_id"]
-                    # img.token_num = record["token_num"]
                 for audio in multimodal_params.audios:
                     self.tokenizer.init_audioitem_extral_params(audio, multimodal_params, sampling_params)
                     data = audio.read()
@@ -172,9 +169,6 @@ async def _alloc_multimodal_resources(self, multimodal_params: MultimodalParams,
                     token_nums.append(num_tokens)
                     datas.append(data)
                     items.append(audio)
-                    # audio.uuid = record["id"]
-                    # audio.token_id = record["token_id"]
-                    # audio.token_num = record["token_num"]
             wait_time = 1
             while True:
                 records = self.cache_client.root.alloc_batch(md5s, token_nums)
@@ -194,7 +188,6 @@ async def _alloc_multimodal_resources(self, multimodal_params: MultimodalParams,
                 item.token_num = record["token_num"]
                 if not ready:
                     create_shm(get_shm_name_data(item.uuid), data)
-                    self.cache_client.root.set_items_data(item.uuid)
                     uids_to_write.append(item.uuid)
             if uids_to_write:
                 self.cache_client.root.set_items_data(uids_to_write)
diff --git a/lightllm/server/visualserver/manager.py b/lightllm/server/visualserver/manager.py
@@ -35,7 +35,7 @@ def __init__(
 
         self.recv_from_httpserver = context.socket(zmq.PULL)
         self.recv_from_httpserver.bind(f"{args.zmq_mode}127.0.0.1:{visual_port}")
-        self.cache_client = rpyc.connect("localhost", cache_port)
+        self.cache_client = rpyc.connect("localhost", cache_port, config={"allow_pickle": True})
         self.cache_port = cache_port
         self.waiting_reqs: List[GroupReqIndexes] = []
         self.model_weightdir = args.model_dir
@@ -121,11 +121,9 @@ async def loop_for_fwd(self):
                     multimodal_params = group_req_indexes.multimodal_params
 
                     img_uuids = [img.uuid for img in multimodal_params.images]
-                    ready_flags = []
-                    for uuid in img_uuids:
-                        ready_flags.append(self.cache_client.root.get_items_embed(uuid))
+                    ready_image = self.cache_client.root.get_items_embed(img_uuids)
 
-                    for img, ready in zip(multimodal_params.images, ready_flags):
+                    for img, ready in zip(multimodal_params.images, ready_image):
                         if not ready:
                             images_need_infer.append(img)
 
diff --git a/lightllm/server/visualserver/model_infer/model_rpc.py b/lightllm/server/visualserver/model_infer/model_rpc.py
@@ -94,14 +94,20 @@ def exposed_encode(self, images: List[ImageItem]):
         images = obtain(images)
         all_img_embeds, uuids, valid_ids = self.forward(images)
         all_img_embeds = all_img_embeds.to(torch.device("cpu"))
+
         if self.tp_rank_id == 0:
-            for i in range(len(uuids)):
+            ready_flags = self.cache_client.root.get_items_embed(uuids)
+            ids_to_set = []
+            for i, ready in enumerate(ready_flags):
+                if ready:
+                    continue
                 uid = uuids[i]
-                if not self.cache_client.root.get_item_embed(uid):
-                    start, end = valid_ids[i]
-                    cur_embed_bytes = tensor2bytes(all_img_embeds[start:end])
-                    create_shm(get_shm_name_embed(uuids[i]), cur_embed_bytes)
-                    self.cache_client.root.set_item_embed(uuids[i])
+                start, end = valid_ids[i]
+                cur_embed_bytes = tensor2bytes(all_img_embeds[start:end])
+                create_shm(get_shm_name_embed(uid), cur_embed_bytes)
+                ids_to_set.append(uid)
+            if ids_to_set:
+                self.cache_client.root.set_items_embed(ids_to_set)
         return