add wait for embed for llm

shihaobai · shihaobai · commit 676215ed515c · 2025-08-28T22:15:18.000+08:00
diff --git a/lightllm/server/embed_cache/impl/memory_cache_with_redis.py b/lightllm/server/embed_cache/impl/memory_cache_with_redis.py
@@ -18,7 +18,6 @@ class MemoryCacheWithRedis(InMemoryCache):
     def __init__(self, args) -> None:
         super().__init__(args)
         redis_url = f"redis://{args.config_server_host}:{args.redis_port}/0"
-        print(redis_url, flush=True)
         self.redis_cache = EmbedRefCountRedis(
             redis_url=redis_url,
             capacity=args.cache_capacity,
@@ -29,24 +28,25 @@ def __init__(self, args) -> None:
         # 便于 dynamic prompt cache 的使用。所以要把cache_capacity * 2，保障其保留的图片cache > redis 服务维护的
         # 硬盘里的图片image embed 数量。
         self.cache_capacity = args.cache_capacity * 2
-        print(self.redis_cache.stats(), flush=True)
 
     def release(self, ids: list[int]) -> None:
         with self.lock:
             for id_ in ids:
                 self._records[id_].ref -= 1
                 self.redis_cache.decr(id_)
-
-    def set_items_data(self, ids: list[int]) -> None:
-        for id_ in ids:
-            self._records[id_].data = True
-
-    def get_items_data(self, ids: list[int]) -> list[Optional[bool]]:
-        return [self._records.get(id_).data if id_ in self._records else False for id_ in ids]
+                print(self.redis_cache.stats(), flush=True)
 
     def set_items_embed(self, ids: list[int]) -> None:
         for id in ids:
             self.redis_cache.insert(str(id))
 
     def get_items_embed(self, ids: list[int]) -> list[Optional[bool]]:
-        return [self.redis_cache.query_and_incre(str(id)) for id in ids]
+        ret = []
+        for id in ids:
+            # 避免重复的引用计数增加
+            if self._records[id].embed:
+                ret.append(True)
+                continue
+            self._records[id].embed = self.redis_cache.query_and_incre(str(id))
+            ret.append(self._records[id].embed)
+        return ret
diff --git a/lightllm/server/httpserver/manager.py b/lightllm/server/httpserver/manager.py
@@ -87,7 +87,7 @@ def __init__(
             # 初始化VIT连接管理器
             from lightllm.server.visualserver.vit_connect import VITConnectionManager
 
-            self.vit_manager = VITConnectionManager(args, context, visual_port)
+            self.vit_manager = VITConnectionManager(args, context, visual_port, self.cache_client)
 
         self.shm_req_manager = ShmReqManager()
 
diff --git a/lightllm/server/multimodal_params.py b/lightllm/server/multimodal_params.py
@@ -148,6 +148,9 @@ def free(self):
         for audio in self.audios:
             audio.free()
 
+    def get_all_uuids(self):
+        return [image.uuid for image in self.images] + [audio.uuid for audio in self.audios]
+
     async def verify_and_preload(self, request: Request):
         for image in self.images:
             await image.preload(request)
diff --git a/lightllm/server/visualserver/- b/lightllm/server/visualserver/-
@@ -15,3 +15,13 @@
 533706:M 28 Aug 2025 13:13:21.724 # Server initialized
 533706:M 28 Aug 2025 13:13:21.724 # WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.
 533706:M 28 Aug 2025 13:13:21.727 * Ready to accept connections
+533706:signal-handler (1756390331) Received SIGINT scheduling shutdown...
+533706:M 28 Aug 2025 14:12:11.921 # User requested shutdown...
+533706:M 28 Aug 2025 14:12:11.922 # Redis is now ready to exit, bye bye...
+546119:C 28 Aug 2025 14:12:19.084 # oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo
+546119:C 28 Aug 2025 14:12:19.086 # Redis version=6.0.16, bits=64, commit=00000000, modified=0, pid=546119, just started
+546119:C 28 Aug 2025 14:12:19.087 # Configuration loaded
+546119:M 28 Aug 2025 14:12:19.089 * Running mode=standalone, port=6379.
+546119:M 28 Aug 2025 14:12:19.090 # Server initialized
+546119:M 28 Aug 2025 14:12:19.091 # WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.
+546119:M 28 Aug 2025 14:12:19.093 * Ready to accept connections
diff --git a/lightllm/server/visualserver/vit_connect.py b/lightllm/server/visualserver/vit_connect.py
@@ -8,6 +8,7 @@
 import httpx
 import base64
 from dataclasses import dataclass
+import rpyc
 
 logger = init_logger(__name__)
 
@@ -24,7 +25,7 @@ def to_log_str(self):
 class VITConnectionManager:
     """VIT连接管理器"""
 
-    def __init__(self, args, context, local_visual_port: int):
+    def __init__(self, args, context, local_visual_port: int, cache_client: rpyc.Connection):
         self.args = args
         self.context = context
         self.local_visual_port = local_visual_port
@@ -34,6 +35,7 @@ def __init__(self, args, context, local_visual_port: int):
         self.current_vit_index = 0
         self.remote_vit = args.enable_remote_vit
         self.remote_vit_port = args.remote_vit_port
+        self.cache_client = cache_client
 
         self._setup_vit_connections()
 
@@ -159,16 +161,21 @@ async def send_to_vit(self, data, protocol=pickle.HIGHEST_PROTOCOL):
         发送数据到VIT实例，支持本地和远程模式
         """
         instance = self._get_vit_instance()
+        # 本地模式下，提前释放图片资源，降低传输开销
+        if not self.remote_vit:
+            data.multimodal_params.free()
+
         try:
             print(instance, flush=True)
             instance.send_pyobj(data, protocol=protocol)
         except Exception as e:
             logger.error(f"Failed to send to VIT instance: {e}")
             raise Exception(f"Failed to send to VIT instance: {e}")
-        finally:
-            # 释放图片资源
+
+        # 远程模式下，发送完以后，在释放图片资源
+        await self._wait_visual_embed_ready(data)
+        if self.remote_vit:
             data.multimodal_params.free()
-        await self._wait_visual_embed_ready()
 
     async def vit_handle_loop(self):
         """
@@ -179,7 +186,6 @@ async def vit_handle_loop(self):
             try:
                 id_to_vit_obj = await self._async_get_vit_objs()
                 if id_to_vit_obj:
-                    logger.debug(f"Retrieved {len(id_to_vit_obj)} VIT instances")
                     self._update_vit_connections(id_to_vit_obj)
                 await asyncio.sleep(30)
             except Exception as e:
@@ -205,8 +211,20 @@ async def _async_get_vit_objs(self) -> Optional[Dict[int, VIT_Obj]]:
             logger.exception(f"Error getting VIT instances: {e}")
             return None
 
-    async def _wait_visual_embed_ready(self):
-        """
-        等待VIT实例的embed准备好
-        """
-        await asyncio.sleep(10)
+    async def _wait_visual_embed_ready(self, data, timeout_seconds: int = 20):
+        # 本地模式不需要等待
+        if not self.remote_vit:
+            return
+
+        uuids = data.multimodal_params.get_all_uuids()
+
+        async def wait_for_embeds():
+            while not all(self.cache_client.root.get_items_embed(uuids)):
+                await asyncio.sleep(0.05)
+
+        try:
+            await asyncio.wait_for(wait_for_embeds(), timeout=timeout_seconds)
+        except asyncio.TimeoutError:
+            logger.error(
+                f"Req {data.group_req_id}: timeout waiting for visual embed ready after {timeout_seconds} seconds"
+            )