merge

shihaobai · shihaobai · commit 81cbc03512cf · 2025-08-28T19:22:30.000+08:00
diff --git a/lightllm/models/internvl/model.py b/lightllm/models/internvl/model.py
@@ -64,6 +64,7 @@ def init_imageitem_extral_params(
                 img.extra_params["image_patch_max_num"] = 6
             elif num_images > 6:
                 img.extra_params["image_patch_max_num"] = 0
+        img.patch_num = self.get_image_patch(img)
         return
 
     def init_audioitem_extral_params(
diff --git a/lightllm/models/qwen_vl/layer_infer/pre_layer_infer.py b/lightllm/models/qwen_vl/layer_infer/pre_layer_infer.py
@@ -53,11 +53,11 @@ def context_forward(self, input_ids, infer_state: LlamaInferStateInfo, layer_wei
                 if img["token_id"] in img_start_token_ids or img["_prefill_"] is False:
                     continue
                 # pull the img_embeds by uid from shm or afs
-                if self.args.run_mode == "llm_only":
-                    data = read_afs(get_shm_name_embed(img["uuid"]))
+                if self.args.enable_remote_vit:
+                    embed = read_afs(get_shm_name_embed(img["uuid"]))
                 else:
-                    data = read_shm(get_shm_name_embed(img["uuid"]))
-                img_weight.append(bytes2tensor(data).cuda().reshape(img["token_num"], -1))
+                    embed = read_shm(get_shm_name_embed(img["uuid"]))
+                img_weight.append(bytes2tensor(embed).cuda().reshape(img["token_num"], -1))
                 img_start_token_ids.append(img["token_id"])
                 img_token_lens.append(img["token_num"])
                 img_start_locs.append(img_start_loc)
diff --git a/lightllm/server/embed_cache/impl/memory_cache_with_redis.py b/lightllm/server/embed_cache/impl/memory_cache_with_redis.py
@@ -36,13 +36,15 @@ def release(self, ids: list[int]) -> None:
                 self.redis_cache.decr(id_)
 
     def set_items_data(self, ids: list[int]) -> None:
-        pass
+        for id_ in ids:
+            self._records[id_].data = True
 
     def get_items_data(self, ids: list[int]) -> list[Optional[bool]]:
         return [self._records.get(id_).data if id_ in self._records else False for id_ in ids]
 
     def set_items_embed(self, ids: list[int]) -> None:
-        pass
+        for id in ids:
+            self.redis_cache.insert(id)
 
     def get_items_embed(self, ids: list[int]) -> list[Optional[bool]]:
-        pass
+        return [self.redis_cache.query_and_incre(id) for id in ids]
diff --git a/lightllm/server/httpserver/manager.py b/lightllm/server/httpserver/manager.py
@@ -133,9 +133,9 @@ async def _alloc_resource(self, items, md5sums, token_nums, datas):
                 item.token_num = rec["token_num"]
                 uid_list.append(rec["id"])
 
-            # If enable the vit/audio-llm disaggregation, no need to cache the data in the memory of the server
-            if self.enable_remote_vit:
-                return
+            # # If enable the vit/audio-llm disaggregation, no need to cache the data in the memory of the server
+            # if self.enable_remote_vit:
+            #     return
 
             ready_flags = obtain(self.cache_client.root.get_items_data(uid_list))
             update_data_ids = []
@@ -159,11 +159,10 @@ async def _alloc_multimodal_resources(self, multimodal_params: MultimodalParams,
                 items, md5sums, tokens_nums, datas = [], [], [], []
                 for img in multimodal_params.images:
                     self.tokenizer.init_imageitem_extral_params(img, multimodal_params, sampling_params)
-                    patch_num = self.tokenizer.get_image_patch(img)
                     data = img.read()
                     # must after init_imageitem_extral_params
                     token_num = self.tokenizer.get_image_token_length(img)
-                    md5sum = "{}_{}".format(hashlib.md5(data).hexdigest(), patch_num)
+                    md5sum = "{}_{}".format(hashlib.md5(data).hexdigest(), img.patch_num)
                     md5sums.append(md5sum)
                     tokens_nums.append(token_num)
                     datas.append(data)
diff --git a/lightllm/server/multimodal_params.py b/lightllm/server/multimodal_params.py
@@ -78,8 +78,7 @@ def __init__(self, **kwargs):
         self.token_num = None
         self.image_w = 0
         self.image_h = 0
-        self.afs_embed = False
-        self.is_abort = False
+        self.patch_num = 0
 
         self._preload_data = None
         self.extra_params = {}
@@ -114,8 +113,8 @@ async def preload(self, request: Request):
     def read(self):
         assert self._preload_data is not None
         ans = self._preload_data
-        # self._preload_data = None
-        # self._data = None
+        self._preload_data = None
+        self._data = None
         return ans
 
     def to_dict(self):
diff --git a/lightllm/server/visualserver/manager.py b/lightllm/server/visualserver/manager.py
@@ -43,6 +43,8 @@ def __init__(
         self.args = args
         self.remote_vit = args.enable_remote_vit or args.run_mode == "visual"
         self.cache_port = cache_port
+        self.visual_port = visual_port
+        self.next_module_port = next_module_port
         self.waiting_reqs: List[GroupReqIndexes] = []
         self.infer_batch_size = args.visual_infer_batch_size
         self.trust_remote_code = args.trust_remote_code
@@ -151,20 +153,16 @@ async def loop_for_fwd(self):
                     processing_group_reqs = []
                     images_need_infer = []
 
-    def _recv_reqs(self):
-        if self.remote_vit:
-            recv_req: GroupReqIndexes = self.vit_receiver.recv_pyobj(zmq.NOBLOCK)
-            for img in recv_req.multimodal_params.images:
-                image_patch = self.tokenizer.get_image_patch_func(img)
-                data = img._preload_data
-                # img._preload_data = None
-                md5sum = "{}_{}".format(hashlib.md5(data).hexdigest(), image_patch)
-                md5 = int(md5sum, 16)
-                # create_shm(get_shm_name_data(uid), data)
-                self.cache_client.root.set_items_data([md5])
-            return recv_req
-        else:
-            return self.vit_receiver.recv_pyobj(zmq.NOBLOCK)
+    # def _recv_reqs(self):
+    #     if self.remote_vit:
+    #         recv_req: GroupReqIndexes = self.recv_from_httpserver.recv_pyobj(zmq.NOBLOCK)
+    #         recv_req.multimodal_params.images[:]= [
+    #             img for img in recv_req.multimodal_params.images
+    #             if not self.cache_client.root.get_item_embed(img.uuid)  # embed已存在的被丢弃 , ref +1
+    #         ]
+    #         return recv_req
+    #     else:
+    #         return self.recv_from_httpserver.recv_pyobj(zmq.NOBLOCK)
 
     async def loop_for_netio_req(self):
         if not hasattr(self, "visual_recv_max_count"):
@@ -199,8 +197,6 @@ async def loop_for_fwd_visual_only(self):
                     visual_req = self.waiting_reqs.pop(0)
 
                     for img in visual_req.multimodal_params.images:
-                        if img.is_abort:
-                            continue
                         images_need_infer.append(img)
 
                         if len(images_need_infer) == self.infer_batch_size: