fix

hiworldwzj · hiworldwzj · commit 60fc7f5a826b · 2025-12-18T21:45:11.000+08:00
diff --git a/lightllm/server/embed_cache/impl/naive_memory_cache.py b/lightllm/server/embed_cache/impl/naive_memory_cache.py
@@ -144,7 +144,7 @@ def alloc(self, md5sum_list: list[str], token_num_list: list[int]) -> Optional[l
         now = time.time()
         with self.lock:
             if not self._judge_enough_token_cache(md5sum_list=md5sum_list, token_num_list=token_num_list):
-                return "error not enough cache"
+                return "error not enough embed cache"
 
             add_ref_m_list = []
             new_md5_dict = {}
@@ -197,7 +197,7 @@ def alloc(self, md5sum_list: list[str], token_num_list: list[int]) -> Optional[l
                         {
                             "id": rec.id,
                             "token_id": rec.token_id,
-                            "embed_cache_start_index": rec.mem_block.start,
+                            "start_index_in_embed_cache": rec.mem_block.start,
                             "token_num": rec.token_num,
                         }
                     )
diff --git a/lightllm/server/httpserver/manager.py b/lightllm/server/httpserver/manager.py
@@ -125,11 +125,17 @@ async def _alloc_resource(self, items, md5sums, token_nums, datas):
                 await asyncio.sleep(0.1)
                 continue
 
+            if "error" in records:
+                raise Exception(records)
+
             uid_list = []
             for item, rec in zip(items, records):
+                item: Union[ImageItem, AudioItem] = item
                 item.uuid = rec["id"]
                 item.token_id = rec["token_id"]
                 item.token_num = rec["token_num"]
+                item.start_index_in_embed_cache = rec["start_index_in_embed_cache"]
+
                 uid_list.append(rec["id"])
 
             ready_flags = obtain(self.cache_client.root.get_items_data(uid_list))
@@ -187,13 +193,15 @@ async def _release_multimodal_resources(self, multimodal_params: MultimodalParam
                         img.uuid = None
                         img.token_id = None
                         img.token_num = None
+                        img.start_index_in_embed_cache = None
                 for audio in multimodal_params.audios:
                     if audio.uuid is not None:
                         ids_to_release.append(audio.uuid)
                         # 将 uuid 等 赋值为 None, 防止因为abort等异常情况造成重复释放异常
                         audio.uuid = None
                         audio.token_id = None
                         audio.token_num = None
+                        audio.start_index_in_embed_cache = None
                 if ids_to_release:
                     self.cache_client.root.release(ids_to_release)
         return
diff --git a/lightllm/server/multimodal_params.py b/lightllm/server/multimodal_params.py
@@ -20,6 +20,8 @@ def __init__(self, **kwargs):
         self.uuid = None
         # the start audio token id
         self.token_id = None
+        # the start index in embed cache
+        self.start_index_in_embed_cache = None
         # the audio token num
         self.token_num = None
         # the audio length
@@ -62,6 +64,7 @@ def to_dict(self):
         ret["uuid"] = self.uuid
         ret["token_id"] = self.token_id
         ret["token_num"] = self.token_num
+        ret["start_index_in_embed_cache"] = self.start_index_in_embed_cache
         return ret
 
 
@@ -73,6 +76,8 @@ def __init__(self, **kwargs):
         self.uuid = None
         # the start image token id
         self.token_id = None
+        # the start index in embed cache
+        self.start_index_in_embed_cache = None
         # the image token num
         self.token_num = None
         # the start index of the image in the input_ids
@@ -123,6 +128,7 @@ def to_dict(self):
         ret = {}
         ret["uuid"] = self.uuid
         ret["token_id"] = self.token_id
+        ret["start_index_in_embed_cache"] = self.start_index_in_embed_cache
         ret["token_num"] = self.token_num
         ret["grid_thwd"] = self.grid_thwd
         ret["start_idx"] = self.start_idx

Original file line number	Diff line number	Diff line change
`@@ -144,7 +144,7 @@ def alloc(self, md5sum_list: list[str], token_num_list: list[int]) -> Optional[l`
`144`	`144`	`now = time.time()`
`145`	`145`	`with self.lock:`
`146`	`146`	`if not self._judge_enough_token_cache(md5sum_list=md5sum_list, token_num_list=token_num_list):`
`147`		`- return "error not enough cache"`
	`147`	`+ return "error not enough embed cache"`
`148`	`148`
`149`	`149`	`add_ref_m_list = []`
`150`	`150`	`new_md5_dict = {}`
`@@ -197,7 +197,7 @@ def alloc(self, md5sum_list: list[str], token_num_list: list[int]) -> Optional[l`
`197`	`197`	`{`
`198`	`198`	`"id": rec.id,`
`199`	`199`	`"token_id": rec.token_id,`
`200`		`- "embed_cache_start_index": rec.mem_block.start,`
	`200`	`+ "start_index_in_embed_cache": rec.mem_block.start,`
`201`	`201`	`"token_num": rec.token_num,`
`202`	`202`	`}`
`203`	`203`	`)`