Skip to content

Commit 60fc7f5

Browse files
committed
fix
1 parent 042a26b commit 60fc7f5

File tree

3 files changed

+16
-2
lines changed

3 files changed

+16
-2
lines changed

lightllm/server/embed_cache/impl/naive_memory_cache.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def alloc(self, md5sum_list: list[str], token_num_list: list[int]) -> Optional[l
144144
now = time.time()
145145
with self.lock:
146146
if not self._judge_enough_token_cache(md5sum_list=md5sum_list, token_num_list=token_num_list):
147-
return "error not enough cache"
147+
return "error not enough embed cache"
148148

149149
add_ref_m_list = []
150150
new_md5_dict = {}
@@ -197,7 +197,7 @@ def alloc(self, md5sum_list: list[str], token_num_list: list[int]) -> Optional[l
197197
{
198198
"id": rec.id,
199199
"token_id": rec.token_id,
200-
"embed_cache_start_index": rec.mem_block.start,
200+
"start_index_in_embed_cache": rec.mem_block.start,
201201
"token_num": rec.token_num,
202202
}
203203
)

lightllm/server/httpserver/manager.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,17 @@ async def _alloc_resource(self, items, md5sums, token_nums, datas):
125125
await asyncio.sleep(0.1)
126126
continue
127127

128+
if "error" in records:
129+
raise Exception(records)
130+
128131
uid_list = []
129132
for item, rec in zip(items, records):
133+
item: Union[ImageItem, AudioItem] = item
130134
item.uuid = rec["id"]
131135
item.token_id = rec["token_id"]
132136
item.token_num = rec["token_num"]
137+
item.start_index_in_embed_cache = rec["start_index_in_embed_cache"]
138+
133139
uid_list.append(rec["id"])
134140

135141
ready_flags = obtain(self.cache_client.root.get_items_data(uid_list))
@@ -187,13 +193,15 @@ async def _release_multimodal_resources(self, multimodal_params: MultimodalParam
187193
img.uuid = None
188194
img.token_id = None
189195
img.token_num = None
196+
img.start_index_in_embed_cache = None
190197
for audio in multimodal_params.audios:
191198
if audio.uuid is not None:
192199
ids_to_release.append(audio.uuid)
193200
# 将 uuid 等 赋值为 None, 防止因为abort等异常情况造成重复释放异常
194201
audio.uuid = None
195202
audio.token_id = None
196203
audio.token_num = None
204+
audio.start_index_in_embed_cache = None
197205
if ids_to_release:
198206
self.cache_client.root.release(ids_to_release)
199207
return

lightllm/server/multimodal_params.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ def __init__(self, **kwargs):
2020
self.uuid = None
2121
# the start audio token id
2222
self.token_id = None
23+
# the start index in embed cache
24+
self.start_index_in_embed_cache = None
2325
# the audio token num
2426
self.token_num = None
2527
# the audio length
@@ -62,6 +64,7 @@ def to_dict(self):
6264
ret["uuid"] = self.uuid
6365
ret["token_id"] = self.token_id
6466
ret["token_num"] = self.token_num
67+
ret["start_index_in_embed_cache"] = self.start_index_in_embed_cache
6568
return ret
6669

6770

@@ -73,6 +76,8 @@ def __init__(self, **kwargs):
7376
self.uuid = None
7477
# the start image token id
7578
self.token_id = None
79+
# the start index in embed cache
80+
self.start_index_in_embed_cache = None
7681
# the image token num
7782
self.token_num = None
7883
# the start index of the image in the input_ids
@@ -123,6 +128,7 @@ def to_dict(self):
123128
ret = {}
124129
ret["uuid"] = self.uuid
125130
ret["token_id"] = self.token_id
131+
ret["start_index_in_embed_cache"] = self.start_index_in_embed_cache
126132
ret["token_num"] = self.token_num
127133
ret["grid_thwd"] = self.grid_thwd
128134
ret["start_idx"] = self.start_idx

0 commit comments

Comments
 (0)