Skip to content

Commit bdae2ef

Browse files
author
wangzaijun
committed
fix offload ref
1 parent 239b583 commit bdae2ef

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

lightllm/server/multi_level_kv_cache/cpu_cache_client.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,10 @@ def update_pages_status_to_ready(
125125
assert cur_page.ref_count > 0
126126
cur_page.ref_count -= 1
127127

128+
# 进入卸载队列的请求,引用计数加一,等卸载完成后再释放。
129+
if disk_offload_enable:
130+
cur_page.ref_count += 1
131+
128132
# 控制prompt长度,较短的prompt不进行disk offload
129133
limit_length = get_disk_cache_prompt_limit_length()
130134
if (
@@ -214,7 +218,6 @@ def get_pages_to_offloading(self) -> List[List[int]]:
214218
groups.append(page_list[index + 1 : index + 1 + group_size])
215219
for page_index in groups[-1]:
216220
page_item: _CpuPageStatus = page_items[page_index]
217-
page_item.ref_count += 1
218221
# TODO 这个状态是否存在问题
219222
page_item.status = _CpuPageStatus.OFFLOADING
220223

0 commit comments

Comments
 (0)