Skip to content

Commit 8da73ae

Browse files
authored
fix
1 parent 8245286 commit 8da73ae

File tree

4 files changed

+3
-10
lines changed

4 files changed

+3
-10
lines changed

lightllm/server/multi_level_kv_cache/cpu_cache_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def update_pages_status_to_ready(
123123
assert cur_page.ref_count > 0
124124
cur_page.ref_count -= 1
125125
if cur_page.ref_count == 0:
126-
# 放回 LRU 列表头部
126+
# 放回 LRU 列表尾部
127127
self.page_items.add_item_to_tail(cur_page.self_index)
128128

129129
# 全部落盘,已落盘前缀部分会在落盘中自动剔除

lightllm/server/multi_level_kv_cache/manager.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -164,16 +164,11 @@ def _handle_group_req_multi_cache_match(self, group_req_indexes: GroupReqIndexes
164164
continue
165165

166166
finded_page_indexes: List[int] = []
167-
disk_service = (
168-
self.disk_cache_worker.service
169-
if (self.disk_cache_worker is not None and self.disk_cache_worker.service is not None)
170-
else None
171-
)
172167
req.disk_prompt_cache_len = 0
173168

174169
# 匹配 CPU cache
175170
all_pages = self._cpu_cache_match(token_hash_list)
176-
if len(all_pages) == len(token_hash_list) or disk_service is None:
171+
if len(all_pages) == len(token_hash_list) or self.only_cpu_cache_enable:
177172
finded_page_indexes = all_pages
178173
else:
179174
# 匹配 disk cache并load到cpu cache

lightllm/server/router/model_infer/mode_backend/multi_level_kv_cache.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,6 @@ def update_cpu_cache_task_states(self):
254254
self.cpu_cache_client.lock.acquire_sleep1ms()
255255
# 分组update,避免不同请求的page交叉,导致disk cache hash不一致
256256
for pages in page_array_list:
257-
if not pages:
258-
continue
259257
self.cpu_cache_client.update_pages_status_to_ready(
260258
page_list=pages, deref=True, disk_offload_enable=self.args.enable_disk_cache
261259
)

lightllm/utils/envs_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def enable_diverse_mode_gqa_decode_fast_kernel() -> bool:
208208

209209
@lru_cache(maxsize=None)
210210
def get_disk_cache_prompt_limit_length():
211-
return int(os.getenv("LIGHTLLM_DISK_CACHE_PROMPT_LIMIT_LENGTH", 10000))
211+
return int(os.getenv("LIGHTLLM_DISK_CACHE_PROMPT_LIMIT_LENGTH", 2048))
212212

213213

214214
@lru_cache(maxsize=None)

0 commit comments

Comments
 (0)