Skip to content

Commit 8b3a1b3

Browse files
niushengxiaowangzaijun
authored andcommitted
remove compute_sequence_hash
1 parent dd42001 commit 8b3a1b3

File tree

1 file changed

+2
-9
lines changed

1 file changed

+2
-9
lines changed

lightllm/server/router/model_infer/mode_backend/multi_level_kv_cache.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,6 @@ def wait_for_init(self):
3535
if attach_shm_handle is not None:
3636
attach_shm_handle.wait()
3737

38-
def _compute_sequence_hash(self, req: InferReq):
39-
# 综合考虑后只对prompt做缓存管理,不包含decode内容,这里与radix cache不一致
40-
if not req.shm_req.token_hash_list.is_empty():
41-
return req.shm_req.token_hash_list.get_all()
42-
43-
input_tokens = req.shm_req.get_prompt_ids()
44-
return compute_token_list_hash(input_tokens, self.args.cpu_cache_token_page_size)
45-
4638
def handle_finished_reqs(self, finished_reqs: List[InferReq]) -> List[InferReq]:
4739
"""
4840
将满足cpu kv cache 卸载条件的请求进行处理,并返回需要真正退出的请求列表。
@@ -97,7 +89,8 @@ def _start_kv_cache_offload_task(
9789
) -> Optional["TransTask"]:
9890
with torch.cuda.stream(cpu_kv_cache_stream):
9991
if self.backend.is_master_in_dp:
100-
token_hash_list = self._compute_sequence_hash(req)
92+
# 综合考虑后只对prompt做缓存管理,不包含decode内容,这里与radix cache不一致
93+
token_hash_list = req.shm_req.token_hash_list.get_all()
10194
block_size = req.cur_kv_len // self.args.cpu_cache_token_page_size
10295
move_block_size = min(block_size, len(token_hash_list))
10396

0 commit comments

Comments
 (0)