File tree Expand file tree Collapse file tree 1 file changed +2
-9
lines changed
lightllm/server/router/model_infer/mode_backend Expand file tree Collapse file tree 1 file changed +2
-9
lines changed Original file line number Diff line number Diff line change @@ -35,14 +35,6 @@ def wait_for_init(self):
3535 if attach_shm_handle is not None :
3636 attach_shm_handle .wait ()
3737
38- def _compute_sequence_hash (self , req : InferReq ):
39- # 综合考虑后只对prompt做缓存管理,不包含decode内容,这里与radix cache不一致
40- if not req .shm_req .token_hash_list .is_empty ():
41- return req .shm_req .token_hash_list .get_all ()
42-
43- input_tokens = req .shm_req .get_prompt_ids ()
44- return compute_token_list_hash (input_tokens , self .args .cpu_cache_token_page_size )
45-
4638 def handle_finished_reqs (self , finished_reqs : List [InferReq ]) -> List [InferReq ]:
4739 """
4840 将满足cpu kv cache 卸载条件的请求进行处理,并返回需要真正退出的请求列表。
@@ -97,7 +89,8 @@ def _start_kv_cache_offload_task(
9789 ) -> Optional ["TransTask" ]:
9890 with torch .cuda .stream (cpu_kv_cache_stream ):
9991 if self .backend .is_master_in_dp :
100- token_hash_list = self ._compute_sequence_hash (req )
92+ # 综合考虑后只对prompt做缓存管理,不包含decode内容,这里与radix cache不一致
93+ token_hash_list = req .shm_req .token_hash_list .get_all ()
10194 block_size = req .cur_kv_len // self .args .cpu_cache_token_page_size
10295 move_block_size = min (block_size , len (token_hash_list ))
10396
You can’t perform that action at this time.
0 commit comments