File tree Expand file tree Collapse file tree 1 file changed +5
-5
lines changed
lightllm/server/router/model_infer Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -253,7 +253,7 @@ def init_all(self):
253253 self .cur_output_len = 0
254254 self .finish_status = FinishStatus ()
255255
256- if self .paused or not self .initialized :
256+ if self .paused or not self .initialized or self . is_chunked :
257257 # 如果是具有 prompt_cache 的使用特性则需要进行提前的填充和恢复操作。
258258 input_token_ids = self .get_input_token_ids ()
259259 if g_infer_context .radix_cache is not None and len (input_token_ids ) > 1 :
@@ -263,10 +263,10 @@ def init_all(self):
263263 if share_node is not None :
264264 self .shared_kv_node = share_node
265265 ready_cache_len = share_node .node_prefix_total_len
266- g_infer_context .req_manager .req_to_token_indexs [
267- self .req_idx , self . cur_kv_len : self . cur_kv_len + ready_cache_len
268- ] = value_tensor
269- self . cur_kv_len += int ( ready_cache_len ) # 序列化问题, 该对象可能为numpy.int64,用 int(*)转换
266+ g_infer_context .req_manager .req_to_token_indexs [self . req_idx , 0 : ready_cache_len ] = value_tensor
267+ self .cur_kv_len = max (
268+ self . cur_kv_len , int ( ready_cache_len )
269+ ) # dynamic prompt cache for chunked prefill
270270 self .shm_req .prompt_cache_len = self .cur_kv_len # 记录 prompt cache 的命中长度
271271
272272 self .shm_req .shm_cur_kv_len = self .cur_kv_len
You can’t perform that action at this time.
0 commit comments