Skip to content

Commit 0e03ecb

Browse files
committed
Fix dynamic_prompt_cache for chunked prefill
1 parent 81b9ecb commit 0e03ecb

File tree

3 files changed

+1
-6
lines changed

3 files changed

+1
-6
lines changed

lightllm/common/basemodel/basemodel.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,6 @@ def _create_inferstate(self, model_input: ModelInput, microbatch_index: int = 0)
251251
infer_state.is_prefill = model_input.is_prefill
252252
infer_state.is_token_healing = self.is_token_healing
253253
infer_state.return_all_prompt_logics = self.return_all_prompt_logics
254-
infer_state.use_dynamic_prompt_cache = self.use_dynamic_prompt_cache
255254
infer_state.batch_size = model_input.batch_size
256255
infer_state.total_token_num = model_input.total_token_num
257256
infer_state.max_len_in_batch = model_input.max_len_in_batch

lightllm/models/deepseek2/layer_infer/transformer_layer_infer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ def _decompress_kv(
255255
b_kv_start_loc,
256256
skip_sample=False,
257257
):
258-
if infer_state.use_dynamic_prompt_cache and not skip_sample:
258+
if not skip_sample:
259259
if is_fp8:
260260
kv = infer_state.mem_manager.kv_buffer[self.layer_num_][:, :, :-2].view(torch.float8_e4m3fn)
261261
kv_scale = infer_state.mem_manager.kv_buffer[self.layer_num_][:, :, -2:].view(torch.bfloat16)

lightllm/server/api_start.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,6 @@ def normal_or_p_d_start(args):
9595
if args.graph_max_len_in_batch == 0:
9696
args.graph_max_len_in_batch = args.max_req_total_len
9797

98-
# mode setting check.
99-
if not args.disable_chunked_prefill:
100-
assert args.disable_dynamic_prompt_cache is False
101-
assert args.disable_chunked_prefill is False
10298
if args.output_constraint_mode != "none":
10399
assert args.disable_dynamic_prompt_cache is False
104100
assert args.disable_chunked_prefill is False

0 commit comments

Comments
 (0)