Skip to content

Commit ff04aed

Browse files
authored
fix
1 parent 0e03ecb commit ff04aed

File tree

4 files changed

+0
-6
lines changed

4 files changed

+0
-6
lines changed

lightllm/common/basemodel/basemodel.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ def __init__(self, kvargs):
6262
self.is_token_healing = kvargs.get("is_token_healing", False)
6363
self.return_all_prompt_logics = kvargs.get("return_all_prompt_logics", False)
6464
assert not (self.is_token_healing and self.return_all_prompt_logics), "can not be true in same time"
65-
self.use_dynamic_prompt_cache = kvargs.get("use_dynamic_prompt_cache", False)
6665
self.data_type = kvargs.get("data_type", "float16")
6766
self.graph_max_batch_size = kvargs.get("graph_max_batch_size", 16)
6867
self.graph_max_batch_size = (

lightllm/common/basemodel/infer_struct.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ def __init__(self):
3535

3636
self.is_token_healing: bool = False
3737
self.return_all_prompt_logics: bool = False
38-
self.use_dynamic_prompt_cache: bool = False
3938
self.multimodal_params: dict = None
4039
self.is_cuda_graph: bool = False # 标记是否是cuda graph的捕获推理
4140
self.dist_group: CustomProcessGroup = None

lightllm/server/router/model_infer/mode_backend/base_backend.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,6 @@ def init_model(self, kvargs):
124124
"max_seq_length": kvargs.get("max_seq_length", 1024 * 5),
125125
"is_token_healing": kvargs.get("is_token_healing", False),
126126
"return_all_prompt_logics": self.return_all_prompt_logprobs,
127-
"use_dynamic_prompt_cache": self.use_dynamic_prompt_cache,
128127
"disable_chunked_prefill": self.disable_chunked_prefill,
129128
"data_type": kvargs.get("data_type", "float16"),
130129
"graph_max_batch_size": kvargs.get("graph_max_batch_size", 16),
@@ -231,7 +230,6 @@ def init_mtp_draft_model(self, main_kvargs: dict):
231230
"max_seq_length": main_kvargs.get("max_seq_length", 1024 * 5),
232231
"is_token_healing": False,
233232
"return_all_prompt_logics": False,
234-
"use_dynamic_prompt_cache": self.use_dynamic_prompt_cache,
235233
"disable_chunked_prefill": self.disable_chunked_prefill,
236234
"data_type": main_kvargs.get("data_type", "float16"),
237235
"graph_max_batch_size": main_kvargs.get("graph_max_batch_size", 16),

test/benchmark/static_inference/model_infer_mtp.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ def init_mtp_model(args: StartArgs, kvargs, main_model):
2727
{
2828
"weight_dir": args.mtp_draft_model_dir,
2929
"max_total_token_num": main_model.mem_manager.size,
30-
"use_dynamic_prompt_cache": False,
3130
"disable_chunked_prefill": True,
3231
"mtp_mode": args.mtp_mode,
3332
"main_model": main_model,
@@ -39,7 +38,6 @@ def init_mtp_model(args: StartArgs, kvargs, main_model):
3938
{
4039
"weight_dir": args.spec_model_dir,
4140
"max_total_token_num": main_model.mem_manager.size,
42-
"use_dynamic_prompt_cache": False,
4341
"disable_chunked_prefill": True,
4442
"mtp_mode": args.mtp_mode,
4543
"main_model": main_model,

0 commit comments

Comments
 (0)