Skip to content

Commit 0e459ea

Browse files
authored
refactor log
1 parent 18a940e commit 0e459ea

File tree

2 files changed

+3
-6
lines changed

2 files changed

+3
-6
lines changed

lightllm/server/core/objs/start_args_type.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class StartArgs:
3737
router_max_new_token_len: int = field(default=1024)
3838
router_max_wait_tokens: int = field(default=6)
3939
use_dynamic_prompt_cache: bool = field(default=False)
40-
chunked_prefill_size: int = field(default=256)
40+
chunked_prefill_size: int = field(default=8192)
4141
enable_chunked_prefill: bool = field(default=False)
4242
diverse_mode: bool = field(default=False)
4343
token_healing_mode: bool = field(default=False)

lightllm/server/router/req_queue/chunked_prefill/impl.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,9 @@ def _can_add_new_req(self, req: Req, is_busy, new_batch_first_router_need_tokens
4242
ok_req_num = len(self.cache_len_list) + len(self.pause_req_dict) - 1 <= self.running_max_req_size
4343

4444
new_batch_first_router_need_tokens += req.get_first_router_need_tokens()
45-
# splitfuse decode ok
46-
ok_splitfuse_decode = new_batch_first_router_need_tokens <= self.batch_max_tokens
45+
ok_prefill = new_batch_first_router_need_tokens <= self.batch_max_tokens
4746

48-
if ok_token_num and ok_req_num and ok_splitfuse_decode:
47+
if ok_token_num and ok_req_num and ok_prefill:
4948
self.router.shared_token_load.set_estimated_peak_token_count(need_max_token_num, self.dp_index)
5049
self.router.shared_token_load.set_dynamic_max_load(
5150
(need_max_token_num + self.router.shared_token_load.get_frozened_token_count(self.dp_index))
@@ -67,8 +66,6 @@ def generate_new_batch(self, current_batch: Batch):
6766

6867
is_busy = self.is_busy()
6968

70-
# 得到当前batch 往前 decode 一次,需要的token量,在 splitfuse 模式下才有用,因为splitfuse
71-
# 模式下 类似prefill 和 deocde 是在一起进行的,所以需要合并考虑历史当前Batch
7269
new_batch_first_router_need_tokens = (
7370
0 if current_batch is None else current_batch.get_batch_decode_need_tokens()[self.dp_index]
7471
)

0 commit comments

Comments
 (0)