Skip to content

Commit e5af904

Browse files
committed
fix
1 parent 7fe6038 commit e5af904

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

lightllm/server/router/model_infer/mode_backend/dp_backend/control_state.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def __init__(self, backend: ModeBackend):
1212
self.is_aggressive_schedule = not get_env_start_args().disable_aggressive_schedule
1313

1414
# 非激进调度参数
15-
self.decode_max_step = max(1, get_env_start_args().router_max_wait_tokens)
15+
self.decode_max_step = max(0, get_env_start_args().router_max_wait_tokens)
1616
self.left_decode_num = self.decode_max_step
1717

1818
self.step_count = 0
@@ -84,10 +84,17 @@ def _normal_way(
8484
use_ratio = np.count_nonzero(dp_prefill_req_nums) / dp_prefill_req_nums.shape[0]
8585
max_decode_num = np.max(dp_decode_req_nums)
8686
max_prefill_num = np.max(dp_prefill_req_nums)
87+
88+
if self.left_decode_num > 0 and max_decode_num > 0:
89+
self.left_decode_num -= 1
90+
return RunWay.DECODE
91+
8792
if use_ratio < 0.6:
8893
if max_prefill_num > 0:
8994
self.dp_prefill_wait_step += 1
9095
if self.dp_prefill_wait_step > self.dp_prefill_wait_max_step:
96+
# prefill 一次允许进行几次 decode 操作。
97+
self.left_decode_num = self.decode_max_step
9198
return RunWay.PREFILL
9299

93100
if max_decode_num > 0:
@@ -97,6 +104,8 @@ def _normal_way(
97104
else:
98105
if max_prefill_num > 0:
99106
self.dp_prefill_wait_step = 0
107+
# prefill 一次允许进行几次 decode 操作。
108+
self.left_decode_num = self.decode_max_step
100109
return RunWay.PREFILL
101110
else:
102111
if max_decode_num > 0:

0 commit comments

Comments
 (0)