File tree Expand file tree Collapse file tree 1 file changed +10
-1
lines changed
lightllm/server/router/model_infer/mode_backend/dp_backend Expand file tree Collapse file tree 1 file changed +10
-1
lines changed Original file line number Diff line number Diff line change @@ -12,7 +12,7 @@ def __init__(self, backend: ModeBackend):
1212 self .is_aggressive_schedule = not get_env_start_args ().disable_aggressive_schedule
1313
1414 # 非激进调度参数
15- self .decode_max_step = max (1 , get_env_start_args ().router_max_wait_tokens )
15+ self .decode_max_step = max (0 , get_env_start_args ().router_max_wait_tokens )
1616 self .left_decode_num = self .decode_max_step
1717
1818 self .step_count = 0
@@ -84,10 +84,17 @@ def _normal_way(
8484 use_ratio = np .count_nonzero (dp_prefill_req_nums ) / dp_prefill_req_nums .shape [0 ]
8585 max_decode_num = np .max (dp_decode_req_nums )
8686 max_prefill_num = np .max (dp_prefill_req_nums )
87+
88+ if self .left_decode_num > 0 and max_decode_num > 0 :
89+ self .left_decode_num -= 1
90+ return RunWay .DECODE
91+
8792 if use_ratio < 0.6 :
8893 if max_prefill_num > 0 :
8994 self .dp_prefill_wait_step += 1
9095 if self .dp_prefill_wait_step > self .dp_prefill_wait_max_step :
96+ # prefill 一次允许进行几次 decode 操作。
97+ self .left_decode_num = self .decode_max_step
9198 return RunWay .PREFILL
9299
93100 if max_decode_num > 0 :
@@ -97,6 +104,8 @@ def _normal_way(
97104 else :
98105 if max_prefill_num > 0 :
99106 self .dp_prefill_wait_step = 0
107+ # prefill 一次允许进行几次 decode 操作。
108+ self .left_decode_num = self .decode_max_step
100109 return RunWay .PREFILL
101110 else :
102111 if max_decode_num > 0 :
You can’t perform that action at this time.
0 commit comments