Skip to content

Commit ae7165c

Browse files
authored
Fix the pause issue under extremely aggressive scheduling. (#1057)
1 parent 16c8c79 commit ae7165c

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

lightllm/server/router/model_infer/infer_batch.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,16 +199,20 @@ def pause_reqs(self, pause_reqs: List["InferReq"], is_master_in_dp: bool):
199199
g_infer_state_lock.release()
200200
return self
201201

202-
def recover_paused_reqs(self, paused_reqs: List["InferReq"], is_master_in_dp: bool):
202+
def recover_paused_reqs(self, paused_reqs: List["InferReq"], is_master_in_dp: bool, can_alloc_token_num: int):
203203
if paused_reqs:
204204
g_infer_state_lock.acquire()
205205

206206
for req in paused_reqs:
207+
prefill_need_token_num = req.get_cur_total_len()
208+
if prefill_need_token_num > can_alloc_token_num:
209+
break
207210
req._match_radix_cache()
208211
assert req.paused is True
209212
req.paused = False
210213
if is_master_in_dp:
211214
req.shm_req.is_paused = False
215+
can_alloc_token_num -= prefill_need_token_num
212216

213217
g_infer_state_lock.release()
214218
return

lightllm/server/router/model_infer/mode_backend/base_backend.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,9 @@ def _get_classed_reqs(
464464
g_infer_context.pause_reqs(wait_pause_reqs, is_master_in_dp=self.is_master_in_dp)
465465

466466
if recover_paused:
467-
g_infer_context.recover_paused_reqs(paused_reqs=paused_reqs, is_master_in_dp=self.is_master_in_dp)
467+
g_infer_context.recover_paused_reqs(
468+
paused_reqs=paused_reqs, is_master_in_dp=self.is_master_in_dp, can_alloc_token_num=can_alloc_token_num
469+
)
468470

469471
return prefill_reqs, decode_reqs
470472

0 commit comments

Comments
 (0)