Skip to content

Commit ecd495c

Browse files
committed
fix chunked prefill
1 parent fffb99e commit ecd495c

File tree

1 file changed

+5
-6
lines changed
  • lightllm/server/router/req_queue/chunked_prefill

1 file changed

+5
-6
lines changed

lightllm/server/router/req_queue/chunked_prefill/impl.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def _can_add_new_req(self, req: Req, is_busy, new_batch_first_router_need_tokens
5656
return False, new_batch_first_router_need_tokens
5757

5858
# @calculate_time(show=True, min_cost_ms=10)
59-
def generate_new_batch(self, current_batch: Batch, from_req_list):
59+
def generate_new_batch(self, current_batch: Batch, current_waiting_num):
6060

6161
# 如果当前已经被调度的请求数量超过了上限,直接不调度新的请求了。
6262
exist_req_num = self.get_batch_dp_req_size(current_batch) + len(self.pause_req_dict)
@@ -74,7 +74,7 @@ def generate_new_batch(self, current_batch: Batch, from_req_list):
7474
can_run_list = []
7575
abort_req_list = []
7676
aborted_count = 0
77-
for req in from_req_list:
77+
for req in self.waiting_req_list[:current_waiting_num]:
7878
if req.is_aborted and not req.is_paused:
7979
# 由于管理的复杂性,只有没有被调度运行过的请求可以因为abort直接在队列中忽略掉.
8080
# 暂停的请求需要恢复后,由 router manager 部分来过滤。暂时保持这种处理方法, 否则会导致管理token的泄漏
@@ -96,11 +96,10 @@ def generate_new_batch(self, current_batch: Batch, from_req_list):
9696
new_batch = Batch(uuid.uuid4().int, can_run_list, dp_size=self.dp_size)
9797
for req in abort_req_list:
9898
self.router.shm_req_manager.put_back_req_obj(req)
99-
poped_req_list = from_req_list[: len(can_run_list) + aborted_count]
100-
remain_req_list = from_req_list[len(can_run_list) + aborted_count :]
101-
return new_batch, poped_req_list, remain_req_list
99+
self.waiting_req_list = self.waiting_req_list[len(can_run_list) + aborted_count :]
100+
return new_batch
102101
else:
103-
return None, [], from_req_list
102+
return None
104103

105104
def _calcu_batch_token_load_batch_not_none(self, current_batch: Batch):
106105
is_busy = self.is_busy()

0 commit comments

Comments
 (0)