Skip to content

Commit c0b1146

Browse files
committed
support chunked prefill
1 parent 3fd6e48 commit c0b1146

File tree

1 file changed

+6
-4
lines changed
  • lightllm/server/router/req_queue/chunked_prefill

1 file changed

+6
-4
lines changed

lightllm/server/router/req_queue/chunked_prefill/impl.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def _can_add_new_req(self, req: Req, is_busy, new_batch_first_router_need_tokens
5656
return False, new_batch_first_router_need_tokens
5757

5858
# @calculate_time(show=True, min_cost_ms=10)
59-
def generate_new_batch(self, current_batch: Batch):
59+
def generate_new_batch(self, current_batch: Batch, from_req_list):
6060

6161
# 如果当前已经被调度的请求数量超过了上限,直接不调度新的请求了。
6262
exist_req_num = self.get_batch_dp_req_size(current_batch) + len(self.pause_req_dict)
@@ -74,7 +74,7 @@ def generate_new_batch(self, current_batch: Batch):
7474
can_run_list = []
7575
abort_req_list = []
7676
aborted_count = 0
77-
for req in self.waiting_req_list:
77+
for req in from_req_list:
7878
if req.is_aborted and not req.is_paused:
7979
# 由于管理的复杂性,只有没有被调度运行过的请求可以因为abort直接在队列中忽略掉.
8080
# 暂停的请求需要恢复后,由 router manager 部分来过滤。暂时保持这种处理方法, 否则会导致管理token的泄漏
@@ -97,9 +97,11 @@ def generate_new_batch(self, current_batch: Batch):
9797
for req in abort_req_list:
9898
self.router.shm_req_manager.put_back_req_obj(req)
9999
self.waiting_req_list = self.waiting_req_list[len(can_run_list) + aborted_count :]
100-
return new_batch
100+
poped_req_list = from_req_list[: len(can_run_list) + aborted_count]
101+
remain_req_list = from_req_list[len(can_run_list) + aborted_count :]
102+
return new_batch, poped_req_list, remain_req_list
101103
else:
102-
return None
104+
return None, [], from_req_list
103105

104106
def _calcu_batch_token_load_batch_not_none(self, current_batch: Batch):
105107
is_busy = self.is_busy()

0 commit comments

Comments
 (0)