@@ -56,7 +56,7 @@ def _can_add_new_req(self, req: Req, is_busy, new_batch_first_router_need_tokens
5656 return False , new_batch_first_router_need_tokens
5757
5858 # @calculate_time(show=True, min_cost_ms=10)
59- def generate_new_batch (self , current_batch : Batch ):
59+ def generate_new_batch (self , current_batch : Batch , from_req_list ):
6060
6161 # 如果当前已经被调度的请求数量超过了上限,直接不调度新的请求了。
6262 exist_req_num = self .get_batch_dp_req_size (current_batch ) + len (self .pause_req_dict )
@@ -74,7 +74,7 @@ def generate_new_batch(self, current_batch: Batch):
7474 can_run_list = []
7575 abort_req_list = []
7676 aborted_count = 0
77- for req in self . waiting_req_list :
77+ for req in from_req_list :
7878 if req .is_aborted and not req .is_paused :
7979 # 由于管理的复杂性,只有没有被调度运行过的请求可以因为abort直接在队列中忽略掉.
8080 # 暂停的请求需要恢复后,由 router manager 部分来过滤。暂时保持这种处理方法, 否则会导致管理token的泄漏
@@ -97,9 +97,11 @@ def generate_new_batch(self, current_batch: Batch):
9797 for req in abort_req_list :
9898 self .router .shm_req_manager .put_back_req_obj (req )
9999 self .waiting_req_list = self .waiting_req_list [len (can_run_list ) + aborted_count :]
100- return new_batch
100+ poped_req_list = from_req_list [: len (can_run_list ) + aborted_count ]
101+ remain_req_list = from_req_list [len (can_run_list ) + aborted_count :]
102+ return new_batch , poped_req_list , remain_req_list
101103 else :
102- return None
104+ return None , [], from_req_list
103105
104106 def _calcu_batch_token_load_batch_not_none (self , current_batch : Batch ):
105107 is_busy = self .is_busy ()
0 commit comments