@@ -56,7 +56,7 @@ def _can_add_new_req(self, req: Req, is_busy, new_batch_first_router_need_tokens
5656 return False , new_batch_first_router_need_tokens
5757
5858 # @calculate_time(show=True, min_cost_ms=10)
59- def generate_new_batch (self , current_batch : Batch , from_req_list ):
59+ def generate_new_batch (self , current_batch : Batch , current_waiting_num ):
6060
6161 # 如果当前已经被调度的请求数量超过了上限,直接不调度新的请求了。
6262 exist_req_num = self .get_batch_dp_req_size (current_batch ) + len (self .pause_req_dict )
@@ -74,7 +74,7 @@ def generate_new_batch(self, current_batch: Batch, from_req_list):
7474 can_run_list = []
7575 abort_req_list = []
7676 aborted_count = 0
77- for req in from_req_list :
77+ for req in self . waiting_req_list [: current_waiting_num ] :
7878 if req .is_aborted and not req .is_paused :
7979 # 由于管理的复杂性,只有没有被调度运行过的请求可以因为abort直接在队列中忽略掉.
8080 # 暂停的请求需要恢复后,由 router manager 部分来过滤。暂时保持这种处理方法, 否则会导致管理token的泄漏
@@ -96,11 +96,10 @@ def generate_new_batch(self, current_batch: Batch, from_req_list):
9696 new_batch = Batch (uuid .uuid4 ().int , can_run_list , dp_size = self .dp_size )
9797 for req in abort_req_list :
9898 self .router .shm_req_manager .put_back_req_obj (req )
99- poped_req_list = from_req_list [: len (can_run_list ) + aborted_count ]
100- remain_req_list = from_req_list [len (can_run_list ) + aborted_count :]
101- return new_batch , poped_req_list , remain_req_list
99+ self .waiting_req_list = self .waiting_req_list [len (can_run_list ) + aborted_count :]
100+ return new_batch
102101 else :
103- return None , [], from_req_list
102+ return None
104103
105104 def _calcu_batch_token_load_batch_not_none (self , current_batch : Batch ):
106105 is_busy = self .is_busy ()
0 commit comments