@@ -90,38 +90,37 @@ def _filter_not_ready_reqs(self, req_ids: List[int]) -> List[InferReq]:
9090 ans_list : List [InferReq ] = []
9191 for request_id in req_ids :
9292 req_obj : InferReq = g_infer_context .requests_mapping [request_id ]
93- if req_obj .infer_aborted :
94- if req_obj .nixl_pd_task_num == (req_obj .nixl_pd_task_failed_num + req_obj .nixl_pd_task_sunccess_num ):
95- ans_list .append (req_obj )
93+ if req_obj .nixl_pd_task_num != (req_obj .nixl_pd_task_failed_num + req_obj .nixl_pd_task_sunccess_num ):
9694 continue
97-
98- if req_obj .nixl_pd_task_num == ( req_obj . nixl_pd_task_failed_num + req_obj . nixl_pd_task_sunccess_num ) :
99- if req_obj . nixl_pd_task_failed_num > 0 :
100- if not req_obj .finish_status .is_finished ():
101- # 强制停止
102- req_obj .cur_output_len += 1
103- req_obj .set_next_gen_token_id ( 0 , 0.0 , 1 )
104- req_obj . finish_status . set_status ( FinishStatus . FINISHED_STOP )
105-
106- if self . is_master_in_dp :
107- req_obj .shm_req .shm_cur_output_len = req_obj .cur_output_len
108- req_obj .shm_req .finish_token_index = req_obj . get_cur_total_len () - 1
109- req_obj .shm_req .finish_status . set_status ( FinishStatus . FINISHED_STOP )
110- req_obj . shm_req . candetoken_out_len = req_obj . cur_output_len
111-
112- logger . error ( f"req_id: { req_obj . req_id } forced to finished, it exits kv transfer error" )
113-
114- # 提前释放有问题的 mem_index
115- old_prefix_len = 0 if req_obj .shared_kv_node is None else req_obj . shared_kv_node . node_prefix_total_len
116- error_mem_len = req_obj . cur_kv_len - old_prefix_len
95+
96+ if req_obj .nixl_pd_task_failed_num > 0 :
97+ # 强制停止
98+ if not req_obj .finish_status .is_finished ():
99+ req_obj . cur_output_len += 1
100+ req_obj .set_next_gen_token_id ( 0 , 0.0 , 1 )
101+ req_obj .finish_status . set_status ( FinishStatus . FINISHED_STOP )
102+
103+ if self . is_master_in_dp :
104+ req_obj . shm_req . shm_cur_output_len = req_obj . cur_output_len
105+ req_obj .shm_req .finish_token_index = req_obj .get_cur_total_len () - 1
106+ req_obj .shm_req .finish_status . set_status ( FinishStatus . FINISHED_STOP )
107+ req_obj .shm_req .candetoken_out_len = req_obj . cur_output_len
108+
109+ logger . error ( f"req_id: { req_obj . req_id } forced to finished, it exits kv transfer error" )
110+
111+ # 提前释放有问题的 mem_index
112+ old_prefix_len = 0 if req_obj . shared_kv_node is None else req_obj . shared_kv_node . node_prefix_total_len
113+ error_mem_len = req_obj .cur_kv_len - old_prefix_len
114+ if error_mem_len > 0 :
117115 req_obj .cur_kv_len -= error_mem_len
118116
119117 mem_indexes = self .model .req_manager .req_to_token_indexs [req_obj .req_idx , req_obj .cur_kv_len :(req_obj .cur_kv_len + error_mem_len )].detach ().cpu ()
120118 self .model .mem_manager .free (mem_indexes )
121119 if self .is_master_in_dp :
122120 req_obj .shm_req .shm_cur_kv_len = req_obj .cur_kv_len
123- else :
124- ans_list .append (req_obj )
121+
122+ ans_list .append (req_obj )
123+
125124 g_infer_state_lock .release ()
126125 return ans_list
127126
0 commit comments