@@ -2236,9 +2236,14 @@ def _handle_canceled_requests(self):
22362236 # Remove cancel request in the waiting queue
22372237 self .executor_request_queue .update_waiting_queue ()
22382238
2239+ # Create set from list of canceled request ids to speed up canceled test
2240+ canceled_req_ids = set (
2241+ self .executor_request_queue .get_canceled_req_ids ())
2242+
2243+ still_pending_canceled_ids = []
22392244 for request in self .active_requests :
22402245 req_id = request .py_request_id if not request .is_child else request .parent_request_id
2241- if req_id not in self . executor_request_queue . get_canceled_req_ids () :
2246+ if req_id not in canceled_req_ids :
22422247 continue
22432248
22442249 is_cancelled = self ._try_cancel_request (request )
@@ -2247,13 +2252,13 @@ def _handle_canceled_requests(self):
22472252 # to clean up the KV cache resources.
22482253 request .finish_by_reason (FinishReason .CANCELLED )
22492254 request .decoding_iter = request .py_decoding_iter
2250- self .executor_request_queue .canceled_req_ids .remove (req_id )
2255+ else :
2256+ still_pending_canceled_ids .append (req_id )
22512257
2252- if self .enable_attention_dp :
2253- # TODO: revisit the cancel logic of attention dp
2254- # When enable attention dp, each rank does not have full copy of requests
2255- # so we need to remove the cancel requests not in the local rank
2256- self .executor_request_queue .clear_canceled_req_ids ()
2258+ # Clear list of requests marked for cancellation and add back those that failed to cancel.
2259+ self .executor_request_queue .canceled_req_ids .clear ()
2260+ self .executor_request_queue .canceled_req_ids .extend (
2261+ still_pending_canceled_ids )
22572262
22582263 @nvtx_range ("_enqueue_responses" )
22592264 def _enqueue_responses (self , responses : Iterable [Tuple [int , LlmResponse ]]):
0 commit comments