Skip to content

Commit f938058

Browse files
authored
[https://nvbugs/5508267][fix] Proper handling of inactive canceled requests (#9280)
Signed-off-by: thorjohnsen <41591019+thorjohnsen@users.noreply.github.com>
1 parent faabc1a commit f938058

File tree

1 file changed

+12
-7
lines changed

1 file changed

+12
-7
lines changed

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2236,9 +2236,14 @@ def _handle_canceled_requests(self):
22362236
# Remove cancel request in the waiting queue
22372237
self.executor_request_queue.update_waiting_queue()
22382238

2239+
# Create set from list of canceled request ids to speed up canceled test
2240+
canceled_req_ids = set(
2241+
self.executor_request_queue.get_canceled_req_ids())
2242+
2243+
still_pending_canceled_ids = []
22392244
for request in self.active_requests:
22402245
req_id = request.py_request_id if not request.is_child else request.parent_request_id
2241-
if req_id not in self.executor_request_queue.get_canceled_req_ids():
2246+
if req_id not in canceled_req_ids:
22422247
continue
22432248

22442249
is_cancelled = self._try_cancel_request(request)
@@ -2247,13 +2252,13 @@ def _handle_canceled_requests(self):
22472252
# to clean up the KV cache resources.
22482253
request.finish_by_reason(FinishReason.CANCELLED)
22492254
request.decoding_iter = request.py_decoding_iter
2250-
self.executor_request_queue.canceled_req_ids.remove(req_id)
2255+
else:
2256+
still_pending_canceled_ids.append(req_id)
22512257

2252-
if self.enable_attention_dp:
2253-
# TODO: revisit the cancel logic of attention dp
2254-
# When enable attention dp, each rank does not have full copy of requests
2255-
# so we need to remove the cancel requests not in the local rank
2256-
self.executor_request_queue.clear_canceled_req_ids()
2258+
# Clear list of requests marked for cancellation and add back those that failed to cancel.
2259+
self.executor_request_queue.canceled_req_ids.clear()
2260+
self.executor_request_queue.canceled_req_ids.extend(
2261+
still_pending_canceled_ids)
22572262

22582263
@nvtx_range("_enqueue_responses")
22592264
def _enqueue_responses(self, responses: Iterable[Tuple[int, LlmResponse]]):

0 commit comments

Comments
 (0)