Skip to content

Commit 108a1c0

Browse files
committed
[https://nvbugs/5689235][fix] Fix cancellation+chunked prefill+disagg
Signed-off-by: Iman Tabrizian <[email protected]>
1 parent 9ba1426 commit 108a1c0

File tree

4 files changed

+18
-5
lines changed

4 files changed

+18
-5
lines changed

cpp/include/tensorrt_llm/batch_manager/llmRequest.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1667,6 +1667,12 @@ class GenericLlmRequest
16671667
[](auto reason) { return reason == executor::FinishReason::kLENGTH; });
16681668
}
16691669

1670+
[[nodiscard]] bool isFinishedDueToCancellation() const noexcept
1671+
{
1672+
return std::all_of(mFinishReasons.begin(), mFinishReasons.end(),
1673+
[](auto reason) { return reason == executor::FinishReason::kCANCELLED; });
1674+
}
1675+
16701676
[[nodiscard]] bool isTimedOut() const
16711677
{
16721678
if (!mAllottedTimeMs.has_value())

cpp/tensorrt_llm/nanobind/batch_manager/bindings.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ void initBindings(nb::module_& m)
161161
.def("set_finished_reason", &GenLlmReq::setFinishedReason, nb::arg("finish_reason"), nb::arg("beam"))
162162
.def_prop_ro("is_finished", &GenLlmReq::isFinished)
163163
.def_prop_ro("is_finished_due_to_length", &GenLlmReq::isFinishedDueToLength)
164+
.def_prop_ro("is_finished_due_to_cancellation", &GenLlmReq::isFinishedDueToCancellation)
164165
.def_prop_rw(
165166
"context_current_position", &GenLlmReq::getContextCurrentPosition, &GenLlmReq::setContextCurrentPosition)
166167
.def_prop_ro("prepopulated_prompt_len", &GenLlmReq::getPrepopulatedPromptLen)

cpp/tensorrt_llm/pybind/batch_manager/bindings.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ void initBindings(pybind11::module_& m)
165165
.def("set_finished_reason", &GenLlmReq::setFinishedReason, py::arg("finish_reason"), py::arg("beam"))
166166
.def_property_readonly("is_finished", &GenLlmReq::isFinished)
167167
.def_property_readonly("is_finished_due_to_length", &GenLlmReq::isFinishedDueToLength)
168+
.def_property_readonly("is_finished_due_to_cancellation", &GenLlmReq::isFinishedDueToCancellation)
168169
.def_property(
169170
"context_current_position", &GenLlmReq::getContextCurrentPosition, &GenLlmReq::setContextCurrentPosition)
170171
.def_property_readonly("prepopulated_prompt_len", &GenLlmReq::getPrepopulatedPromptLen)

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,7 +1071,8 @@ def _executor_loop_pp(self):
10711071
for req in previous_batch.scheduled_ctx_reqs:
10721072
if req.is_context_only_request and (
10731073
req.is_context_finished
1074-
or req.is_finished_due_to_length):
1074+
or req.is_finished_due_to_length
1075+
) and not req.is_finished_due_to_cancellation:
10751076
block_id = self.kv_cache_manager.store_blocks_for_reuse(
10761077
req, True)
10771078
self.ctx_in_transmission_requests[
@@ -1340,7 +1341,8 @@ def _executor_loop(self):
13401341
for req in scheduled_batch.context_requests:
13411342
if req.is_context_only_request and (
13421343
req.is_context_finished
1343-
or req.is_finished_due_to_length):
1344+
or req.is_finished_due_to_length
1345+
) and not req.is_finished_due_to_cancellation:
13441346
block_id = self.kv_cache_manager.store_blocks_for_reuse(
13451347
req, True)
13461348
self.ctx_in_transmission_requests[
@@ -1567,7 +1569,8 @@ def _executor_loop_overlap(self):
15671569
for req in self.previous_batch.sample_state.scheduled_requests.context_requests:
15681570
if req.is_context_only_request and (
15691571
req.is_context_finished
1570-
or req.is_finished_due_to_length):
1572+
or req.is_finished_due_to_length
1573+
) and not req.is_finished_due_to_cancellation:
15711574
block_id = self.kv_cache_manager.store_blocks_for_reuse(
15721575
req, True)
15731576
self.ctx_in_transmission_requests[
@@ -2076,8 +2079,9 @@ def _send_disagg_ctx_cache(self, scheduled_ctx_requests):
20762079
if (scheduled_ctx_requests is None or len(scheduled_ctx_requests) == 0):
20772080
return []
20782081
for req in scheduled_ctx_requests:
2079-
if req.is_context_only_request and (req.is_context_finished or
2080-
req.is_finished_due_to_length):
2082+
if req.is_context_only_request and (
2083+
req.is_context_finished or req.is_finished_due_to_length
2084+
) and not req.is_finished_due_to_cancellation:
20812085
self.kv_cache_transceiver.respond_and_send_async(req)
20822086
for resource_mgr_type in (
20832087
ResourceManagerType.SEQ_SLOT_MANAGER,
@@ -2377,6 +2381,7 @@ def _handle_canceled_requests(self):
23772381
# to clean up the KV cache resources.
23782382
request.finish_by_reason(FinishReason.CANCELLED)
23792383
request.decoding_iter = request.py_decoding_iter
2384+
self.ctx_in_transmission_requests.pop(request.py_request_id)
23802385
else:
23812386
still_pending_canceled_ids.append(req_id)
23822387

0 commit comments

Comments
 (0)