Skip to content

Commit 02ed233

Browse files
committed
move transfer remaining logits logic to handle_response
Signed-off-by: Yibin Li <[email protected]>
1 parent 0998a7b commit 02ed233

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

tensorrt_llm/_torch/pyexecutor/handle_logits.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,3 @@ def __call__(
8383
logits_view = logits[logits_begin:logits_end].reshape(
8484
1, beam_width, -1)
8585
llm_req.py_result.append_generation_logits(logits_view)
86-
87-
# Finalize any remaining logits transfers for all requests in chunked mode
88-
for llm_req in chain(context_requests, generation_requests):
89-
if llm_req.py_use_chunked_generation_logits and llm_req.py_return_generation_logits:
90-
llm_req.py_result.transfer_remaining_device_logits()

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2639,6 +2639,11 @@ def _handle_responses(self):
26392639
if request.return_perf_metrics and request.py_decoding_iter >= 1:
26402640
request.update_perf_metrics(self.iter_counter)
26412641

2642+
if request.is_finished:
2643+
# Finalize any remaining logits transfers for the finished request in chunked mode
2644+
if request.py_use_chunked_generation_logits and request.py_return_generation_logits:
2645+
request.py_result.transfer_remaining_device_logits()
2646+
26422647
request_done = False
26432648
if request.py_decoding_iter == 1 or request.is_finished or \
26442649
request.py_decoding_iter % self.stream_interval == 0:

0 commit comments

Comments
 (0)