File tree Expand file tree Collapse file tree 2 files changed +5
-5
lines changed
tensorrt_llm/_torch/pyexecutor Expand file tree Collapse file tree 2 files changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -83,8 +83,3 @@ def __call__(
8383 logits_view = logits [logits_begin :logits_end ].reshape (
8484 1 , beam_width , - 1 )
8585 llm_req .py_result .append_generation_logits (logits_view )
86-
87- # Finalize any remaining logits transfers for all requests in chunked mode
88- for llm_req in chain (context_requests , generation_requests ):
89- if llm_req .py_use_chunked_generation_logits and llm_req .py_return_generation_logits :
90- llm_req .py_result .transfer_remaining_device_logits ()
Original file line number Diff line number Diff line change @@ -2639,6 +2639,11 @@ def _handle_responses(self):
26392639 if request .return_perf_metrics and request .py_decoding_iter >= 1 :
26402640 request .update_perf_metrics (self .iter_counter )
26412641
2642+ if request .is_finished :
2643+ # Finalize any remaining logits transfers for the finished request in chunked mode
2644+ if request .py_use_chunked_generation_logits and request .py_return_generation_logits :
2645+ request .py_result .transfer_remaining_device_logits ()
2646+
26422647 request_done = False
26432648 if request .py_decoding_iter == 1 or request .is_finished or \
26442649 request .py_decoding_iter % self .stream_interval == 0 :
You can’t perform that action at this time.
0 commit comments