We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 7c8faa0 commit 11ca288Copy full SHA for 11ca288
tensorrt_llm/_torch/pyexecutor/py_executor.py
@@ -2593,7 +2593,8 @@ def _handle_responses(self):
2593
if request.is_finished:
2594
# Finalize any remaining logits transfers for the finished request in chunked mode
2595
if request.py_use_chunked_generation_logits and request.py_return_generation_logits:
2596
- request.py_result.transfer_remaining_device_logits()
+ with torch.inference_mode():
2597
+ request.py_result.transfer_remaining_device_logits()
2598
2599
request_done = False
2600
if request.py_decoding_iter == 1 or request.is_finished or \
0 commit comments