fix test

yibinl-nvidia · yibinl-nvidia · commit 11ca288eb4e8 · 2026-01-07T21:57:23.000Z
Signed-off-by: Yibin Li &lt;109242046+yibinl-nvidia@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor.py b/tensorrt_llm/_torch/pyexecutor/py_executor.py
@@ -2593,7 +2593,8 @@ def _handle_responses(self):
             if request.is_finished:
                 # Finalize any remaining logits transfers for the finished request in chunked mode
                 if request.py_use_chunked_generation_logits and request.py_return_generation_logits:
-                    request.py_result.transfer_remaining_device_logits()
+                    with torch.inference_mode():
+                        request.py_result.transfer_remaining_device_logits()
 
             request_done = False
             if request.py_decoding_iter == 1 or request.is_finished or \