Skip to content

Commit df67e11

Browse files
committed
Add missing sync before the forward call.
Signed-off-by: SimengLiu-nv <simengl@nvidia.com>
1 parent 7b33a89 commit df67e11

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ def __init__(self,
261261
# KVCacheTransferManager's onboard/offload operations.
262262
self.is_warmup = True
263263

264+
self.execution_stream.wait_stream(torch.cuda.current_stream())
264265
with torch.cuda.stream(self.execution_stream):
265266
self.model_engine.warmup(self.resource_manager)
266267
if self.draft_model_engine is not None:
@@ -2236,6 +2237,7 @@ def forward(scheduled_requests, resource_manager, new_tensors_device,
22362237

22372238
# Run model forward on the execution stream for proper synchronization
22382239
# with KVCacheTransferManager's onboard/offload operations.
2240+
self.execution_stream.wait_stream(torch.cuda.current_stream())
22392241
with torch.cuda.stream(self.execution_stream):
22402242
outputs = forward(scheduled_requests, self.resource_manager,
22412243
new_tensors_device, gather_context_logits,

0 commit comments

Comments
 (0)