Skip to content

Commit d307836

Browse files
Freeze GC before CUDAGraph
Signed-off-by: Diego-Castan <[email protected]>
1 parent 1f7091e commit d307836

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

vllm/v1/worker/gpu_worker.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,9 +369,11 @@ def execute_model(
369369
if scheduler_output.total_num_scheduled_tokens not in self._token_compiled_cudagraphs and scheduler_output.total_num_scheduled_tokens != 0:
370370
logger.info("DIEGO: CUDAgraph in execution time for %d input tokens", scheduler_output.total_num_scheduled_tokens)
371371
self._token_compiled_cudagraphs.add(scheduler_output.total_num_scheduled_tokens)
372+
gc.freeze()
372373
start_time = time.perf_counter()
373374
self.model_runner._dummy_run(scheduler_output.total_num_scheduled_tokens, capture_attn_cudagraph=False, skip_eplb=True)
374375
end_time = time.perf_counter()
376+
gc.unfreeze()
375377
elapsed_time = end_time - start_time
376378
logger.info("Graph capturing finished in %.3f secs", elapsed_time)
377379

0 commit comments

Comments
 (0)