We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 17d2398 commit a3b31e7Copy full SHA for a3b31e7
vllm/v1/worker/gpu_worker.py
@@ -310,8 +310,8 @@ def compile_or_warm_up_model(self) -> None:
310
for size in sorted(warmup_sizes, reverse=True):
311
logger.info("Compile and warming up model for size %d", size)
312
self.model_runner._dummy_run(size, skip_eplb=True)
313
- # if not self.model_config.enforce_eager:
314
- # self.model_runner.capture_model()
+ if not self.model_config.enforce_eager:
+ self.model_runner.capture_model()
315
316
# Warm up sampler and preallocate memory buffer for logits and other
317
# sampling related tensors of max possible shape to avoid memory
0 commit comments