We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 3ee4333 commit 04dc491Copy full SHA for 04dc491
vllm/v1/worker/gpu_worker.py
@@ -383,6 +383,7 @@ def compile_cuda_graph(input_size: int):
383
# but users still want to compile for better performance,
384
# e.g. for the max-num-batched token size in chunked prefill.
385
warmup_sizes = self.vllm_config.compilation_config.compile_sizes.copy()
386
+ logger.info("Warm up sizes %s", str(warmup_sizes))
387
if not self.model_config.enforce_eager:
388
warmup_sizes = [
389
x for x in warmup_sizes if x not in
0 commit comments