We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4e55e9d commit a3fdc0fCopy full SHA for a3fdc0f
vllm/v1/worker/gpu_model_runner.py
@@ -2558,6 +2558,8 @@ def freeze_gc():
2558
desc="Capturing CUDA graph shapes")
2559
for num_tokens in compilation_cases:
2560
# We skip EPLB here since we don't want to record dummy metrics
2561
+ logger.info("DIEGO: compilation for number of tokens %d",
2562
+ num_tokens)
2563
for _ in range(
2564
self.compilation_config.cudagraph_num_of_warmups):
2565
self._dummy_run(num_tokens,
0 commit comments