We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b3ac3db commit 3ee4333Copy full SHA for 3ee4333
vllm/v1/worker/gpu_worker.py
@@ -402,8 +402,8 @@ def compile_cuda_graph(input_size: int):
402
next_comp_set = warmup_sizes_set.difference(self._token_compiled_cudagraphs)
403
if len(next_comp_set) != 0:
404
next_comp = list(next_comp_set)
405
- self._token_compiled_cudagraphs.add(next_comp[0])
406
- compile_cuda_graph(next_comp[0])
+ self._token_compiled_cudagraphs.add(next_comp[0])
+ compile_cuda_graph(next_comp[0])
407
408
output = self.model_runner.execute_model(scheduler_output,
409
intermediate_tensors)
0 commit comments