We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 19cf84f commit 0664d85Copy full SHA for 0664d85
vllm/v1/worker/gpu_worker.py
@@ -399,9 +399,9 @@ def compile_cuda_graph(input_size: int):
399
scheduler_output.total_num_scheduled_tokens)
400
compile_cuda_graph(scheduler_output.total_num_scheduled_tokens)
401
else:
402
- next_comp = list(
403
- warmup_sizes_set.difference(
404
- self._token_compiled_cudagraphs))[0]
+ next_comp_set = warmup_sizes_set.difference(self._token_compiled_cudagraphs)
+ if len(next_comp_set) != 0:
+ next_comp = list(next_comp_set)[0]
405
self._token_compiled_cudagraphs.add(next_comp)
406
compile_cuda_graph(next_comp)
407
0 commit comments