We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a332b84 commit 2518230Copy full SHA for 2518230
vllm/config/vllm.py
@@ -580,9 +580,12 @@ def _set_cudagraph_sizes(self):
580
not self.model_config.enforce_eager:
581
cuda_graph_sizes = self.scheduler_config.cuda_graph_sizes
582
if len(cuda_graph_sizes) == 1:
583
- batch_size_capture_list = [1, 2, 4] + [
584
- i for i in range(8, cuda_graph_sizes[0] + 1, 8)
585
- ]
+ max_graph_size = cuda_graph_sizes[0]
+ assert max_graph_size >= 1, "Maximum cudagraph size should be" \
+ " greater than or equal to 1."
586
+ batch_size_capture_list = [
587
+ i for i in [1, 2, 4] if i <= max_graph_size
588
+ ] + list(range(8, max_graph_size + 1, 8))
589
elif len(cuda_graph_sizes) > 1:
590
batch_size_capture_list = sorted(cuda_graph_sizes)
591
else:
0 commit comments