[MISC] Fix misleading batch_size_capture_list when cuda_graph_sizes < 4 (#25829)

billishyahao · ProExpertProg · web-flow · commit 2518230d3eda · 2025-10-01T08:39:45.000-04:00
Signed-off-by: billishyahao &lt;bill.he@amd.com&gt;
Co-authored-by: Luka Govedic &lt;ProExpertProg@users.noreply.github.com&gt;
diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py
@@ -580,9 +580,12 @@ def _set_cudagraph_sizes(self):
             not self.model_config.enforce_eager:
             cuda_graph_sizes = self.scheduler_config.cuda_graph_sizes
             if len(cuda_graph_sizes) == 1:
-                batch_size_capture_list = [1, 2, 4] + [
-                    i for i in range(8, cuda_graph_sizes[0] + 1, 8)
-                ]
+                max_graph_size = cuda_graph_sizes[0]
+                assert max_graph_size >= 1, "Maximum cudagraph size should be" \
+                                            " greater than or equal to 1."
+                batch_size_capture_list = [
+                    i for i in [1, 2, 4] if i <= max_graph_size
+                ] + list(range(8, max_graph_size + 1, 8))
             elif len(cuda_graph_sizes) > 1:
                 batch_size_capture_list = sorted(cuda_graph_sizes)
             else: