[https://nvbugs/5531963][fix] cherry pick #7725 (#7907)

QiJune · web-flow · commit 68b7900a1d35 · 2025-09-22T06:55:05.000-07:00
Signed-off-by: junq &lt;22017000+QiJune@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/bench/dataclasses/configuration.py b/tensorrt_llm/bench/dataclasses/configuration.py
@@ -90,15 +90,16 @@ def get_llm_args(self) -> Dict:
         if self.backend == "pytorch":
             cuda_graph_config = updated_llm_args.pop(
                 "cuda_graph_config", llm_args["cuda_graph_config"])
-            # Use runtime max_batch_size as cuda_graph_config.max_batch_size
-            # if both max_batch_size and batch_sizes are not set.
-            batch_sizes_set = cuda_graph_config.get("batch_sizes",
-                                                    None) is not None
-            max_batch_size_set = cuda_graph_config.get("max_batch_size",
-                                                       None) is not None
-            if not batch_sizes_set and not max_batch_size_set:
-                cuda_graph_config[
-                    "max_batch_size"] = self.settings_config.max_batch_size
+            if cuda_graph_config:
+                # Use runtime max_batch_size as cuda_graph_config.max_batch_size
+                # if both max_batch_size and batch_sizes are not set.
+                batch_sizes_set = cuda_graph_config.get("batch_sizes",
+                                                        None) is not None
+                max_batch_size_set = cuda_graph_config.get(
+                    "max_batch_size", None) is not None
+                if not batch_sizes_set and not max_batch_size_set:
+                    cuda_graph_config[
+                        "max_batch_size"] = self.settings_config.max_batch_size
             updated_llm_args["cuda_graph_config"] = cuda_graph_config
 
         return updated_llm_args