[Doc] Improve help examples for --compilation-config (vllm-project#16729)

DarkLight1337 · web-flow · commit c16fb5dae889 · 2025-04-17T21:22:34.000-07:00
Signed-off-by: DarkLight1337 &lt;tlleungac@connect.ust.hk&gt;
diff --git a/docs/source/design/v1/torch_compile.md b/docs/source/design/v1/torch_compile.md
@@ -134,6 +134,6 @@ The cudagraphs are captured and managed by the compiler backend, and replayed wh
 
 By default, vLLM will try to determine a set of sizes to capture cudagraph. You can also override it using the config `cudagraph_capture_sizes`:
 
-`VLLM_USE_V1=1 vllm serve meta-llama/Llama-3.2-1B --compilation_config "{'cudagraph_capture_sizes': [1, 2, 4, 8]}"`
+`VLLM_USE_V1=1 vllm serve meta-llama/Llama-3.2-1B --compilation-config "{'cudagraph_capture_sizes': [1, 2, 4, 8]}"`
 
 Then it will only capture cudagraph for the specified sizes. It can be useful to have fine-grained control over the cudagraph capture.
diff --git a/tests/engine/test_arg_utils.py b/tests/engine/test_arg_utils.py
@@ -53,12 +53,20 @@ def test_compilation_config():
     assert args.compilation_config.level == 3
 
     # set to string form of a dict
-    args = parser.parse_args(["--compilation-config", "{'level': 3}"])
-    assert args.compilation_config.level == 3
+    args = parser.parse_args([
+        "--compilation-config",
+        "{'level': 3, 'cudagraph_capture_sizes': [1, 2, 4, 8]}",
+    ])
+    assert (args.compilation_config.level == 3 and
+            args.compilation_config.cudagraph_capture_sizes == [1, 2, 4, 8])
 
     # set to string form of a dict
-    args = parser.parse_args(["--compilation-config={'level': 3}"])
-    assert args.compilation_config.level == 3
+    args = parser.parse_args([
+        "--compilation-config="
+        "{'level': 3, 'cudagraph_capture_sizes': [1, 2, 4, 8]}",
+    ])
+    assert (args.compilation_config.level == 3 and
+            args.compilation_config.cudagraph_capture_sizes == [1, 2, 4, 8])
 
 
 def test_prefix_cache_default():
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
@@ -939,10 +939,11 @@ def get_kwargs(cls: type[Config]) -> dict[str, Any]:
                             'testing only. level 3 is the recommended level '
                             'for production.\n'
                             'To specify the full compilation config, '
-                            'use a JSON string.\n'
+                            'use a JSON string, e.g. ``{"level": 3, '
+                            '"cudagraph_capture_sizes": [1, 2, 4, 8]}``\n'
                             'Following the convention of traditional '
-                            'compilers, using -O without space is also '
-                            'supported. -O3 is equivalent to -O 3.')
+                            'compilers, using ``-O`` without space is also '
+                            'supported. ``-O3`` is equivalent to ``-O 3``.')
 
         parser.add_argument('--kv-transfer-config',
                             type=KVTransferConfig.from_cli,