@@ -272,10 +272,10 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
272
272
parser .add_argument (
273
273
'--allowed-local-media-path' ,
274
274
type = str ,
275
- help = "Allowing API requests to read local images or videos"
276
- "from directories specified by the server file system."
277
- "This is a security risk."
278
- "Should only be enabled in trusted environments" )
275
+ help = "Allowing API requests to read local images or videos "
276
+ "from directories specified by the server file system. "
277
+ "This is a security risk. "
278
+ "Should only be enabled in trusted environments. " )
279
279
parser .add_argument ('--download-dir' ,
280
280
type = nullable_str ,
281
281
default = EngineArgs .download_dir ,
@@ -340,7 +340,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
340
340
'scaling factors. This should generally be supplied, when '
341
341
'KV cache dtype is FP8. Otherwise, KV cache scaling factors '
342
342
'default to 1.0, which may cause accuracy issues. '
343
- 'FP8_E5M2 (without scaling) is only supported on cuda version'
343
+ 'FP8_E5M2 (without scaling) is only supported on cuda version '
344
344
'greater than 11.8. On ROCm (AMD GPU), FP8_E4M3 is instead '
345
345
'supported for common inference criteria.' )
346
346
parser .add_argument ('--max-model-len' ,
@@ -446,9 +446,9 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
446
446
'this argument can be seen as a virtual way to increase '
447
447
'the GPU memory size. For example, if you have one 24 GB '
448
448
'GPU and set this to 10, virtually you can think of it as '
449
- 'a 34 GB GPU. Then you can load a 13B model with BF16 weight,'
449
+ 'a 34 GB GPU. Then you can load a 13B model with BF16 weight, '
450
450
'which requires at least 26GB GPU memory. Note that this '
451
- 'requires fast CPU-GPU interconnect, as part of the model is'
451
+ 'requires fast CPU-GPU interconnect, as part of the model is '
452
452
'loaded from CPU memory to GPU memory on the fly in each '
453
453
'model forward pass.' )
454
454
parser .add_argument (
@@ -468,7 +468,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
468
468
type = int ,
469
469
default = None ,
470
470
help = 'If specified, ignore GPU profiling result and use this number'
471
- 'of GPU blocks. Used for testing preemption.' )
471
+ ' of GPU blocks. Used for testing preemption.' )
472
472
parser .add_argument ('--max-num-batched-tokens' ,
473
473
type = int ,
474
474
default = EngineArgs .max_num_batched_tokens ,
@@ -514,7 +514,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
514
514
parser .add_argument ('--hf-overrides' ,
515
515
type = json .loads ,
516
516
default = EngineArgs .hf_overrides ,
517
- help = 'Extra arguments for the HuggingFace config.'
517
+ help = 'Extra arguments for the HuggingFace config. '
518
518
'This should be a JSON string that will be '
519
519
'parsed into a dictionary.' )
520
520
parser .add_argument ('--enforce-eager' ,
@@ -572,7 +572,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
572
572
'--mm-processor-kwargs' ,
573
573
default = None ,
574
574
type = json .loads ,
575
- help = ('Overrides for the multimodal input mapping/processing,'
575
+ help = ('Overrides for the multimodal input mapping/processing, '
576
576
'e.g., image processor. For example: {"num_crops": 4}.' ))
577
577
578
578
# LoRA related configs
@@ -822,9 +822,9 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
822
822
"of the provided names. The model name in the model "
823
823
"field of a response will be the first name in this "
824
824
"list. If not specified, the model name will be the "
825
- "same as the `--model` argument. Noted that this name(s)"
825
+ "same as the `--model` argument. Noted that this name(s) "
826
826
"will also be used in `model_name` tag content of "
827
- "prometheus metrics, if multiple names provided, metrics"
827
+ "prometheus metrics, if multiple names provided, metrics "
828
828
"tag will take the first one." )
829
829
parser .add_argument ('--qlora-adapter-name-or-path' ,
830
830
type = str ,
0 commit comments