11
11
from vllm .utils import str_to_int_tuple
12
12
13
13
14
+ def nullable_str (val : str ):
15
+ if not val or val == "None" :
16
+ return None
17
+ return val
18
+
19
+
14
20
@dataclass
15
21
class EngineArgs :
16
22
"""Arguments for vLLM engine."""
@@ -96,7 +102,7 @@ def add_cli_args(
96
102
help = 'Name or path of the huggingface model to use.' )
97
103
parser .add_argument (
98
104
'--tokenizer' ,
99
- type = str ,
105
+ type = nullable_str ,
100
106
default = EngineArgs .tokenizer ,
101
107
help = 'Name or path of the huggingface tokenizer to use.' )
102
108
parser .add_argument (
@@ -105,21 +111,21 @@ def add_cli_args(
105
111
help = 'Skip initialization of tokenizer and detokenizer' )
106
112
parser .add_argument (
107
113
'--revision' ,
108
- type = str ,
114
+ type = nullable_str ,
109
115
default = None ,
110
116
help = 'The specific model version to use. It can be a branch '
111
117
'name, a tag name, or a commit id. If unspecified, will use '
112
118
'the default version.' )
113
119
parser .add_argument (
114
120
'--code-revision' ,
115
- type = str ,
121
+ type = nullable_str ,
116
122
default = None ,
117
123
help = 'The specific revision to use for the model code on '
118
124
'Hugging Face Hub. It can be a branch name, a tag name, or a '
119
125
'commit id. If unspecified, will use the default version.' )
120
126
parser .add_argument (
121
127
'--tokenizer-revision' ,
122
- type = str ,
128
+ type = nullable_str ,
123
129
default = None ,
124
130
help = 'The specific tokenizer version to use. It can be a branch '
125
131
'name, a tag name, or a commit id. If unspecified, will use '
@@ -136,7 +142,7 @@ def add_cli_args(
136
142
action = 'store_true' ,
137
143
help = 'Trust remote code from huggingface.' )
138
144
parser .add_argument ('--download-dir' ,
139
- type = str ,
145
+ type = nullable_str ,
140
146
default = EngineArgs .download_dir ,
141
147
help = 'Directory to download and load the weights, '
142
148
'default to the default cache dir of '
@@ -187,7 +193,7 @@ def add_cli_args(
187
193
'supported for common inference criteria.' )
188
194
parser .add_argument (
189
195
'--quantization-param-path' ,
190
- type = str ,
196
+ type = nullable_str ,
191
197
default = None ,
192
198
help = 'Path to the JSON file containing the KV cache '
193
199
'scaling factors. This should generally be supplied, when '
@@ -304,7 +310,7 @@ def add_cli_args(
304
310
# Quantization settings.
305
311
parser .add_argument ('--quantization' ,
306
312
'-q' ,
307
- type = str ,
313
+ type = nullable_str ,
308
314
choices = [* QUANTIZATION_METHODS , None ],
309
315
default = EngineArgs .quantization ,
310
316
help = 'Method used to quantize the weights. If '
@@ -349,7 +355,7 @@ def add_cli_args(
349
355
'asynchronous tokenization. Ignored '
350
356
'if tokenizer_pool_size is 0.' )
351
357
parser .add_argument ('--tokenizer-pool-extra-config' ,
352
- type = str ,
358
+ type = nullable_str ,
353
359
default = EngineArgs .tokenizer_pool_extra_config ,
354
360
help = 'Extra config for tokenizer pool. '
355
361
'This should be a JSON string that will be '
@@ -404,7 +410,7 @@ def add_cli_args(
404
410
# Related to Vision-language models such as llava
405
411
parser .add_argument (
406
412
'--image-input-type' ,
407
- type = str ,
413
+ type = nullable_str ,
408
414
default = None ,
409
415
choices = [
410
416
t .name .lower () for t in VisionLanguageConfig .ImageInputType
@@ -417,7 +423,7 @@ def add_cli_args(
417
423
help = ('Input id for image token.' ))
418
424
parser .add_argument (
419
425
'--image-input-shape' ,
420
- type = str ,
426
+ type = nullable_str ,
421
427
default = None ,
422
428
help = ('The biggest image input shape (worst for memory footprint) '
423
429
'given an input type. Only used for vLLM\' s profile_run.' ))
@@ -440,7 +446,7 @@ def add_cli_args(
440
446
441
447
parser .add_argument (
442
448
'--speculative-model' ,
443
- type = str ,
449
+ type = nullable_str ,
444
450
default = EngineArgs .speculative_model ,
445
451
help =
446
452
'The name of the draft model to be used in speculative decoding.' )
@@ -454,7 +460,7 @@ def add_cli_args(
454
460
455
461
parser .add_argument (
456
462
'--speculative-max-model-len' ,
457
- type = str ,
463
+ type = int ,
458
464
default = EngineArgs .speculative_max_model_len ,
459
465
help = 'The maximum sequence length supported by the '
460
466
'draft model. Sequences over this length will skip '
@@ -475,7 +481,7 @@ def add_cli_args(
475
481
'decoding.' )
476
482
477
483
parser .add_argument ('--model-loader-extra-config' ,
478
- type = str ,
484
+ type = nullable_str ,
479
485
default = EngineArgs .model_loader_extra_config ,
480
486
help = 'Extra config for model loader. '
481
487
'This will be passed to the model loader '
0 commit comments