Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.

Commit 15310b5

Browse files
authored
[Bugfix] Use LoadFormat values for vllm serve --load-format (vllm-project#7784)
1 parent 57792ed commit 15310b5

File tree

1 file changed

+3
-6
lines changed

1 file changed

+3
-6
lines changed

vllm/engine/arg_utils.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99

1010
import vllm.envs as envs
1111
from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig,
12-
EngineConfig, LoadConfig, LoRAConfig, ModelConfig,
13-
ObservabilityConfig, ParallelConfig,
12+
EngineConfig, LoadConfig, LoadFormat, LoRAConfig,
13+
ModelConfig, ObservabilityConfig, ParallelConfig,
1414
PromptAdapterConfig, SchedulerConfig,
1515
SpeculativeConfig, TokenizerPoolConfig)
1616
from vllm.executor.executor_base import ExecutorBase
@@ -214,10 +214,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
214214
'--load-format',
215215
type=str,
216216
default=EngineArgs.load_format,
217-
choices=[
218-
'auto', 'pt', 'safetensors', 'npcache', 'dummy', 'tensorizer',
219-
'bitsandbytes'
220-
],
217+
choices=[f.value for f in LoadFormat],
221218
help='The format of the model weights to load.\n\n'
222219
'* "auto" will try to load the weights in the safetensors format '
223220
'and fall back to the pytorch bin format if safetensors format '

0 commit comments

Comments
 (0)