5
5
from typing import TYPE_CHECKING , Any , ClassVar , Literal , Optional , Union
6
6
7
7
import torch
8
- from pydantic import ConfigDict
8
+ from pydantic import ConfigDict , Field , model_validator
9
9
from pydantic .dataclasses import dataclass
10
+ from typing_extensions import Self
10
11
11
12
import vllm .envs as envs
12
13
from vllm .config .utils import config
23
24
logger = init_logger (__name__ )
24
25
25
26
LoRADType = Literal ["auto" , "float16" , "bfloat16" ]
27
+ MaxLoRARanks = Literal [1 , 8 , 16 , 32 , 64 , 128 , 256 , 320 , 512 ]
28
+ LoRAExtraVocabSize = Literal [256 , 512 ]
26
29
27
30
28
31
@config
29
32
@dataclass (config = ConfigDict (arbitrary_types_allowed = True ))
30
33
class LoRAConfig :
31
34
"""Configuration for LoRA."""
32
35
33
- max_lora_rank : int = 16
36
+ max_lora_rank : MaxLoRARanks = 16
34
37
"""Max LoRA rank."""
35
- max_loras : int = 1
38
+ max_loras : int = Field ( default = 1 , ge = 1 )
36
39
"""Max number of LoRAs in a single batch."""
37
40
fully_sharded_loras : bool = False
38
41
"""By default, only half of the LoRA computation is sharded with tensor
@@ -44,7 +47,14 @@ class LoRAConfig:
44
47
`max_loras`."""
45
48
lora_dtype : Union [torch .dtype , LoRADType ] = "auto"
46
49
"""Data type for LoRA. If auto, will default to base model dtype."""
47
- lora_extra_vocab_size : int = 256
50
+ lora_extra_vocab_size : LoRAExtraVocabSize = Field (
51
+ default = 256 ,
52
+ deprecated = (
53
+ "`lora_extra_vocab_size` is deprecated and will be removed "
54
+ "in v0.12.0. Additional vocabulary support for "
55
+ "LoRA adapters is being phased out."
56
+ ),
57
+ )
48
58
"""(Deprecated) Maximum size of extra vocabulary that can be present in a
49
59
LoRA adapter. Will be removed in v0.12.0."""
50
60
lora_vocab_padding_size : ClassVar [int ] = (
@@ -60,7 +70,10 @@ class LoRAConfig:
60
70
per prompt. When run in offline mode, the lora IDs for n modalities
61
71
will be automatically assigned to 1-n with the names of the modalities
62
72
in alphabetic order."""
63
- bias_enabled : bool = False
73
+ bias_enabled : bool = Field (
74
+ default = False ,
75
+ deprecated = "`bias_enabled` is deprecated and will be removed in v0.12.0." ,
76
+ )
64
77
"""[DEPRECATED] Enable bias for LoRA adapters. This option will be
65
78
removed in v0.12.0."""
66
79
@@ -87,36 +100,8 @@ def compute_hash(self) -> str:
87
100
hash_str = hashlib .md5 (str (factors ).encode (), usedforsecurity = False ).hexdigest ()
88
101
return hash_str
89
102
90
- def __post_init__ (self ):
91
- # Deprecation warning for lora_extra_vocab_size
92
- logger .warning (
93
- "`lora_extra_vocab_size` is deprecated and will be removed "
94
- "in v0.12.0. Additional vocabulary support for "
95
- "LoRA adapters is being phased out."
96
- )
97
-
98
- # Deprecation warning for enable_lora_bias
99
- if self .bias_enabled :
100
- logger .warning (
101
- "`enable_lora_bias` is deprecated and will be removed in v0.12.0."
102
- )
103
-
104
- # Setting the maximum rank to 512 should be able to satisfy the vast
105
- # majority of applications.
106
- possible_max_ranks = (1 , 8 , 16 , 32 , 64 , 128 , 256 , 320 , 512 )
107
- possible_lora_extra_vocab_size = (256 , 512 )
108
- if self .max_lora_rank not in possible_max_ranks :
109
- raise ValueError (
110
- f"max_lora_rank ({ self .max_lora_rank } ) must be one of "
111
- f"{ possible_max_ranks } ."
112
- )
113
- if self .lora_extra_vocab_size not in possible_lora_extra_vocab_size :
114
- raise ValueError (
115
- f"lora_extra_vocab_size ({ self .lora_extra_vocab_size } ) "
116
- f"must be one of { possible_lora_extra_vocab_size } ."
117
- )
118
- if self .max_loras < 1 :
119
- raise ValueError (f"max_loras ({ self .max_loras } ) must be >= 1." )
103
+ @model_validator (mode = "after" )
104
+ def _validate_lora_config (self ) -> Self :
120
105
if self .max_cpu_loras is None :
121
106
self .max_cpu_loras = self .max_loras
122
107
elif self .max_cpu_loras < self .max_loras :
@@ -125,6 +110,8 @@ def __post_init__(self):
125
110
f"max_loras ({ self .max_loras } )"
126
111
)
127
112
113
+ return self
114
+
128
115
def verify_with_cache_config (self , cache_config : CacheConfig ):
129
116
if cache_config .cpu_offload_gb > 0 and not envs .VLLM_USE_V1 :
130
117
raise ValueError ("V0 LoRA does not support CPU offload, please use V1." )
0 commit comments