Skip to content

Commit 65552b4

Browse files
[Misc] Use config definitions from Transformers library (#21913)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent 7ad7adb commit 65552b4

File tree

11 files changed

+54
-51
lines changed

11 files changed

+54
-51
lines changed

vllm/model_executor/models/aimv2.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
import torch
1010
import torch.nn as nn
11-
from transformers import PretrainedConfig
1211

1312
from vllm.attention.layer import MultiHeadAttention
1413
from vllm.distributed import get_tensor_model_parallel_world_size
@@ -21,12 +20,13 @@
2120
from vllm.model_executor.layers.quantization.base_config import (
2221
QuantizationConfig)
2322
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
23+
from vllm.transformers_utils.configs.ovis import AIMv2Config
2424

2525

2626
class AIMv2SwiGLUFFN(nn.Module):
2727

28-
def __init__(self, config: PretrainedConfig,
29-
quant_config: QuantizationConfig, prefix: str):
28+
def __init__(self, config: AIMv2Config, quant_config: QuantizationConfig,
29+
prefix: str):
3030
super().__init__()
3131
hidden_features = config.intermediate_size
3232
in_features = config.hidden_size
@@ -57,7 +57,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
5757

5858
class AIMv2PatchEmbed(nn.Module):
5959

60-
def __init__(self, config: PretrainedConfig):
60+
def __init__(self, config: AIMv2Config):
6161
super().__init__()
6262
self.proj = nn.Conv2d(
6363
config.num_channels,
@@ -75,7 +75,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
7575

7676
class AIMv2ViTPreprocessor(nn.Module):
7777

78-
def __init__(self, config: PretrainedConfig):
78+
def __init__(self, config: AIMv2Config):
7979
super().__init__()
8080
num_patches = (config.image_size // config.patch_size)**2
8181

@@ -93,8 +93,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
9393

9494
class AIMv2Attention(nn.Module):
9595

96-
def __init__(self, config: PretrainedConfig,
97-
quant_config: QuantizationConfig, prefix: str):
96+
def __init__(self, config: AIMv2Config, quant_config: QuantizationConfig,
97+
prefix: str):
9898
super().__init__()
9999
self.config = config
100100
self.embed_dim = config.hidden_size
@@ -141,8 +141,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
141141

142142
class AIMv2Block(nn.Module):
143143

144-
def __init__(self, config: PretrainedConfig,
145-
quant_config: QuantizationConfig, prefix: str):
144+
def __init__(self, config: AIMv2Config, quant_config: QuantizationConfig,
145+
prefix: str):
146146
super().__init__()
147147
self.attn = AIMv2Attention(config,
148148
quant_config=quant_config,
@@ -163,7 +163,7 @@ class AIMv2Transformer(nn.Module):
163163

164164
def __init__(
165165
self,
166-
config: PretrainedConfig,
166+
config: AIMv2Config,
167167
quant_config: QuantizationConfig,
168168
*,
169169
require_post_norm: Optional[bool] = None,
@@ -193,7 +193,7 @@ def forward(self, tokens: torch.Tensor) -> torch.Tensor:
193193
class AIMv2Model(torch.nn.Module):
194194

195195
def __init__(self,
196-
config: PretrainedConfig,
196+
config: AIMv2Config,
197197
quant_config: QuantizationConfig,
198198
*,
199199
require_post_norm: Optional[bool] = None,

vllm/model_executor/models/commandr.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
import torch
2929
from torch import nn
30-
from transformers import CohereConfig
30+
from transformers import Cohere2Config, CohereConfig
3131

3232
from vllm.attention import Attention
3333
from vllm.compilation.decorators import support_torch_compile
@@ -89,7 +89,7 @@ class CohereMLP(nn.Module):
8989

9090
def __init__(
9191
self,
92-
config: CohereConfig,
92+
config: Union[CohereConfig, Cohere2Config],
9393
quant_config: Optional[QuantizationConfig] = None,
9494
prefix: str = "",
9595
):
@@ -124,7 +124,7 @@ class CohereAttention(nn.Module):
124124

125125
def __init__(
126126
self,
127-
config: CohereConfig,
127+
config: Union[CohereConfig, Cohere2Config],
128128
cache_config: Optional[CacheConfig] = None,
129129
quant_config: Optional[QuantizationConfig] = None,
130130
prefix: str = "",
@@ -242,7 +242,7 @@ def forward(
242242
class CohereDecoderLayer(nn.Module):
243243

244244
def __init__(self,
245-
config: CohereConfig,
245+
config: Union[CohereConfig, Cohere2Config],
246246
cache_config: Optional[CacheConfig] = None,
247247
quant_config: Optional[QuantizationConfig] = None,
248248
prefix: str = ""):

vllm/model_executor/models/dbrx.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import torch
88
import torch.nn as nn
9-
from transformers import PretrainedConfig
9+
from transformers import DbrxConfig
1010

1111
from vllm.attention import Attention
1212
from vllm.config import CacheConfig, VllmConfig
@@ -39,7 +39,7 @@ class DbrxRouter(nn.Module):
3939

4040
def __init__(
4141
self,
42-
config: PretrainedConfig,
42+
config: DbrxConfig,
4343
params_dtype: Optional[torch.dtype] = None,
4444
):
4545
super().__init__()
@@ -63,7 +63,7 @@ class DbrxExperts(FusedMoE):
6363

6464
def __init__(
6565
self,
66-
config: PretrainedConfig,
66+
config: DbrxConfig,
6767
quant_config: Optional[QuantizationConfig] = None,
6868
params_dtype: Optional[torch.dtype] = None,
6969
prefix: str = "",
@@ -138,7 +138,7 @@ class DbrxMoE(nn.Module):
138138

139139
def __init__(
140140
self,
141-
config: PretrainedConfig,
141+
config: DbrxConfig,
142142
quant_config: Optional[QuantizationConfig] = None,
143143
params_dtype: Optional[torch.dtype] = None,
144144
prefix: str = "",
@@ -169,7 +169,7 @@ class DbrxAttention(nn.Module):
169169

170170
def __init__(
171171
self,
172-
config: PretrainedConfig,
172+
config: DbrxConfig,
173173
cache_config: Optional[CacheConfig] = None,
174174
quant_config: Optional[QuantizationConfig] = None,
175175
prefix: str = "",
@@ -249,7 +249,7 @@ class DbrxFusedNormAttention(nn.Module):
249249

250250
def __init__(
251251
self,
252-
config: PretrainedConfig,
252+
config: DbrxConfig,
253253
cache_config: Optional[CacheConfig] = None,
254254
quant_config: Optional[QuantizationConfig] = None,
255255
prefix: str = "",
@@ -284,7 +284,7 @@ class DbrxBlock(nn.Module):
284284

285285
def __init__(
286286
self,
287-
config: PretrainedConfig,
287+
config: DbrxConfig,
288288
cache_config: Optional[CacheConfig] = None,
289289
quant_config: Optional[QuantizationConfig] = None,
290290
prefix: str = "",

vllm/model_executor/models/deepseek_v2.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
import torch
3131
from torch import nn
32-
from transformers import PretrainedConfig
32+
from transformers import DeepseekV2Config, DeepseekV3Config
3333

3434
from vllm.attention import Attention
3535
from vllm.compilation.decorators import support_torch_compile
@@ -100,7 +100,7 @@ class DeepseekV2MoE(nn.Module):
100100

101101
def __init__(
102102
self,
103-
config: PretrainedConfig,
103+
config: Union[DeepseekV2Config, DeepseekV3Config],
104104
quant_config: Optional[QuantizationConfig] = None,
105105
prefix: str = "",
106106
enable_eplb: bool = False,
@@ -221,7 +221,7 @@ class DeepseekV2Attention(nn.Module):
221221

222222
def __init__(
223223
self,
224-
config: PretrainedConfig,
224+
config: Union[DeepseekV2Config, DeepseekV3Config],
225225
hidden_size: int,
226226
num_heads: int,
227227
qk_nope_head_dim: int,
@@ -373,7 +373,7 @@ class DeepseekV2MLAAttention(nn.Module):
373373

374374
def __init__(
375375
self,
376-
config: PretrainedConfig,
376+
config: Union[DeepseekV2Config, DeepseekV3Config],
377377
hidden_size: int,
378378
num_heads: int,
379379
qk_nope_head_dim: int,
@@ -538,7 +538,7 @@ class DeepseekV2DecoderLayer(nn.Module):
538538

539539
def __init__(
540540
self,
541-
config: PretrainedConfig,
541+
config: Union[DeepseekV2Config, DeepseekV3Config],
542542
prefix: str,
543543
model_config: ModelConfig,
544544
cache_config: Optional[CacheConfig] = None,
@@ -973,7 +973,10 @@ class DeepseekV3ForCausalLM(DeepseekV2ForCausalLM):
973973
pass
974974

975975

976-
def get_spec_layer_idx_from_weight_name(config: PretrainedConfig,
976+
# Compatibility with
977+
# https://huggingface.co/deepseek-ai/DeepSeek-V3-Base/blob/main/configuration_deepseek.py
978+
def get_spec_layer_idx_from_weight_name(config: Union[DeepseekV2Config,
979+
DeepseekV3Config],
977980
weight_name: str) -> Optional[int]:
978981
if (hasattr(config, "num_nextn_predict_layers")
979982
and config.num_nextn_predict_layers > 0):

vllm/model_executor/models/dots1.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
import torch
3131
from torch import nn
32-
from transformers import PretrainedConfig
32+
from transformers import Dots1Config
3333

3434
from vllm.attention import Attention
3535
from vllm.compilation.decorators import support_torch_compile
@@ -99,7 +99,7 @@ class Dots1MoE(nn.Module):
9999

100100
def __init__(
101101
self,
102-
config: PretrainedConfig,
102+
config: Dots1Config,
103103
quant_config: Optional[QuantizationConfig] = None,
104104
prefix: str = "",
105105
):
@@ -174,7 +174,7 @@ def __init__(
174174
hidden_size: int,
175175
num_heads: int,
176176
num_kv_heads: int,
177-
config: PretrainedConfig,
177+
config: Dots1Config,
178178
rope_theta: float = 10000,
179179
rope_scaling: Optional[dict[str, Any]] = None,
180180
max_position_embeddings: int = 8192,
@@ -260,7 +260,7 @@ class Dots1DecoderLayer(nn.Module):
260260

261261
def __init__(
262262
self,
263-
config: PretrainedConfig,
263+
config: Dots1Config,
264264
prefix: str,
265265
model_config: ModelConfig,
266266
cache_config: Optional[CacheConfig] = None,

vllm/model_executor/models/exaone4.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
import torch
2828
from torch import nn
29-
from transformers import PretrainedConfig
29+
from transformers import Exaone4Config
3030

3131
from vllm.attention import Attention
3232
from vllm.compilation.decorators import support_torch_compile
@@ -96,7 +96,7 @@ class Exaone4Attention(nn.Module):
9696

9797
def __init__(
9898
self,
99-
config: PretrainedConfig,
99+
config: Exaone4Config,
100100
hidden_size: int,
101101
num_heads: int,
102102
num_kv_heads: int,
@@ -224,7 +224,7 @@ class Exaone4DecoderLayer(nn.Module):
224224

225225
def __init__(
226226
self,
227-
config: PretrainedConfig,
227+
config: Exaone4Config,
228228
cache_config: Optional[CacheConfig] = None,
229229
quant_config: Optional[QuantizationConfig] = None,
230230
prefix: str = "",

vllm/model_executor/models/glm4_moe.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
import torch
3030
from torch import nn
31-
from transformers import PretrainedConfig
31+
from transformers.models.glm4_moe import Glm4MoeConfig
3232

3333
from vllm.attention import Attention
3434
from vllm.compilation.decorators import support_torch_compile
@@ -100,7 +100,7 @@ class Glm4MoE(nn.Module):
100100

101101
def __init__(
102102
self,
103-
config: PretrainedConfig,
103+
config: Glm4MoeConfig,
104104
quant_config: Optional[QuantizationConfig] = None,
105105
prefix: str = "",
106106
enable_eplb: bool = False,
@@ -198,7 +198,7 @@ class Glm4MoeAttention(nn.Module):
198198

199199
def __init__(
200200
self,
201-
config: PretrainedConfig,
201+
config: Glm4MoeConfig,
202202
hidden_size: int,
203203
num_heads: int,
204204
num_kv_heads: int,
@@ -297,7 +297,7 @@ class Glm4MoeDecoderLayer(nn.Module):
297297

298298
def __init__(
299299
self,
300-
config: PretrainedConfig,
300+
config: Glm4MoeConfig,
301301
cache_config: Optional[CacheConfig] = None,
302302
quant_config: Optional[QuantizationConfig] = None,
303303
prefix: str = "",
@@ -681,7 +681,7 @@ def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
681681
return self.model.get_expert_mapping()
682682

683683

684-
def get_spec_layer_idx_from_weight_name(config: PretrainedConfig,
684+
def get_spec_layer_idx_from_weight_name(config: Glm4MoeConfig,
685685
weight_name: str) -> Optional[int]:
686686
if hasattr(config,
687687
"num_nextn_predict_layers") and (config.num_nextn_predict_layers

vllm/model_executor/models/minimax_text_01.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import torch.nn.functional as F
1313
from einops import rearrange
1414
from torch import nn
15-
from transformers.configuration_utils import PretrainedConfig
15+
from transformers import MiniMaxConfig
1616

1717
from vllm import envs
1818
from vllm.attention import Attention, AttentionMetadata
@@ -656,7 +656,7 @@ class MiniMaxText01DecoderLayer(nn.Module):
656656

657657
def __init__(
658658
self,
659-
config: PretrainedConfig,
659+
config: MiniMaxConfig,
660660
cache_config: Optional[CacheConfig] = None,
661661
quant_config: Optional[QuantizationConfig] = None,
662662
expert_num: int = 1,
@@ -860,7 +860,7 @@ class MiniMaxText01Model(nn.Module):
860860

861861
def __init__(
862862
self,
863-
config: PretrainedConfig,
863+
config: MiniMaxConfig,
864864
quant_config: Optional[QuantizationConfig] = None,
865865
cache_config: Optional[CacheConfig] = None,
866866
scheduler_config=None,

vllm/model_executor/models/olmoe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
import torch
2121
from torch import nn
22-
from transformers import PretrainedConfig
22+
from transformers import OlmoeConfig
2323

2424
from vllm.attention import Attention
2525
from vllm.compilation.decorators import support_torch_compile
@@ -205,7 +205,7 @@ class OlmoeDecoderLayer(nn.Module):
205205

206206
def __init__(
207207
self,
208-
config: PretrainedConfig,
208+
config: OlmoeConfig,
209209
cache_config: Optional[CacheConfig] = None,
210210
quant_config: Optional[QuantizationConfig] = None,
211211
prefix: str = "",

0 commit comments

Comments
 (0)