32
32
from vllm .transformers_utils .configs import (ChatGLMConfig , DeepseekVLV2Config ,
33
33
EAGLEConfig , JAISConfig ,
34
34
KimiVLConfig , MedusaConfig ,
35
- MllamaConfig , MLPSpeculatorConfig ,
35
+ MLPSpeculatorConfig ,
36
36
Nemotron_Nano_VL_Config ,
37
- NemotronConfig , NVLM_D_Config ,
38
- OvisConfig , RWConfig ,
39
- SpeculatorsConfig ,
37
+ NemotronConfig , OvisConfig ,
38
+ RWConfig , SpeculatorsConfig ,
40
39
Step3TextConfig , Step3VLConfig ,
41
40
UltravoxConfig )
42
41
# yapf: enable
@@ -68,10 +67,6 @@ def _get_hf_token() -> Optional[str]:
68
67
return None
69
68
70
69
71
- _CONFIG_REGISTRY_OVERRIDE_HF : dict [str , type [PretrainedConfig ]] = {
72
- "mllama" : MllamaConfig
73
- }
74
-
75
70
_CONFIG_REGISTRY : dict [str , type [PretrainedConfig ]] = {
76
71
"chatglm" : ChatGLMConfig ,
77
72
"deepseek_vl_v2" : DeepseekVLV2Config ,
@@ -85,18 +80,30 @@ def _get_hf_token() -> Optional[str]:
85
80
"eagle" : EAGLEConfig ,
86
81
"speculators" : SpeculatorsConfig ,
87
82
"nemotron" : NemotronConfig ,
88
- "NVLM_D" : NVLM_D_Config ,
89
83
"ovis" : OvisConfig ,
90
84
"ultravox" : UltravoxConfig ,
91
85
"step3_vl" : Step3VLConfig ,
92
86
"step3_text" : Step3TextConfig ,
93
- ** _CONFIG_REGISTRY_OVERRIDE_HF
94
87
}
95
88
96
89
_CONFIG_ATTRS_MAPPING : dict [str , str ] = {
97
90
"llm_config" : "text_config" ,
98
91
}
99
92
93
+ _AUTO_CONFIG_KWARGS_OVERRIDES : dict [str , dict [str , Any ]] = {
94
+ "internvl_chat" : {
95
+ "has_no_defaults_at_init" : True
96
+ },
97
+ # transformers regards mllama as is_encoder_decoder=False
98
+ # vllm needs is_encoder_decoder=True to enable cross-attention
99
+ "mllama" : {
100
+ "is_encoder_decoder" : True
101
+ },
102
+ "NVLM_D" : {
103
+ "has_no_defaults_at_init" : True
104
+ },
105
+ }
106
+
100
107
101
108
class ConfigFormat (str , enum .Enum ):
102
109
AUTO = "auto"
@@ -273,11 +280,12 @@ def thinker_uses_mrope(config: PretrainedConfig) -> bool:
273
280
274
281
def is_encoder_decoder (config : PretrainedConfig ) -> bool :
275
282
"""Detect if the model with this config is used as an encoder/decoder."""
276
- text_config = getattr (config , "text_config" , None )
277
- if text_config is not None :
278
- return is_encoder_decoder (text_config )
279
283
280
- return getattr (config , "is_encoder_decoder" , False )
284
+ def _is_encoder_decoder (config : PretrainedConfig ) -> bool :
285
+ return getattr (config , "is_encoder_decoder" , False )
286
+
287
+ return (_is_encoder_decoder (config )
288
+ or _is_encoder_decoder (config .get_text_config ()))
281
289
282
290
283
291
def is_interleaved (config : PretrainedConfig ) -> bool :
@@ -291,13 +299,21 @@ def is_interleaved(config: PretrainedConfig) -> bool:
291
299
return False
292
300
293
301
302
+ def _maybe_update_auto_config_kwargs (kwargs : dict [str , Any ], model_type : str ):
303
+ """
304
+ Update kwargs for AutoConfig initialization based on model_type
305
+ """
306
+ if model_type in _AUTO_CONFIG_KWARGS_OVERRIDES :
307
+ kwargs .update (_AUTO_CONFIG_KWARGS_OVERRIDES [model_type ])
308
+ return kwargs
309
+
310
+
294
311
def _maybe_remap_hf_config_attrs (config : PretrainedConfig ) -> PretrainedConfig :
295
312
"""Remap config attributes to match the expected names."""
296
313
for old_attr , new_attr in _CONFIG_ATTRS_MAPPING .items ():
297
314
if hasattr (config , old_attr ):
298
315
if not hasattr (config , new_attr ):
299
316
config .update ({new_attr : getattr (config , old_attr )})
300
- delattr (config , old_attr )
301
317
logger .debug ("Remapped config attribute '%s' to '%s'" , old_attr ,
302
318
new_attr )
303
319
return config
@@ -408,15 +424,14 @@ def get_config(
408
424
)
409
425
else :
410
426
try :
427
+ kwargs = _maybe_update_auto_config_kwargs (
428
+ kwargs , model_type = model_type )
411
429
config = AutoConfig .from_pretrained (
412
430
model ,
413
431
trust_remote_code = trust_remote_code ,
414
432
revision = revision ,
415
433
code_revision = code_revision ,
416
434
token = _get_hf_token (),
417
- # some old custom model's config needs
418
- # `has_no_defaults_at_init=True` to work.
419
- has_no_defaults_at_init = trust_remote_code ,
420
435
** kwargs ,
421
436
)
422
437
except ValueError as e :
0 commit comments