File tree Expand file tree Collapse file tree 1 file changed +8
-7
lines changed
vllm/model_executor/model_loader Expand file tree Collapse file tree 1 file changed +8
-7
lines changed Original file line number Diff line number Diff line change 1919 QuantizationConfig ,
2020 QuantizeMethodBase ,
2121)
22- from vllm .model_executor .models .adapters import (
23- as_embedding_model ,
24- as_reward_model ,
25- as_seq_cls_model ,
26- try_create_mm_pooling_model_cls ,
27- )
22+
2823from vllm .model_executor .models .interfaces import SupportsQuant , supports_multimodal
2924from vllm .utils .platform_utils import is_pin_memory_available
3025
@@ -171,7 +166,13 @@ def device_loading_context(module: torch.nn.Module, target_device: torch.device)
171166"""Caches the outputs of `_get_model_architecture`."""
172167
173168
174- def _get_model_architecture (model_config : ModelConfig ) -> tuple [type [nn .Module ], str ]:
169+ def _get_model_architecture (model_config : ModelConfig ) -> tuple [type [nn .Module ], str , bool ]:
170+ from vllm .model_executor .models .adapters import (
171+ as_embedding_model ,
172+ as_reward_model ,
173+ as_seq_cls_model ,
174+ try_create_mm_pooling_model_cls ,
175+ )
175176 architectures = getattr (model_config .hf_config , "architectures" , [])
176177
177178 model_cls , arch = model_config .registry .resolve_model_cls (
You can’t perform that action at this time.
0 commit comments