vllm-project · nandan2003 · Nov 21, 2025 · Nov 21, 2025
@@ -19,12 +19,7 @@
     QuantizationConfig,
     QuantizeMethodBase,
 )
-from vllm.model_executor.models.adapters import (
-    as_embedding_model,
-    as_reward_model,
-    as_seq_cls_model,
-    try_create_mm_pooling_model_cls,
-)
+
 from vllm.model_executor.models.interfaces import SupportsQuant, supports_multimodal
 from vllm.utils.platform_utils import is_pin_memory_available
 
@@ -172,6 +167,12 @@ def device_loading_context(module: torch.nn.Module, target_device: torch.device)
 
 
 def _get_model_architecture(model_config: ModelConfig) -> tuple[type[nn.Module], str]:
+    from vllm.model_executor.models.adapters import (
+        as_embedding_model,
+        as_reward_model,
+        as_seq_cls_model,
+        try_create_mm_pooling_model_cls,
+    )
     architectures = getattr(model_config.hf_config, "architectures", [])
 
     model_cls, arch = model_config.registry.resolve_model_cls(