Enable model validation on downloaded models

stefanberger · stefanberger · commit e73621fa0ee8 · 2025-10-06T22:43:47.000Z
Implement a method 'validate' in the BaseModelLoader that first checks
whether any plugin requests to validate the given model and then possibly
downloads all the model files, including the signature. For this, query
the subclass of BaseModelLoader for its download type. Support validation
of local models and those downloaded from Huggingface Hub.

Add a test case.

Signed-off-by: Stefan Berger &lt;stefanb@linux.ibm.com&gt;
diff --git a/tests/model_executor/model_loader/test_model_validation.py b/tests/model_executor/model_loader/test_model_validation.py
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import Optional
+
+import pytest
+from torch import nn
+
+from vllm.config import DeviceConfig, ModelConfig, VllmConfig
+from vllm.config.load import LoadConfig
+from vllm.model_executor.model_loader import get_model_loader, register_model_loader
+from vllm.model_executor.model_loader.base_loader import BaseModelLoader, DownloadType
+from vllm.validation.plugins import (
+    ModelType,
+    ModelValidationPlugin,
+    ModelValidationPluginRegistry,
+)
+
+
+@register_model_loader("custom_load_format")
+class CustomModelLoader(BaseModelLoader):
+    def __init__(self, load_config: LoadConfig) -> None:
+        super().__init__(load_config)
+        self.download_type = None
+
+    def download_model(self, model_config: ModelConfig) -> None:
+        pass
+
+    def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None:
+        pass
+
+    def set_download_type(self, download_type: DownloadType) -> None:
+        """Allow changing download_type"""
+        self.download_type = download_type
+
+    def get_download_type(self, model_name_or_path: str) -> Optional[DownloadType]:
+        return self.download_type
+
+
+class MyModelValidator(ModelValidationPlugin):
+    def model_validation_needed(self, model_type: ModelType, model_path: str) -> bool:
+        return True
+
+    def validate_model(
+        self, model_type: ModelType, model_path: str, model: Optional[str] = None
+    ) -> None:
+        raise BaseException("Model did not validate")
+
+
+def test_register_model_loader(dist_init):
+    load_config = LoadConfig(load_format="custom_load_format")
+    custom_model_loader = get_model_loader(load_config)
+    assert isinstance(custom_model_loader, CustomModelLoader)
+
+    my_model_validator = MyModelValidator()
+    ModelValidationPluginRegistry.register_plugin("test", my_model_validator)
+
+    vllm_config = VllmConfig(
+        model_config=ModelConfig(),
+        device_config=DeviceConfig("auto"),
+        load_config=LoadConfig(),
+    )
+    with pytest.raises(RuntimeError):
+        custom_model_loader.load_model(vllm_config, vllm_config.model_config)
+
+    # have validate_model() called
+    custom_model_loader.set_download_type(DownloadType.LOCAL_FILE)
+
+    vllm_config = VllmConfig(
+        model_config=ModelConfig(),
+        device_config=DeviceConfig("cpu"),
+        load_config=LoadConfig(),
+    )
+    with pytest.raises(BaseException, match="Model did not validate"):
+        custom_model_loader.load_model(vllm_config, vllm_config.model_config)
diff --git a/vllm/model_executor/model_loader/base_loader.py b/vllm/model_executor/model_loader/base_loader.py
@@ -1,7 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import enum
 from abc import ABC, abstractmethod
+from typing import Optional
 
+import huggingface_hub
 import torch
 import torch.nn as nn
 
@@ -13,10 +16,18 @@
     process_weights_after_loading,
     set_default_torch_dtype,
 )
+from vllm.validation.plugins import ModelType, ModelValidationPluginRegistry
 
 logger = init_logger(__name__)
 
 
+class DownloadType(int, enum.Enum):
+    HUGGINGFACE_HUB = 1
+    LOCAL_FILE = 2
+    S3 = 3  # not currently supported
+    UNKNOWN = 4
+
+
 class BaseModelLoader(ABC):
     """Base class for model loaders."""
 
@@ -34,6 +45,45 @@ def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None:
         inplace weights loading for an already-initialized model"""
         raise NotImplementedError
 
+    def get_download_type(self, model_name_or_path: str) -> Optional[DownloadType]:
+        """Subclass must override this and return the download type it needs"""
+        return None
+
+    def download_all_files(
+        self, model: nn.Module, model_config: ModelConfig, load_config: LoadConfig
+    ) -> Optional[str]:
+        """Download all files. Ask the subclass for what type of download
+        it does; Huggingface is used so often, so download all files here."""
+        dt = self.get_download_type(model_config.model)
+        if dt == DownloadType.HUGGINGFACE_HUB:
+            return huggingface_hub.snapshot_download(
+                model_config.model,
+                allow_patterns=["*"],
+                cache_dir=self.load_config.download_dir,
+                revision=model_config.revision,
+                local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE,
+            )
+        elif dt == DownloadType.LOCAL_FILE:
+            return model_config.model
+        return None
+
+    def validate_model(
+        self, model: nn.Module, model_config: ModelConfig, load_config: LoadConfig
+    ) -> None:
+        """If needed, validate the model after downloading _all_ its files."""
+        if ModelValidationPluginRegistry.model_validation_needed(
+            ModelType.MODEL_TYPE_AI_MODEL, model_config.model
+        ):
+            folder = self.download_all_files(model, model_config, load_config)
+            if folder is None:
+                raise RuntimeError(
+                    "Model validation could not be done due to "
+                    "an unsupported download method."
+                )
+            ModelValidationPluginRegistry.validate_model(
+                ModelType.MODEL_TYPE_AI_MODEL, folder, model_config.model
+            )
+
     def load_model(
         self, vllm_config: VllmConfig, model_config: ModelConfig
     ) -> nn.Module:
@@ -51,6 +101,7 @@ def load_model(
                 )
 
             logger.debug("Loading weights on %s ...", load_device)
+            self.validate_model(model, model_config, vllm_config.load_config)
             # Quantization does not happen in `load_weights` but after it
             self.load_weights(model, model_config)
             process_weights_after_loading(model, model_config, target_device)
diff --git a/vllm/model_executor/model_loader/bitsandbytes_loader.py b/vllm/model_executor/model_loader/bitsandbytes_loader.py
@@ -31,7 +31,7 @@
     ReplicatedLinear,
     RowParallelLinear,
 )
-from vllm.model_executor.model_loader.base_loader import BaseModelLoader
+from vllm.model_executor.model_loader.base_loader import BaseModelLoader, DownloadType
 from vllm.model_executor.model_loader.utils import ParamMapping, set_default_torch_dtype
 from vllm.model_executor.model_loader.weight_utils import (
     download_safetensors_index_file_from_hf,
@@ -820,3 +820,8 @@ def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None:
 
     def download_model(self, model_config: ModelConfig) -> None:
         self._prepare_weights(model_config.model, model_config.revision)
+
+    def get_download_type(self, model_name_or_path: str) -> DownloadType:
+        if os.path.isdir(model_name_or_path):
+            return DownloadType.LOCAL_FILE
+        return DownloadType.HUGGINGFACE_HUB
diff --git a/vllm/model_executor/model_loader/default_loader.py b/vllm/model_executor/model_loader/default_loader.py
@@ -14,7 +14,7 @@
 from vllm.config import ModelConfig
 from vllm.config.load import LoadConfig
 from vllm.logger import init_logger
-from vllm.model_executor.model_loader.base_loader import BaseModelLoader
+from vllm.model_executor.model_loader.base_loader import BaseModelLoader, DownloadType
 from vllm.model_executor.model_loader.weight_utils import (
     download_safetensors_index_file_from_hf,
     download_weights_from_hf,
@@ -319,3 +319,8 @@ def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None:
                     "Following weights were not initialized from "
                     f"checkpoint: {weights_not_loaded}"
                 )
+
+    def get_download_type(self, model_name_or_path: str) -> DownloadType:
+        if os.path.isdir(model_name_or_path):
+            return DownloadType.LOCAL_FILE
+        return DownloadType.HUGGINGFACE_HUB
diff --git a/vllm/model_executor/model_loader/gguf_loader.py b/vllm/model_executor/model_loader/gguf_loader.py
@@ -11,7 +11,7 @@
 
 from vllm.config import ModelConfig, VllmConfig
 from vllm.config.load import LoadConfig
-from vllm.model_executor.model_loader.base_loader import BaseModelLoader
+from vllm.model_executor.model_loader.base_loader import BaseModelLoader, DownloadType
 from vllm.model_executor.model_loader.utils import (
     initialize_model,
     process_weights_after_loading,
@@ -40,15 +40,13 @@ def __init__(self, load_config: LoadConfig):
             )
 
     def _prepare_weights(self, model_name_or_path: str):
-        if os.path.isfile(model_name_or_path):
+        download_type = self.get_download_type(model_name_or_path)
+
+        if download_type == DownloadType.LOCAL_FILE:
             return model_name_or_path
-        # for raw HTTPS link
-        if model_name_or_path.startswith(
-            ("http://", "https://")
-        ) and model_name_or_path.endswith(".gguf"):
-            return hf_hub_download(url=model_name_or_path)
-        # repo id/filename.gguf
-        if "/" in model_name_or_path and model_name_or_path.endswith(".gguf"):
+        elif download_type == DownloadType.HUGGINGFACE_HUB:
+            if model_name_or_path.startswith(("http://", "https://")):
+                return hf_hub_download(url=model_name_or_path)
             repo_id, filename = model_name_or_path.rsplit("/", 1)
             return hf_hub_download(repo_id=repo_id, filename=filename)
         else:
@@ -170,3 +168,13 @@ def load_model(
 
             process_weights_after_loading(model, model_config, target_device)
         return model
+
+    def get_download_type(self, model_name_or_path: str) -> DownloadType:
+        if os.path.isfile(model_name_or_path):
+            return DownloadType.LOCAL_FILE
+        if model_name_or_path.endswith(".gguf") and (
+            model_name_or_path.startswith(("http://", "https://"))
+            or "/" in model_name_or_path
+        ):
+            return DownloadType.HUGGINGFACE_HUB
+        return DownloadType.UNKNOWN
diff --git a/vllm/model_executor/model_loader/runai_streamer_loader.py b/vllm/model_executor/model_loader/runai_streamer_loader.py
@@ -11,7 +11,7 @@
 
 from vllm.config import ModelConfig
 from vllm.config.load import LoadConfig
-from vllm.model_executor.model_loader.base_loader import BaseModelLoader
+from vllm.model_executor.model_loader.base_loader import BaseModelLoader, DownloadType
 from vllm.model_executor.model_loader.weight_utils import (
     download_safetensors_index_file_from_hf,
     download_weights_from_hf,
@@ -109,3 +109,8 @@ def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None:
         model.load_weights(
             self._get_weights_iterator(model_weights, model_config.revision)
         )
+
+    def get_download_type(self, model_name_or_path: str) -> DownloadType:
+        if os.path.isdir(model_name_or_path):
+            return DownloadType.LOCAL_FILE
+        return DownloadType.HUGGINGFACE_HUB
diff --git a/vllm/model_executor/model_loader/sharded_state_loader.py b/vllm/model_executor/model_loader/sharded_state_loader.py
@@ -13,7 +13,7 @@
 from vllm.config import ModelConfig
 from vllm.config.load import LoadConfig
 from vllm.logger import init_logger
-from vllm.model_executor.model_loader.base_loader import BaseModelLoader
+from vllm.model_executor.model_loader.base_loader import BaseModelLoader, DownloadType
 from vllm.model_executor.model_loader.weight_utils import (
     download_weights_from_hf,
     runai_safetensors_weights_iterator,
@@ -204,3 +204,8 @@ def save_model(
                 state_dict_part,
                 os.path.join(path, filename),
             )
+
+    def get_download_type(self, model_name_or_path: str) -> DownloadType:
+        if is_s3(model_name_or_path) or os.path.isdir(model_name_or_path):
+            return DownloadType.LOCAL_FILE
+        return DownloadType.HUGGINGFACE_HUB