Add model validation plugin and registry and validate local models

stefanberger · stefanberger · commit 5a9007be168d · 2025-10-06T16:48:36.000Z
Add a model validation plugin registry where classes implementing the
ModelValidationPlugin interface can be registered.

Enable the validating on local models that have already been downloaded
by the user.

Add a test case with an already downloaded model whose config.json is
unmodified so that a ModelConfig can be created from it.

Signed-off-by: Stefan Berger &lt;stefanb@linux.ibm.com&gt;
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1062,6 +1062,7 @@ def num_gpus_available():
 
 temp_dir = tempfile.gettempdir()
 _dummy_opt_path = os.path.join(temp_dir, "dummy_opt")
+_dummy_opt_unmodified_path = os.path.join(temp_dir, "dummy_opt_unmodified")
 _dummy_llava_path = os.path.join(temp_dir, "dummy_llava")
 _dummy_gemma2_embedding_path = os.path.join(temp_dir, "dummy_gemma2_embedding")
 
@@ -1084,6 +1085,19 @@ def dummy_opt_path():
     return _dummy_opt_path
 
 
+@pytest.fixture
+def dummy_opt_unmodified_path():
+    json_path = os.path.join(_dummy_opt_unmodified_path, "config.json")
+    if not os.path.exists(_dummy_opt_unmodified_path):
+        snapshot_download(
+            repo_id="facebook/opt-125m",
+            local_dir=_dummy_opt_unmodified_path,
+            ignore_patterns=["*.bin", "*.bin.index.json", "*.pt", "*.h5", "*.msgpack"],
+        )
+        assert os.path.exists(json_path)
+    return _dummy_opt_unmodified_path
+
+
 @pytest.fixture
 def dummy_llava_path():
     json_path = os.path.join(_dummy_llava_path, "config.json")
diff --git a/tests/v1/engine/test_engine_core_model_validation.py b/tests/v1/engine/test_engine_core_model_validation.py
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import Optional
+
+import pytest
+
+from vllm.engine.arg_utils import EngineArgs
+from vllm.utils import set_default_torch_num_threads
+from vllm.v1.engine.core import EngineCore
+from vllm.v1.executor.abstract import Executor
+from vllm.validation.plugins import (
+    ModelType,
+    ModelValidationPlugin,
+    ModelValidationPluginRegistry,
+)
+
+
+class MyModelValidator(ModelValidationPlugin):
+    def model_validation_needed(self, model_type: ModelType, model_path: str) -> bool:
+        return True
+
+    def validate_model(
+        self, model_type: ModelType, model_path: str, model: Optional[str] = None
+    ) -> None:
+        raise BaseException("Model did not validate")
+
+
+def test_engine_core_model_validation(
+    monkeypatch: pytest.MonkeyPatch, dummy_opt_unmodified_path
+):
+    my_model_validator = MyModelValidator()
+    ModelValidationPluginRegistry.register_plugin("test", my_model_validator)
+
+    with monkeypatch.context() as m:
+        m.setenv("VLLM_USE_V1", "1")
+
+        engine_args = EngineArgs(model=dummy_opt_unmodified_path)
+        vllm_config = engine_args.create_engine_config()
+        executor_class = Executor.get_class(vllm_config)
+
+        with set_default_torch_num_threads(1) and pytest.raises(
+            BaseException, match="Model did not validate"
+        ):
+            EngineCore(
+                vllm_config=vllm_config,
+                executor_class=executor_class,
+                log_stats=False,
+            )
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
@@ -65,6 +65,7 @@
 from vllm.v1.request import Request, RequestStatus
 from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder
 from vllm.v1.structured_output import StructuredOutputManager
+from vllm.validation.plugins import ModelType, ModelValidationPluginRegistry
 from vllm.version import __version__ as VLLM_VERSION
 
 logger = init_logger(__name__)
@@ -97,6 +98,11 @@ def __init__(
             vllm_config,
         )
 
+        if os.path.isdir(vllm_config.model_config.model):
+            ModelValidationPluginRegistry.validate_model(
+                ModelType.MODEL_TYPE_AI_MODEL, vllm_config.model_config.model
+            )
+
         self.log_stats = log_stats
 
         # Setup Model.
@@ -115,6 +121,16 @@ def __init__(
         vllm_config.cache_config.num_cpu_blocks = num_cpu_blocks
         self.collective_rpc("initialize_cache", args=(num_gpu_blocks, num_cpu_blocks))
 
+        if ModelValidationPluginRegistry.model_validation_needed(
+            ModelType.MODEL_TYPE_AI_MODEL, vllm_config.model_config.model
+        ):
+            raise Exception(
+                "Model validation was requested for "
+                f"{vllm_config.model_config.model} but was not "
+                "done since a code path was taken that is not yet "
+                "instrumented for model validation."
+            )
+
         self.structured_output_manager = StructuredOutputManager(vllm_config)
 
         # Setup scheduler.
diff --git a/vllm/validation/__init__.py b/vllm/validation/__init__.py
diff --git a/vllm/validation/plugins.py b/vllm/validation/plugins.py
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import enum
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from typing import Optional
+
+from vllm.logger import init_logger
+
+logger = init_logger(__name__)
+
+
+class ModelType(int, enum.Enum):
+    MODEL_TYPE_AI_MODEL = 1
+    MODEL_TYPE_LORA = 2
+
+
+class ModelValidationPlugin(ABC):
+    """Base class for all model validation plugins"""
+
+    @abstractmethod
+    def model_validation_needed(self, model_type: ModelType, model_path: str) -> bool:
+        """Have the plugin check whether it already validated the model
+        at the given model_path."""
+        return False
+
+    @abstractmethod
+    def validate_model(
+        self, model_type: ModelType, model_path: str, model: Optional[str] = None
+    ) -> None:
+        """Validate the model at the given model_path."""
+        pass
+
+
+@dataclass
+class _ModelValidationPluginRegistry:
+    plugins: dict[str, ModelValidationPlugin] = field(default_factory=dict)
+
+    def register_plugin(self, plugin_name: str, plugin: ModelValidationPlugin):
+        """Register a security plugin."""
+        if plugin_name in self.plugins:
+            logger.warning(
+                "Model validation plugin %s is already registered, and will be "
+                "overwritten by the new plugin %s.",
+                plugin_name,
+                plugin,
+            )
+
+        self.plugins[plugin_name] = plugin
+
+    def model_validation_needed(self, model_type: ModelType, model_path: str) -> bool:
+        """Check whether model validation was requested but was not done, yet.
+        Returns False in case no model validation was requested or it is already
+        done. Returns True if model validation was request but not done yet."""
+        for plugin in self.plugins.values():
+            if plugin.model_validation_needed(model_type, model_path):
+                return True
+        return False
+
+    def validate_model(
+        self, model_type: ModelType, model_path: str, model: Optional[str] = None
+    ) -> None:
+        """Have all plugins validate the model at the given path. Any plugin
+        that cannot validate it will throw an exception."""
+        plugins = self.plugins.values()
+        if plugins:
+            for plugin in plugins:
+                plugin.validate_model(model_type, model_path, model)
+            logger.info("Successfully validated %s", model_path)
+
+
+ModelValidationPluginRegistry = _ModelValidationPluginRegistry()