Skip to content

Commit fa00c5d

Browse files
authored
[Misc] Clean up duplicated hf overrides (#22311)
Signed-off-by: Isotr0py <[email protected]>
1 parent 134a8ee commit fa00c5d

File tree

3 files changed

+71
-103
lines changed

3 files changed

+71
-103
lines changed

tests/models/multimodal/test_tensor_schema.py

Lines changed: 3 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
from functools import partial
4-
from typing import Any
54
from unittest.mock import patch
65

76
import pytest
8-
from transformers import PretrainedConfig
97

108
from vllm.config import ModelConfig
119
from vllm.engine.llm_engine import LLMEngine as V0LLMEngine
@@ -19,6 +17,7 @@
1917

2018
from ...conftest import VllmRunner
2119
from ..registry import _MULTIMODAL_EXAMPLE_MODELS, HF_EXAMPLE_MODELS
20+
from ..utils import dummy_hf_overrides
2221

2322
ARCH_TO_SKIP = {
2423
"MolmoForCausalLM": "incompatible requirements",
@@ -51,51 +50,6 @@ def create_batched_mm_kwargs(
5150
return mm_kwargs
5251

5352

54-
# Avoid OOM and reduce initialization time by only using 1 layer
55-
def hf_overrides(hf_config: PretrainedConfig,
56-
exist_overrides: dict[str, Any]) -> PretrainedConfig:
57-
hf_config.update(exist_overrides)
58-
text_config = hf_config.get_text_config()
59-
# Ensure at least 2 expert per group
60-
# Since `grouped_topk` assumes top-2
61-
n_group = getattr(text_config, 'n_group', None)
62-
num_experts = n_group * 2 if n_group is not None else 2
63-
# we use three layers for Gemma-3n to check
64-
# both normal layer and kv_shared_layer
65-
text_config.update({
66-
"num_layers": 1,
67-
"num_hidden_layers": 1,
68-
"num_experts": num_experts,
69-
"num_experts_per_tok": 2,
70-
"num_local_experts": num_experts,
71-
# Otherwise there will not be any expert layers
72-
"first_k_dense_replace": 0,
73-
# To avoid OOM on DeepSeek-V3
74-
"n_routed_experts": num_experts,
75-
# For Gemma-3n
76-
"num_kv_shared_layers": 1,
77-
})
78-
if hasattr(hf_config, "vision_config"):
79-
hf_config.vision_config.update({
80-
"num_layers": 1,
81-
"num_hidden_layers": 1,
82-
})
83-
# e.g.: ibm-granite/granite-speech-3.3-2b
84-
if hasattr(hf_config, "encoder_config"):
85-
hf_config.encoder_config.update({
86-
"num_layers": 1,
87-
"num_hidden_layers": 1,
88-
})
89-
# e.g.: Qwen/Qwen2-Audio-7B-Instruct
90-
if hasattr(hf_config, "audio_config"):
91-
hf_config.audio_config.update({
92-
"num_layers": 1,
93-
"num_hidden_layers": 1,
94-
"encoder_layers": 1,
95-
})
96-
return hf_config
97-
98-
9953
@pytest.mark.core_model
10054
@pytest.mark.parametrize("model_arch", list(_MULTIMODAL_EXAMPLE_MODELS.keys()))
10155
def test_model_tensor_schema(model_arch: str, vllm_runner: type[VllmRunner],
@@ -110,7 +64,8 @@ def test_model_tensor_schema(model_arch: str, vllm_runner: type[VllmRunner],
11064

11165
model_id = model_info.default
11266

113-
hf_overrides_fn = partial(hf_overrides,
67+
hf_overrides_fn = partial(dummy_hf_overrides,
68+
model_arch=model_arch,
11469
exist_overrides=model_info.hf_overrides)
11570

11671
model_config = ModelConfig(

tests/models/test_initialization.py

Lines changed: 7 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
from functools import partial
45
from unittest.mock import patch
56

67
import pytest
7-
from transformers import PretrainedConfig
88

99
from vllm import LLM
1010
from vllm.config import ModelImpl
@@ -16,6 +16,7 @@
1616
from ..utils import create_new_process_for_each_test
1717
from .registry import (_TRANSFORMERS_BACKEND_MODELS, AUTO_EXAMPLE_MODELS,
1818
HF_EXAMPLE_MODELS, HfExampleModels)
19+
from .utils import dummy_hf_overrides
1920

2021

2122
@create_new_process_for_each_test()
@@ -33,64 +34,15 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch,
3334
model_info.check_available_online(on_fail="skip")
3435
model_info.check_transformers_version(on_fail="skip")
3536

37+
hf_overrides_fn = partial(dummy_hf_overrides,
38+
model_arch=model_arch,
39+
exist_overrides=model_info.hf_overrides)
40+
3641
if model_arch in ("Llama4ForCausalLM", "EagleLlama4ForCausalLM"):
3742
from vllm.model_executor.models.llama4 import Llama4ForCausalLM
3843
from vllm.model_executor.models.registry import ModelRegistry
3944
ModelRegistry.register_model("Llama4ForCausalLM", Llama4ForCausalLM)
4045

41-
# Avoid OOM and reduce initialization time by only using 1 layer
42-
def hf_overrides(hf_config: PretrainedConfig) -> PretrainedConfig:
43-
hf_config.update(model_info.hf_overrides)
44-
45-
text_config = hf_config.get_text_config()
46-
47-
# Ensure at least 2 expert per group
48-
# Since `grouped_topk` assumes top-2
49-
n_group = getattr(text_config, 'n_group', None)
50-
num_experts = n_group * 2 if n_group is not None else 2
51-
52-
# we use three layers for Gemma-3n to check
53-
# both normal layer and kv_shared_layer
54-
num_hidden_layers = (3 if model_arch
55-
== "Gemma3nForConditionalGeneration" else 1)
56-
57-
text_config.update({
58-
"num_layers": 1,
59-
"num_hidden_layers": num_hidden_layers,
60-
"num_experts": num_experts,
61-
"num_experts_per_tok": 2,
62-
"num_local_experts": num_experts,
63-
# Otherwise there will not be any expert layers
64-
"first_k_dense_replace": 0,
65-
# To avoid OOM on DeepSeek-V3
66-
"n_routed_experts": num_experts,
67-
# For Gemma-3n
68-
"num_kv_shared_layers": 1,
69-
})
70-
71-
if hasattr(hf_config, "vision_config"):
72-
hf_config.vision_config.update({
73-
"num_layers": 1,
74-
"num_hidden_layers": 1,
75-
})
76-
77-
# e.g.: ibm-granite/granite-speech-3.3-2b
78-
if hasattr(hf_config, "encoder_config"):
79-
hf_config.encoder_config.update({
80-
"num_layers": 1,
81-
"num_hidden_layers": 1,
82-
})
83-
84-
# e.g.: Qwen/Qwen2-Audio-7B-Instruct
85-
if hasattr(hf_config, "audio_config"):
86-
hf_config.audio_config.update({
87-
"num_layers": 1,
88-
"num_hidden_layers": 1,
89-
"encoder_layers": 1,
90-
})
91-
92-
return hf_config
93-
9446
# Avoid calling model.forward()
9547
def _initialize_kv_caches_v0(self) -> None:
9648
self.cache_config.num_gpu_blocks = 0
@@ -132,7 +84,7 @@ def _initialize_kv_caches_v1(self, vllm_config):
13284
load_format="dummy",
13385
model_impl=ModelImpl.TRANSFORMERS
13486
if model_arch in _TRANSFORMERS_BACKEND_MODELS else ModelImpl.VLLM,
135-
hf_overrides=hf_overrides,
87+
hf_overrides=hf_overrides_fn,
13688
)
13789

13890

tests/models/utils.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import torch
99
import torch.nn.functional as F
10+
from transformers import PretrainedConfig
1011

1112
from vllm.config import ModelConfig, RunnerOption
1213
from vllm.inputs import InputContext
@@ -351,3 +352,63 @@ class RerankModelInfo(NamedTuple):
351352
architecture: str = ""
352353
dtype: str = "auto"
353354
enable_test: bool = True
355+
356+
357+
def dummy_hf_overrides(
358+
hf_config: PretrainedConfig,
359+
model_arch: str,
360+
exist_overrides: Optional[dict[str, Any]] = None,
361+
) -> PretrainedConfig:
362+
"""
363+
Dummy HF overrides function used to create dummy model
364+
with only minimum nums of layer.
365+
"""
366+
hf_config.update(exist_overrides or {})
367+
368+
text_config = hf_config.get_text_config()
369+
370+
# Ensure at least 2 expert per group
371+
# Since `grouped_topk` assumes top-2
372+
n_group = getattr(text_config, 'n_group', None)
373+
num_experts = n_group * 2 if n_group is not None else 2
374+
375+
# we use three layers for Gemma-3n to check
376+
# both normal layer and kv_shared_layer
377+
num_hidden_layers = (3 if model_arch == "Gemma3nForConditionalGeneration"
378+
else 1)
379+
text_config.update({
380+
"num_layers": 1,
381+
"num_hidden_layers": num_hidden_layers,
382+
"num_experts": num_experts,
383+
"num_experts_per_tok": 2,
384+
"num_local_experts": num_experts,
385+
# Otherwise there will not be any expert layers
386+
"first_k_dense_replace": 0,
387+
# To avoid OOM on DeepSeek-V3
388+
"n_routed_experts": num_experts,
389+
# For Gemma-3n
390+
"num_kv_shared_layers": 1,
391+
})
392+
393+
if hasattr(hf_config, "vision_config"):
394+
hf_config.vision_config.update({
395+
"num_layers": 1,
396+
"num_hidden_layers": 1,
397+
})
398+
399+
# e.g.: ibm-granite/granite-speech-3.3-2b
400+
if hasattr(hf_config, "encoder_config"):
401+
hf_config.encoder_config.update({
402+
"num_layers": 1,
403+
"num_hidden_layers": 1,
404+
})
405+
406+
# e.g.: Qwen/Qwen2-Audio-7B-Instruct
407+
if hasattr(hf_config, "audio_config"):
408+
hf_config.audio_config.update({
409+
"num_layers": 1,
410+
"num_hidden_layers": 1,
411+
"encoder_layers": 1,
412+
})
413+
414+
return hf_config

0 commit comments

Comments
 (0)