Skip to content

Commit 01dc9a7

Browse files
[CI/Build][Bugfix] Ensure compatibility with transformers 4.52 (vllm-project#18678)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent 35cf32d commit 01dc9a7

File tree

13 files changed

+82
-47
lines changed

13 files changed

+82
-47
lines changed

requirements/test.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ opencv-python-headless >= 4.11.0 # required for video test
3434
datamodel_code_generator # required for minicpm3 test
3535
lm-eval[api]==0.4.8 # required for model evaluation test
3636
mteb>=1.38.11, <2 # required for mteb test
37-
transformers==4.51.3
37+
transformers==4.52.4
3838
tokenizers==0.21.1
3939
huggingface-hub[hf_xet]>=0.30.0 # Required for Xet downloads.
4040
schemathesis>=3.39.15 # Required for openai schema test.

requirements/test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -794,7 +794,7 @@ tqdm==4.66.6
794794
# transformers
795795
tqdm-multiprocess==0.0.11
796796
# via lm-eval
797-
transformers==4.51.3
797+
transformers==4.52.4
798798
# via
799799
# -r requirements/test.in
800800
# genai-perf

tests/models/multimodal/generation/test_common.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,8 @@
226226
img_idx_to_prompt=lambda idx: "",
227227
auto_cls=AutoModelForImageTextToText,
228228
vllm_output_post_proc=model_utils.blip2_vllm_to_hf_output,
229+
# FIXME: https://github.com/huggingface/transformers/pull/38510
230+
marks=[pytest.mark.skip("Model is broken")],
229231
),
230232
"chameleon": VLMTestInfo(
231233
models=["facebook/chameleon-7b"],
@@ -281,10 +283,10 @@
281283
multi_image_prompt="<start_of_image><start_of_image>Describe the two images in detail.", # noqa: E501
282284
max_model_len=4096,
283285
max_num_seqs=2,
284-
dtype="bfloat16",
285286
auto_cls=AutoModelForImageTextToText,
286287
vllm_runner_kwargs={"mm_processor_kwargs": {"do_pan_and_scan": True}},
287288
patch_hf_runner=model_utils.gemma3_patch_hf_runner,
289+
num_logprobs=10,
288290
),
289291
"glm4v": VLMTestInfo(
290292
models=["THUDM/glm-4v-9b"],
@@ -337,7 +339,8 @@
337339
models=[
338340
"OpenGVLab/InternVL2-1B",
339341
"OpenGVLab/InternVL2-2B",
340-
"OpenGVLab/Mono-InternVL-2B",
342+
# FIXME: Config cannot be loaded in transformers 4.52
343+
# "OpenGVLab/Mono-InternVL-2B",
341344
],
342345
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
343346
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
@@ -568,6 +571,8 @@
568571
max_num_seqs=2,
569572
vllm_output_post_proc=model_utils.qwen_vllm_to_hf_output,
570573
prompt_path_encoder=model_utils.qwen_prompt_path_encoder,
574+
# FIXME: https://github.com/huggingface/transformers/issues/38358
575+
marks=[pytest.mark.skip("Model initialization fails")],
571576
),
572577
"qwen2_vl": VLMTestInfo(
573578
models=["Qwen/Qwen2-VL-2B-Instruct"],

tests/models/multimodal/generation/test_florence2.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ def run_test(
100100
)
101101

102102

103+
# FIXME: https://github.com/huggingface/transformers/issues/38358
104+
@pytest.mark.skip("Model initialization fails")
103105
@pytest.mark.core_model
104106
@pytest.mark.parametrize("model", MODELS)
105107
@pytest.mark.parametrize(

tests/models/multimodal/generation/test_granite_speech.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def vllm_to_hf_output(
2929
return output_ids, hf_output_str, out_logprobs
3030

3131

32-
MODEL_NAME = "ibm-granite/granite-speech-3.3-8b"
32+
MODEL_NAME = "ibm-granite/granite-speech-3.3-2b"
3333
# Audio lora co-exists directly in the model directory, but
3434
# currently still needs to be passed directly to vLLM.
3535
audio_lora_path = MODEL_NAME

tests/models/multimodal/generation/test_phi4mm.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ def run_test(
122122
for prompts, images, audios in inputs
123123
]
124124

125+
# This error occurs inside `get_peft_model`
126+
# FIXME: https://huggingface.co/microsoft/Phi-4-multimodal-instruct/discussions/75
127+
pytest.skip("HF impl is not compatible with current transformers")
128+
125129
hf_model_kwargs = {"_attn_implementation": "sdpa"}
126130
with hf_runner(model, dtype=dtype,
127131
model_kwargs=hf_model_kwargs) as hf_model:

tests/models/multimodal/generation/vlm_utils/model_utils.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,12 @@
1010

1111
import numpy as np
1212
import numpy.typing as npt
13+
import pytest
1314
import regex as re
1415
import torch
1516
from PIL.Image import Image
1617
from transformers import (AutoConfig, AutoTokenizer, BatchFeature,
17-
GenerationConfig)
18+
GenerationConfig, GenerationMixin)
1819

1920
from vllm.sequence import SampleLogprobs
2021
from vllm.transformers_utils.tokenizer import patch_padding_side
@@ -324,6 +325,16 @@ def processor(*args, **kwargs):
324325

325326
hf_model.processor = processor
326327

328+
orig_generate = hf_model.model.generate
329+
330+
def _generate(self, *args, **kwargs):
331+
# FIXME: https://github.com/huggingface/transformers/issues/38333
332+
kwargs["disable_compile"] = True
333+
334+
return orig_generate(*args, **kwargs)
335+
336+
hf_model.model.generate = types.MethodType(_generate, hf_model.model)
337+
327338
return hf_model
328339

329340

@@ -610,6 +621,11 @@ def _internvl_generate(
610621
if getattr(self, "use_visual_token_mask", False):
611622
visual_token_mask = selected.reshape(B, N, 1).to(input_embeds.dtype)
612623
forward_kwargs["visual_token_mask"] = visual_token_mask
624+
625+
# e.g. InternVL2-2B
626+
if not isinstance(self.language_model, GenerationMixin):
627+
pytest.skip("HF impl is not compatible with current transformers")
628+
613629
outputs = self.language_model.generate(
614630
**forward_kwargs,
615631
**generate_kwargs,

tests/models/multimodal/processing/test_common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def _test_processing_correctness_one(
245245
"adept/fuyu-8b",
246246
"google/gemma-3-4b-it",
247247
"THUDM/glm-4v-9b",
248-
"ibm-granite/granite-speech-3.3-8b",
248+
"ibm-granite/granite-speech-3.3-2b",
249249
"h2oai/h2ovl-mississippi-800m",
250250
"OpenGVLab/InternVL2-1B",
251251
"OpenGVLab/InternVL3-1B",

tests/models/registry.py

Lines changed: 15 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -160,17 +160,12 @@ def check_available_online(
160160
"Fairseq2LlamaForCausalLM": _HfExamplesInfo("mgleize/fairseq2-dummy-Llama-3.2-1B"), # noqa: E501
161161
"FalconForCausalLM": _HfExamplesInfo("tiiuae/falcon-7b"),
162162
"FalconH1ForCausalLM":_HfExamplesInfo("tiiuae/Falcon-H1-1.5B-Instruct",
163-
is_available_online=False,
164-
min_transformers_version="4.52.2"),
163+
min_transformers_version="4.53"),
165164
"GemmaForCausalLM": _HfExamplesInfo("google/gemma-1.1-2b-it"),
166165
"Gemma2ForCausalLM": _HfExamplesInfo("google/gemma-2-9b"),
167166
"Gemma3ForCausalLM": _HfExamplesInfo("google/gemma-3-1b-it"),
168167
"GlmForCausalLM": _HfExamplesInfo("THUDM/glm-4-9b-chat-hf"),
169-
"Glm4ForCausalLM": _HfExamplesInfo(
170-
"THUDM/GLM-4-32B-0414",
171-
is_available_online=False,
172-
min_transformers_version="4.52.dev0"
173-
),
168+
"Glm4ForCausalLM": _HfExamplesInfo("THUDM/GLM-4-9B-0414"),
174169
"GPT2LMHeadModel": _HfExamplesInfo("openai-community/gpt2",
175170
{"alias": "gpt2"}),
176171
"GPTBigCodeForCausalLM": _HfExamplesInfo("bigcode/starcoder",
@@ -181,8 +176,7 @@ def check_available_online(
181176
{"1b": "EleutherAI/pythia-1.4b"}),
182177
"GraniteForCausalLM": _HfExamplesInfo("ibm/PowerLM-3b"),
183178
"GraniteMoeForCausalLM": _HfExamplesInfo("ibm/PowerMoE-3b"),
184-
"GraniteMoeHybridForCausalLM": _HfExamplesInfo("ibm-granite/granite-4.0-tiny-preview", # noqa: E501
185-
min_transformers_version="4.52.0"), # noqa: E501
179+
"GraniteMoeHybridForCausalLM": _HfExamplesInfo("ibm-granite/granite-4.0-tiny-preview"), # noqa: E501
186180
"GraniteMoeSharedForCausalLM": _HfExamplesInfo("ibm-research/moe-7b-1b-active-shared-experts"), # noqa: E501
187181
"Grok1ModelForCausalLM": _HfExamplesInfo("hpcai-tech/grok-1",
188182
trust_remote_code=True),
@@ -203,8 +197,7 @@ def check_available_online(
203197
"LLaMAForCausalLM": _HfExamplesInfo("decapoda-research/llama-7b-hf",
204198
is_available_online=False),
205199
"MambaForCausalLM": _HfExamplesInfo("state-spaces/mamba-130m-hf"),
206-
"Mamba2ForCausalLM": _HfExamplesInfo("mistralai/Mamba-Codestral-7B-v0.1",
207-
is_available_online=False),
200+
"Mamba2ForCausalLM": _HfExamplesInfo("mistralai/Mamba-Codestral-7B-v0.1"),
208201
"FalconMambaForCausalLM": _HfExamplesInfo("tiiuae/falcon-mamba-7b-instruct"), # noqa: E501
209202
"MiniCPMForCausalLM": _HfExamplesInfo("openbmb/MiniCPM-2B-sft-bf16",
210203
trust_remote_code=True),
@@ -243,10 +236,9 @@ def check_available_online(
243236
"Qwen2MoeForCausalLM": _HfExamplesInfo("Qwen/Qwen1.5-MoE-A2.7B-Chat"),
244237
"Qwen3ForCausalLM": _HfExamplesInfo("Qwen/Qwen3-8B"),
245238
"Qwen3MoeForCausalLM": _HfExamplesInfo("Qwen/Qwen3-30B-A3B"),
246-
"RWForCausalLM": _HfExamplesInfo("tiiuae/falcon-40b",
247-
is_available_online=False),
239+
"RWForCausalLM": _HfExamplesInfo("tiiuae/falcon-40b"),
248240
"StableLMEpochForCausalLM": _HfExamplesInfo("stabilityai/stablelm-zephyr-3b", # noqa: E501
249-
is_available_online=False),
241+
v0_only=True),
250242
"StableLmForCausalLM": _HfExamplesInfo("stabilityai/stablelm-3b-4e1t",
251243
v0_only=True),
252244
"Starcoder2ForCausalLM": _HfExamplesInfo("bigcode/starcoder2-3b"),
@@ -256,7 +248,7 @@ def check_available_online(
256248
"TeleFLMForCausalLM": _HfExamplesInfo("CofeAI/FLM-2-52B-Instruct-2407",
257249
trust_remote_code=True),
258250
"XverseForCausalLM": _HfExamplesInfo("xverse/XVERSE-7B-Chat",
259-
is_available_online=False,
251+
tokenizer="meta-llama/Llama-2-7b",
260252
trust_remote_code=True),
261253
"Zamba2ForCausalLM": _HfExamplesInfo("Zyphra/Zamba2-7B-instruct"),
262254
"MiMoForCausalLM": _HfExamplesInfo("XiaomiMiMo/MiMo-7B-RL",
@@ -275,8 +267,7 @@ def check_available_online(
275267
trust_remote_code=True),
276268
"GteNewModel": _HfExamplesInfo("Alibaba-NLP/gte-base-en-v1.5",
277269
trust_remote_code=True,
278-
hf_overrides={"architectures":
279-
["GteNewModel"]}),
270+
hf_overrides={"architectures": ["GteNewModel"]}), # noqa: E501
280271
"InternLM2ForRewardModel": _HfExamplesInfo("internlm/internlm2-1_8b-reward",
281272
trust_remote_code=True),
282273
"JambaForSequenceClassification": _HfExamplesInfo("ai21labs/Jamba-tiny-reward-dev"), # noqa: E501
@@ -298,10 +289,8 @@ def check_available_online(
298289
"Phi3VForCausalLM": _HfExamplesInfo("TIGER-Lab/VLM2Vec-Full",
299290
trust_remote_code=True),
300291
"Qwen2VLForConditionalGeneration": _HfExamplesInfo("MrLight/dse-qwen2-2b-mrl-v1"), # noqa: E501
301-
# The model on Huggingface is currently being updated,
302-
# hence I temporarily mark it as not available online
303-
"PrithviGeoSpatialMAE": _HfExamplesInfo("ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11", # noqa: E501
304-
is_available_online=False),
292+
"PrithviGeoSpatialMAE": _HfExamplesInfo("ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11", # noqa: E501
293+
is_available_online=False), # noqa: E501
305294
}
306295

307296
_CROSS_ENCODER_EXAMPLE_MODELS = {
@@ -327,8 +316,7 @@ def check_available_online(
327316
hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]}), # noqa: E501
328317
"FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"),
329318
"Gemma3ForConditionalGeneration": _HfExamplesInfo("google/gemma-3-4b-it"),
330-
"GraniteSpeechForConditionalGeneration": _HfExamplesInfo("ibm-granite/granite-speech-3.3-8b", # noqa: E501
331-
min_transformers_version="4.52.0"), # noqa: E501
319+
"GraniteSpeechForConditionalGeneration": _HfExamplesInfo("ibm-granite/granite-speech-3.3-2b"), # noqa: E501
332320
"GLM4VForCausalLM": _HfExamplesInfo("THUDM/glm-4v-9b",
333321
trust_remote_code=True,
334322
hf_overrides={"architectures": ["GLM4VForCausalLM"]}), # noqa: E501
@@ -347,7 +335,6 @@ def check_available_online(
347335
trust_remote_code=True,
348336
v0_only=True),
349337
"Llama4ForConditionalGeneration": _HfExamplesInfo("meta-llama/Llama-4-Scout-17B-16E-Instruct", # noqa: E501
350-
min_transformers_version="4.51",
351338
max_model_len=10240),
352339
"LlavaForConditionalGeneration": _HfExamplesInfo("llava-hf/llava-1.5-7b-hf",
353340
extras={"mistral": "mistral-community/pixtral-12b", # noqa: E501
@@ -360,8 +347,6 @@ def check_available_online(
360347
transformers_version_reason="HF model is not compatible.", # noqa: E501
361348
hf_overrides={"architectures": ["MantisForConditionalGeneration"]}), # noqa: E501
362349
"MiniCPMO": _HfExamplesInfo("openbmb/MiniCPM-o-2_6",
363-
max_transformers_version="4.48",
364-
transformers_version_reason="Use of deprecated imports which have been removed.", # noqa: E501
365350
trust_remote_code=True),
366351
"MiniCPMV": _HfExamplesInfo("openbmb/MiniCPM-Llama3-V-2_5",
367352
extras={"2.6": "openbmb/MiniCPM-V-2_6"}, # noqa: E501
@@ -399,10 +384,8 @@ def check_available_online(
399384
"Qwen2AudioForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-Audio-7B-Instruct"), # noqa: E501
400385
"Qwen2VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-VL-2B-Instruct"), # noqa: E501
401386
"Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-VL-3B-Instruct"), # noqa: E501
402-
"Qwen2_5OmniModel": _HfExamplesInfo("Qwen/Qwen2.5-Omni-3B",
403-
min_transformers_version="4.52"),
404-
"Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-Omni-7B-AWQ", # noqa: E501
405-
min_transformers_version="4.52"),
387+
"Qwen2_5OmniModel": _HfExamplesInfo("Qwen/Qwen2.5-Omni-3B"),
388+
"Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-Omni-7B-AWQ"), # noqa: E501
406389
"SkyworkR1VChatModel": _HfExamplesInfo("Skywork/Skywork-R1V-38B"),
407390
"SmolVLMForConditionalGeneration": _HfExamplesInfo("HuggingFaceTB/SmolVLM2-2.2B-Instruct"), # noqa: E501
408391
"UltravoxModel": _HfExamplesInfo("fixie-ai/ultravox-v0_5-llama-3_2-1b", # noqa: E501
@@ -413,8 +396,8 @@ def check_available_online(
413396
# Florence-2 uses BartFastTokenizer which can't be loaded from AutoTokenizer
414397
# Therefore, we borrow the BartTokenizer from the original Bart model
415398
"Florence2ForConditionalGeneration": _HfExamplesInfo("microsoft/Florence-2-base", # noqa: E501
416-
tokenizer="Isotr0py/Florence-2-tokenizer",
417-
trust_remote_code=True,), # noqa: E501
399+
tokenizer="Isotr0py/Florence-2-tokenizer", # noqa: E501
400+
trust_remote_code=True), # noqa: E501
418401
"MllamaForConditionalGeneration": _HfExamplesInfo("meta-llama/Llama-3.2-11B-Vision-Instruct"), # noqa: E501
419402
"WhisperForConditionalGeneration": _HfExamplesInfo("openai/whisper-large-v3"), # noqa: E501
420403
}

tests/models/test_initialization.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ def test_can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch):
2121
model_info.check_available_online(on_fail="skip")
2222
model_info.check_transformers_version(on_fail="skip")
2323

24+
# FIXME: Possible memory leak in the previous tests?
25+
if model_arch == "GraniteSpeechForConditionalGeneration":
26+
pytest.skip("Avoid OOM")
27+
2428
# Avoid OOM and reduce initialization time by only using 1 layer
2529
def hf_overrides(hf_config: PretrainedConfig) -> PretrainedConfig:
2630
hf_config.update(model_info.hf_overrides)
@@ -41,6 +45,13 @@ def hf_overrides(hf_config: PretrainedConfig) -> PretrainedConfig:
4145
"num_hidden_layers": 1,
4246
})
4347

48+
# e.g.: ibm-granite/granite-speech-3.3-2b
49+
if hasattr(hf_config, "encoder_config"):
50+
hf_config.encoder_config.update({
51+
"num_layers": 1,
52+
"num_hidden_layers": 1,
53+
})
54+
4455
return hf_config
4556

4657
# Avoid calling model.forward()

0 commit comments

Comments
 (0)