|
6 | 6 | import pytest
|
7 | 7 | import torch
|
8 | 8 | from transformers import AutoConfig, AutoModelForImageTextToText, AutoTokenizer
|
| 9 | +from transformers import __version__ as TRANSFORMERS_VERSION |
9 | 10 |
|
10 | 11 | from vllm import LLM, SamplingParams
|
11 | 12 | from vllm.attention.backends.flash_attn import FlashAttentionMetadata
|
@@ -285,6 +286,10 @@ def clear_cache():
|
285 | 286 | @pytest.mark.parametrize("max_tokens", [128])
|
286 | 287 | @pytest.mark.parametrize("num_logprobs", [5])
|
287 | 288 | @pytest.mark.parametrize("attn_backend", LIST_ENC_DEC_SUPPORTED_BACKENDS)
|
| 289 | +@pytest.mark.skipif( |
| 290 | + TRANSFORMERS_VERSION == "4.55.0", |
| 291 | + reason="Transformers v4.55.0 has a regression issue on mllama, " |
| 292 | + "see: https://github.com/huggingface/transformers/pull/40083") |
288 | 293 | def test_models_single_leading_image(hf_runner, vllm_runner, image_assets,
|
289 | 294 | model, sizes, dtype, max_tokens,
|
290 | 295 | num_logprobs,
|
@@ -313,6 +318,10 @@ def test_models_single_leading_image(hf_runner, vllm_runner, image_assets,
|
313 | 318 | @pytest.mark.parametrize("max_tokens", [128])
|
314 | 319 | @pytest.mark.parametrize("num_logprobs", [5])
|
315 | 320 | @pytest.mark.parametrize("attn_backend", LIST_ENC_DEC_SUPPORTED_BACKENDS)
|
| 321 | +@pytest.mark.skipif( |
| 322 | + TRANSFORMERS_VERSION == "4.55.0", |
| 323 | + reason="Transformers v4.55.0 has a regression issue on mllama, " |
| 324 | + "see: https://github.com/huggingface/transformers/pull/40083") |
316 | 325 | def test_models_multi_leading_images(hf_runner, vllm_runner, image_assets,
|
317 | 326 | model, dtype, max_tokens, num_logprobs,
|
318 | 327 | attn_backend: _Backend) -> None:
|
@@ -362,6 +371,10 @@ def test_models_multi_leading_images(hf_runner, vllm_runner, image_assets,
|
362 | 371 | @pytest.mark.parametrize("max_tokens", [128])
|
363 | 372 | @pytest.mark.parametrize("num_logprobs", [5])
|
364 | 373 | @pytest.mark.parametrize("attn_backend", LIST_ENC_DEC_SUPPORTED_BACKENDS)
|
| 374 | +@pytest.mark.skipif( |
| 375 | + TRANSFORMERS_VERSION == "4.55.0", |
| 376 | + reason="Transformers v4.55.0 has a regression issue on mllama, " |
| 377 | + "see: https://github.com/huggingface/transformers/pull/40083") |
365 | 378 | def test_models_interleaved_images(hf_runner, vllm_runner, image_assets, model,
|
366 | 379 | dtype, max_tokens, num_logprobs,
|
367 | 380 | attn_backend: _Backend) -> None:
|
@@ -402,6 +415,10 @@ def test_models_interleaved_images(hf_runner, vllm_runner, image_assets, model,
|
402 | 415 | @pytest.mark.parametrize("dtype", ["bfloat16"])
|
403 | 416 | @pytest.mark.parametrize("max_tokens", [64])
|
404 | 417 | @pytest.mark.parametrize("num_logprobs", [5])
|
| 418 | +@pytest.mark.skipif( |
| 419 | + TRANSFORMERS_VERSION == "4.55.0", |
| 420 | + reason="Transformers v4.55.0 has a regression issue on mllama, " |
| 421 | + "see: https://github.com/huggingface/transformers/pull/40083") |
405 | 422 | def test_models_distributed(
|
406 | 423 | hf_runner,
|
407 | 424 | vllm_runner,
|
|
0 commit comments