Skip to content

Commit b40cf64

Browse files
[Model] Support Qwen2 embeddings and use tags to select model tests (#10184)
1 parent 2885ba0 commit b40cf64

File tree

19 files changed

+252
-178
lines changed

19 files changed

+252
-178
lines changed

.buildkite/run-cpu-test-ppc64le.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ function cpu_tests() {
2727
decord einops librosa peft Pillow sentence-transformers soundfile \
2828
transformers_stream_generator matplotlib datamodel_code_generator
2929
pip install torchvision --index-url https://download.pytorch.org/whl/cpu
30-
pytest -v -s tests/models/embedding/language
31-
pytest -v -s tests/models/encoder_decoder/language
32-
pytest -v -s tests/models/decoder_only/language/test_models.py
30+
pytest -v -s tests/models/decoder_only/language -m cpu_model
31+
pytest -v -s tests/models/embedding/language -m cpu_model
32+
pytest -v -s tests/models/encoder_decoder/language -m cpu_model
3333
pytest -v -s tests/models/decoder_only/audio_language -m cpu_model
3434
pytest -v -s tests/models/decoder_only/vision_language -m cpu_model"
3535

.buildkite/run-cpu-test.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ function cpu_tests() {
3838
decord einops librosa peft Pillow sentence-transformers soundfile \
3939
transformers_stream_generator matplotlib datamodel_code_generator
4040
pip install torchvision --index-url https://download.pytorch.org/whl/cpu
41-
pytest -v -s tests/models/embedding/language
42-
pytest -v -s tests/models/encoder_decoder/language
43-
pytest -v -s tests/models/decoder_only/language/test_models.py
41+
pytest -v -s tests/models/decoder_only/language -m cpu_model
42+
pytest -v -s tests/models/embedding/language -m cpu_model
43+
pytest -v -s tests/models/encoder_decoder/language -m cpu_model
4444
pytest -v -s tests/models/decoder_only/audio_language -m cpu_model
4545
pytest -v -s tests/models/decoder_only/vision_language -m cpu_model"
4646

.buildkite/test-pipeline.yaml

Lines changed: 23 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -323,62 +323,60 @@ steps:
323323
- pytest -v -s models/test_registry.py
324324
- pytest -v -s models/test_initialization.py
325325

326-
- label: Decoder-only Language Models Test (Standard) # 18min
326+
- label: Language Models Test (Standard) # 42min
327327
#mirror_hardwares: [amd]
328328
source_file_dependencies:
329329
- vllm/
330330
- tests/models/decoder_only/language
331+
- tests/models/embedding/language
332+
- tests/models/encoder_decoder/language
331333
commands:
332-
- pytest -v -s models/decoder_only/language -m core_model
333-
- pytest -v -s models/decoder_only/language -m quant_model
334+
- pytest -v -s models/decoder_only/language -m 'core_model or quant_model'
335+
- pytest -v -s models/embedding/language -m core_model
336+
- pytest -v -s models/embedding/vision_language -m core_model
334337

335-
- label: Decoder-only Language Models Test (Extended) # 46min
338+
- label: Language Models Test (Extended) # 50min
336339
nightly: true
337340
source_file_dependencies:
338341
- vllm/
339342
- tests/models/decoder_only/language
343+
- tests/models/embedding/language
344+
- tests/models/encoder_decoder/language
340345
commands:
341346
- pytest -v -s models/decoder_only/language -m 'not core_model and not quant_model'
347+
- pytest -v -s models/embedding/language -m 'not core_model'
348+
- pytest -v -s models/embedding/vision_language -m 'not core_model'
342349

343-
- label: Decoder-only Multi-Modal Models Test (Standard) # 22min
350+
- label: Multi-Modal Models Test (Standard) # 26min
344351
#mirror_hardwares: [amd]
345352
source_file_dependencies:
346353
- vllm/
347354
- tests/models/decoder_only/audio_language
348355
- tests/models/decoder_only/vision_language
356+
- tests/models/embedding/vision_language
357+
- tests/models/encoder_decoder/vision_language
349358
commands:
350-
- pytest -v -s models/decoder_only/audio_language -m core_model
351-
- pytest -v -s --ignore models/decoder_only/vision_language/test_phi3v.py models/decoder_only/vision_language -m core_model
352-
# No tests under this group for now
353-
# - pytest -v -s models/decoder_only/audio_language -m quant_model
354-
- pytest -v -s --ignore models/decoder_only/vision_language/test_phi3v.py models/decoder_only/vision_language -m quant_model
359+
- pytest -v -s models/decoder_only/audio_language -m 'core_model or quant_model'
360+
- pytest -v -s --ignore models/decoder_only/vision_language/test_phi3v.py models/decoder_only/vision_language -m 'core_model or quant_model'
361+
- pytest -v -s models/encoder_decoder/language -m core_model
362+
- pytest -v -s models/encoder_decoder/vision_language -m core_model
355363

356-
- label: Decoder-only Multi-Modal Models Test (Extended) # 1h10m
364+
- label: Multi-Modal Models Test (Extended) # 1h15m
357365
nightly: true
358366
source_file_dependencies:
359367
- vllm/
360368
- tests/models/decoder_only/audio_language
361369
- tests/models/decoder_only/vision_language
370+
- tests/models/embedding/vision_language
371+
- tests/models/encoder_decoder/vision_language
362372
commands:
363373
- pytest -v -s models/decoder_only/audio_language -m 'not core_model and not quant_model'
364374
# HACK - run phi3v tests separately to sidestep this transformers bug
365375
# https://github.com/huggingface/transformers/issues/34307
366376
- pytest -v -s models/decoder_only/vision_language/test_phi3v.py
367377
- pytest -v -s --ignore models/decoder_only/vision_language/test_phi3v.py models/decoder_only/vision_language -m 'not core_model and not quant_model'
368-
369-
- label: Other Models Test # 20min
370-
#mirror_hardwares: [amd]
371-
source_file_dependencies:
372-
- vllm/
373-
- tests/models/embedding/language
374-
- tests/models/embedding/vision_language
375-
- tests/models/encoder_decoder/language
376-
- tests/models/encoder_decoder/vision_language
377-
commands:
378-
- pytest -v -s models/embedding/language
379-
- pytest -v -s models/embedding/vision_language
380-
- pytest -v -s models/encoder_decoder/language
381-
- pytest -v -s models/encoder_decoder/vision_language
378+
- pytest -v -s models/encoder_decoder/language -m 'not core_model'
379+
- pytest -v -s models/encoder_decoder/vision_language -m 'not core_model'
382380

383381
# This test is used only in PR development phase to test individual models and should never run on main
384382
- label: Custom Models Test

docs/source/models/supported_models.rst

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -330,11 +330,16 @@ Text Embedding
330330
- :code:`BAAI/bge-multilingual-gemma2`, etc.
331331
-
332332
- ✅︎
333-
* - :code:`MistralModel`
334-
- Mistral-based
333+
* - :code:`LlamaModel`, :code:`LlamaForCausalLM`, :code:`MistralModel`, etc.
334+
- Llama-based
335335
- :code:`intfloat/e5-mistral-7b-instruct`, etc.
336336
- ✅︎
337337
- ✅︎
338+
* - :code:`Qwen2Model`, :code:`Qwen2ForCausalLM`
339+
- Qwen2-based
340+
- :code:`ssmits/Qwen2-7B-Instruct-embed-base`, :code:`Alibaba-NLP/gte-Qwen2-1.5B-instruct`, etc.
341+
- ✅︎
342+
- ✅︎
338343

339344
.. important::
340345
Some model architectures support both generation and embedding tasks.
@@ -355,7 +360,7 @@ Reward Modeling
355360
* - :code:`Qwen2ForRewardModel`
356361
- Qwen2-based
357362
- :code:`Qwen/Qwen2.5-Math-RM-72B`, etc.
358-
-
363+
- ✅︎
359364
- ✅︎
360365

361366
.. note::
@@ -376,7 +381,7 @@ Classification
376381
* - :code:`Qwen2ForSequenceClassification`
377382
- Qwen2-based
378383
- :code:`jason9693/Qwen2.5-1.5B-apeach`, etc.
379-
-
384+
- ✅︎
380385
- ✅︎
381386

382387
.. note::

tests/models/decoder_only/language/test_jamba.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ def test_models(
3333

3434
with vllm_runner(model, dtype=dtype) as vllm_model:
3535
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
36+
# This test is for verifying whether the model's extra_repr
37+
# can be printed correctly.
38+
print(vllm_model.model.llm_engine.model_executor.driver_worker.
39+
model_runner.model)
3640

3741
for i in range(len(example_prompts)):
3842
hf_output_ids, hf_output_str = hf_outputs[i]
@@ -293,17 +297,3 @@ def test_jamba_distributed_produces_identical_generation(
293297
name_0="vllm_tp_1",
294298
name_1="vllm_tp_2",
295299
)
296-
297-
298-
@pytest.mark.parametrize("model", MODELS)
299-
@pytest.mark.parametrize("dtype", ["float"])
300-
def test_model_print(
301-
vllm_runner,
302-
model: str,
303-
dtype: str,
304-
) -> None:
305-
with vllm_runner(model, dtype=dtype) as vllm_model:
306-
# This test is for verifying whether the model's extra_repr
307-
# can be printed correctly.
308-
print(vllm_model.model.llm_engine.model_executor.driver_worker.
309-
model_runner.model)

tests/models/decoder_only/language/test_mamba.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ def test_models(
5151

5252
with vllm_runner(model, dtype=dtype) as vllm_model:
5353
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
54+
# This test is for verifying whether the model's extra_repr
55+
# can be printed correctly.
56+
print(vllm_model.model.llm_engine.model_executor.driver_worker.
57+
model_runner.model)
5458

5559
for i in range(len(example_prompts)):
5660
hf_output_ids, hf_output_str = hf_outputs[i]
@@ -279,17 +283,3 @@ def test_state_cleanup(
279283
except ValueError:
280284
pytest.fail("Mamba inner state wasn't cleaned up between states, "
281285
"could be related to finished_requests_ids")
282-
283-
284-
@pytest.mark.parametrize("model", MODELS)
285-
@pytest.mark.parametrize("dtype", ["float"])
286-
def test_model_print(
287-
vllm_runner,
288-
model: str,
289-
dtype: str,
290-
) -> None:
291-
with vllm_runner(model, dtype=dtype) as vllm_model:
292-
# This test is for verifying whether the model's extra_repr
293-
# can be printed correctly.
294-
print(vllm_model.model.llm_engine.model_executor.driver_worker.
295-
model_runner.model)

tests/models/decoder_only/language/test_models.py

Lines changed: 43 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,37 +4,52 @@
44
"""
55
import pytest
66

7-
from vllm.platforms import current_platform
8-
97
from ...utils import check_logprobs_close
108

11-
MODELS = [
12-
"facebook/opt-125m", # opt
13-
"openai-community/gpt2", # gpt2
14-
# "Milos/slovak-gpt-j-405M", # gptj
15-
# "bigcode/tiny_starcoder_py", # gpt_bigcode
16-
# "EleutherAI/pythia-70m", # gpt_neox
17-
"bigscience/bloom-560m", # bloom - testing alibi slopes
18-
"microsoft/phi-2", # phi
19-
# "stabilityai/stablelm-3b-4e1t", # stablelm
20-
# "bigcode/starcoder2-3b", # starcoder2
21-
"google/gemma-1.1-2b-it", # gemma
22-
"Qwen/Qwen2.5-0.5B-Instruct", # qwen2
23-
"meta-llama/Llama-3.2-1B-Instruct", # llama
24-
]
25-
26-
if not current_platform.is_cpu():
27-
MODELS += [
28-
# fused_moe which not supported on CPU
29-
"openbmb/MiniCPM3-4B",
30-
]
31-
32-
target_dtype = "half"
33-
349

35-
@pytest.mark.core_model
36-
@pytest.mark.parametrize("model", MODELS)
37-
@pytest.mark.parametrize("dtype", [target_dtype])
10+
@pytest.mark.parametrize(
11+
"model",
12+
[
13+
pytest.param(
14+
"bigscience/bloom-560m", # bloom - testing alibi slopes
15+
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
16+
),
17+
pytest.param(
18+
"openai-community/gpt2", # gpt2
19+
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
20+
),
21+
pytest.param("Milos/slovak-gpt-j-405M"), # gptj
22+
pytest.param("bigcode/tiny_starcoder_py"), # gpt_bigcode
23+
pytest.param("EleutherAI/pythia-70m"), # gpt_neox
24+
pytest.param(
25+
"google/gemma-1.1-2b-it", # gemma
26+
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
27+
),
28+
pytest.param(
29+
"meta-llama/Llama-3.2-1B-Instruct", # llama
30+
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
31+
),
32+
pytest.param(
33+
"openbmb/MiniCPM3-4B",
34+
# fused_moe not supported on CPU
35+
marks=[pytest.mark.core_model],
36+
),
37+
pytest.param(
38+
"facebook/opt-125m", # opt
39+
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
40+
),
41+
pytest.param(
42+
"microsoft/phi-2", # phi
43+
marks=[pytest.mark.core_model],
44+
),
45+
pytest.param(
46+
"Qwen/Qwen2.5-0.5B-Instruct", # qwen2
47+
marks=[pytest.mark.core_model],
48+
),
49+
pytest.param("stabilityai/stablelm-3b-4e1t"), # stablelm
50+
pytest.param("bigcode/starcoder2-3b"), # starcoder2
51+
])
52+
@pytest.mark.parametrize("dtype", ["half"])
3853
@pytest.mark.parametrize("max_tokens", [32])
3954
@pytest.mark.parametrize("num_logprobs", [5])
4055
def test_models(

tests/models/embedding/language/test_cls_models.py

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,14 @@
99
import torch
1010
from transformers import AutoModelForSequenceClassification
1111

12-
CLASSIFICATION_MODELS = ["jason9693/Qwen2.5-1.5B-apeach"]
1312

14-
15-
@pytest.mark.parametrize("model", CLASSIFICATION_MODELS)
13+
@pytest.mark.parametrize(
14+
"model",
15+
[
16+
pytest.param("jason9693/Qwen2.5-1.5B-apeach",
17+
marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
18+
],
19+
)
1620
@pytest.mark.parametrize("dtype", ["float"])
1721
def test_classification_models(
1822
hf_runner,
@@ -23,31 +27,19 @@ def test_classification_models(
2327
) -> None:
2428
with vllm_runner(model, dtype=dtype) as vllm_model:
2529
vllm_outputs = vllm_model.classify(example_prompts)
30+
# This test is for verifying whether the model's extra_repr
31+
# can be printed correctly.
32+
print(vllm_model.model.llm_engine.model_executor.driver_worker.
33+
model_runner.model)
2634

2735
with hf_runner(model,
2836
dtype=dtype,
2937
auto_cls=AutoModelForSequenceClassification) as hf_model:
3038
hf_outputs = hf_model.classify(example_prompts)
3139

32-
print(hf_outputs, vllm_outputs)
33-
3440
# check logits difference
3541
for hf_output, vllm_output in zip(hf_outputs, vllm_outputs):
3642
hf_output = torch.tensor(hf_output)
3743
vllm_output = torch.tensor(vllm_output)
3844

3945
assert torch.allclose(hf_output, vllm_output, 1e-3)
40-
41-
42-
@pytest.mark.parametrize("model", CLASSIFICATION_MODELS)
43-
@pytest.mark.parametrize("dtype", ["float"])
44-
def test_classification_model_print(
45-
vllm_runner,
46-
model: str,
47-
dtype: str,
48-
) -> None:
49-
with vllm_runner(model, dtype=dtype) as vllm_model:
50-
# This test is for verifying whether the model's extra_repr
51-
# can be printed correctly.
52-
print(vllm_model.model.llm_engine.model_executor.driver_worker.
53-
model_runner.model)

0 commit comments

Comments
 (0)