Skip to content

Commit d21a36f

Browse files
authored
[CI] Add ci_envs for convenient local testing (vllm-project#24630)
Signed-off-by: wang.yuqi <[email protected]>
1 parent 561a0ba commit d21a36f

File tree

4 files changed

+98
-18
lines changed

4 files changed

+98
-18
lines changed

tests/ci_envs.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
"""
4+
These envs only work for a small part of the tests, fix what you need!
5+
"""
6+
7+
import os
8+
from typing import TYPE_CHECKING, Any, Callable, Optional
9+
10+
if TYPE_CHECKING:
11+
VLLM_CI_NO_SKIP: bool = False
12+
VLLM_CI_DTYPE: Optional[str] = None
13+
VLLM_CI_HEAD_DTYPE: Optional[str] = None
14+
VLLM_CI_HF_DTYPE: Optional[str] = None
15+
16+
environment_variables: dict[str, Callable[[], Any]] = {
17+
# A model family has many models with the same architecture.
18+
# By default, a model family tests only one model.
19+
# Through this flag, all models can be tested.
20+
"VLLM_CI_NO_SKIP": lambda: bool(int(os.getenv("VLLM_CI_NO_SKIP", "0"))),
21+
# Allow changing the dtype used by vllm in tests
22+
"VLLM_CI_DTYPE": lambda: os.getenv("VLLM_CI_DTYPE", None),
23+
# Allow changing the head dtype used by vllm in tests
24+
"VLLM_CI_HEAD_DTYPE": lambda: os.getenv("VLLM_CI_HEAD_DTYPE", None),
25+
# Allow changing the head dtype used by transformers in tests
26+
"VLLM_CI_HF_DTYPE": lambda: os.getenv("VLLM_CI_HF_DTYPE", None),
27+
}
28+
29+
30+
def __getattr__(name: str):
31+
# lazy evaluation of environment variables
32+
if name in environment_variables:
33+
return environment_variables[name]()
34+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
35+
36+
37+
def __dir__():
38+
return list(environment_variables.keys())
39+
40+
41+
def is_set(name: str):
42+
"""Check if an environment variable is explicitly set."""
43+
if name in environment_variables:
44+
return name in os.environ
45+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

tests/models/language/generation_ppl_test/ppl_utils.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import torch
88
from datasets import load_dataset
99

10+
import tests.ci_envs as ci_envs
1011
from tests.models.utils import (GenerateModelInfo,
1112
TokensTextLogprobsPromptLogprobs)
1213
from vllm.logprobs import Logprob
@@ -26,27 +27,34 @@ def wikitext_ppl_test(hf_runner,
2627

2728
# A model family has many models with the same architecture,
2829
# and we don't need to test each one.
29-
if not model_info.enable_test:
30+
if not ci_envs.VLLM_CI_NO_SKIP and not model_info.enable_test:
3031
pytest.skip("Skipping test.")
3132

3233
dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
3334

3435
# Allow vllm to test using the given dtype, such as float32
3536
vllm_extra_kwargs = vllm_extra_kwargs or {}
36-
vllm_extra_kwargs["dtype"] = model_info.dtype
37+
vllm_extra_kwargs["dtype"] = ci_envs.VLLM_CI_DTYPE or model_info.dtype
3738

3839
# Allow vllm to test using hf_overrides
3940
if model_info.hf_overrides is not None:
4041
vllm_extra_kwargs["hf_overrides"] = model_info.hf_overrides
4142

43+
# Allow changing the head dtype used by vllm in tests
44+
if ci_envs.VLLM_CI_HEAD_DTYPE is not None:
45+
if "hf_overrides" not in vllm_extra_kwargs:
46+
vllm_extra_kwargs["hf_overrides"] = {}
47+
vllm_extra_kwargs["hf_overrides"][
48+
"head_dtype"] = ci_envs.VLLM_CI_HEAD_DTYPE
49+
4250
with vllm_runner(model_info.name,
4351
gpu_memory_utilization=0.7,
4452
max_model_len=max_length,
4553
max_num_seqs=1,
4654
enforce_eager=True,
4755
**vllm_extra_kwargs) as vllm_model:
4856
# Use max_num_seqs=1 to avoid OOM,
49-
# and batch different requests together.
57+
# and avoid batch different requests together.
5058

5159
model_config = vllm_model.llm.llm_engine.model_config
5260

@@ -91,12 +99,13 @@ def wikitext_ppl_test(hf_runner,
9199
n_tokens += len(token_log_probs)
92100
vllm_ppl = float(torch.exp(nll_sum / n_tokens))
93101
vllm_dtype = model_config.dtype
102+
head_dtype = model_config.head_dtype
94103

95104
# Accelerate ppl test by setting Transformers ppl score to a constant
96105
if model_info.hf_ppl is None:
97106
with hf_runner(
98107
model_info.name,
99-
dtype=model_info.hf_dtype,
108+
dtype=ci_envs.VLLM_CI_HF_DTYPE or model_info.hf_dtype,
100109
) as hf_model:
101110
nll_sum = torch.tensor(0., dtype=torch.float32, device="cpu")
102111
n_tokens = 0
@@ -121,7 +130,7 @@ def wikitext_ppl_test(hf_runner,
121130

122131
differ = (vllm_ppl - hf_ppl) / hf_ppl
123132
print("Model:", model_info.name)
124-
print("VLLM:", vllm_dtype, vllm_ppl)
133+
print("VLLM:", f"dtype:{vllm_dtype}", f"head_dtype:{head_dtype}", vllm_ppl)
125134
print("Transformers:", hf_dtype, hf_ppl)
126135
print("Difference (%):", differ * 100)
127136

tests/models/language/pooling_mteb_test/mteb_utils.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import requests
1212
import torch
1313

14+
import tests.ci_envs as ci_envs
1415
from tests.models.utils import (EmbedModelInfo, RerankModelInfo,
1516
check_embeddings_close)
1617

@@ -168,20 +169,27 @@ def mteb_test_embed_models(hf_runner,
168169
atol=MTEB_EMBED_TOL):
169170
# A model family has many models with the same architecture,
170171
# and we don't need to test each one.
171-
if not model_info.enable_test:
172+
if not ci_envs.VLLM_CI_NO_SKIP and not model_info.enable_test:
172173
pytest.skip("Skipping test.")
173174

174175
# Test embed_dims, isnan and whether to use normalize
175176
example_prompts = ["The chef prepared a delicious meal." * 1000]
176177

177178
# Allow vllm to test using the given dtype, such as float32
178179
vllm_extra_kwargs = vllm_extra_kwargs or {}
179-
vllm_extra_kwargs["dtype"] = model_info.dtype
180+
vllm_extra_kwargs["dtype"] = ci_envs.VLLM_CI_DTYPE or model_info.dtype
180181

181182
# Allow vllm to test using hf_overrides
182183
if model_info.hf_overrides is not None:
183184
vllm_extra_kwargs["hf_overrides"] = model_info.hf_overrides
184185

186+
# Allow changing the head dtype used by vllm in tests
187+
if ci_envs.VLLM_CI_HEAD_DTYPE is not None:
188+
if "hf_overrides" not in vllm_extra_kwargs:
189+
vllm_extra_kwargs["hf_overrides"] = {}
190+
vllm_extra_kwargs["hf_overrides"][
191+
"head_dtype"] = ci_envs.VLLM_CI_HEAD_DTYPE
192+
185193
with vllm_runner(model_info.name,
186194
runner="pooling",
187195
max_model_len=None,
@@ -202,6 +210,7 @@ def mteb_test_embed_models(hf_runner,
202210
vllm_main_score = run_mteb_embed_task(VllmMtebEncoder(vllm_model),
203211
MTEB_EMBED_TASKS)
204212
vllm_dtype = vllm_model.llm.llm_engine.model_config.dtype
213+
head_dtype = model_config.head_dtype
205214

206215
# Test embed_dims, isnan and whether to use normalize
207216
vllm_outputs = vllm_model.embed(example_prompts,
@@ -211,9 +220,11 @@ def mteb_test_embed_models(hf_runner,
211220
# Accelerate mteb test by setting
212221
# SentenceTransformers mteb score to a constant
213222
if model_info.mteb_score is None:
214-
with hf_runner(model_info.name,
215-
is_sentence_transformer=True,
216-
dtype=model_info.hf_dtype) as hf_model:
223+
with hf_runner(
224+
model_info.name,
225+
is_sentence_transformer=True,
226+
dtype=ci_envs.VLLM_CI_HF_DTYPE or model_info.hf_dtype,
227+
) as hf_model:
217228

218229
# e.g. setting default parameters for the encode method of hf_runner
219230
if hf_model_callback is not None:
@@ -236,7 +247,8 @@ def mteb_test_embed_models(hf_runner,
236247
st_dtype = "Constant"
237248

238249
print("Model:", model_info.name)
239-
print("VLLM:", vllm_dtype, vllm_main_score)
250+
print("VLLM:", f"dtype:{vllm_dtype}", f"head_dtype:{head_dtype}",
251+
vllm_main_score)
240252
print("SentenceTransformers:", st_dtype, st_main_score)
241253
print("Difference:", st_main_score - vllm_main_score)
242254

@@ -319,17 +331,24 @@ def mteb_test_rerank_models(hf_runner,
319331
atol=MTEB_RERANK_TOL):
320332
# A model family has many models with the same architecture,
321333
# and we don't need to test each one.
322-
if not model_info.enable_test:
334+
if not ci_envs.VLLM_CI_NO_SKIP and not model_info.enable_test:
323335
pytest.skip("Skipping test.")
324336

325337
# Allow vllm to test using the given dtype, such as float32
326338
vllm_extra_kwargs = vllm_extra_kwargs or {}
327-
vllm_extra_kwargs["dtype"] = model_info.dtype
339+
vllm_extra_kwargs["dtype"] = ci_envs.VLLM_CI_DTYPE or model_info.dtype
328340

329341
# Allow vllm to test using hf_overrides
330342
if model_info.hf_overrides is not None:
331343
vllm_extra_kwargs["hf_overrides"] = model_info.hf_overrides
332344

345+
# Allow changing the head dtype used by vllm in tests
346+
if ci_envs.VLLM_CI_HEAD_DTYPE is not None:
347+
if "hf_overrides" not in vllm_extra_kwargs:
348+
vllm_extra_kwargs["hf_overrides"] = {}
349+
vllm_extra_kwargs["hf_overrides"][
350+
"head_dtype"] = ci_envs.VLLM_CI_HEAD_DTYPE
351+
333352
with vllm_runner(model_info.name,
334353
runner="pooling",
335354
max_model_len=None,
@@ -355,6 +374,7 @@ def mteb_test_rerank_models(hf_runner,
355374
tasks=MTEB_RERANK_TASKS,
356375
languages=MTEB_RERANK_LANGS)
357376
vllm_dtype = model_config.dtype
377+
head_dtype = model_config.head_dtype
358378

359379
# Accelerate mteb test by setting
360380
# SentenceTransformers mteb score to a constant
@@ -366,7 +386,8 @@ def mteb_test_rerank_models(hf_runner,
366386
st_dtype = "Constant"
367387

368388
print("Model:", model_info.name)
369-
print("VLLM:", vllm_dtype, vllm_main_score)
389+
print("VLLM:", f"dtype:{vllm_dtype}", f"head_dtype:{head_dtype}",
390+
vllm_main_score)
370391
print("SentenceTransformers:", st_dtype, st_main_score)
371392
print("Difference:", st_main_score - vllm_main_score)
372393

vllm/config/__init__.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1775,16 +1775,21 @@ def head_dtype(self) -> torch.dtype:
17751775
such as the lm_head in a generation model,
17761776
or the score or classifier in a classification model.
17771777
1778-
The default head_dtype based on runner_type.\n
1778+
`head_dtype` currently only supports pooling models.\n
17791779
- The pooling model defaults to using fp32 head,
1780-
you can use --hf-overrides '{"head_dtype": "model"}' to disable it.\n
1781-
- The generate model defaults to not using fp32 head,
1782-
you can use --hf-overrides '{"head_dtype": "float32"}' to enable it.
1780+
you can use --hf-overrides '{"head_dtype": "model"}' to disable it.
17831781
"""
1782+
17841783
head_dtype = _get_head_dtype(config=self.hf_config,
17851784
dtype=self.dtype,
17861785
runner_type=self.runner_type)
17871786

1787+
if self.runner_type != "pooling" and head_dtype != self.dtype:
1788+
logger.warning_once(
1789+
"`head_dtype` currently only supports pooling models."
1790+
"fallback to model dtype [%s].", self.dtype)
1791+
return self.dtype
1792+
17881793
if head_dtype not in current_platform.supported_dtypes:
17891794
logger.warning_once(
17901795
"The current platform does not support [%s] head dtype, "

0 commit comments

Comments
 (0)