11
11
import requests
12
12
import torch
13
13
14
+ import tests .ci_envs as ci_envs
14
15
from tests .models .utils import (EmbedModelInfo , RerankModelInfo ,
15
16
check_embeddings_close )
16
17
@@ -168,20 +169,27 @@ def mteb_test_embed_models(hf_runner,
168
169
atol = MTEB_EMBED_TOL ):
169
170
# A model family has many models with the same architecture,
170
171
# and we don't need to test each one.
171
- if not model_info .enable_test :
172
+ if not ci_envs . VLLM_CI_NO_SKIP and not model_info .enable_test :
172
173
pytest .skip ("Skipping test." )
173
174
174
175
# Test embed_dims, isnan and whether to use normalize
175
176
example_prompts = ["The chef prepared a delicious meal." * 1000 ]
176
177
177
178
# Allow vllm to test using the given dtype, such as float32
178
179
vllm_extra_kwargs = vllm_extra_kwargs or {}
179
- vllm_extra_kwargs ["dtype" ] = model_info .dtype
180
+ vllm_extra_kwargs ["dtype" ] = ci_envs . VLLM_CI_DTYPE or model_info .dtype
180
181
181
182
# Allow vllm to test using hf_overrides
182
183
if model_info .hf_overrides is not None :
183
184
vllm_extra_kwargs ["hf_overrides" ] = model_info .hf_overrides
184
185
186
+ # Allow changing the head dtype used by vllm in tests
187
+ if ci_envs .VLLM_CI_HEAD_DTYPE is not None :
188
+ if "hf_overrides" not in vllm_extra_kwargs :
189
+ vllm_extra_kwargs ["hf_overrides" ] = {}
190
+ vllm_extra_kwargs ["hf_overrides" ][
191
+ "head_dtype" ] = ci_envs .VLLM_CI_HEAD_DTYPE
192
+
185
193
with vllm_runner (model_info .name ,
186
194
runner = "pooling" ,
187
195
max_model_len = None ,
@@ -202,6 +210,7 @@ def mteb_test_embed_models(hf_runner,
202
210
vllm_main_score = run_mteb_embed_task (VllmMtebEncoder (vllm_model ),
203
211
MTEB_EMBED_TASKS )
204
212
vllm_dtype = vllm_model .llm .llm_engine .model_config .dtype
213
+ head_dtype = model_config .head_dtype
205
214
206
215
# Test embed_dims, isnan and whether to use normalize
207
216
vllm_outputs = vllm_model .embed (example_prompts ,
@@ -211,9 +220,11 @@ def mteb_test_embed_models(hf_runner,
211
220
# Accelerate mteb test by setting
212
221
# SentenceTransformers mteb score to a constant
213
222
if model_info .mteb_score is None :
214
- with hf_runner (model_info .name ,
215
- is_sentence_transformer = True ,
216
- dtype = model_info .hf_dtype ) as hf_model :
223
+ with hf_runner (
224
+ model_info .name ,
225
+ is_sentence_transformer = True ,
226
+ dtype = ci_envs .VLLM_CI_HF_DTYPE or model_info .hf_dtype ,
227
+ ) as hf_model :
217
228
218
229
# e.g. setting default parameters for the encode method of hf_runner
219
230
if hf_model_callback is not None :
@@ -236,7 +247,8 @@ def mteb_test_embed_models(hf_runner,
236
247
st_dtype = "Constant"
237
248
238
249
print ("Model:" , model_info .name )
239
- print ("VLLM:" , vllm_dtype , vllm_main_score )
250
+ print ("VLLM:" , f"dtype:{ vllm_dtype } " , f"head_dtype:{ head_dtype } " ,
251
+ vllm_main_score )
240
252
print ("SentenceTransformers:" , st_dtype , st_main_score )
241
253
print ("Difference:" , st_main_score - vllm_main_score )
242
254
@@ -319,17 +331,24 @@ def mteb_test_rerank_models(hf_runner,
319
331
atol = MTEB_RERANK_TOL ):
320
332
# A model family has many models with the same architecture,
321
333
# and we don't need to test each one.
322
- if not model_info .enable_test :
334
+ if not ci_envs . VLLM_CI_NO_SKIP and not model_info .enable_test :
323
335
pytest .skip ("Skipping test." )
324
336
325
337
# Allow vllm to test using the given dtype, such as float32
326
338
vllm_extra_kwargs = vllm_extra_kwargs or {}
327
- vllm_extra_kwargs ["dtype" ] = model_info .dtype
339
+ vllm_extra_kwargs ["dtype" ] = ci_envs . VLLM_CI_DTYPE or model_info .dtype
328
340
329
341
# Allow vllm to test using hf_overrides
330
342
if model_info .hf_overrides is not None :
331
343
vllm_extra_kwargs ["hf_overrides" ] = model_info .hf_overrides
332
344
345
+ # Allow changing the head dtype used by vllm in tests
346
+ if ci_envs .VLLM_CI_HEAD_DTYPE is not None :
347
+ if "hf_overrides" not in vllm_extra_kwargs :
348
+ vllm_extra_kwargs ["hf_overrides" ] = {}
349
+ vllm_extra_kwargs ["hf_overrides" ][
350
+ "head_dtype" ] = ci_envs .VLLM_CI_HEAD_DTYPE
351
+
333
352
with vllm_runner (model_info .name ,
334
353
runner = "pooling" ,
335
354
max_model_len = None ,
@@ -355,6 +374,7 @@ def mteb_test_rerank_models(hf_runner,
355
374
tasks = MTEB_RERANK_TASKS ,
356
375
languages = MTEB_RERANK_LANGS )
357
376
vllm_dtype = model_config .dtype
377
+ head_dtype = model_config .head_dtype
358
378
359
379
# Accelerate mteb test by setting
360
380
# SentenceTransformers mteb score to a constant
@@ -366,7 +386,8 @@ def mteb_test_rerank_models(hf_runner,
366
386
st_dtype = "Constant"
367
387
368
388
print ("Model:" , model_info .name )
369
- print ("VLLM:" , vllm_dtype , vllm_main_score )
389
+ print ("VLLM:" , f"dtype:{ vllm_dtype } " , f"head_dtype:{ head_dtype } " ,
390
+ vllm_main_score )
370
391
print ("SentenceTransformers:" , st_dtype , st_main_score )
371
392
print ("Difference:" , st_main_score - vllm_main_score )
372
393
0 commit comments