1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
+ from typing import Any
4
+ import pytest
5
+ import numpy as np
6
+ from scipy .spatial .distance import cosine
7
+ from ...utils import EmbedModelInfo
8
+ from .mteb_utils import mteb_test_embed_models
9
+
10
+
11
+ def _get_vllm_embeddings (vllm_runner , model_info : EmbedModelInfo , test_texts : list [str ]):
12
+ """Helper function to get vLLM embeddings."""
13
+ vllm_extra_kwargs : dict [str , Any ] = {}
14
+ if model_info .architecture == "GteNewModel" :
15
+ vllm_extra_kwargs ["hf_overrides" ] = {"architectures" : ["GteNewModel" ]}
16
+
17
+ with vllm_runner (model_info .name ,
18
+ runner = "pooling" ,
19
+ max_model_len = None ,
20
+ trust_remote_code = True ,
21
+ ** vllm_extra_kwargs ) as vllm_model :
22
+ embeddings = vllm_model .encode (test_texts )
23
+
24
+ # Extract embedding data
25
+ data = []
26
+ for emb in embeddings :
27
+ if hasattr (emb , 'outputs' ):
28
+ data .append (emb .outputs .data .cpu ().numpy ())
29
+ else :
30
+ data .append (emb .cpu ().numpy () if hasattr (emb , 'cpu' ) else emb )
31
+ return np .array (data )
32
+
33
+
34
+ def _get_hf_embeddings (hf_runner , model_info : EmbedModelInfo , test_texts : list [str ]):
35
+ """Helper function to get HuggingFace embeddings."""
36
+ with hf_runner (model_info .name ,
37
+ is_sentence_transformer = True ,
38
+ dtype = "float32" ) as hf_model :
39
+ embeddings = hf_model .encode (test_texts )
40
+ if hasattr (embeddings , 'cpu' ):
41
+ return embeddings .cpu ().numpy ()
42
+ else :
43
+ return np .array (embeddings )
44
+
45
+
46
+ # Test models with ST projectors (Dense layers)
47
+ ST_PROJECTOR_MODELS = [
48
+ EmbedModelInfo ("TencentBAC/Conan-embedding-v1" ,
49
+ architecture = "BertModel" ,
50
+ enable_test = True ),
51
+ # Add more ST models with projectors as they become available
52
+ ]
53
+
54
+ # Test models without ST projectors (for regression testing)
55
+ NON_PROJECTOR_MODELS = [
56
+ EmbedModelInfo ("thenlper/gte-large" ,
57
+ architecture = "BertModel" ,
58
+ enable_test = True ),
59
+ EmbedModelInfo ("Alibaba-NLP/gte-base-en-v1.5" ,
60
+ architecture = "GteNewModel" ,
61
+ enable_test = True ),
62
+ EmbedModelInfo ("Qwen/Qwen3-Embedding-0.6B" ,
63
+ architecture = "Qwen3ForCausalLM" ,
64
+ dtype = "float32" ,
65
+ enable_test = True ),
66
+ ]
67
+
68
+ @pytest .mark .parametrize ("model_info" , ST_PROJECTOR_MODELS )
69
+ def test_st_projector_loading (vllm_runner , model_info : EmbedModelInfo ) -> None :
70
+ """Test that ST projector models load correctly with their projectors."""
71
+ if not model_info .enable_test :
72
+ pytest .skip ("Skipping test." )
73
+
74
+ test_texts = ["This is a test sentence." ]
75
+ embeddings_data = _get_vllm_embeddings (vllm_runner , model_info , test_texts )
76
+
77
+ # Check if dimensions match expected projector output
78
+ actual_dim = embeddings_data .shape [- 1 ]
79
+ expected_dim = 1792
80
+ assert actual_dim == expected_dim , f"Expected { expected_dim } , got { actual_dim } "
81
+
82
+ @pytest .mark .parametrize ("model_info" , ST_PROJECTOR_MODELS )
83
+ def test_compare_with_hf_dimensions (hf_runner , vllm_runner , model_info : EmbedModelInfo ) -> None :
84
+ """Compare embedding dimensions between vLLM and HuggingFace."""
85
+ if not model_info .enable_test :
86
+ pytest .skip ("Skipping test." )
87
+
88
+ test_texts = ["This is a test sentence for dimension comparison." ]
89
+
90
+ # Get embeddings from both implementations
91
+ vllm_data = _get_vllm_embeddings (vllm_runner , model_info , test_texts )
92
+ hf_data = _get_hf_embeddings (hf_runner , model_info , test_texts )
93
+
94
+ # Compare dimensions
95
+ vllm_dim = vllm_data .shape [- 1 ]
96
+ hf_dim = hf_data .shape [- 1 ]
97
+
98
+ assert vllm_dim == hf_dim , f"Embedding dimension mismatch: vLLM { vllm_dim } vs HF { hf_dim } "
99
+ print (f"✓ Embedding dimensions match: { vllm_dim } " )
100
+
101
+
102
+ @pytest .mark .parametrize ("model_info" , ST_PROJECTOR_MODELS )
103
+ def test_embedding_numerical_similarity (hf_runner , vllm_runner ,
104
+ model_info : EmbedModelInfo ) -> None :
105
+ """Test numerical similarity between vLLM and HuggingFace embeddings."""
106
+ if not model_info .enable_test :
107
+ pytest .skip ("Skipping test." )
108
+
109
+ test_texts = [
110
+ "This is a test sentence for numerical comparison." ,
111
+ "Another sentence to verify embedding quality." ,
112
+ "机器学习是人工智能的一个重要分支。" , # Chinese test
113
+ ]
114
+
115
+ # Get embeddings from both implementations
116
+ vllm_data = _get_vllm_embeddings (vllm_runner , model_info , test_texts )
117
+ hf_data = _get_hf_embeddings (hf_runner , model_info , test_texts )
118
+
119
+ # Verify shape matching
120
+ assert vllm_data .shape == hf_data .shape , f"Shape mismatch: vLLM { vllm_data .shape } vs HF { hf_data .shape } "
121
+
122
+ print (f"Embedding shape: { vllm_data .shape } " )
123
+ print (f"Embedding dimension: { vllm_data .shape [- 1 ]} " )
124
+
125
+ # Compute similarities for each test text
126
+ similarities = []
127
+ for i , text in enumerate (test_texts ):
128
+ vllm_emb = vllm_data [i ]
129
+ hf_emb = hf_data [i ]
130
+
131
+ # Compute cosine similarity
132
+ similarity = 1 - cosine (vllm_emb , hf_emb )
133
+ similarities .append (similarity )
134
+
135
+ print (f"Text { i + 1 } : '{ text [:50 ]} { '...' if len (text ) > 50 else '' } '" )
136
+ print (f" Cosine similarity: { similarity :.6f} " )
137
+
138
+ # Verify similarity threshold
139
+ min_similarity = 0.95
140
+ assert similarity > min_similarity , (
141
+ f"Text { i + 1 } similarity too low: { similarity :.6f} < { min_similarity } \n "
142
+ f"vLLM norm: { np .linalg .norm (vllm_emb ):.6f} , "
143
+ f"HF norm: { np .linalg .norm (hf_emb ):.6f} " )
144
+
145
+ # Verify average similarity
146
+ avg_similarity = np .mean (similarities )
147
+ print (f"\n Average cosine similarity: { avg_similarity :.6f} " )
148
+
149
+ assert avg_similarity > 0.98 , (
150
+ f"Average similarity too low: { avg_similarity :.6f} < 0.98" )
151
+
152
+ print ("✓ All numerical similarity tests passed!" )
153
+
154
+
155
+ @pytest .mark .parametrize ("model_info" , ST_PROJECTOR_MODELS )
156
+ def test_embedding_quality_checks (vllm_runner , model_info : EmbedModelInfo ) -> None :
157
+ """Test embedding quality: non-zero, non-constant, and distinct vectors."""
158
+ if not model_info .enable_test :
159
+ pytest .skip ("Skipping test." )
160
+
161
+ test_texts = [
162
+ "First test sentence." ,
163
+ "Second different sentence." ,
164
+ "Completely different content here."
165
+ ]
166
+
167
+ embeddings_data = _get_vllm_embeddings (vllm_runner , model_info , test_texts )
168
+
169
+ print (f"Embeddings shape: { embeddings_data .shape } " )
170
+
171
+ # Verify non-zero vectors
172
+ for i , emb in enumerate (embeddings_data ):
173
+ norm = np .linalg .norm (emb )
174
+ print (f"Embedding { i + 1 } L2 norm: { norm :.6f} " )
175
+ assert norm > 1e-6 , f"Embedding { i + 1 } is too close to zero vector: norm={ norm } "
176
+
177
+ # Verify non-constant vectors
178
+ std = np .std (emb )
179
+ print (f"Embedding { i + 1 } std: { std :.6f} " )
180
+ assert std > 1e-6 , f"Embedding { i + 1 } is too close to constant vector: std={ std } "
181
+
182
+ # Verify different texts produce different embeddings
183
+ for i in range (len (embeddings_data )):
184
+ for j in range (i + 1 , len (embeddings_data )):
185
+ similarity = 1 - cosine (embeddings_data [i ], embeddings_data [j ])
186
+ print (f"Similarity between text { i + 1 } and { j + 1 } : { similarity :.6f} " )
187
+ # Different texts should not be too similar
188
+ assert similarity < 0.99 , f"Embeddings { i + 1 } and { j + 1 } are too similar: { similarity :.6f} "
189
+
190
+ print ("✓ All embedding quality checks passed!" )
191
+
192
+
193
+ # MTEB tests (currently skipped for projector models due to batch processing optimization pending)
194
+ @pytest .mark .parametrize ("model_info" , ST_PROJECTOR_MODELS )
195
+ @pytest .mark .skip (reason = "Projector loading and single-sentence inference verified. MTEB batch processing optimization pending." )
196
+ def test_st_projector_models_mteb (hf_runner , vllm_runner ,
197
+ model_info : EmbedModelInfo ) -> None :
198
+ """Test ST models with projectors using MTEB."""
199
+ if not model_info .enable_test :
200
+ pytest .skip ("Skipping test." )
201
+ vllm_extra_kwargs : dict [str , Any ] = {}
202
+ if model_info .architecture == "GteNewModel" :
203
+ vllm_extra_kwargs ["hf_overrides" ] = {"architectures" : ["GteNewModel" ]}
204
+ mteb_test_embed_models (hf_runner , vllm_runner , model_info ,
205
+ vllm_extra_kwargs )
206
+
207
+
208
+ @pytest .mark .parametrize ("model_info" , NON_PROJECTOR_MODELS )
209
+ def test_non_projector_models_mteb (hf_runner , vllm_runner ,
210
+ model_info : EmbedModelInfo ) -> None :
211
+ """Test models without projectors to ensure no regression."""
212
+ if not model_info .enable_test :
213
+ pytest .skip ("Skipping test." )
214
+ vllm_extra_kwargs : dict [str , Any ] = {}
215
+ if model_info .architecture == "GteNewModel" :
216
+ vllm_extra_kwargs ["hf_overrides" ] = {"architectures" : ["GteNewModel" ]}
217
+ mteb_test_embed_models (hf_runner , vllm_runner , model_info ,
218
+ vllm_extra_kwargs )
0 commit comments