@@ -34,8 +34,7 @@ def check_implementation(
34
34
35
35
with runner_test (model , ** kwargs_test , ** kwargs ) as model_test :
36
36
model_config = model_test .llm .llm_engine .model_config
37
- assert model_config .architecture == (
38
- model_config ._get_transformers_backend_cls ())
37
+ assert model_config .using_transformers_backend ()
39
38
40
39
outputs_test = model_test .generate_greedy_logprobs (* args )
41
40
@@ -135,8 +134,7 @@ def test_quantization(
135
134
enforce_eager = True ,
136
135
** quantization_kwargs ) as vllm_model : # type: ignore[arg-type]
137
136
model_config = vllm_model .llm .llm_engine .model_config
138
- assert model_config .architecture == (
139
- model_config ._get_transformers_backend_cls ())
137
+ assert model_config .using_transformers_backend ()
140
138
141
139
transformers_outputs = vllm_model .generate_greedy_logprobs (
142
140
example_prompts , max_tokens = max_tokens , num_logprobs = num_logprobs )
@@ -149,6 +147,25 @@ def test_quantization(
149
147
)
150
148
151
149
150
+ @pytest .mark .parametrize (
151
+ "model" ,
152
+ [
153
+ # Layers live in `layers`
154
+ "Qwen/Qwen3-Embedding-0.6B" ,
155
+ # Layers live in `model.layers`
156
+ "meta-llama/Llama-3.2-1B-Instruct"
157
+ ],
158
+ )
159
+ def test_embed_loading (vllm_runner , model ):
160
+ with vllm_runner (model ,
161
+ max_model_len = 1024 ,
162
+ enforce_eager = True ,
163
+ runner = "pooling" ,
164
+ model_impl = "transformers" ) as model_test :
165
+ model_config = model_test .llm .llm_engine .model_config
166
+ assert model_config .using_transformers_backend ()
167
+
168
+
152
169
@pytest .mark .parametrize (
153
170
"model" ,
154
171
["jason9693/Qwen2.5-1.5B-apeach" ],
@@ -169,8 +186,7 @@ def test_classify(
169
186
dtype = dtype ,
170
187
model_impl = "transformers" ) as vllm_model :
171
188
model_config = vllm_model .llm .llm_engine .model_config
172
- assert model_config .architecture == (
173
- model_config ._get_transformers_backend_cls ())
189
+ assert model_config .using_transformers_backend ()
174
190
175
191
vllm_outputs = vllm_model .classify (example_prompts )
176
192
0 commit comments