Skip to content

Commit fecbba3

Browse files
authored
Fix VLM model ut (#2218)
Signed-off-by: Kaihui-intel <[email protected]>
1 parent 63a4ed8 commit fecbba3

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

test/3x/torch/quantization/weight_only/test_transformers.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -247,15 +247,21 @@ def test_vlm(self):
247247
from intel_extension_for_pytorch.nn.utils._quantize_convert import WeightOnlyQuantizedLinear
248248
else:
249249
from intel_extension_for_pytorch.nn.modules import WeightOnlyQuantizedLinear
250-
assert isinstance(woq_model.model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "replacing model failed."
250+
251+
if Version(transformers.__version__) >= Version("4.52"):
252+
assert isinstance(woq_model.model.language_model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "replacing model failed."
253+
else:
254+
assert isinstance(woq_model.model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "replacing model failed."
251255

252256
#save
253257
woq_model.save_pretrained("transformers_vlm_tmp")
254258

255259
#load
256260
loaded_model = Qwen2VLForConditionalGeneration.from_pretrained("transformers_vlm_tmp")
257-
assert isinstance(loaded_model.model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "loaing model failed."
258-
261+
if Version(transformers.__version__) >= Version("4.52"):
262+
assert isinstance(loaded_model.model.language_model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "loaing model failed."
263+
else:
264+
assert isinstance(loaded_model.model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "loaing model failed."
259265
# phi-3-vision-128k-instruct, disable as CI consumes too much time
260266
# woq_config = AutoRoundConfig(
261267
# bits=4,

0 commit comments

Comments
 (0)