File tree Expand file tree Collapse file tree 1 file changed +9
-3
lines changed
test/3x/torch/quantization/weight_only Expand file tree Collapse file tree 1 file changed +9
-3
lines changed Original file line number Diff line number Diff line change @@ -247,15 +247,21 @@ def test_vlm(self):
247
247
from intel_extension_for_pytorch .nn .utils ._quantize_convert import WeightOnlyQuantizedLinear
248
248
else :
249
249
from intel_extension_for_pytorch .nn .modules import WeightOnlyQuantizedLinear
250
- assert isinstance (woq_model .model .layers [0 ].self_attn .k_proj , WeightOnlyQuantizedLinear ), "replacing model failed."
250
+
251
+ if Version (transformers .__version__ ) >= Version ("4.52" ):
252
+ assert isinstance (woq_model .model .language_model .layers [0 ].self_attn .k_proj , WeightOnlyQuantizedLinear ), "replacing model failed."
253
+ else :
254
+ assert isinstance (woq_model .model .layers [0 ].self_attn .k_proj , WeightOnlyQuantizedLinear ), "replacing model failed."
251
255
252
256
#save
253
257
woq_model .save_pretrained ("transformers_vlm_tmp" )
254
258
255
259
#load
256
260
loaded_model = Qwen2VLForConditionalGeneration .from_pretrained ("transformers_vlm_tmp" )
257
- assert isinstance (loaded_model .model .layers [0 ].self_attn .k_proj , WeightOnlyQuantizedLinear ), "loaing model failed."
258
-
261
+ if Version (transformers .__version__ ) >= Version ("4.52" ):
262
+ assert isinstance (loaded_model .model .language_model .layers [0 ].self_attn .k_proj , WeightOnlyQuantizedLinear ), "loaing model failed."
263
+ else :
264
+ assert isinstance (loaded_model .model .layers [0 ].self_attn .k_proj , WeightOnlyQuantizedLinear ), "loaing model failed."
259
265
# phi-3-vision-128k-instruct, disable as CI consumes too much time
260
266
# woq_config = AutoRoundConfig(
261
267
# bits=4,
You can’t perform that action at this time.
0 commit comments