Skip to content

Commit 3130e90

Browse files
[OpenVINO] Add model inference check to weight-only and pipeline quantization testing (#1470)
* Add model inference check to weight-only and pipeline quantization testing * Fix tests * Fix tests 2 * Fix nanollava; add generation config * Add generation config for seq2seqlm * Merge seq2seq conditions back * Apply suggestion
1 parent 828fb1f commit 3130e90

File tree

1 file changed

+73
-43
lines changed

1 file changed

+73
-43
lines changed

tests/openvino/test_quantization.py

Lines changed: 73 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,15 @@
2929
import pytest
3030
import numpy as np
3131
import torch
32+
from PIL import Image
3233
from parameterized import parameterized
3334
import nncf
3435
from transformers import (
3536
AutoModelForQuestionAnswering,
3637
AutoTokenizer,
3738
AutoProcessor,
39+
AutoConfig,
40+
GenerationConfig,
3841
)
3942
from transformers.testing_utils import slow
4043
from transformers.utils.quantization_config import QuantizationMethod
@@ -76,6 +79,7 @@
7679
_DEFAULT_4BIT_WQ_CONFIG,
7780
_quantization_config_from_dict,
7881
)
82+
from optimum.intel.openvino.modeling_visual_language import _OVNanoLlavaForCausalLM
7983
from optimum.intel.openvino.utils import TemporaryDirectory
8084
from copy import deepcopy
8185

@@ -574,50 +578,11 @@ def test_ov_model_static_quantization_with_auto_dataset(
574578
ov_model = model_cls.from_pretrained(model_id, quantization_config=quantization_config)
575579
ov_model.save_pretrained(tmp_dir)
576580

577-
if model_cls in [OVModelForSpeechSeq2Seq, OVModelForSeq2SeqLM]:
578-
if ov_model.decoder_with_past is None:
579-
expected_fake_nodes_per_model.pop("decoder_with_past", None)
580-
expected_num_weight_nodes_per_model.pop("decoder_with_past", None)
581-
582-
if model_cls == OVModelForSpeechSeq2Seq:
583-
input_features = torch.randn((1, ov_model.config.num_mel_bins, 3000), dtype=torch.float32)
584-
ov_model.generate(input_features)
585-
else:
586-
tokenizer = AutoTokenizer.from_pretrained(model_id)
587-
inputs = tokenizer("This is a sample <mask>", return_tensors="pt")
588-
ov_model.generate(**inputs)
589-
elif model_cls in (OVModelForCausalLM, OVModelForFeatureExtraction, OVModelForMaskedLM):
590-
tokenizer = AutoTokenizer.from_pretrained(model_id)
591-
if tokenizer.pad_token is None:
592-
tokenizer.pad_token = tokenizer.eos_token
593-
tokens = tokenizer("This is a sample <mask>", return_tensors="pt")
594-
ov_model(**tokens)
595-
elif model_cls in (
596-
OVStableDiffusionPipeline,
597-
OVStableDiffusionXLPipeline,
598-
OVLatentConsistencyModelPipeline,
599-
):
600-
ov_model(prompt="A text-to-image prompt")
601-
elif model_cls == OVSentenceTransformer:
602-
ov_model.encode(["This is a sample input"])
603-
elif model_cls == OVModelForZeroShotImageClassification:
604-
processor = AutoProcessor.from_pretrained(model_id)
605-
image = np.random.rand(224, 224, 3).astype(np.uint8)
606-
inputs = processor(text=["This is a sample text"], images=image, return_tensors="pt")
607-
ov_model(**inputs)
608-
elif model_cls == OVModelForVisualCausalLM:
609-
processor = AutoProcessor.from_pretrained(model_id)
610-
image = np.random.rand(224, 224, 3).astype(np.uint8)
611-
inputs = ov_model.preprocess_inputs(image=image, text="This is a sample text", processor=processor)
612-
ov_model(**inputs)
613-
elif model_cls == OVSamModel:
614-
processor = AutoProcessor.from_pretrained(model_id)
615-
image = np.random.rand(224, 224, 3).astype(np.uint8)
616-
inputs = processor(image, input_points=[[[0, 0]]], return_tensors="pt")
617-
ov_model(**inputs)
618-
else:
619-
raise Exception("Unexpected model class.")
581+
check_model_inference(ov_model, model_id, trust_remote_code=False)
620582

583+
if model_cls in [OVModelForSpeechSeq2Seq, OVModelForSeq2SeqLM] and ov_model.decoder_with_past is None:
584+
expected_fake_nodes_per_model.pop("decoder_with_past", None)
585+
expected_num_weight_nodes_per_model.pop("decoder_with_past", None)
621586
check_compression_state_per_model(
622587
self,
623588
ov_model.ov_submodels,
@@ -1311,6 +1276,9 @@ def test_ovmodel_4bit_auto_compression_with_config(
13111276
check_compression_state_per_model(self, submodels, expected_num_weight_nodes_per_model)
13121277

13131278
model.save_pretrained(tmp_dir)
1279+
model = model_cls.from_pretrained(tmp_dir, trust_remote_code=trust_remote_code)
1280+
check_model_inference(model, model_id, trust_remote_code)
1281+
13141282
# At the moment the first model in the list is the only one we apply data-aware compression to
13151283
wc_rt_info = next(iter(submodels.values())).get_rt_info()["nncf"]["weight_compression"]
13161284
self.assertEqual(quantization_config.quant_method.lower() == "awq", wc_rt_info["awq"].value == "True")
@@ -1728,6 +1696,7 @@ def eval_expression_if_possible(expression):
17281696
model.save_pretrained(tmp_dir)
17291697

17301698
model = model_cls.from_pretrained(tmp_dir, trust_remote_code=trust_remote_code)
1699+
check_model_inference(model, model_id, trust_remote_code)
17311700
check_compression_state_per_model(
17321701
self, model.ov_submodels, expected_num_weight_nodes_per_model, expected_fake_nodes_per_model
17331702
)
@@ -2260,3 +2229,64 @@ def check_optimization_not_applicable_to_optimized_model(model, quantization_con
22602229
match="Cannot apply optimization to the model because it was already optimized with the following config",
22612230
):
22622231
quantizer.quantize(ov_config=OVConfig(quantization_config=quantization_config))
2232+
2233+
2234+
def check_model_inference(ov_model, model_id, trust_remote_code):
2235+
if isinstance(ov_model, (OVModelForSpeechSeq2Seq, OVModelForSeq2SeqLM)):
2236+
gen_config = GenerationConfig(
2237+
max_new_tokens=10,
2238+
min_new_tokens=10,
2239+
num_beams=2,
2240+
do_sample=False,
2241+
eos_token_id=None,
2242+
)
2243+
if isinstance(ov_model, OVModelForSpeechSeq2Seq):
2244+
input_features = torch.randn((1, ov_model.config.num_mel_bins, 3000), dtype=torch.float32)
2245+
generate_kwrgs = {}
2246+
if is_transformers_version(">=", "4.50"):
2247+
generate_kwrgs = {"use_model_defaults": False}
2248+
ov_model.generate(input_features, generation_config=gen_config, **generate_kwrgs)
2249+
else:
2250+
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust_remote_code)
2251+
inputs = tokenizer("This is a sample <mask>", return_tensors="pt")
2252+
ov_model.generate(**inputs, generation_config=gen_config)
2253+
elif isinstance(ov_model, (OVModelForCausalLM, OVModelForFeatureExtraction, OVModelForMaskedLM)):
2254+
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust_remote_code)
2255+
if tokenizer.pad_token is None:
2256+
tokenizer.pad_token = tokenizer.eos_token
2257+
tokens = tokenizer("This is a sample <mask>", return_tensors="pt")
2258+
ov_model(**tokens)
2259+
elif isinstance(
2260+
ov_model,
2261+
(
2262+
OVStableDiffusionPipeline,
2263+
OVStableDiffusion3Pipeline,
2264+
OVStableDiffusionXLPipeline,
2265+
OVLatentConsistencyModelPipeline,
2266+
),
2267+
):
2268+
ov_model(prompt="A text-to-image prompt")
2269+
elif isinstance(ov_model, OVSentenceTransformer):
2270+
ov_model.encode(["This is a sample input"])
2271+
elif isinstance(ov_model, OVModelForZeroShotImageClassification):
2272+
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=trust_remote_code)
2273+
image = np.random.rand(224, 224, 3).astype(np.uint8)
2274+
inputs = processor(text=["This is a sample text"], images=image, return_tensors="pt")
2275+
ov_model(**inputs)
2276+
elif isinstance(ov_model, OVModelForVisualCausalLM):
2277+
config = AutoConfig.from_pretrained(model_id, trust_remote_code=trust_remote_code)
2278+
processor_id = config.mm_vision_tower if isinstance(ov_model, _OVNanoLlavaForCausalLM) else model_id
2279+
processor = AutoProcessor.from_pretrained(processor_id, trust_remote_code=trust_remote_code)
2280+
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust_remote_code)
2281+
image = Image.fromarray(np.random.rand(224, 224, 3).astype(np.uint8))
2282+
inputs = ov_model.preprocess_inputs(
2283+
image=image, text="This is a sample text", processor=processor, tokenizer=tokenizer, config=config
2284+
)
2285+
ov_model(**inputs)
2286+
elif isinstance(ov_model, OVSamModel):
2287+
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=trust_remote_code)
2288+
image = np.random.rand(224, 224, 3).astype(np.uint8)
2289+
inputs = processor(image, input_points=[[[0, 0]]], return_tensors="pt")
2290+
ov_model(**inputs)
2291+
else:
2292+
raise Exception("Unexpected model class.")

0 commit comments

Comments
 (0)