|
29 | 29 | import pytest |
30 | 30 | import numpy as np |
31 | 31 | import torch |
| 32 | +from PIL import Image |
32 | 33 | from parameterized import parameterized |
33 | 34 | import nncf |
34 | 35 | from transformers import ( |
35 | 36 | AutoModelForQuestionAnswering, |
36 | 37 | AutoTokenizer, |
37 | 38 | AutoProcessor, |
| 39 | + AutoConfig, |
| 40 | + GenerationConfig, |
38 | 41 | ) |
39 | 42 | from transformers.testing_utils import slow |
40 | 43 | from transformers.utils.quantization_config import QuantizationMethod |
|
76 | 79 | _DEFAULT_4BIT_WQ_CONFIG, |
77 | 80 | _quantization_config_from_dict, |
78 | 81 | ) |
| 82 | +from optimum.intel.openvino.modeling_visual_language import _OVNanoLlavaForCausalLM |
79 | 83 | from optimum.intel.openvino.utils import TemporaryDirectory |
80 | 84 | from copy import deepcopy |
81 | 85 |
|
@@ -574,50 +578,11 @@ def test_ov_model_static_quantization_with_auto_dataset( |
574 | 578 | ov_model = model_cls.from_pretrained(model_id, quantization_config=quantization_config) |
575 | 579 | ov_model.save_pretrained(tmp_dir) |
576 | 580 |
|
577 | | - if model_cls in [OVModelForSpeechSeq2Seq, OVModelForSeq2SeqLM]: |
578 | | - if ov_model.decoder_with_past is None: |
579 | | - expected_fake_nodes_per_model.pop("decoder_with_past", None) |
580 | | - expected_num_weight_nodes_per_model.pop("decoder_with_past", None) |
581 | | - |
582 | | - if model_cls == OVModelForSpeechSeq2Seq: |
583 | | - input_features = torch.randn((1, ov_model.config.num_mel_bins, 3000), dtype=torch.float32) |
584 | | - ov_model.generate(input_features) |
585 | | - else: |
586 | | - tokenizer = AutoTokenizer.from_pretrained(model_id) |
587 | | - inputs = tokenizer("This is a sample <mask>", return_tensors="pt") |
588 | | - ov_model.generate(**inputs) |
589 | | - elif model_cls in (OVModelForCausalLM, OVModelForFeatureExtraction, OVModelForMaskedLM): |
590 | | - tokenizer = AutoTokenizer.from_pretrained(model_id) |
591 | | - if tokenizer.pad_token is None: |
592 | | - tokenizer.pad_token = tokenizer.eos_token |
593 | | - tokens = tokenizer("This is a sample <mask>", return_tensors="pt") |
594 | | - ov_model(**tokens) |
595 | | - elif model_cls in ( |
596 | | - OVStableDiffusionPipeline, |
597 | | - OVStableDiffusionXLPipeline, |
598 | | - OVLatentConsistencyModelPipeline, |
599 | | - ): |
600 | | - ov_model(prompt="A text-to-image prompt") |
601 | | - elif model_cls == OVSentenceTransformer: |
602 | | - ov_model.encode(["This is a sample input"]) |
603 | | - elif model_cls == OVModelForZeroShotImageClassification: |
604 | | - processor = AutoProcessor.from_pretrained(model_id) |
605 | | - image = np.random.rand(224, 224, 3).astype(np.uint8) |
606 | | - inputs = processor(text=["This is a sample text"], images=image, return_tensors="pt") |
607 | | - ov_model(**inputs) |
608 | | - elif model_cls == OVModelForVisualCausalLM: |
609 | | - processor = AutoProcessor.from_pretrained(model_id) |
610 | | - image = np.random.rand(224, 224, 3).astype(np.uint8) |
611 | | - inputs = ov_model.preprocess_inputs(image=image, text="This is a sample text", processor=processor) |
612 | | - ov_model(**inputs) |
613 | | - elif model_cls == OVSamModel: |
614 | | - processor = AutoProcessor.from_pretrained(model_id) |
615 | | - image = np.random.rand(224, 224, 3).astype(np.uint8) |
616 | | - inputs = processor(image, input_points=[[[0, 0]]], return_tensors="pt") |
617 | | - ov_model(**inputs) |
618 | | - else: |
619 | | - raise Exception("Unexpected model class.") |
| 581 | + check_model_inference(ov_model, model_id, trust_remote_code=False) |
620 | 582 |
|
| 583 | + if model_cls in [OVModelForSpeechSeq2Seq, OVModelForSeq2SeqLM] and ov_model.decoder_with_past is None: |
| 584 | + expected_fake_nodes_per_model.pop("decoder_with_past", None) |
| 585 | + expected_num_weight_nodes_per_model.pop("decoder_with_past", None) |
621 | 586 | check_compression_state_per_model( |
622 | 587 | self, |
623 | 588 | ov_model.ov_submodels, |
@@ -1311,6 +1276,9 @@ def test_ovmodel_4bit_auto_compression_with_config( |
1311 | 1276 | check_compression_state_per_model(self, submodels, expected_num_weight_nodes_per_model) |
1312 | 1277 |
|
1313 | 1278 | model.save_pretrained(tmp_dir) |
| 1279 | + model = model_cls.from_pretrained(tmp_dir, trust_remote_code=trust_remote_code) |
| 1280 | + check_model_inference(model, model_id, trust_remote_code) |
| 1281 | + |
1314 | 1282 | # At the moment the first model in the list is the only one we apply data-aware compression to |
1315 | 1283 | wc_rt_info = next(iter(submodels.values())).get_rt_info()["nncf"]["weight_compression"] |
1316 | 1284 | self.assertEqual(quantization_config.quant_method.lower() == "awq", wc_rt_info["awq"].value == "True") |
@@ -1728,6 +1696,7 @@ def eval_expression_if_possible(expression): |
1728 | 1696 | model.save_pretrained(tmp_dir) |
1729 | 1697 |
|
1730 | 1698 | model = model_cls.from_pretrained(tmp_dir, trust_remote_code=trust_remote_code) |
| 1699 | + check_model_inference(model, model_id, trust_remote_code) |
1731 | 1700 | check_compression_state_per_model( |
1732 | 1701 | self, model.ov_submodels, expected_num_weight_nodes_per_model, expected_fake_nodes_per_model |
1733 | 1702 | ) |
@@ -2260,3 +2229,64 @@ def check_optimization_not_applicable_to_optimized_model(model, quantization_con |
2260 | 2229 | match="Cannot apply optimization to the model because it was already optimized with the following config", |
2261 | 2230 | ): |
2262 | 2231 | quantizer.quantize(ov_config=OVConfig(quantization_config=quantization_config)) |
| 2232 | + |
| 2233 | + |
| 2234 | +def check_model_inference(ov_model, model_id, trust_remote_code): |
| 2235 | + if isinstance(ov_model, (OVModelForSpeechSeq2Seq, OVModelForSeq2SeqLM)): |
| 2236 | + gen_config = GenerationConfig( |
| 2237 | + max_new_tokens=10, |
| 2238 | + min_new_tokens=10, |
| 2239 | + num_beams=2, |
| 2240 | + do_sample=False, |
| 2241 | + eos_token_id=None, |
| 2242 | + ) |
| 2243 | + if isinstance(ov_model, OVModelForSpeechSeq2Seq): |
| 2244 | + input_features = torch.randn((1, ov_model.config.num_mel_bins, 3000), dtype=torch.float32) |
| 2245 | + generate_kwrgs = {} |
| 2246 | + if is_transformers_version(">=", "4.50"): |
| 2247 | + generate_kwrgs = {"use_model_defaults": False} |
| 2248 | + ov_model.generate(input_features, generation_config=gen_config, **generate_kwrgs) |
| 2249 | + else: |
| 2250 | + tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust_remote_code) |
| 2251 | + inputs = tokenizer("This is a sample <mask>", return_tensors="pt") |
| 2252 | + ov_model.generate(**inputs, generation_config=gen_config) |
| 2253 | + elif isinstance(ov_model, (OVModelForCausalLM, OVModelForFeatureExtraction, OVModelForMaskedLM)): |
| 2254 | + tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust_remote_code) |
| 2255 | + if tokenizer.pad_token is None: |
| 2256 | + tokenizer.pad_token = tokenizer.eos_token |
| 2257 | + tokens = tokenizer("This is a sample <mask>", return_tensors="pt") |
| 2258 | + ov_model(**tokens) |
| 2259 | + elif isinstance( |
| 2260 | + ov_model, |
| 2261 | + ( |
| 2262 | + OVStableDiffusionPipeline, |
| 2263 | + OVStableDiffusion3Pipeline, |
| 2264 | + OVStableDiffusionXLPipeline, |
| 2265 | + OVLatentConsistencyModelPipeline, |
| 2266 | + ), |
| 2267 | + ): |
| 2268 | + ov_model(prompt="A text-to-image prompt") |
| 2269 | + elif isinstance(ov_model, OVSentenceTransformer): |
| 2270 | + ov_model.encode(["This is a sample input"]) |
| 2271 | + elif isinstance(ov_model, OVModelForZeroShotImageClassification): |
| 2272 | + processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=trust_remote_code) |
| 2273 | + image = np.random.rand(224, 224, 3).astype(np.uint8) |
| 2274 | + inputs = processor(text=["This is a sample text"], images=image, return_tensors="pt") |
| 2275 | + ov_model(**inputs) |
| 2276 | + elif isinstance(ov_model, OVModelForVisualCausalLM): |
| 2277 | + config = AutoConfig.from_pretrained(model_id, trust_remote_code=trust_remote_code) |
| 2278 | + processor_id = config.mm_vision_tower if isinstance(ov_model, _OVNanoLlavaForCausalLM) else model_id |
| 2279 | + processor = AutoProcessor.from_pretrained(processor_id, trust_remote_code=trust_remote_code) |
| 2280 | + tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust_remote_code) |
| 2281 | + image = Image.fromarray(np.random.rand(224, 224, 3).astype(np.uint8)) |
| 2282 | + inputs = ov_model.preprocess_inputs( |
| 2283 | + image=image, text="This is a sample text", processor=processor, tokenizer=tokenizer, config=config |
| 2284 | + ) |
| 2285 | + ov_model(**inputs) |
| 2286 | + elif isinstance(ov_model, OVSamModel): |
| 2287 | + processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=trust_remote_code) |
| 2288 | + image = np.random.rand(224, 224, 3).astype(np.uint8) |
| 2289 | + inputs = processor(image, input_points=[[[0, 0]]], return_tensors="pt") |
| 2290 | + ov_model(**inputs) |
| 2291 | + else: |
| 2292 | + raise Exception("Unexpected model class.") |
0 commit comments