From 88b017cd813a2de7d1fa5b30da5b404ba7525058 Mon Sep 17 00:00:00 2001 From: eaidova Date: Fri, 16 May 2025 12:20:26 +0400 Subject: [PATCH 1/3] allow passing ov_config to from_pretrained --- optimum/intel/openvino/modeling_base.py | 12 +++++---- optimum/intel/openvino/modeling_decoder.py | 12 +++++---- optimum/intel/openvino/modeling_diffusion.py | 15 ++++++----- optimum/intel/openvino/modeling_open_clip.py | 25 +++++++++++-------- optimum/intel/openvino/modeling_seq2seq.py | 12 +++++---- .../openvino/modeling_visual_language.py | 14 ++++++----- 6 files changed, 53 insertions(+), 37 deletions(-) diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 2171b930f5..3f5f3ea548 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -619,11 +619,13 @@ def _from_transformers( ) compile_only = False - # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size - if load_in_8bit is None and not quantization_config: - ov_config = None - else: - ov_config = OVConfig(dtype="fp32") + ov_config = kwargs.get("ov_config") + if ov_config is None: + # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size + if load_in_8bit is None and not quantization_config: + ov_config = None + else: + ov_config = OVConfig(dtype="fp32") variant = kwargs.pop("variant", None) diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 9ddac824ac..11ff3f8527 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -306,11 +306,13 @@ def _from_transformers( if use_cache: task = task + "-with-past" - # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size - if load_in_8bit is None and not quantization_config: - ov_export_config = None - else: - ov_export_config = OVConfig(dtype="auto") + ov_export_config = kwargs.get("ov_config") + if ov_export_config is None: + # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size + if load_in_8bit is None and not quantization_config: + ov_export_config = None + else: + ov_export_config = OVConfig(dtype="auto") stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache) diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index e2984c821f..614995876a 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -605,12 +605,15 @@ def _from_transformers( ) compile_only = False - # If load_in_8bit and quantization_config not specified then ov_config is set - # to None and will be set by default in convert depending on the model size - if load_in_8bit is None and not quantization_config: - ov_config = None - else: - ov_config = OVConfig(dtype="auto") + ov_config = kwargs.get("ov_config") + + if ov_config is None: + # If load_in_8bit and quantization_config not specified then ov_config is set + # to None and will be set by default in convert depending on the model size + if load_in_8bit is None and not quantization_config: + ov_config = None + else: + ov_config = OVConfig(dtype="auto") torch_dtype = kwargs.pop("torch_dtype", None) diff --git a/optimum/intel/openvino/modeling_open_clip.py b/optimum/intel/openvino/modeling_open_clip.py index ef00c182e5..bb909a705b 100644 --- a/optimum/intel/openvino/modeling_open_clip.py +++ b/optimum/intel/openvino/modeling_open_clip.py @@ -243,11 +243,13 @@ def _from_transformers( # would end-up removing the directory containing the underlying OpenVINO model cls._model_save_dir_tempdirectory_instance = save_dir - # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size - if load_in_8bit is None and not quantization_config: - ov_config = None - else: - ov_config = OVConfig(dtype="fp32") + ov_config = kwargs.get("ov_config") + if ov_config is None: + # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size + if load_in_8bit is None and not quantization_config: + ov_config = None + else: + ov_config = OVConfig(dtype="fp32") def fn_get_submodels(model): return {"model_text": model.text} @@ -368,11 +370,14 @@ def _from_transformers( # would end-up removing the directory containing the underlying OpenVINO model cls._model_save_dir_tempdirectory_instance = save_dir - # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size - if load_in_8bit is None and not quantization_config: - ov_config = None - else: - ov_config = OVConfig(dtype="fp32") + ov_config = kwargs.get("ov_config") + + if ov_config is None: + # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size + if load_in_8bit is None and not quantization_config: + ov_config = None + else: + ov_config = OVConfig(dtype="fp32") def fn_get_submodels(model): return {"model_vision": model.visual} diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py index 8a11bc1c3c..8813af51e4 100644 --- a/optimum/intel/openvino/modeling_seq2seq.py +++ b/optimum/intel/openvino/modeling_seq2seq.py @@ -602,11 +602,13 @@ def _from_transformers( "Please provide openvino model obtained using optimum-cli or saved on disk using `save_pretrained`" ) compile_only = False - # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size - if load_in_8bit is None and not quantization_config: - ov_config = None - else: - ov_config = OVConfig(dtype="fp32") + ov_config = kwargs.get("ov_config") + if ov_config is None: + # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size + if load_in_8bit is None and not quantization_config: + ov_config = None + else: + ov_config = OVConfig(dtype="fp32") stateful = kwargs.get("stateful", True) variant = kwargs.pop("variant", None) diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py index 78bb3916d0..caa1fb98d4 100644 --- a/optimum/intel/openvino/modeling_visual_language.py +++ b/optimum/intel/openvino/modeling_visual_language.py @@ -660,12 +660,14 @@ def _from_transformers( if task is None: task = cls.export_feature - # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size - if load_in_8bit is None and not quantization_config: - ov_config = None - else: - # Export in fp32 if compression won't be applied later - ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto") + ov_config = kwargs.get("ov_config") + if ov_config is None: + # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size + if load_in_8bit is None and not quantization_config: + ov_config = None + else: + # Export in fp32 if compression won't be applied later + ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto") stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache) variant = kwargs.pop("variant", None) From a3bbaffeb58e73f7ad17171bee93714b83d9cca8 Mon Sep 17 00:00:00 2001 From: eaidova Date: Fri, 16 May 2025 12:40:52 +0400 Subject: [PATCH 2/3] refactoring --- optimum/intel/openvino/modeling_base.py | 10 +++------ optimum/intel/openvino/modeling_decoder.py | 10 +++------ optimum/intel/openvino/modeling_diffusion.py | 12 +++-------- optimum/intel/openvino/modeling_open_clip.py | 21 ++++++------------- optimum/intel/openvino/modeling_seq2seq.py | 10 +++------ .../openvino/modeling_visual_language.py | 12 ++++------- 6 files changed, 22 insertions(+), 53 deletions(-) diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py index 3f5f3ea548..fd65a03717 100644 --- a/optimum/intel/openvino/modeling_base.py +++ b/optimum/intel/openvino/modeling_base.py @@ -619,13 +619,9 @@ def _from_transformers( ) compile_only = False - ov_config = kwargs.get("ov_config") - if ov_config is None: - # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size - if load_in_8bit is None and not quantization_config: - ov_config = None - else: - ov_config = OVConfig(dtype="fp32") + ov_config = kwargs.get("ov_export_config") + if ov_config is None and load_in_8bit is not None or quantization_config is not None: + ov_config = OVConfig(dtype="fp32") variant = kwargs.pop("variant", None) diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index 11ff3f8527..5b6ed01752 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -306,13 +306,9 @@ def _from_transformers( if use_cache: task = task + "-with-past" - ov_export_config = kwargs.get("ov_config") - if ov_export_config is None: - # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size - if load_in_8bit is None and not quantization_config: - ov_export_config = None - else: - ov_export_config = OVConfig(dtype="auto") + ov_export_config = kwargs.get("ov_export_config") + if ov_export_config is None and load_in_8bit is not None or quantization_config is not None: + ov_export_config = OVConfig(dtype="auto") stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache) diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index 614995876a..e982ac54e5 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -605,15 +605,9 @@ def _from_transformers( ) compile_only = False - ov_config = kwargs.get("ov_config") - - if ov_config is None: - # If load_in_8bit and quantization_config not specified then ov_config is set - # to None and will be set by default in convert depending on the model size - if load_in_8bit is None and not quantization_config: - ov_config = None - else: - ov_config = OVConfig(dtype="auto") + ov_config = kwargs.get("ov_export_config") + if ov_config is None and load_in_8bit is not None or quantization_config is not None: + ov_config = OVConfig(dtype="auto") torch_dtype = kwargs.pop("torch_dtype", None) diff --git a/optimum/intel/openvino/modeling_open_clip.py b/optimum/intel/openvino/modeling_open_clip.py index bb909a705b..4ebab09338 100644 --- a/optimum/intel/openvino/modeling_open_clip.py +++ b/optimum/intel/openvino/modeling_open_clip.py @@ -243,13 +243,9 @@ def _from_transformers( # would end-up removing the directory containing the underlying OpenVINO model cls._model_save_dir_tempdirectory_instance = save_dir - ov_config = kwargs.get("ov_config") - if ov_config is None: - # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size - if load_in_8bit is None and not quantization_config: - ov_config = None - else: - ov_config = OVConfig(dtype="fp32") + ov_config = kwargs.get("ov_export_config") + if ov_config is None and load_in_8bit is not None or quantization_config is not None: + ov_config = OVConfig(dtype="fp32") def fn_get_submodels(model): return {"model_text": model.text} @@ -370,14 +366,9 @@ def _from_transformers( # would end-up removing the directory containing the underlying OpenVINO model cls._model_save_dir_tempdirectory_instance = save_dir - ov_config = kwargs.get("ov_config") - - if ov_config is None: - # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size - if load_in_8bit is None and not quantization_config: - ov_config = None - else: - ov_config = OVConfig(dtype="fp32") + ov_config = kwargs.get("ov_export_config") + if ov_config is None and load_in_8bit is not None or quantization_config is not None: + ov_config = OVConfig(dtype="fp32") def fn_get_submodels(model): return {"model_vision": model.visual} diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py index 8813af51e4..c1db686de4 100644 --- a/optimum/intel/openvino/modeling_seq2seq.py +++ b/optimum/intel/openvino/modeling_seq2seq.py @@ -602,13 +602,9 @@ def _from_transformers( "Please provide openvino model obtained using optimum-cli or saved on disk using `save_pretrained`" ) compile_only = False - ov_config = kwargs.get("ov_config") - if ov_config is None: - # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size - if load_in_8bit is None and not quantization_config: - ov_config = None - else: - ov_config = OVConfig(dtype="fp32") + ov_config = kwargs.get("ov_export_config") + if ov_config is None and (load_in_8bit is not None or quantization_config is not None): + ov_config = OVConfig(dtype="fp32") stateful = kwargs.get("stateful", True) variant = kwargs.pop("variant", None) diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py index caa1fb98d4..04450f701e 100644 --- a/optimum/intel/openvino/modeling_visual_language.py +++ b/optimum/intel/openvino/modeling_visual_language.py @@ -660,14 +660,10 @@ def _from_transformers( if task is None: task = cls.export_feature - ov_config = kwargs.get("ov_config") - if ov_config is None: - # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size - if load_in_8bit is None and not quantization_config: - ov_config = None - else: - # Export in fp32 if compression won't be applied later - ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto") + ov_config = kwargs.get("ov_export_config") + if ov_config is None and (load_in_8bit is not None or quantization_config is None): + # Export in fp32 if compression won't be applied later + ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto") stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache) variant = kwargs.pop("variant", None) From 206cbf9ab4a12a8d3e5f3278a798a1f6a9e2c0d5 Mon Sep 17 00:00:00 2001 From: eaidova Date: Fri, 16 May 2025 15:22:30 +0400 Subject: [PATCH 3/3] add tests --- tests/openvino/test_modeling.py | 248 ++++++++++++++++++++++++++++++++ 1 file changed, 248 insertions(+) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index e1da1395d0..a503175a7d 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -104,6 +104,7 @@ OVStableDiffusionPipeline, ) from optimum.intel.openvino import OV_DECODER_NAME, OV_DECODER_WITH_PAST_NAME, OV_ENCODER_NAME, OV_XML_FILE_NAME +from optimum.intel.openvino.configuration import OVConfig from optimum.intel.openvino.modeling_base import OVBaseModel from optimum.intel.openvino.modeling_seq2seq import OVDecoder, OVEncoder from optimum.intel.openvino.modeling_timm import TimmImageProcessor @@ -863,6 +864,17 @@ def test_pipeline(self, model_arch): del pipe gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["bert"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForSequenceClassification.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForQuestionAnsweringIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ( @@ -946,6 +958,17 @@ def test_metric(self): del ov_model gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["bert"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForQuestionAnswering.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForTokenClassificationIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ( @@ -1018,6 +1041,17 @@ def test_default_token_type_ids(self): del model gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["bert"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForTokenClassification.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForFeatureExtractionIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ( @@ -1090,6 +1124,17 @@ def test_sentence_transformers_pipeline(self, model_arch): OVModelForFeatureExtraction.from_pretrained(save_dir) self.assertIn("Please use `OVSentenceTransformer`", str(context.exception)) + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["bert"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForFeatureExtraction.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForCausalLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ( @@ -1728,6 +1773,22 @@ def test_load_with_different_dtype(self): f"values are not close for {dtype if dtype is not None else 'None'}, max diff = {torch.abs(ov_logits - ref_logits).max()}", ) + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["gpt2"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForCausalLM.from_pretrained( + model_id, + export=True, + ov_config=F32_CONFIG, + compile=False, + ov_export_config=ov_config, + use_cache=False, + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForMaskedLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ( @@ -1801,6 +1862,17 @@ def test_pipeline(self, model_arch): del model gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["bert"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForMaskedLM.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForImageClassificationIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ( @@ -1903,6 +1975,17 @@ def test_timm_save_and_infer(self, model_id): model(pixel_values=torch.zeros((5, 3, model.config.image_size, model.config.image_size))) gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["beit"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForImageClassification.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForSeq2SeqLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ( @@ -2069,6 +2152,17 @@ def test_compare_with_and_without_past_key_values(self): del model_without_pkv gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["bart"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForSeq2SeqLM.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForAudioClassificationIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ( @@ -2140,6 +2234,17 @@ def test_pipeline(self, model_arch): del model gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["data2vec_audio"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForAudioClassification.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForCTCIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = [ @@ -2196,6 +2301,17 @@ def test_compare_to_transformers(self, model_arch): del ov_model gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["data2vec_audio"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForCTC.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForAudioXVectorIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = [ @@ -2250,6 +2366,17 @@ def test_compare_to_transformers(self, model_arch): del ov_model gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["data2vec_audio"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForAudioXVector.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForAudioFrameClassificationIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = [ @@ -2301,6 +2428,17 @@ def test_compare_to_transformers(self, model_arch): del ov_model gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["data2vec_audio"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForAudioFrameClassification.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForPix2StructIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ["pix2struct"] @@ -2394,6 +2532,17 @@ def test_compare_with_and_without_past_key_values(self): del model_without_pkv gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["pix2struct"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForPix2Struct.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForVisualCausalLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ["llava"] @@ -2792,6 +2941,20 @@ def test_model_can_be_loaded_after_saving(self, model_arch): ) self.assertIsInstance(ov_restored_model, type(ov_model)) + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["llava"] + ov_config = OVConfig(dtype="fp16") + with ( + unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save, + unittest.mock.patch("openvino.Core.read_model"), + unittest.mock.patch("optimum.intel.openvino.modeling_visual_language.OVModelWithEmbedForCausalLM"), + ): + OVModelForVisualCausalLM.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForSpeechSeq2SeqIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ("whisper",) @@ -2882,6 +3045,17 @@ def test_pipeline(self, model_arch): del model gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["whisper"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForSpeechSeq2Seq.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForVision2SeqIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ["vision-encoder-decoder", "trocr", "donut"] @@ -2991,6 +3165,17 @@ def test_pipeline(self, model_arch: str): gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["vision-encoder-decoder"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForVision2Seq.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForCustomTasksIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES_WITH_ATTENTION = ["vit-with-attentions"] @@ -3213,6 +3398,17 @@ def test_functions(self): del model gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["open-clip"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelOpenCLIPForZeroShotImageClassification.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForSTFeatureExtractionIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ("st-bert", "st-mpnet") @@ -3261,6 +3457,17 @@ def test_langchain(self, model_arch): output = embedding.embed_query("foo bar") self.assertTrue(len(output) > 0) + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["st-bert"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVSentenceTransformer.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVLangchainTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ("gpt2",) @@ -3383,6 +3590,17 @@ def test_reshape(self, model_arch): del ov_model gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["sam"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVSamModel.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForTextToSpeechSeq2SeqIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ("speecht5",) @@ -3448,6 +3666,25 @@ def test_compare_to_transformers(self, model_arch): del processor gc.collect() + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["speecht5"] + ov_config = OVConfig(dtype="fp16") + with ( + unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save, + unittest.mock.patch("openvino.Core.read_model"), + unittest.mock.patch("optimum.intel.openvino.modeling_text2speech.OVTextToSpeechEncoder"), + ): + OVModelForTextToSpeechSeq2Seq.from_pretrained( + model_id, + vocoder="fxmarty/speecht5-hifigan-tiny", + export=True, + ov_config=F32_CONFIG, + compile=False, + ov_export_config=ov_config, + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config) + class OVModelForZeroShotImageClassificationIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ["clip"] @@ -3499,3 +3736,14 @@ def test_compare_to_transformers(self, model_arch): del transformers_model del ov_model gc.collect() + + def test_model_loading_with_ov_export_config(self): + model_id = MODEL_NAMES["clip"] + ov_config = OVConfig(dtype="fp16") + with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save: + with unittest.mock.patch("openvino.Core.read_model"): + OVModelForZeroShotImageClassification.from_pretrained( + model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config + ) + for call in mock_save.calls: + self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)