From 88b017cd813a2de7d1fa5b30da5b404ba7525058 Mon Sep 17 00:00:00 2001
From: eaidova <ekaterina.aidova@intel.com>
Date: Fri, 16 May 2025 12:20:26 +0400
Subject: [PATCH 1/3] allow passing ov_config to from_pretrained

---
 optimum/intel/openvino/modeling_base.py       | 12 +++++----
 optimum/intel/openvino/modeling_decoder.py    | 12 +++++----
 optimum/intel/openvino/modeling_diffusion.py  | 15 ++++++-----
 optimum/intel/openvino/modeling_open_clip.py  | 25 +++++++++++--------
 optimum/intel/openvino/modeling_seq2seq.py    | 12 +++++----
 .../openvino/modeling_visual_language.py      | 14 ++++++-----
 6 files changed, 53 insertions(+), 37 deletions(-)

diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py
index 2171b930f5..3f5f3ea548 100644
--- a/optimum/intel/openvino/modeling_base.py
+++ b/optimum/intel/openvino/modeling_base.py
@@ -619,11 +619,13 @@ def _from_transformers(
             )
             compile_only = False
 
-        # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-        if load_in_8bit is None and not quantization_config:
-            ov_config = None
-        else:
-            ov_config = OVConfig(dtype="fp32")
+        ov_config = kwargs.get("ov_config")
+        if ov_config is None:
+            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
+            if load_in_8bit is None and not quantization_config:
+                ov_config = None
+            else:
+                ov_config = OVConfig(dtype="fp32")
 
         variant = kwargs.pop("variant", None)
 
diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py
index 9ddac824ac..11ff3f8527 100644
--- a/optimum/intel/openvino/modeling_decoder.py
+++ b/optimum/intel/openvino/modeling_decoder.py
@@ -306,11 +306,13 @@ def _from_transformers(
             if use_cache:
                 task = task + "-with-past"
 
-        # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-        if load_in_8bit is None and not quantization_config:
-            ov_export_config = None
-        else:
-            ov_export_config = OVConfig(dtype="auto")
+        ov_export_config = kwargs.get("ov_config")
+        if ov_export_config is None:
+            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
+            if load_in_8bit is None and not quantization_config:
+                ov_export_config = None
+            else:
+                ov_export_config = OVConfig(dtype="auto")
 
         stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache)
 
diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
index e2984c821f..614995876a 100644
--- a/optimum/intel/openvino/modeling_diffusion.py
+++ b/optimum/intel/openvino/modeling_diffusion.py
@@ -605,12 +605,15 @@ def _from_transformers(
             )
             compile_only = False
 
-        # If load_in_8bit and quantization_config not specified then ov_config is set
-        # to None and will be set by default in convert depending on the model size
-        if load_in_8bit is None and not quantization_config:
-            ov_config = None
-        else:
-            ov_config = OVConfig(dtype="auto")
+        ov_config = kwargs.get("ov_config")
+
+        if ov_config is None:
+            # If load_in_8bit and quantization_config not specified then ov_config is set
+            # to None and will be set by default in convert depending on the model size
+            if load_in_8bit is None and not quantization_config:
+                ov_config = None
+            else:
+                ov_config = OVConfig(dtype="auto")
 
         torch_dtype = kwargs.pop("torch_dtype", None)
 
diff --git a/optimum/intel/openvino/modeling_open_clip.py b/optimum/intel/openvino/modeling_open_clip.py
index ef00c182e5..bb909a705b 100644
--- a/optimum/intel/openvino/modeling_open_clip.py
+++ b/optimum/intel/openvino/modeling_open_clip.py
@@ -243,11 +243,13 @@ def _from_transformers(
         # would end-up removing the directory containing the underlying OpenVINO model
         cls._model_save_dir_tempdirectory_instance = save_dir
 
-        # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-        if load_in_8bit is None and not quantization_config:
-            ov_config = None
-        else:
-            ov_config = OVConfig(dtype="fp32")
+        ov_config = kwargs.get("ov_config")
+        if ov_config is None:
+            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
+            if load_in_8bit is None and not quantization_config:
+                ov_config = None
+            else:
+                ov_config = OVConfig(dtype="fp32")
 
         def fn_get_submodels(model):
             return {"model_text": model.text}
@@ -368,11 +370,14 @@ def _from_transformers(
         # would end-up removing the directory containing the underlying OpenVINO model
         cls._model_save_dir_tempdirectory_instance = save_dir
 
-        # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-        if load_in_8bit is None and not quantization_config:
-            ov_config = None
-        else:
-            ov_config = OVConfig(dtype="fp32")
+        ov_config = kwargs.get("ov_config")
+
+        if ov_config is None:
+            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
+            if load_in_8bit is None and not quantization_config:
+                ov_config = None
+            else:
+                ov_config = OVConfig(dtype="fp32")
 
         def fn_get_submodels(model):
             return {"model_vision": model.visual}
diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py
index 8a11bc1c3c..8813af51e4 100644
--- a/optimum/intel/openvino/modeling_seq2seq.py
+++ b/optimum/intel/openvino/modeling_seq2seq.py
@@ -602,11 +602,13 @@ def _from_transformers(
                 "Please provide openvino model obtained using optimum-cli or saved on disk using `save_pretrained`"
             )
             compile_only = False
-        # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-        if load_in_8bit is None and not quantization_config:
-            ov_config = None
-        else:
-            ov_config = OVConfig(dtype="fp32")
+        ov_config = kwargs.get("ov_config")
+        if ov_config is None:
+            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
+            if load_in_8bit is None and not quantization_config:
+                ov_config = None
+            else:
+                ov_config = OVConfig(dtype="fp32")
         stateful = kwargs.get("stateful", True)
         variant = kwargs.pop("variant", None)
 
diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py
index 78bb3916d0..caa1fb98d4 100644
--- a/optimum/intel/openvino/modeling_visual_language.py
+++ b/optimum/intel/openvino/modeling_visual_language.py
@@ -660,12 +660,14 @@ def _from_transformers(
         if task is None:
             task = cls.export_feature
 
-        # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-        if load_in_8bit is None and not quantization_config:
-            ov_config = None
-        else:
-            # Export in fp32 if compression won't be applied later
-            ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto")
+        ov_config = kwargs.get("ov_config")
+        if ov_config is None:
+            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
+            if load_in_8bit is None and not quantization_config:
+                ov_config = None
+            else:
+                # Export in fp32 if compression won't be applied later
+                ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto")
 
         stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache)
         variant = kwargs.pop("variant", None)

From a3bbaffeb58e73f7ad17171bee93714b83d9cca8 Mon Sep 17 00:00:00 2001
From: eaidova <ekaterina.aidova@intel.com>
Date: Fri, 16 May 2025 12:40:52 +0400
Subject: [PATCH 2/3] refactoring

---
 optimum/intel/openvino/modeling_base.py       | 10 +++------
 optimum/intel/openvino/modeling_decoder.py    | 10 +++------
 optimum/intel/openvino/modeling_diffusion.py  | 12 +++--------
 optimum/intel/openvino/modeling_open_clip.py  | 21 ++++++-------------
 optimum/intel/openvino/modeling_seq2seq.py    | 10 +++------
 .../openvino/modeling_visual_language.py      | 12 ++++-------
 6 files changed, 22 insertions(+), 53 deletions(-)

diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py
index 3f5f3ea548..fd65a03717 100644
--- a/optimum/intel/openvino/modeling_base.py
+++ b/optimum/intel/openvino/modeling_base.py
@@ -619,13 +619,9 @@ def _from_transformers(
             )
             compile_only = False
 
-        ov_config = kwargs.get("ov_config")
-        if ov_config is None:
-            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-            if load_in_8bit is None and not quantization_config:
-                ov_config = None
-            else:
-                ov_config = OVConfig(dtype="fp32")
+        ov_config = kwargs.get("ov_export_config")
+        if ov_config is None and load_in_8bit is not None or quantization_config is not None:
+            ov_config = OVConfig(dtype="fp32")
 
         variant = kwargs.pop("variant", None)
 
diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py
index 11ff3f8527..5b6ed01752 100644
--- a/optimum/intel/openvino/modeling_decoder.py
+++ b/optimum/intel/openvino/modeling_decoder.py
@@ -306,13 +306,9 @@ def _from_transformers(
             if use_cache:
                 task = task + "-with-past"
 
-        ov_export_config = kwargs.get("ov_config")
-        if ov_export_config is None:
-            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-            if load_in_8bit is None and not quantization_config:
-                ov_export_config = None
-            else:
-                ov_export_config = OVConfig(dtype="auto")
+        ov_export_config = kwargs.get("ov_export_config")
+        if ov_export_config is None and load_in_8bit is not None or quantization_config is not None:
+            ov_export_config = OVConfig(dtype="auto")
 
         stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache)
 
diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
index 614995876a..e982ac54e5 100644
--- a/optimum/intel/openvino/modeling_diffusion.py
+++ b/optimum/intel/openvino/modeling_diffusion.py
@@ -605,15 +605,9 @@ def _from_transformers(
             )
             compile_only = False
 
-        ov_config = kwargs.get("ov_config")
-
-        if ov_config is None:
-            # If load_in_8bit and quantization_config not specified then ov_config is set
-            # to None and will be set by default in convert depending on the model size
-            if load_in_8bit is None and not quantization_config:
-                ov_config = None
-            else:
-                ov_config = OVConfig(dtype="auto")
+        ov_config = kwargs.get("ov_export_config")
+        if ov_config is None and load_in_8bit is not None or quantization_config is not None:
+            ov_config = OVConfig(dtype="auto")
 
         torch_dtype = kwargs.pop("torch_dtype", None)
 
diff --git a/optimum/intel/openvino/modeling_open_clip.py b/optimum/intel/openvino/modeling_open_clip.py
index bb909a705b..4ebab09338 100644
--- a/optimum/intel/openvino/modeling_open_clip.py
+++ b/optimum/intel/openvino/modeling_open_clip.py
@@ -243,13 +243,9 @@ def _from_transformers(
         # would end-up removing the directory containing the underlying OpenVINO model
         cls._model_save_dir_tempdirectory_instance = save_dir
 
-        ov_config = kwargs.get("ov_config")
-        if ov_config is None:
-            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-            if load_in_8bit is None and not quantization_config:
-                ov_config = None
-            else:
-                ov_config = OVConfig(dtype="fp32")
+        ov_config = kwargs.get("ov_export_config")
+        if ov_config is None and load_in_8bit is not None or quantization_config is not None:
+            ov_config = OVConfig(dtype="fp32")
 
         def fn_get_submodels(model):
             return {"model_text": model.text}
@@ -370,14 +366,9 @@ def _from_transformers(
         # would end-up removing the directory containing the underlying OpenVINO model
         cls._model_save_dir_tempdirectory_instance = save_dir
 
-        ov_config = kwargs.get("ov_config")
-
-        if ov_config is None:
-            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-            if load_in_8bit is None and not quantization_config:
-                ov_config = None
-            else:
-                ov_config = OVConfig(dtype="fp32")
+        ov_config = kwargs.get("ov_export_config")
+        if ov_config is None and load_in_8bit is not None or quantization_config is not None:
+            ov_config = OVConfig(dtype="fp32")
 
         def fn_get_submodels(model):
             return {"model_vision": model.visual}
diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py
index 8813af51e4..c1db686de4 100644
--- a/optimum/intel/openvino/modeling_seq2seq.py
+++ b/optimum/intel/openvino/modeling_seq2seq.py
@@ -602,13 +602,9 @@ def _from_transformers(
                 "Please provide openvino model obtained using optimum-cli or saved on disk using `save_pretrained`"
             )
             compile_only = False
-        ov_config = kwargs.get("ov_config")
-        if ov_config is None:
-            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-            if load_in_8bit is None and not quantization_config:
-                ov_config = None
-            else:
-                ov_config = OVConfig(dtype="fp32")
+        ov_config = kwargs.get("ov_export_config")
+        if ov_config is None and (load_in_8bit is not None or quantization_config is not None):
+            ov_config = OVConfig(dtype="fp32")
         stateful = kwargs.get("stateful", True)
         variant = kwargs.pop("variant", None)
 
diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py
index caa1fb98d4..04450f701e 100644
--- a/optimum/intel/openvino/modeling_visual_language.py
+++ b/optimum/intel/openvino/modeling_visual_language.py
@@ -660,14 +660,10 @@ def _from_transformers(
         if task is None:
             task = cls.export_feature
 
-        ov_config = kwargs.get("ov_config")
-        if ov_config is None:
-            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-            if load_in_8bit is None and not quantization_config:
-                ov_config = None
-            else:
-                # Export in fp32 if compression won't be applied later
-                ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto")
+        ov_config = kwargs.get("ov_export_config")
+        if ov_config is None and (load_in_8bit is not None or quantization_config is None):
+            # Export in fp32 if compression won't be applied later
+            ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto")
 
         stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache)
         variant = kwargs.pop("variant", None)

From 206cbf9ab4a12a8d3e5f3278a798a1f6a9e2c0d5 Mon Sep 17 00:00:00 2001
From: eaidova <ekaterina.aidova@intel.com>
Date: Fri, 16 May 2025 15:22:30 +0400
Subject: [PATCH 3/3] add tests

---
 tests/openvino/test_modeling.py | 248 ++++++++++++++++++++++++++++++++
 1 file changed, 248 insertions(+)

diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index e1da1395d0..a503175a7d 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -104,6 +104,7 @@
     OVStableDiffusionPipeline,
 )
 from optimum.intel.openvino import OV_DECODER_NAME, OV_DECODER_WITH_PAST_NAME, OV_ENCODER_NAME, OV_XML_FILE_NAME
+from optimum.intel.openvino.configuration import OVConfig
 from optimum.intel.openvino.modeling_base import OVBaseModel
 from optimum.intel.openvino.modeling_seq2seq import OVDecoder, OVEncoder
 from optimum.intel.openvino.modeling_timm import TimmImageProcessor
@@ -863,6 +864,17 @@ def test_pipeline(self, model_arch):
         del pipe
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["bert"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForSequenceClassification.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForQuestionAnsweringIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = (
@@ -946,6 +958,17 @@ def test_metric(self):
         del ov_model
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["bert"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForQuestionAnswering.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForTokenClassificationIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = (
@@ -1018,6 +1041,17 @@ def test_default_token_type_ids(self):
         del model
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["bert"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForTokenClassification.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForFeatureExtractionIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = (
@@ -1090,6 +1124,17 @@ def test_sentence_transformers_pipeline(self, model_arch):
                 OVModelForFeatureExtraction.from_pretrained(save_dir)
             self.assertIn("Please use `OVSentenceTransformer`", str(context.exception))
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["bert"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForFeatureExtraction.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForCausalLMIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = (
@@ -1728,6 +1773,22 @@ def test_load_with_different_dtype(self):
                 f"values are not close for {dtype if dtype is not None else 'None'}, max diff = {torch.abs(ov_logits - ref_logits).max()}",
             )
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["gpt2"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForCausalLM.from_pretrained(
+                    model_id,
+                    export=True,
+                    ov_config=F32_CONFIG,
+                    compile=False,
+                    ov_export_config=ov_config,
+                    use_cache=False,
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForMaskedLMIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = (
@@ -1801,6 +1862,17 @@ def test_pipeline(self, model_arch):
         del model
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["bert"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForMaskedLM.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForImageClassificationIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = (
@@ -1903,6 +1975,17 @@ def test_timm_save_and_infer(self, model_id):
             model(pixel_values=torch.zeros((5, 3, model.config.image_size, model.config.image_size)))
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["beit"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForImageClassification.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForSeq2SeqLMIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = (
@@ -2069,6 +2152,17 @@ def test_compare_with_and_without_past_key_values(self):
         del model_without_pkv
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["bart"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForSeq2SeqLM.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForAudioClassificationIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = (
@@ -2140,6 +2234,17 @@ def test_pipeline(self, model_arch):
         del model
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["data2vec_audio"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForAudioClassification.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForCTCIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = [
@@ -2196,6 +2301,17 @@ def test_compare_to_transformers(self, model_arch):
         del ov_model
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["data2vec_audio"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForCTC.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForAudioXVectorIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = [
@@ -2250,6 +2366,17 @@ def test_compare_to_transformers(self, model_arch):
         del ov_model
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["data2vec_audio"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForAudioXVector.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForAudioFrameClassificationIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = [
@@ -2301,6 +2428,17 @@ def test_compare_to_transformers(self, model_arch):
         del ov_model
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["data2vec_audio"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForAudioFrameClassification.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForPix2StructIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = ["pix2struct"]
@@ -2394,6 +2532,17 @@ def test_compare_with_and_without_past_key_values(self):
         del model_without_pkv
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["pix2struct"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForPix2Struct.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForVisualCausalLMIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = ["llava"]
@@ -2792,6 +2941,20 @@ def test_model_can_be_loaded_after_saving(self, model_arch):
             )
             self.assertIsInstance(ov_restored_model, type(ov_model))
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["llava"]
+        ov_config = OVConfig(dtype="fp16")
+        with (
+            unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save,
+            unittest.mock.patch("openvino.Core.read_model"),
+            unittest.mock.patch("optimum.intel.openvino.modeling_visual_language.OVModelWithEmbedForCausalLM"),
+        ):
+            OVModelForVisualCausalLM.from_pretrained(
+                model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+            )
+            for call in mock_save.calls:
+                self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForSpeechSeq2SeqIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = ("whisper",)
@@ -2882,6 +3045,17 @@ def test_pipeline(self, model_arch):
         del model
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["whisper"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForSpeechSeq2Seq.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForVision2SeqIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = ["vision-encoder-decoder", "trocr", "donut"]
@@ -2991,6 +3165,17 @@ def test_pipeline(self, model_arch: str):
 
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["vision-encoder-decoder"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForVision2Seq.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForCustomTasksIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES_WITH_ATTENTION = ["vit-with-attentions"]
@@ -3213,6 +3398,17 @@ def test_functions(self):
         del model
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["open-clip"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelOpenCLIPForZeroShotImageClassification.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForSTFeatureExtractionIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = ("st-bert", "st-mpnet")
@@ -3261,6 +3457,17 @@ def test_langchain(self, model_arch):
         output = embedding.embed_query("foo bar")
         self.assertTrue(len(output) > 0)
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["st-bert"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVSentenceTransformer.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVLangchainTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = ("gpt2",)
@@ -3383,6 +3590,17 @@ def test_reshape(self, model_arch):
         del ov_model
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["sam"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVSamModel.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForTextToSpeechSeq2SeqIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = ("speecht5",)
@@ -3448,6 +3666,25 @@ def test_compare_to_transformers(self, model_arch):
         del processor
         gc.collect()
 
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["speecht5"]
+        ov_config = OVConfig(dtype="fp16")
+        with (
+            unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save,
+            unittest.mock.patch("openvino.Core.read_model"),
+            unittest.mock.patch("optimum.intel.openvino.modeling_text2speech.OVTextToSpeechEncoder"),
+        ):
+            OVModelForTextToSpeechSeq2Seq.from_pretrained(
+                model_id,
+                vocoder="fxmarty/speecht5-hifigan-tiny",
+                export=True,
+                ov_config=F32_CONFIG,
+                compile=False,
+                ov_export_config=ov_config,
+            )
+            for call in mock_save.calls:
+                self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)
+
 
 class OVModelForZeroShotImageClassificationIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = ["clip"]
@@ -3499,3 +3736,14 @@ def test_compare_to_transformers(self, model_arch):
         del transformers_model
         del ov_model
         gc.collect()
+
+    def test_model_loading_with_ov_export_config(self):
+        model_id = MODEL_NAMES["clip"]
+        ov_config = OVConfig(dtype="fp16")
+        with unittest.mock.patch("optimum.exporters.openvino.convert._save_model") as mock_save:
+            with unittest.mock.patch("openvino.Core.read_model"):
+                OVModelForZeroShotImageClassification.from_pretrained(
+                    model_id, export=True, ov_config=F32_CONFIG, compile=False, ov_export_config=ov_config
+                )
+                for call in mock_save.calls:
+                    self.assertEqual(call.call_args.kwargs["ov_config"], ov_config)