allow passing ov_config to from_pretrained

eaidova · eaidova · commit 88b017cd813a · 2025-05-16T12:20:26.000+04:00
diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py
@@ -619,11 +619,13 @@ def _from_transformers(
             )
             compile_only = False
 
-        # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-        if load_in_8bit is None and not quantization_config:
-            ov_config = None
-        else:
-            ov_config = OVConfig(dtype="fp32")
+        ov_config = kwargs.get("ov_config")
+        if ov_config is None:
+            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
+            if load_in_8bit is None and not quantization_config:
+                ov_config = None
+            else:
+                ov_config = OVConfig(dtype="fp32")
 
         variant = kwargs.pop("variant", None)
 
diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py
@@ -306,11 +306,13 @@ def _from_transformers(
             if use_cache:
                 task = task + "-with-past"
 
-        # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-        if load_in_8bit is None and not quantization_config:
-            ov_export_config = None
-        else:
-            ov_export_config = OVConfig(dtype="auto")
+        ov_export_config = kwargs.get("ov_config")
+        if ov_export_config is None:
+            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
+            if load_in_8bit is None and not quantization_config:
+                ov_export_config = None
+            else:
+                ov_export_config = OVConfig(dtype="auto")
 
         stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache)
 
diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
@@ -605,12 +605,15 @@ def _from_transformers(
             )
             compile_only = False
 
-        # If load_in_8bit and quantization_config not specified then ov_config is set
-        # to None and will be set by default in convert depending on the model size
-        if load_in_8bit is None and not quantization_config:
-            ov_config = None
-        else:
-            ov_config = OVConfig(dtype="auto")
+        ov_config = kwargs.get("ov_config")
+
+        if ov_config is None:
+            # If load_in_8bit and quantization_config not specified then ov_config is set
+            # to None and will be set by default in convert depending on the model size
+            if load_in_8bit is None and not quantization_config:
+                ov_config = None
+            else:
+                ov_config = OVConfig(dtype="auto")
 
         torch_dtype = kwargs.pop("torch_dtype", None)
 
diff --git a/optimum/intel/openvino/modeling_open_clip.py b/optimum/intel/openvino/modeling_open_clip.py
@@ -243,11 +243,13 @@ def _from_transformers(
         # would end-up removing the directory containing the underlying OpenVINO model
         cls._model_save_dir_tempdirectory_instance = save_dir
 
-        # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-        if load_in_8bit is None and not quantization_config:
-            ov_config = None
-        else:
-            ov_config = OVConfig(dtype="fp32")
+        ov_config = kwargs.get("ov_config")
+        if ov_config is None:
+            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
+            if load_in_8bit is None and not quantization_config:
+                ov_config = None
+            else:
+                ov_config = OVConfig(dtype="fp32")
 
         def fn_get_submodels(model):
             return {"model_text": model.text}
@@ -368,11 +370,14 @@ def _from_transformers(
         # would end-up removing the directory containing the underlying OpenVINO model
         cls._model_save_dir_tempdirectory_instance = save_dir
 
-        # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-        if load_in_8bit is None and not quantization_config:
-            ov_config = None
-        else:
-            ov_config = OVConfig(dtype="fp32")
+        ov_config = kwargs.get("ov_config")
+
+        if ov_config is None:
+            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
+            if load_in_8bit is None and not quantization_config:
+                ov_config = None
+            else:
+                ov_config = OVConfig(dtype="fp32")
 
         def fn_get_submodels(model):
             return {"model_vision": model.visual}
diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py
@@ -602,11 +602,13 @@ def _from_transformers(
                 "Please provide openvino model obtained using optimum-cli or saved on disk using `save_pretrained`"
             )
             compile_only = False
-        # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-        if load_in_8bit is None and not quantization_config:
-            ov_config = None
-        else:
-            ov_config = OVConfig(dtype="fp32")
+        ov_config = kwargs.get("ov_config")
+        if ov_config is None:
+            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
+            if load_in_8bit is None and not quantization_config:
+                ov_config = None
+            else:
+                ov_config = OVConfig(dtype="fp32")
         stateful = kwargs.get("stateful", True)
         variant = kwargs.pop("variant", None)
 
diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py
@@ -660,12 +660,14 @@ def _from_transformers(
         if task is None:
             task = cls.export_feature
 
-        # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
-        if load_in_8bit is None and not quantization_config:
-            ov_config = None
-        else:
-            # Export in fp32 if compression won't be applied later
-            ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto")
+        ov_config = kwargs.get("ov_config")
+        if ov_config is None:
+            # If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
+            if load_in_8bit is None and not quantization_config:
+                ov_config = None
+            else:
+                # Export in fp32 if compression won't be applied later
+                ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto")
 
         stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache)
         variant = kwargs.pop("variant", None)