Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions optimum/intel/openvino/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,10 +619,8 @@ def _from_transformers(
)
compile_only = False

# If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
if load_in_8bit is None and not quantization_config:
ov_config = None
else:
ov_config = kwargs.get("ov_export_config")
if ov_config is None and load_in_8bit is not None or quantization_config is not None:
ov_config = OVConfig(dtype="fp32")

variant = kwargs.pop("variant", None)
Expand Down
6 changes: 2 additions & 4 deletions optimum/intel/openvino/modeling_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,10 +306,8 @@ def _from_transformers(
if use_cache:
task = task + "-with-past"

# If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
if load_in_8bit is None and not quantization_config:
ov_export_config = None
else:
ov_export_config = kwargs.get("ov_export_config")
if ov_export_config is None and load_in_8bit is not None or quantization_config is not None:
ov_export_config = OVConfig(dtype="auto")

stateful = kwargs.pop("stateful", ensure_stateful_is_available(warn=False) and use_cache)
Expand Down
7 changes: 2 additions & 5 deletions optimum/intel/openvino/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,11 +605,8 @@ def _from_transformers(
)
compile_only = False

# If load_in_8bit and quantization_config not specified then ov_config is set
# to None and will be set by default in convert depending on the model size
if load_in_8bit is None and not quantization_config:
ov_config = None
else:
ov_config = kwargs.get("ov_export_config")
if ov_config is None and load_in_8bit is not None or quantization_config is not None:
ov_config = OVConfig(dtype="auto")

torch_dtype = kwargs.pop("torch_dtype", None)
Expand Down
12 changes: 4 additions & 8 deletions optimum/intel/openvino/modeling_open_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,10 +243,8 @@ def _from_transformers(
# would end-up removing the directory containing the underlying OpenVINO model
cls._model_save_dir_tempdirectory_instance = save_dir

# If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
if load_in_8bit is None and not quantization_config:
ov_config = None
else:
ov_config = kwargs.get("ov_export_config")
if ov_config is None and load_in_8bit is not None or quantization_config is not None:
ov_config = OVConfig(dtype="fp32")

def fn_get_submodels(model):
Expand Down Expand Up @@ -368,10 +366,8 @@ def _from_transformers(
# would end-up removing the directory containing the underlying OpenVINO model
cls._model_save_dir_tempdirectory_instance = save_dir

# If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
if load_in_8bit is None and not quantization_config:
ov_config = None
else:
ov_config = kwargs.get("ov_export_config")
if ov_config is None and load_in_8bit is not None or quantization_config is not None:
ov_config = OVConfig(dtype="fp32")

def fn_get_submodels(model):
Expand Down
6 changes: 2 additions & 4 deletions optimum/intel/openvino/modeling_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,10 +602,8 @@ def _from_transformers(
"Please provide openvino model obtained using optimum-cli or saved on disk using `save_pretrained`"
)
compile_only = False
# If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
if load_in_8bit is None and not quantization_config:
ov_config = None
else:
ov_config = kwargs.get("ov_export_config")
if ov_config is None and (load_in_8bit is not None or quantization_config is not None):
ov_config = OVConfig(dtype="fp32")
stateful = kwargs.get("stateful", True)
variant = kwargs.pop("variant", None)
Expand Down
6 changes: 2 additions & 4 deletions optimum/intel/openvino/modeling_visual_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,10 +660,8 @@ def _from_transformers(
if task is None:
task = cls.export_feature

# If load_in_8bit and quantization_config not specified then ov_config is set to None and will be set by default in convert depending on the model size
if load_in_8bit is None and not quantization_config:
ov_config = None
else:
ov_config = kwargs.get("ov_export_config")
if ov_config is None and (load_in_8bit is not None or quantization_config is None):
# Export in fp32 if compression won't be applied later
ov_config = OVConfig(dtype="fp32" if load_in_8bit is False else "auto")

Expand Down