Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions optimum/exporters/openvino/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import pkgutil

from . import _compat_sam2 # noqa: F401 Ensures SAM2 patches are applied before registrations.
import optimum.exporters.openvino.model_configs

from .__main__ import main_export
from .convert import export, export_from_model, export_models, export_pytorch_via_onnx
from .stateful import ensure_stateful_is_available, patch_stateful

__path__ = pkgutil.extend_path(__path__, __name__)

__all__ = ["main_export", "export", "export_models"]
8 changes: 3 additions & 5 deletions optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
clear_class_registry,
deduce_diffusers_dtype,
load_preprocessors,
resolve_model_type,
)


Expand Down Expand Up @@ -274,7 +275,7 @@ def main_export(
do_gptq_patching = quant_method == "gptq"
do_bitnet_patching = quant_method == "bitnet"

model_type = config.model_type
model_type = resolve_model_type(config, task)
if model_type not in TasksManager._SUPPORTED_MODEL_TYPE:
custom_architecture = True
if custom_export_configs is None:
Expand Down Expand Up @@ -446,10 +447,7 @@ def bitnet_load_hook(self, state_dict, prefix, *args, **kwargs):
)
model.config.pad_token_id = pad_token_id

if hasattr(model.config, "export_model_type"):
model_type = model.config.export_model_type
else:
model_type = model.config.model_type
model_type = resolve_model_type(model.config, task)

if (
not custom_architecture
Expand Down
122 changes: 122 additions & 0 deletions optimum/exporters/openvino/_compat_sam2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
"""SAM2 compatibility hooks for Optimum OpenVINO exporters."""

from __future__ import annotations

import transformers

try: # new Transformers no longer expose MT5Tokenizer
from transformers import MT5Tokenizer # type: ignore[attr-defined]
except ImportError: # transformers >= version dropping MT5Tokenizer
from transformers import T5Tokenizer

class MT5Tokenizer(T5Tokenizer): # type: ignore[misc]
pass

setattr(transformers, "MT5Tokenizer", MT5Tokenizer)

_SAM2_ERROR_TOKEN = "positional_embedding"


def _patch_sam2_config():
try:
from transformers.models.sam2.configuration_sam2 import Sam2Config # type: ignore
except Exception:
Sam2Config = None

try:
from transformers.models.sam2_video.configuration_sam2_video import Sam2VideoConfig # type: ignore
except Exception:
Sam2VideoConfig = None

def _guard(cfg_cls):
if cfg_cls is None or getattr(cfg_cls, "_optimum_config_patched", False):
return
original_init = cfg_cls.__init__

def patched_init(self, *args, **kwargs):
original_init(self, *args, **kwargs)
try:
if getattr(self, "tie_word_embeddings", True):
self.tie_word_embeddings = False
except Exception:
pass

try:
model_type = getattr(self, "model_type", None)
if model_type == "sam2_video":
mapping = dict(getattr(self, "export_model_type_map", {}) or {})
mapping.setdefault("feature-extraction", "sam2video_vision_encoder")
mapping.setdefault("image-segmentation", "sam2video_mask_decoder")
self.export_model_type_map = mapping
if getattr(self, "export_model_type", None) is None:
self.export_model_type = mapping.get("feature-extraction")
except Exception:
pass

cfg_cls.__init__ = patched_init
setattr(cfg_cls, "_optimum_config_patched", True)

_guard(Sam2Config)
_guard(Sam2VideoConfig)


def _patch_sam2_mark_tied_weights():
try:
from transformers.models.sam2.modeling_sam2 import Sam2Model # type: ignore
except Exception: # transformers may not ship sam2 yet
Sam2Model = None

try:
from transformers.models.sam2_video.modeling_sam2_video import Sam2VideoModel # type: ignore
except Exception:
Sam2VideoModel = None

def _guard(model_cls):
if model_cls is None:
return
original = getattr(model_cls, "mark_tied_weights_as_initialized", None)
if original is None or getattr(model_cls, "_optimum_mark_tied_weights_patched", False):
return

def patched(self, *args, **kwargs):
tied = getattr(self, "_tied_weights_keys", None)
if tied and not getattr(self, "_optimum_sam2_ties_filtered", False):
filtered = []
removed = False
for pair in tied:
keys = pair if isinstance(pair, (list, tuple, set)) else (pair,)
if any((_SAM2_ERROR_TOKEN in str(key)) for key in keys if key):
removed = True
continue
filtered.append(pair)
if removed:
try:
self._tied_weights_keys = type(tied)(filtered)
except Exception:
self._tied_weights_keys = filtered
setattr(self, "_optimum_sam2_ties_filtered", True)
config = getattr(self, "config", None)
if config is not None and getattr(config, "tie_word_embeddings", None):
try:
config.tie_word_embeddings = False
except Exception:
pass
try:
return original(self, *args, **kwargs)
except AttributeError as err:
if _SAM2_ERROR_TOKEN in str(err):
# Tied metadata can sporadically include buffers; skip them quietly.
return
raise

model_cls.mark_tied_weights_as_initialized = patched
setattr(model_cls, "_optimum_mark_tied_weights_patched", True)

_guard(Sam2Model)
_guard(Sam2VideoModel)


_patch_sam2_config()
_patch_sam2_mark_tied_weights()

__all__ = []
80 changes: 74 additions & 6 deletions optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
allow_skip_tracing_check,
clear_class_registry,
remove_none_from_dummy_inputs,
resolve_model_type,
save_config,
save_preprocessors,
set_simplified_chat_template,
Expand Down Expand Up @@ -552,10 +553,7 @@ def export_from_model(
if library_name != "open_clip":
TasksManager.standardize_model_attributes(model)

if hasattr(model.config, "export_model_type") and model.config.export_model_type is not None:
model_type = model.config.export_model_type
else:
model_type = getattr(model.config, "model_type", None) or ""
model_type = resolve_model_type(model.config, task)

custom_architecture = library_name == "transformers" and model_type not in TasksManager._SUPPORTED_MODEL_TYPE

Expand Down Expand Up @@ -698,11 +696,12 @@ def export_from_model(
else:
# save the subcomponent configuration
for model_name in models_and_export_configs:
target_dir = output / model_name
subcomponent = models_and_export_configs[model_name][0]
if hasattr(subcomponent, "save_config"):
subcomponent.save_config(output / model_name)
subcomponent.save_config(target_dir)
elif hasattr(subcomponent, "config") and hasattr(subcomponent.config, "save_pretrained"):
subcomponent.config.save_pretrained(output / model_name)
subcomponent.config.save_pretrained(target_dir)

files_subpaths = [os.path.join(name_dir, OV_XML_FILE_NAME) for name_dir in models_and_export_configs]

Expand Down Expand Up @@ -913,6 +912,61 @@ def _get_multi_modal_submodels_and_export_configs(
return main_config, models_for_export, stateful_parts


def _get_sam2_video_submodels_and_export_configs(
model: "PreTrainedModel",
task: str,
library_name: str,
int_dtype: str,
float_dtype: str,
preprocessors: Optional[List[Any]] = None,
exporter: str = "openvino",
):
models_for_export: Dict[str, Tuple["PreTrainedModel", "OnnxConfig"]] = {}

def _component_export_name(name: str) -> str:
if name.startswith("sam2video_"):
return name[len("sam2video_"):]
if name.startswith("sam2_"):
return name[len("sam2_"):]
return name

normalized_task = task or ""
if normalized_task.startswith("feature-extraction"):
component_specs: List[Tuple[str, str]] = [
("sam2video_vision_encoder", "feature-extraction"),
("sam2video_prompt_encoder", "feature-extraction"),
]
elif normalized_task.startswith("image-segmentation"):
component_specs = [("sam2video_mask_decoder", "image-segmentation")]
else:
component_specs = [
("sam2video_vision_encoder", "feature-extraction"),
("sam2video_prompt_encoder", "feature-extraction"),
("sam2video_mask_decoder", "image-segmentation"),
]

for component_model_type, component_task in component_specs:
config_constructor = TasksManager.get_exporter_config_constructor(
model=model,
exporter=exporter,
library_name=library_name,
task=component_task,
model_type=component_model_type,
)
export_config = config_constructor(
model.config,
int_dtype=int_dtype,
float_dtype=float_dtype,
preprocessors=preprocessors,
)
export_name = _component_export_name(component_model_type)
models_for_export[export_name] = (model, export_config)

export_config = next(iter(models_for_export.values()))[1] if models_for_export else None
stateful_parts = [False] * len(models_for_export)
return export_config, models_for_export, stateful_parts


def _get_submodels_and_export_configs(
model: Union["PreTrainedModel", "DiffusionPipeline"],
task: str,
Expand All @@ -937,6 +991,20 @@ def _get_submodels_and_export_configs(
return _get_multi_modal_submodels_and_export_configs(
model, task, library_name, int_dtype, float_dtype, preprocessors, model_kwargs, stateful
)
elif (
not custom_architecture
and library_name == "transformers"
and getattr(model.config, "model_type", None) == "sam2_video"
):
return _get_sam2_video_submodels_and_export_configs(
model,
task,
library_name,
int_dtype,
float_dtype,
preprocessors,
exporter=exporter,
)
elif not custom_architecture and library_name == "transformers" and model.config.model_type == "speecht5":
return _get_speecht5_tss_model_for_export(
model, task, library_name, int_dtype, float_dtype, preprocessors, model_kwargs
Expand Down
Loading