huggingface · skuros · Dec 11, 2025
diff --git a/optimum/exporters/openvino/__init__.py b/optimum/exporters/openvino/__init__.py
@@ -12,11 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
+
+import pkgutil
+
+from . import _compat_sam2  # noqa: F401  Ensures SAM2 patches are applied before registrations.
 import optimum.exporters.openvino.model_configs
 
 from .__main__ import main_export
 from .convert import export, export_from_model, export_models, export_pytorch_via_onnx
 from .stateful import ensure_stateful_is_available, patch_stateful
 
+__path__ = pkgutil.extend_path(__path__, __name__)
 
 __all__ = ["main_export", "export", "export_models"]
diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
@@ -44,6 +44,7 @@
     clear_class_registry,
     deduce_diffusers_dtype,
     load_preprocessors,
+    resolve_model_type,
 )
 
 
@@ -274,7 +275,7 @@ def main_export(
         do_gptq_patching = quant_method == "gptq"
         do_bitnet_patching = quant_method == "bitnet"
 
-        model_type = config.model_type
+        model_type = resolve_model_type(config, task)
         if model_type not in TasksManager._SUPPORTED_MODEL_TYPE:
             custom_architecture = True
             if custom_export_configs is None:
@@ -446,10 +447,7 @@ def bitnet_load_hook(self, state_dict, prefix, *args, **kwargs):
                     )
                 model.config.pad_token_id = pad_token_id
 
-        if hasattr(model.config, "export_model_type"):
-            model_type = model.config.export_model_type
-        else:
-            model_type = model.config.model_type
+        model_type = resolve_model_type(model.config, task)
 
         if (
             not custom_architecture

diff --git a/optimum/exporters/openvino/_compat_sam2.py b/optimum/exporters/openvino/_compat_sam2.py
@@ -0,0 +1,122 @@
+"""SAM2 compatibility hooks for Optimum OpenVINO exporters."""
+
+from __future__ import annotations
+
+import transformers
+
+try:  # new Transformers no longer expose MT5Tokenizer
+    from transformers import MT5Tokenizer  # type: ignore[attr-defined]
+except ImportError:  # transformers >= version dropping MT5Tokenizer
+    from transformers import T5Tokenizer
+
+    class MT5Tokenizer(T5Tokenizer):  # type: ignore[misc]
+        pass
+
+    setattr(transformers, "MT5Tokenizer", MT5Tokenizer)
+
+_SAM2_ERROR_TOKEN = "positional_embedding"
+
+
+def _patch_sam2_config():
+    try:
+        from transformers.models.sam2.configuration_sam2 import Sam2Config  # type: ignore
+    except Exception:
+        Sam2Config = None
+
+    try:
+        from transformers.models.sam2_video.configuration_sam2_video import Sam2VideoConfig  # type: ignore
+    except Exception:
+        Sam2VideoConfig = None
+
+    def _guard(cfg_cls):
+        if cfg_cls is None or getattr(cfg_cls, "_optimum_config_patched", False):
+            return
+        original_init = cfg_cls.__init__
+
+        def patched_init(self, *args, **kwargs):
+            original_init(self, *args, **kwargs)
+            try:
+                if getattr(self, "tie_word_embeddings", True):
+                    self.tie_word_embeddings = False
+            except Exception:
+                pass
+
+            try:
+                model_type = getattr(self, "model_type", None)
+                if model_type == "sam2_video":
+                    mapping = dict(getattr(self, "export_model_type_map", {}) or {})
+                    mapping.setdefault("feature-extraction", "sam2video_vision_encoder")
+                    mapping.setdefault("image-segmentation", "sam2video_mask_decoder")
+                    self.export_model_type_map = mapping
+                    if getattr(self, "export_model_type", None) is None:
+                        self.export_model_type = mapping.get("feature-extraction")
+            except Exception:
+                pass
+
+        cfg_cls.__init__ = patched_init
+        setattr(cfg_cls, "_optimum_config_patched", True)
+
+    _guard(Sam2Config)
+    _guard(Sam2VideoConfig)
+
+
+def _patch_sam2_mark_tied_weights():
+    try:
+        from transformers.models.sam2.modeling_sam2 import Sam2Model  # type: ignore
+    except Exception:  # transformers may not ship sam2 yet
+        Sam2Model = None
+
+    try:
+        from transformers.models.sam2_video.modeling_sam2_video import Sam2VideoModel  # type: ignore
+    except Exception:
+        Sam2VideoModel = None
+
+    def _guard(model_cls):
+        if model_cls is None:
+            return
+        original = getattr(model_cls, "mark_tied_weights_as_initialized", None)
+        if original is None or getattr(model_cls, "_optimum_mark_tied_weights_patched", False):
+            return
+
+        def patched(self, *args, **kwargs):
+            tied = getattr(self, "_tied_weights_keys", None)
+            if tied and not getattr(self, "_optimum_sam2_ties_filtered", False):
+                filtered = []
+                removed = False
+                for pair in tied:
+                    keys = pair if isinstance(pair, (list, tuple, set)) else (pair,)
+                    if any((_SAM2_ERROR_TOKEN in str(key)) for key in keys if key):
+                        removed = True
+                        continue
+                    filtered.append(pair)
+                if removed:
+                    try:
+                        self._tied_weights_keys = type(tied)(filtered)
+                    except Exception:
+                        self._tied_weights_keys = filtered
+                setattr(self, "_optimum_sam2_ties_filtered", True)
+            config = getattr(self, "config", None)
+            if config is not None and getattr(config, "tie_word_embeddings", None):
+                try:
+                    config.tie_word_embeddings = False
+                except Exception:
+                    pass
+            try:
+                return original(self, *args, **kwargs)
+            except AttributeError as err:
+                if _SAM2_ERROR_TOKEN in str(err):
+                    # Tied metadata can sporadically include buffers; skip them quietly.
+                    return
+                raise
+
+        model_cls.mark_tied_weights_as_initialized = patched
+        setattr(model_cls, "_optimum_mark_tied_weights_patched", True)
+
+    _guard(Sam2Model)
+    _guard(Sam2VideoModel)
+
+
+_patch_sam2_config()
+_patch_sam2_mark_tied_weights()
+
+__all__ = []
diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py
@@ -69,6 +69,7 @@
     allow_skip_tracing_check,
     clear_class_registry,
     remove_none_from_dummy_inputs,
+    resolve_model_type,
     save_config,
     save_preprocessors,
     set_simplified_chat_template,
@@ -552,10 +553,7 @@ def export_from_model(
     if library_name != "open_clip":
         TasksManager.standardize_model_attributes(model)
 
-    if hasattr(model.config, "export_model_type") and model.config.export_model_type is not None:
-        model_type = model.config.export_model_type
-    else:
-        model_type = getattr(model.config, "model_type", None) or ""
+    model_type = resolve_model_type(model.config, task)
 
     custom_architecture = library_name == "transformers" and model_type not in TasksManager._SUPPORTED_MODEL_TYPE
 
@@ -698,11 +696,12 @@ def export_from_model(
     else:
         # save the subcomponent configuration
         for model_name in models_and_export_configs:
+            target_dir = output / model_name
             subcomponent = models_and_export_configs[model_name][0]
             if hasattr(subcomponent, "save_config"):
-                subcomponent.save_config(output / model_name)
+                subcomponent.save_config(target_dir)
             elif hasattr(subcomponent, "config") and hasattr(subcomponent.config, "save_pretrained"):
-                subcomponent.config.save_pretrained(output / model_name)
+                subcomponent.config.save_pretrained(target_dir)
 
         files_subpaths = [os.path.join(name_dir, OV_XML_FILE_NAME) for name_dir in models_and_export_configs]
 
@@ -913,6 +912,61 @@ def _get_multi_modal_submodels_and_export_configs(
     return main_config, models_for_export, stateful_parts
 
 
+def _get_sam2_video_submodels_and_export_configs(
+    model: "PreTrainedModel",
+    task: str,
+    library_name: str,
+    int_dtype: str,
+    float_dtype: str,
+    preprocessors: Optional[List[Any]] = None,
+    exporter: str = "openvino",
+):
+    models_for_export: Dict[str, Tuple["PreTrainedModel", "OnnxConfig"]] = {}
+
+    def _component_export_name(name: str) -> str:
+        if name.startswith("sam2video_"):
+            return name[len("sam2video_"):]
+        if name.startswith("sam2_"):
+            return name[len("sam2_"):]
+        return name
+
+    normalized_task = task or ""
+    if normalized_task.startswith("feature-extraction"):
+        component_specs: List[Tuple[str, str]] = [
+            ("sam2video_vision_encoder", "feature-extraction"),
+            ("sam2video_prompt_encoder", "feature-extraction"),
+        ]
+    elif normalized_task.startswith("image-segmentation"):
+        component_specs = [("sam2video_mask_decoder", "image-segmentation")]
+    else:
+        component_specs = [
+            ("sam2video_vision_encoder", "feature-extraction"),
+            ("sam2video_prompt_encoder", "feature-extraction"),
+            ("sam2video_mask_decoder", "image-segmentation"),
+        ]
+
+    for component_model_type, component_task in component_specs:
+        config_constructor = TasksManager.get_exporter_config_constructor(
+            model=model,
+            exporter=exporter,
+            library_name=library_name,
+            task=component_task,
+            model_type=component_model_type,
+        )
+        export_config = config_constructor(
+            model.config,
+            int_dtype=int_dtype,
+            float_dtype=float_dtype,
+            preprocessors=preprocessors,
+        )
+        export_name = _component_export_name(component_model_type)
+        models_for_export[export_name] = (model, export_config)
+
+    export_config = next(iter(models_for_export.values()))[1] if models_for_export else None
+    stateful_parts = [False] * len(models_for_export)
+    return export_config, models_for_export, stateful_parts
+
+
 def _get_submodels_and_export_configs(
     model: Union["PreTrainedModel", "DiffusionPipeline"],
     task: str,
@@ -937,6 +991,20 @@ def _get_submodels_and_export_configs(
         return _get_multi_modal_submodels_and_export_configs(
             model, task, library_name, int_dtype, float_dtype, preprocessors, model_kwargs, stateful
         )
+    elif (
+        not custom_architecture
+        and library_name == "transformers"
+        and getattr(model.config, "model_type", None) == "sam2_video"
+    ):
+        return _get_sam2_video_submodels_and_export_configs(
+            model,
+            task,
+            library_name,
+            int_dtype,
+            float_dtype,
+            preprocessors,
+            exporter=exporter,
+        )
     elif not custom_architecture and library_name == "transformers" and model.config.model_type == "speecht5":
         return _get_speecht5_tss_model_for_export(
             model, task, library_name, int_dtype, float_dtype, preprocessors, model_kwargs