From c6b768cf88e2ee40d71a433baa595757996bee1e Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Wed, 20 Aug 2025 22:19:14 -0700
Subject: [PATCH 1/7] add support for dinov3

---
 optimum/exporters/openvino/model_configs.py | 22 +++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
index 9c1684db81..335f987d80 100644
--- a/optimum/exporters/openvino/model_configs.py
+++ b/optimum/exporters/openvino/model_configs.py
@@ -4537,3 +4537,25 @@ def patch_model_for_export(
         self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
     ) -> "ModelPatcher":
         return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs)
+
+
+@register_in_tasks_manager("dinov3_vit", *["feature-extraction"], library_name="transformers")
+@register_in_tasks_manager("dinov3_convnext", *["feature-extraction"], library_name="transformers")
+class OpenDinoV3OpenVINOConfig(VisionOnnxConfig):
+    MIN_TRANSFORMERS_VERSION = version.parse("4.55.0")
+
+    DEFAULT_ONNX_OPSET = 14
+
+    NORMALIZED_CONFIG_CLASS = NormalizedVisionConfig
+    @property
+    def inputs(self) -> Dict[str, Dict[int, str]]:
+        return {
+            "pixel_values": {0: "image_batch_size", 1: "num_channels", 2: "height", 3: "width"},
+        }
+        
+    @property
+    def outputs(self) -> Dict[str, Dict[int, str]]:
+        return {
+            "last_hidden_state": {0: "image_batch_size"},
+            "pooler_output": {0: "image_batch_size"},
+        }
\ No newline at end of file

From 912f62475a39cdcddda3def3c3e7238f048d9234 Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Wed, 10 Sep 2025 01:26:36 -0700
Subject: [PATCH 2/7] update dinov3 test case

---
 tests/openvino/test_modeling.py | 34 +++++++++++++++++++++++++++++++++
 tests/openvino/utils_tests.py   |  1 +
 2 files changed, 35 insertions(+)

diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index a7a80bde3e..41c46b35f1 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -3063,6 +3063,10 @@ def test_pipeline(self, model_arch: str):
 class OVModelForCustomTasksIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES_WITH_ATTENTION = ["vit-with-attentions"]
     SUPPORTED_ARCHITECTURES_WITH_HIDDEN_STATES = ["vit-with-hidden-states"]
+    SUPPORTED_ARCHITECTURES_WITH_POOLER = []
+    if is_transformers_version(">=", "4.56"):
+        SUPPORTED_ARCHITECTURES_WITH_POOLER += ("vit-with-pooler",)
+
 
     def _get_sample_image(self):
         url = TEST_IMAGE_URL
@@ -3142,6 +3146,36 @@ def test_compare_output_hidden_states(self, model_arch):
         del transformers_model
         del ov_model
         gc.collect()
+        
+    @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_POOLER)
+    def test_compare_output_pooler(self, model_arch):
+        model_id = MODEL_NAMES[model_arch]
+
+        image = self._get_sample_image()
+        preprocessor = AutoImageProcessor.from_pretrained(model_id)
+        inputs = preprocessor(images=image, return_tensors="pt")
+
+        transformers_model = AutoModel.from_pretrained(model_id, attn_implementation="eager")
+        transformers_model.eval()
+        with torch.no_grad():
+            transformers_outputs = transformers_model(**inputs, output_attentions=True)
+
+        ov_model = OVModelForCustomTasks.from_pretrained(model_id, ov_config=F32_CONFIG)
+        self.assertIsInstance(ov_model.config, PretrainedConfig)
+
+        for input_type in ["pt", "np"]:
+            inputs = preprocessor(images=image, return_tensors=input_type)
+            ov_outputs = ov_model(**inputs)
+            self.assertIn("last_hidden_state", ov_outputs)
+            self.assertIsInstance(ov_outputs.last_hidden_state, TENSOR_ALIAS_TO_TYPE[input_type])
+            self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.last_hidden_state), transformers_outputs.last_hidden_state, atol=1e-4))
+            self.assertIn("pooler_output", ov_outputs)
+            self.assertIsInstance(ov_outputs.pooler_output, TENSOR_ALIAS_TO_TYPE[input_type])
+            self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.pooler_output), transformers_outputs.pooler_output, atol=1e-4))
+
+        del transformers_model
+        del ov_model
+        gc.collect()
 
 
 class OVModelForOpenCLIPZeroShortImageClassificationTest(unittest.TestCase):
diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
index 8d8ba3e098..b62b9abca0 100644
--- a/tests/openvino/utils_tests.py
+++ b/tests/openvino/utils_tests.py
@@ -197,6 +197,7 @@
     "sana": "katuni4ka/tiny-random-sana",
     "sana-sprint": "katuni4ka/tiny-random-sana-sprint",
     "ltx-video": "katuni4ka/tiny-random-ltx-video",
+    "vit-with-pooler": "snake7gun/tiny-random-dinov3",
 }
 
 

From b86eae5b35bbe763d27ef6227e9645ac65b12598 Mon Sep 17 00:00:00 2001
From: ethan <ethan.yang@intel.com>
Date: Wed, 10 Sep 2025 01:37:22 -0700
Subject: [PATCH 3/7] update dinov3 test case

---
 tests/openvino/test_exporters_cli.py |  7 ++++++
 tests/openvino/test_modeling.py      | 34 ----------------------------
 2 files changed, 7 insertions(+), 34 deletions(-)

diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
index eb51b99568..4bb4c6c88e 100644
--- a/tests/openvino/test_exporters_cli.py
+++ b/tests/openvino/test_exporters_cli.py
@@ -121,6 +121,13 @@ class OVCLIExportTestCase(unittest.TestCase):
                 ("text-generation-with-past", "ernie4_5"),
             ]
         )
+        
+    if is_transformers_version(">=", "4.56"):
+        SUPPORTED_ARCHITECTURES.extend(
+            [
+                ("feature-extraction", "vit-with-pooler"),
+            ]
+        )
 
     EXPECTED_NUMBER_OF_TOKENIZER_MODELS = {
         "gpt2": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0,
diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index 41c46b35f1..a7a80bde3e 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -3063,10 +3063,6 @@ def test_pipeline(self, model_arch: str):
 class OVModelForCustomTasksIntegrationTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES_WITH_ATTENTION = ["vit-with-attentions"]
     SUPPORTED_ARCHITECTURES_WITH_HIDDEN_STATES = ["vit-with-hidden-states"]
-    SUPPORTED_ARCHITECTURES_WITH_POOLER = []
-    if is_transformers_version(">=", "4.56"):
-        SUPPORTED_ARCHITECTURES_WITH_POOLER += ("vit-with-pooler",)
-
 
     def _get_sample_image(self):
         url = TEST_IMAGE_URL
@@ -3146,36 +3142,6 @@ def test_compare_output_hidden_states(self, model_arch):
         del transformers_model
         del ov_model
         gc.collect()
-        
-    @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_POOLER)
-    def test_compare_output_pooler(self, model_arch):
-        model_id = MODEL_NAMES[model_arch]
-
-        image = self._get_sample_image()
-        preprocessor = AutoImageProcessor.from_pretrained(model_id)
-        inputs = preprocessor(images=image, return_tensors="pt")
-
-        transformers_model = AutoModel.from_pretrained(model_id, attn_implementation="eager")
-        transformers_model.eval()
-        with torch.no_grad():
-            transformers_outputs = transformers_model(**inputs, output_attentions=True)
-
-        ov_model = OVModelForCustomTasks.from_pretrained(model_id, ov_config=F32_CONFIG)
-        self.assertIsInstance(ov_model.config, PretrainedConfig)
-
-        for input_type in ["pt", "np"]:
-            inputs = preprocessor(images=image, return_tensors=input_type)
-            ov_outputs = ov_model(**inputs)
-            self.assertIn("last_hidden_state", ov_outputs)
-            self.assertIsInstance(ov_outputs.last_hidden_state, TENSOR_ALIAS_TO_TYPE[input_type])
-            self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.last_hidden_state), transformers_outputs.last_hidden_state, atol=1e-4))
-            self.assertIn("pooler_output", ov_outputs)
-            self.assertIsInstance(ov_outputs.pooler_output, TENSOR_ALIAS_TO_TYPE[input_type])
-            self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.pooler_output), transformers_outputs.pooler_output, atol=1e-4))
-
-        del transformers_model
-        del ov_model
-        gc.collect()
 
 
 class OVModelForOpenCLIPZeroShortImageClassificationTest(unittest.TestCase):

From 248869437790b42b58cbd03fe4f129c4f5134e80 Mon Sep 17 00:00:00 2001
From: Ethan Yang <ethan.yang@intel.com>
Date: Tue, 23 Sep 2025 10:57:59 +0800
Subject: [PATCH 4/7] Update tests/openvino/utils_tests.py

Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com>
---
 tests/openvino/utils_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
index b62b9abca0..92f896a5cf 100644
--- a/tests/openvino/utils_tests.py
+++ b/tests/openvino/utils_tests.py
@@ -197,7 +197,7 @@
     "sana": "katuni4ka/tiny-random-sana",
     "sana-sprint": "katuni4ka/tiny-random-sana-sprint",
     "ltx-video": "katuni4ka/tiny-random-ltx-video",
-    "vit-with-pooler": "snake7gun/tiny-random-dinov3",
+    "dinov3_vit": "snake7gun/tiny-random-dinov3",
 }
 
 

From b6c8e887d3506f6c0e75385c1392ed6e68342200 Mon Sep 17 00:00:00 2001
From: Ethan Yang <ethan.yang@intel.com>
Date: Tue, 23 Sep 2025 10:58:11 +0800
Subject: [PATCH 5/7] Update optimum/exporters/openvino/model_configs.py

Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com>
---
 optimum/exporters/openvino/model_configs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
index 335f987d80..0a25283867 100644
--- a/optimum/exporters/openvino/model_configs.py
+++ b/optimum/exporters/openvino/model_configs.py
@@ -4542,7 +4542,7 @@ def patch_model_for_export(
 @register_in_tasks_manager("dinov3_vit", *["feature-extraction"], library_name="transformers")
 @register_in_tasks_manager("dinov3_convnext", *["feature-extraction"], library_name="transformers")
 class OpenDinoV3OpenVINOConfig(VisionOnnxConfig):
-    MIN_TRANSFORMERS_VERSION = version.parse("4.55.0")
+    MIN_TRANSFORMERS_VERSION = "4.55.0"
 
     DEFAULT_ONNX_OPSET = 14
 

From 6d8a9325248aa4a12ff21215015bc414826b51fe Mon Sep 17 00:00:00 2001
From: Ethan Yang <ethan.yang@intel.com>
Date: Tue, 23 Sep 2025 10:58:19 +0800
Subject: [PATCH 6/7] Update optimum/exporters/openvino/model_configs.py

Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com>
---
 optimum/exporters/openvino/model_configs.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
index 0a25283867..30ab356672 100644
--- a/optimum/exporters/openvino/model_configs.py
+++ b/optimum/exporters/openvino/model_configs.py
@@ -4544,7 +4544,6 @@ def patch_model_for_export(
 class OpenDinoV3OpenVINOConfig(VisionOnnxConfig):
     MIN_TRANSFORMERS_VERSION = "4.55.0"
 
-    DEFAULT_ONNX_OPSET = 14
 
     NORMALIZED_CONFIG_CLASS = NormalizedVisionConfig
     @property

From 4f8eaa58d14430766745ac8669f6e2cf3af6ea42 Mon Sep 17 00:00:00 2001
From: Ethan Yang <ethan.yang@intel.com>
Date: Tue, 23 Sep 2025 10:58:26 +0800
Subject: [PATCH 7/7] Update optimum/exporters/openvino/model_configs.py

Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com>
---
 optimum/exporters/openvino/model_configs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
index 30ab356672..23d77dea2d 100644
--- a/optimum/exporters/openvino/model_configs.py
+++ b/optimum/exporters/openvino/model_configs.py
@@ -4541,7 +4541,7 @@ def patch_model_for_export(
 
 @register_in_tasks_manager("dinov3_vit", *["feature-extraction"], library_name="transformers")
 @register_in_tasks_manager("dinov3_convnext", *["feature-extraction"], library_name="transformers")
-class OpenDinoV3OpenVINOConfig(VisionOnnxConfig):
+class DinoV3OpenVINOConfig(VisionOnnxConfig):
     MIN_TRANSFORMERS_VERSION = "4.55.0"