From c6b768cf88e2ee40d71a433baa595757996bee1e Mon Sep 17 00:00:00 2001 From: ethan Date: Wed, 20 Aug 2025 22:19:14 -0700 Subject: [PATCH 1/7] add support for dinov3 --- optimum/exporters/openvino/model_configs.py | 22 +++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 9c1684db81..335f987d80 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -4537,3 +4537,25 @@ def patch_model_for_export( self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None ) -> "ModelPatcher": return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs) + + +@register_in_tasks_manager("dinov3_vit", *["feature-extraction"], library_name="transformers") +@register_in_tasks_manager("dinov3_convnext", *["feature-extraction"], library_name="transformers") +class OpenDinoV3OpenVINOConfig(VisionOnnxConfig): + MIN_TRANSFORMERS_VERSION = version.parse("4.55.0") + + DEFAULT_ONNX_OPSET = 14 + + NORMALIZED_CONFIG_CLASS = NormalizedVisionConfig + @property + def inputs(self) -> Dict[str, Dict[int, str]]: + return { + "pixel_values": {0: "image_batch_size", 1: "num_channels", 2: "height", 3: "width"}, + } + + @property + def outputs(self) -> Dict[str, Dict[int, str]]: + return { + "last_hidden_state": {0: "image_batch_size"}, + "pooler_output": {0: "image_batch_size"}, + } \ No newline at end of file From 912f62475a39cdcddda3def3c3e7238f048d9234 Mon Sep 17 00:00:00 2001 From: ethan Date: Wed, 10 Sep 2025 01:26:36 -0700 Subject: [PATCH 2/7] update dinov3 test case --- tests/openvino/test_modeling.py | 34 +++++++++++++++++++++++++++++++++ tests/openvino/utils_tests.py | 1 + 2 files changed, 35 insertions(+) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index a7a80bde3e..41c46b35f1 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -3063,6 +3063,10 @@ def test_pipeline(self, model_arch: str): class OVModelForCustomTasksIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES_WITH_ATTENTION = ["vit-with-attentions"] SUPPORTED_ARCHITECTURES_WITH_HIDDEN_STATES = ["vit-with-hidden-states"] + SUPPORTED_ARCHITECTURES_WITH_POOLER = [] + if is_transformers_version(">=", "4.56"): + SUPPORTED_ARCHITECTURES_WITH_POOLER += ("vit-with-pooler",) + def _get_sample_image(self): url = TEST_IMAGE_URL @@ -3142,6 +3146,36 @@ def test_compare_output_hidden_states(self, model_arch): del transformers_model del ov_model gc.collect() + + @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_POOLER) + def test_compare_output_pooler(self, model_arch): + model_id = MODEL_NAMES[model_arch] + + image = self._get_sample_image() + preprocessor = AutoImageProcessor.from_pretrained(model_id) + inputs = preprocessor(images=image, return_tensors="pt") + + transformers_model = AutoModel.from_pretrained(model_id, attn_implementation="eager") + transformers_model.eval() + with torch.no_grad(): + transformers_outputs = transformers_model(**inputs, output_attentions=True) + + ov_model = OVModelForCustomTasks.from_pretrained(model_id, ov_config=F32_CONFIG) + self.assertIsInstance(ov_model.config, PretrainedConfig) + + for input_type in ["pt", "np"]: + inputs = preprocessor(images=image, return_tensors=input_type) + ov_outputs = ov_model(**inputs) + self.assertIn("last_hidden_state", ov_outputs) + self.assertIsInstance(ov_outputs.last_hidden_state, TENSOR_ALIAS_TO_TYPE[input_type]) + self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.last_hidden_state), transformers_outputs.last_hidden_state, atol=1e-4)) + self.assertIn("pooler_output", ov_outputs) + self.assertIsInstance(ov_outputs.pooler_output, TENSOR_ALIAS_TO_TYPE[input_type]) + self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.pooler_output), transformers_outputs.pooler_output, atol=1e-4)) + + del transformers_model + del ov_model + gc.collect() class OVModelForOpenCLIPZeroShortImageClassificationTest(unittest.TestCase): diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 8d8ba3e098..b62b9abca0 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -197,6 +197,7 @@ "sana": "katuni4ka/tiny-random-sana", "sana-sprint": "katuni4ka/tiny-random-sana-sprint", "ltx-video": "katuni4ka/tiny-random-ltx-video", + "vit-with-pooler": "snake7gun/tiny-random-dinov3", } From b86eae5b35bbe763d27ef6227e9645ac65b12598 Mon Sep 17 00:00:00 2001 From: ethan Date: Wed, 10 Sep 2025 01:37:22 -0700 Subject: [PATCH 3/7] update dinov3 test case --- tests/openvino/test_exporters_cli.py | 7 ++++++ tests/openvino/test_modeling.py | 34 ---------------------------- 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index eb51b99568..4bb4c6c88e 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -121,6 +121,13 @@ class OVCLIExportTestCase(unittest.TestCase): ("text-generation-with-past", "ernie4_5"), ] ) + + if is_transformers_version(">=", "4.56"): + SUPPORTED_ARCHITECTURES.extend( + [ + ("feature-extraction", "vit-with-pooler"), + ] + ) EXPECTED_NUMBER_OF_TOKENIZER_MODELS = { "gpt2": 2 if is_tokenizers_version("<", "0.20") or is_openvino_version(">=", "2024.5") else 0, diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 41c46b35f1..a7a80bde3e 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -3063,10 +3063,6 @@ def test_pipeline(self, model_arch: str): class OVModelForCustomTasksIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES_WITH_ATTENTION = ["vit-with-attentions"] SUPPORTED_ARCHITECTURES_WITH_HIDDEN_STATES = ["vit-with-hidden-states"] - SUPPORTED_ARCHITECTURES_WITH_POOLER = [] - if is_transformers_version(">=", "4.56"): - SUPPORTED_ARCHITECTURES_WITH_POOLER += ("vit-with-pooler",) - def _get_sample_image(self): url = TEST_IMAGE_URL @@ -3146,36 +3142,6 @@ def test_compare_output_hidden_states(self, model_arch): del transformers_model del ov_model gc.collect() - - @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_POOLER) - def test_compare_output_pooler(self, model_arch): - model_id = MODEL_NAMES[model_arch] - - image = self._get_sample_image() - preprocessor = AutoImageProcessor.from_pretrained(model_id) - inputs = preprocessor(images=image, return_tensors="pt") - - transformers_model = AutoModel.from_pretrained(model_id, attn_implementation="eager") - transformers_model.eval() - with torch.no_grad(): - transformers_outputs = transformers_model(**inputs, output_attentions=True) - - ov_model = OVModelForCustomTasks.from_pretrained(model_id, ov_config=F32_CONFIG) - self.assertIsInstance(ov_model.config, PretrainedConfig) - - for input_type in ["pt", "np"]: - inputs = preprocessor(images=image, return_tensors=input_type) - ov_outputs = ov_model(**inputs) - self.assertIn("last_hidden_state", ov_outputs) - self.assertIsInstance(ov_outputs.last_hidden_state, TENSOR_ALIAS_TO_TYPE[input_type]) - self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.last_hidden_state), transformers_outputs.last_hidden_state, atol=1e-4)) - self.assertIn("pooler_output", ov_outputs) - self.assertIsInstance(ov_outputs.pooler_output, TENSOR_ALIAS_TO_TYPE[input_type]) - self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.pooler_output), transformers_outputs.pooler_output, atol=1e-4)) - - del transformers_model - del ov_model - gc.collect() class OVModelForOpenCLIPZeroShortImageClassificationTest(unittest.TestCase): From 248869437790b42b58cbd03fe4f129c4f5134e80 Mon Sep 17 00:00:00 2001 From: Ethan Yang Date: Tue, 23 Sep 2025 10:57:59 +0800 Subject: [PATCH 4/7] Update tests/openvino/utils_tests.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- tests/openvino/utils_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index b62b9abca0..92f896a5cf 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -197,7 +197,7 @@ "sana": "katuni4ka/tiny-random-sana", "sana-sprint": "katuni4ka/tiny-random-sana-sprint", "ltx-video": "katuni4ka/tiny-random-ltx-video", - "vit-with-pooler": "snake7gun/tiny-random-dinov3", + "dinov3_vit": "snake7gun/tiny-random-dinov3", } From b6c8e887d3506f6c0e75385c1392ed6e68342200 Mon Sep 17 00:00:00 2001 From: Ethan Yang Date: Tue, 23 Sep 2025 10:58:11 +0800 Subject: [PATCH 5/7] Update optimum/exporters/openvino/model_configs.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/exporters/openvino/model_configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 335f987d80..0a25283867 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -4542,7 +4542,7 @@ def patch_model_for_export( @register_in_tasks_manager("dinov3_vit", *["feature-extraction"], library_name="transformers") @register_in_tasks_manager("dinov3_convnext", *["feature-extraction"], library_name="transformers") class OpenDinoV3OpenVINOConfig(VisionOnnxConfig): - MIN_TRANSFORMERS_VERSION = version.parse("4.55.0") + MIN_TRANSFORMERS_VERSION = "4.55.0" DEFAULT_ONNX_OPSET = 14 From 6d8a9325248aa4a12ff21215015bc414826b51fe Mon Sep 17 00:00:00 2001 From: Ethan Yang Date: Tue, 23 Sep 2025 10:58:19 +0800 Subject: [PATCH 6/7] Update optimum/exporters/openvino/model_configs.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/exporters/openvino/model_configs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 0a25283867..30ab356672 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -4544,7 +4544,6 @@ def patch_model_for_export( class OpenDinoV3OpenVINOConfig(VisionOnnxConfig): MIN_TRANSFORMERS_VERSION = "4.55.0" - DEFAULT_ONNX_OPSET = 14 NORMALIZED_CONFIG_CLASS = NormalizedVisionConfig @property From 4f8eaa58d14430766745ac8669f6e2cf3af6ea42 Mon Sep 17 00:00:00 2001 From: Ethan Yang Date: Tue, 23 Sep 2025 10:58:26 +0800 Subject: [PATCH 7/7] Update optimum/exporters/openvino/model_configs.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/exporters/openvino/model_configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 30ab356672..23d77dea2d 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -4541,7 +4541,7 @@ def patch_model_for_export( @register_in_tasks_manager("dinov3_vit", *["feature-extraction"], library_name="transformers") @register_in_tasks_manager("dinov3_convnext", *["feature-extraction"], library_name="transformers") -class OpenDinoV3OpenVINOConfig(VisionOnnxConfig): +class DinoV3OpenVINOConfig(VisionOnnxConfig): MIN_TRANSFORMERS_VERSION = "4.55.0"