diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index b8ffcdcf26..e9f265eba0 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -4267,6 +4267,25 @@ class GPT2OpenVINOConfig(GPT2OnnxConfig): _MODEL_PATCHER = OVDecoderModelPatcher +@register_in_tasks_manager("dinov3_vit", *["feature-extraction"], library_name="transformers") +@register_in_tasks_manager("dinov3_convnext", *["feature-extraction"], library_name="transformers") +class DinoV3OpenVINOConfig(VisionOnnxConfig): + MIN_TRANSFORMERS_VERSION = "4.55.0" + + + NORMALIZED_CONFIG_CLASS = NormalizedVisionConfig + @property + def inputs(self) -> Dict[str, Dict[int, str]]: + return { + "pixel_values": {0: "image_batch_size", 1: "num_channels", 2: "height", 3: "width"}, + } + + @property + def outputs(self) -> Dict[str, Dict[int, str]]: + return { + "last_hidden_state": {0: "image_batch_size"}, + "pooler_output": {0: "image_batch_size"}, + } @register_in_tasks_manager( "vision-encoder-decoder", *[ diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index 3cf5ead2d2..2c5c51c646 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -103,6 +103,7 @@ class OVCLIExportTestCase(unittest.TestCase): ("feature-extraction", "sam"), ("text-to-audio", "speecht5"), ("zero-shot-image-classification", "clip"), + ("feature-extraction", "vit-with-pooler"), ] EXPECTED_NUMBER_OF_TOKENIZER_MODELS = { diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 678f0c510a..129c879cac 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -1469,6 +1469,7 @@ def test_compare_to_transformers(self, model_arch): class OVModelForCustomTasksIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES_WITH_ATTENTION = ["vit-with-attentions"] SUPPORTED_ARCHITECTURES_WITH_HIDDEN_STATES = ["vit-with-hidden-states"] + SUPPORTED_ARCHITECTURES_WITH_HIDDEN_STATES = ["dinov3_vit"] def _get_sample_image(self): url = TEST_IMAGE_URL diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index deef0c4949..0db2f4c89b 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -200,6 +200,7 @@ "sana": "katuni4ka/tiny-random-sana", "sana-sprint": "katuni4ka/tiny-random-sana-sprint", "ltx-video": "katuni4ka/tiny-random-ltx-video", + "dinov3_vit": "snake7gun/tiny-random-dinov3", }