From 9e80aba91a988256f6e649483b9feb30905d32d8 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 1 Oct 2025 16:53:40 +0200 Subject: [PATCH 01/28] add tests --- tests/openvino/test_decoder.py | 39 +++++++++++++++++++++++----- tests/openvino/test_export.py | 2 +- tests/openvino/test_exporters_cli.py | 4 +-- tests/openvino/utils_tests.py | 8 +++--- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index b58e894833..c34cf12126 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -16,10 +16,12 @@ pipeline, set_seed, ) +from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES from transformers.testing_utils import slow from utils_tests import MODEL_NAMES, get_num_sdpa, mock_torch_cuda_is_available, patch_awq_for_inference from optimum.exporters.openvino.model_patcher import patch_update_causal_mask +from optimum.exporters.tasks import TasksManager from optimum.intel import OVModelForCausalLM, OVModelForSequenceClassification from optimum.intel.openvino.utils import _print_compiled_model_properties from optimum.intel.pipelines import pipeline as optimum_pipeline @@ -39,7 +41,8 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "bart", "baichuan2", "baichuan2-13b", - "gpt_bigcode", + "bigbird_pegasus", + "biogpt", "blenderbot", "blenderbot-small", "bloom", @@ -48,10 +51,13 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "codegen2", "gpt2", "gptj", + "gpt_bigcode", "gpt_neo", "gpt_neox", + "gpt_neox_japanese", "llama", "marian", + "mbart", "minicpm", "mistral", "mixtral", @@ -65,8 +71,6 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "falcon", "falcon-40b", "persimmon", - "biogpt", - "gpt_neox_japanese", "xglm", "aquila", "aquila2", @@ -88,10 +92,10 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "gemma2", "exaone", "granite", - "granite-moe", + "granitemoe", ) - SUPPORTED_SSM_ARCHITECTURES = ("mamba", "falcon-mamba") + SUPPORTED_SSM_ARCHITECTURES = ("mamba", "falcon_mamba") SUPPORTED_ARCHITECTURES += SUPPORTED_SSM_ARCHITECTURES @@ -201,7 +205,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "gemma2": 4, "exaone": 8, "granite": 6, - "granite-moe": 6, + "granitemoe": 6, "glm": 28, "mistral-nemo": 8, "minicpm3": 6, @@ -214,9 +218,30 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "qwen3": 2, "qwen3_moe": 2, "mamba": 0, - "falcon-mamba": 0, + "falcon_mamba": 0, "arcee": 2, } + TASK = "text-generation" + + def test_find_untested_architectures(self): + if len(self.SUPPORTED_ARCHITECTURES) != len(set(self.SUPPORTED_ARCHITECTURES)): + raise ValueError( + f"For the task `{self.TASK}`, some architectures are duplicated in the list of tested architectures: " + f"{self.SUPPORTED_ARCHITECTURES}.\n" + ) + + tested_architectures = set(self.SUPPORTED_ARCHITECTURES) + transformers_architectures = set(CONFIG_MAPPING_NAMES.keys()) + ov_architectures = set(TasksManager.get_supported_model_type_for_task(task=self.TASK, exporter="openvino")) + supported_architectures = ov_architectures & transformers_architectures + + untested_architectures = supported_architectures - tested_architectures + + if len(untested_architectures) > 0: + raise ValueError( + f"For the task `{self.TASK}`, the ONNX exporter supports {supported_architectures} but some of them are not " + f"tested: {untested_architectures}.\n" + ) # TODO: remove gptq/awq from here @parameterized.expand(SUPPORTED_ARCHITECTURES) diff --git a/tests/openvino/test_export.py b/tests/openvino/test_export.py index 48a5d9a105..01fa6e98b7 100644 --- a/tests/openvino/test_export.py +++ b/tests/openvino/test_export.py @@ -81,7 +81,7 @@ class ExportModelTest(unittest.TestCase): "speecht5": OVModelForTextToSpeechSeq2Seq, "clip": OVModelForZeroShotImageClassification, "mamba": OVModelForCausalLM, - "falcon-mamba": OVModelForCausalLM, + "falcon_mamba": OVModelForCausalLM, "stable-diffusion-3": OVStableDiffusion3Pipeline, "flux": OVFluxPipeline, "ltx-video": OVLTXPipeline, diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index c7467ea70a..f8d27d5ef7 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -84,7 +84,7 @@ class OVCLIExportTestCase(unittest.TestCase): ("text2text-generation", "t5"), ("text2text-generation-with-past", "t5"), ("text-generation-with-past", "mamba"), - ("text-generation-with-past", "falcon-mamba"), + ("text-generation-with-past", "falcon_mamba"), ("text-classification", "albert"), ("question-answering", "distilbert"), ("token-classification", "roberta"), @@ -127,7 +127,7 @@ class OVCLIExportTestCase(unittest.TestCase): "speecht5": 2, "clip": 2 if is_tokenizers_version("<", "0.20.0") or is_openvino_version(">=", "2024.5") else 0, "mamba": 2, - "falcon-mamba": 2, + "falcon_mamba": 2, "qwen3": 2, } diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 8c37e434b0..1065e25e16 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -75,7 +75,7 @@ "gemma3": "katuni4ka/tiny-random-gemma3", "falcon": "fxmarty/really-tiny-falcon-testing", "falcon-40b": "katuni4ka/tiny-random-falcon-40b", - "falcon-mamba": "rkazants/tiny-falcon-mamba", + "falcon_mamba": "rkazants/tiny-falcon-mamba", "flaubert": "hf-internal-testing/tiny-random-flaubert", "flux": "katuni4ka/tiny-random-flux", "flux-fill": "katuni4ka/tiny-random-flux-fill", @@ -88,7 +88,7 @@ "gpt_oss_mxfp4": "echarlaix/tiny-random-gpt-oss-mxfp4", "gptj": "hf-internal-testing/tiny-random-GPTJModel", "granite": "katuni4ka/tiny-random-granite", - "granite-moe": "katuni4ka/tiny-random-granite-moe", + "granitemoe": "katuni4ka/tiny-random-granite-moe", "hubert": "hf-internal-testing/tiny-random-HubertModel", "ibert": "hf-internal-testing/tiny-random-ibert", "idefics3": "hf-internal-testing/tiny-random-Idefics3ForConditionalGeneration", @@ -326,7 +326,7 @@ }, "clip": {"model": 130}, "mamba": {"model": 386}, - "falcon-mamba": {"model": 194}, + "falcon_mamba": {"model": 194}, } TEST_IMAGE_URL = "http://images.cocodataset.org/val2017/000000039769.jpg" @@ -457,10 +457,8 @@ def get_num_sdpa(model): "baichuan2-13b": "baichuan", "chatglm4": "chatglm", "codegen2": "codegen", - "falcon-mamba": "falcon_mamba", "falcon-40b": "falcon", "gpt_oss_mxfp4": "gpt_oss", - "granite-moe": "granitemoe", "llama_awq": "llama", "llava_next_mistral": "llava_next", "mistral-nemo": "mistral", From 563de33c116d00afca69956e8cc0a2e61c2cda0e Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 1 Oct 2025 17:03:00 +0200 Subject: [PATCH 02/28] reorder --- tests/openvino/test_decoder.py | 57 +++++++++++++++++----------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index c34cf12126..381ab9154d 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -37,24 +37,42 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): + SUPPORTED_ARCHITECTURES = ( - "bart", + "aquila", + "aquila2", + "arctic", "baichuan2", "baichuan2-13b", + "bart", "bigbird_pegasus", "biogpt", "blenderbot", "blenderbot-small", "bloom", "chatglm", + "chatglm4", "codegen", "codegen2", + "cohere", + "dbrx", + "decilm", + "exaone", + "falcon", + "falcon-40b", + "gemma", + "gemma2", "gpt2", - "gptj", "gpt_bigcode", "gpt_neo", "gpt_neox", "gpt_neox_japanese", + "gptj", + "granite", + "granitemoe", + "internlm", + "internlm2", + "jais", "llama", "marian", "mbart", @@ -62,37 +80,20 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "mistral", "mixtral", "mpt", + "olmo", "opt", - "pegasus", - "qwen", - "phi", - "internlm2", "orion", - "falcon", - "falcon-40b", + "pegasus", "persimmon", - "xglm", - "aquila", - "aquila2", - "xverse", - "internlm", - "jais", - "chatglm4", - "decilm", - "gemma", - "olmo", - "stablelm", - "starcoder2", - "dbrx", - "cohere", + "phi", + "phi3", + "qwen", "qwen2", "qwen2_moe", - "arctic", - "phi3", - "gemma2", - "exaone", - "granite", - "granitemoe", + "stablelm", + "starcoder2", + "xglm", + "xverse", ) SUPPORTED_SSM_ARCHITECTURES = ("mamba", "falcon_mamba") From e280b281ab152614f0ca19c9499f0162da277136 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 1 Oct 2025 17:40:18 +0200 Subject: [PATCH 03/28] rename --- tests/openvino/test_decoder.py | 5 +++-- tests/openvino/utils_tests.py | 3 +-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index 381ab9154d..46984d10d4 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -101,7 +101,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES += SUPPORTED_SSM_ARCHITECTURES if is_transformers_version(">=", "4.46.0"): - SUPPORTED_ARCHITECTURES += ("glm", "mistral-nemo", "minicpm3", "phi3-moe") + SUPPORTED_ARCHITECTURES += ("glm", "mistral-nemo", "minicpm3", "phimoe") # openvino 2025.0 required for disabling check_trace if is_openvino_version(">=", "2025.0"): SUPPORTED_ARCHITECTURES += ("deepseek",) @@ -133,6 +133,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): if is_transformers_version(">=", "4.55.0"): SUPPORTED_ARCHITECTURES += ("gpt_oss", "gpt_oss_mxfp4") + GENERATION_LENGTH = 100 REMOTE_CODE_MODELS = ( "chatglm", @@ -210,7 +211,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "glm": 28, "mistral-nemo": 8, "minicpm3": 6, - "phi3-moe": 2, + "phimoe": 2, "deepseek": 2, "opt_gptq": 12, "mixtral_awq": 2, diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 1065e25e16..4c8c7ac717 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -139,7 +139,7 @@ "pix2struct": "fxmarty/pix2struct-tiny-random", "phi": "echarlaix/tiny-random-PhiForCausalLM", "phi3": "Xenova/tiny-random-Phi3ForCausalLM", - "phi3-moe": "katuni4ka/phi-3.5-moe-tiny-random", + "phimoe": "katuni4ka/phi-3.5-moe-tiny-random", "phi3_v": "katuni4ka/tiny-random-phi3-vision", "phi4mm": "katuni4ka/tiny-random-phi-4-multimodal", "poolformer": "hf-internal-testing/tiny-random-PoolFormerModel", @@ -468,7 +468,6 @@ def get_num_sdpa(model): "opt_gptq": "opt", "perceiver_text": "perceiver", "perceiver_vision": "perceiver", - "phi3-moe": "phimoe", "swin-window": "swin", "vit-with-attentions": "vit", "vit-with-hidden-states": "vit", From 252368cf64f265bb385c3f51dbb628032a864579 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 1 Oct 2025 17:43:39 +0200 Subject: [PATCH 04/28] add olmo and smollm3 --- tests/openvino/test_decoder.py | 7 ++++--- tests/openvino/utils_tests.py | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index 46984d10d4..5bebcb43f4 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -37,7 +37,6 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): - SUPPORTED_ARCHITECTURES = ( "aquila", "aquila2", @@ -114,6 +113,9 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): if is_openvino_version(">=", "2024.6.0") and platform.system() != "Windows": SUPPORTED_ARCHITECTURES += ("mixtral_awq",) + if is_transformers_version(">", "4.47"): + SUPPORTED_ARCHITECTURES += ("olmo2",) + if is_transformers_version(">", "4.49"): SUPPORTED_ARCHITECTURES += ("gemma3_text",) @@ -124,7 +126,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES += ("glm4",) if is_transformers_version(">=", "4.53.0"): - SUPPORTED_ARCHITECTURES += ("arcee",) + SUPPORTED_ARCHITECTURES += ("arcee", "smollm3") if is_transformers_version(">=", "4.54.0"): # remote code models differs after transformers v4.54 @@ -133,7 +135,6 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): if is_transformers_version(">=", "4.55.0"): SUPPORTED_ARCHITECTURES += ("gpt_oss", "gpt_oss_mxfp4") - GENERATION_LENGTH = 100 REMOTE_CODE_MODELS = ( "chatglm", diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 4c8c7ac717..90a93acd2f 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -106,6 +106,7 @@ "llava_next_mistral": "optimum-internal-testing/tiny-random-llava-next-mistral", "llava_next_video": "katuni4ka/tiny-random-llava-next-video", "m2m_100": "hf-internal-testing/tiny-random-m2m_100", + "olmo2": "hf-internal-testing/tiny-random-Olmo2ForCausalLM", "opt": "hf-internal-testing/tiny-random-OPTModel", "opt125m": "facebook/opt-125m", "opt_gptq": "ybelkada/opt-125m-gptq-4bit", @@ -156,6 +157,7 @@ "segformer": "hf-internal-testing/tiny-random-SegformerModel", "sentence-transformers-bert": "sentence-transformers-testing/stsb-bert-tiny-safetensors", "sam": "fxmarty/sam-vit-tiny-random", + "smollm3": "optimum-internal-testing/tiny-random-SmolLM3ForCausalLM", "smolvlm": "katuni4ka/tiny-random-smolvlm2", "speecht5": "hf-internal-testing/tiny-random-SpeechT5ForTextToSpeech", "speech_to_text": "hf-internal-testing/tiny-random-Speech2TextModel", From e8ad17d85364c2c846b69a38e365bb2f945bd8b8 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Wed, 1 Oct 2025 18:47:00 +0200 Subject: [PATCH 05/28] fix --- tests/openvino/test_decoder.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index 5bebcb43f4..21615a0725 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -242,8 +242,7 @@ def test_find_untested_architectures(self): if len(untested_architectures) > 0: raise ValueError( - f"For the task `{self.TASK}`, the ONNX exporter supports {supported_architectures} but some of them are not " - f"tested: {untested_architectures}.\n" + f"For the task `{self.TASK}`, the OpenVINO exporter supports {untested_architectures} which are not tested" ) # TODO: remove gptq/awq from here From f978fce36010c1482f267fa3c6587e1736727ab4 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 2 Oct 2025 15:10:33 +0200 Subject: [PATCH 06/28] add llama4 test --- tests/openvino/test_decoder.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index 21615a0725..123169b7d4 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -20,6 +20,7 @@ from transformers.testing_utils import slow from utils_tests import MODEL_NAMES, get_num_sdpa, mock_torch_cuda_is_available, patch_awq_for_inference +from optimum.exporters.openvino.model_configs import DeepseekOpenVINOConfig from optimum.exporters.openvino.model_patcher import patch_update_causal_mask from optimum.exporters.tasks import TasksManager from optimum.intel import OVModelForCausalLM, OVModelForSequenceClassification @@ -73,6 +74,8 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "internlm2", "jais", "llama", + "llama4", + "llama4_text", "marian", "mbart", "minicpm", @@ -174,6 +177,8 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "gpt_neo": 4, "gpt_neox": 5, "llama": 2, + "llama4": 5, + "llama4_text": 2, "marian": 2, "minicpm": 4, "mistral": 2, @@ -238,6 +243,14 @@ def test_find_untested_architectures(self): ov_architectures = set(TasksManager.get_supported_model_type_for_task(task=self.TASK, exporter="openvino")) supported_architectures = ov_architectures & transformers_architectures + if is_transformers_version( + "<", str(DeepseekOpenVINOConfig.MIN_TRANSFORMERS_VERSION) + ) or is_transformers_version(">=", str(DeepseekOpenVINOConfig.MAX_TRANSFORMERS_VERSION)): + if "deepseek_v2" in supported_architectures: + supported_architectures.remove("deepseek_v2") + if "deepseek_v3" in supported_architectures: + supported_architectures.remove("deepseek_v3") + untested_architectures = supported_architectures - tested_architectures if len(untested_architectures) > 0: From 226df98aeca34b220109cccd991161ffce8a99b3 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 2 Oct 2025 15:19:05 +0200 Subject: [PATCH 07/28] add min deepseek transformers version --- optimum/exporters/openvino/model_configs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 56a4b6413b..2b4ae53275 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -3969,6 +3969,7 @@ class M2M100OpenVINOConfig(BartOpenVINOConfig): ) @register_in_tasks_manager("deepseek", *["text-generation", "text-generation-with-past"], library_name="transformers") class DeepseekOpenVINOConfig(MiniCPM3OpenVINOConfig): + MIN_TRANSFORMERS_VERSION = "4.46.0" MAX_TRANSFORMERS_VERSION = "4.54.0" def patch_model_for_export( From 7a8a1d751439a964252dd0b3ee295f28b5a0c970 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 2 Oct 2025 15:19:52 +0200 Subject: [PATCH 08/28] remove min transformers constraint --- tests/openvino/test_decoder.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index 123169b7d4..30aaef8d2a 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -243,9 +243,7 @@ def test_find_untested_architectures(self): ov_architectures = set(TasksManager.get_supported_model_type_for_task(task=self.TASK, exporter="openvino")) supported_architectures = ov_architectures & transformers_architectures - if is_transformers_version( - "<", str(DeepseekOpenVINOConfig.MIN_TRANSFORMERS_VERSION) - ) or is_transformers_version(">=", str(DeepseekOpenVINOConfig.MAX_TRANSFORMERS_VERSION)): + if is_transformers_version(">=", str(DeepseekOpenVINOConfig.MAX_TRANSFORMERS_VERSION)): if "deepseek_v2" in supported_architectures: supported_architectures.remove("deepseek_v2") if "deepseek_v3" in supported_architectures: From 9b4b2e9cc79f50fdde80dc9ac87fae3ef32a421d Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 2 Oct 2025 15:20:29 +0200 Subject: [PATCH 09/28] add tiny llama4 model --- tests/openvino/utils_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 90a93acd2f..5356673051 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -101,6 +101,7 @@ "llama": "HuggingFaceM4/tiny-random-LlamaForCausalLM", "llama_awq": "HuggingFaceH4/tiny-random-LlamaForCausalLM", "llama4": "hf-internal-testing/tiny-random-llama4", + "llama4_text": "trl-internal-testing/tiny-Llama4ForCausalLM", "llava": "katuni4ka/tiny-random-llava", "llava_next": "katuni4ka/tiny-random-llava-next", "llava_next_mistral": "optimum-internal-testing/tiny-random-llava-next-mistral", From 9559e2f0175adea5aaa1acfcfc74a975d5daf6f0 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 2 Oct 2025 15:30:11 +0200 Subject: [PATCH 10/28] add gpt-oss to documentation --- docs/source/openvino/models.mdx | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/openvino/models.mdx b/docs/source/openvino/models.mdx index 705eca4b7d..e38a50cad8 100644 --- a/docs/source/openvino/models.mdx +++ b/docs/source/openvino/models.mdx @@ -64,6 +64,7 @@ Here is the list of the supported architectures : - GPT-Neo - GPT-NeoX - GPT-NeoX-Japanese +- GPT-OSS - Gemma - Gemma2 - Gemma3 From 2389be13d1b8294a13be886dad30b335dc360531 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 2 Oct 2025 15:52:50 +0200 Subject: [PATCH 11/28] remove reorder as it makes things less clear for review --- tests/openvino/test_decoder.py | 64 +++++++++++++++++----------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index 30aaef8d2a..d8f9bd8322 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -39,63 +39,63 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES = ( - "aquila", - "aquila2", - "arctic", + "bart", "baichuan2", "baichuan2-13b", - "bart", + "gpt_bigcode", "bigbird_pegasus", - "biogpt", "blenderbot", "blenderbot-small", "bloom", "chatglm", - "chatglm4", "codegen", "codegen2", - "cohere", - "dbrx", - "decilm", - "exaone", - "falcon", - "falcon-40b", - "gemma", - "gemma2", "gpt2", - "gpt_bigcode", + "gptj", "gpt_neo", "gpt_neox", - "gpt_neox_japanese", - "gptj", - "granite", - "granitemoe", - "internlm", - "internlm2", - "jais", "llama", "llama4", "llama4_text", "marian", - "mbart", "minicpm", "mistral", "mixtral", "mpt", - "olmo", + "mbart", "opt", - "orion", "pegasus", - "persimmon", - "phi", - "phi3", "qwen", - "qwen2", - "qwen2_moe", - "stablelm", - "starcoder2", + "phi", + "internlm2", + "orion", + "falcon", + "falcon-40b", + "persimmon", + "biogpt", + "gpt_neox_japanese", "xglm", + "aquila", + "aquila2", "xverse", + "internlm", + "jais", + "chatglm4", + "decilm", + "gemma", + "olmo", + "stablelm", + "starcoder2", + "dbrx", + "cohere", + "qwen2", + "qwen2_moe", + "arctic", + "phi3", + "gemma2", + "exaone", + "granite", + "granitemoe", ) SUPPORTED_SSM_ARCHITECTURES = ("mamba", "falcon_mamba") From 1819b85367c9794a15405983d9a3ff1843149110 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Thu, 2 Oct 2025 15:56:44 +0200 Subject: [PATCH 12/28] add tests expected sdpa --- tests/openvino/test_decoder.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index d8f9bd8322..8037f63269 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -55,8 +55,6 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "gpt_neo", "gpt_neox", "llama", - "llama4", - "llama4_text", "marian", "minicpm", "mistral", @@ -123,7 +121,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES += ("gemma3_text",) if is_transformers_version(">=", "4.51.0"): - SUPPORTED_ARCHITECTURES += ("qwen3", "qwen3_moe") + SUPPORTED_ARCHITECTURES += ("llama4", "llama4_text", "qwen3", "qwen3_moe") if is_transformers_version(">=", "4.51.3"): SUPPORTED_ARCHITECTURES += ("glm4",) @@ -165,6 +163,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "bart": 2, "baichuan2": 2, "baichuan2-13b": 2, + "bigbird_pegasus": 2, "gpt_bigcode": 5, "blenderbot": 2, "blenderbot-small": 2, @@ -180,10 +179,12 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "llama4": 5, "llama4_text": 2, "marian": 2, + "mbart": 2, "minicpm": 4, "mistral": 2, "mixtral": 2, "mpt": 5, + "olmo2": 2, "opt": 5, "pegasus": 2, "qwen": 2, @@ -204,6 +205,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "olmo": 2, "stablelm": 2, "starcoder2": 2, + "smollm3": 2, "dbrx": 2, "cohere": 2, "qwen2": 2, From 586d2e527298ad2d71cb7b2b6aeac996c3ad6d9a Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 3 Oct 2025 14:08:01 +0200 Subject: [PATCH 13/28] add model doc --- docs/source/openvino/models.mdx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/openvino/models.mdx b/docs/source/openvino/models.mdx index e38a50cad8..cd9d49912e 100644 --- a/docs/source/openvino/models.mdx +++ b/docs/source/openvino/models.mdx @@ -25,6 +25,7 @@ Here is the list of the supported architectures : - Beit - Bert - BioGPT +- BigBirdPegasus - BlenderBot - BlenderBotSmall - Bloom @@ -103,6 +104,7 @@ Here is the list of the supported architectures : - MobileVit - Nystromformer - OLMo +- OLMo 2 - OPT - Orion - Pegasus From 65ce535cba5d1b248b3c6ec1e422c8c4bc58ac97 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 3 Oct 2025 16:09:23 +0200 Subject: [PATCH 14/28] add ov attribute check for tests --- tests/openvino/test_seq2seq.py | 186 ++++++++++++++++----------------- 1 file changed, 93 insertions(+), 93 deletions(-) diff --git a/tests/openvino/test_seq2seq.py b/tests/openvino/test_seq2seq.py index ac3aa7e8bf..073ea1f7bd 100644 --- a/tests/openvino/test_seq2seq.py +++ b/tests/openvino/test_seq2seq.py @@ -19,6 +19,7 @@ from tempfile import TemporaryDirectory import numpy as np +import openvino import pytest import requests import torch @@ -76,7 +77,30 @@ def __exit__(self, type, value, traceback): self.elapsed = (time.perf_counter() - self.elapsed) * 1e3 -class OVModelForSeq2SeqLMIntegrationTest(unittest.TestCase): +class OVSeq2SeqTestMixin(unittest.TestCase): + SUPPORTED_ARCHITECTURES = None + def check_openvino_model_attributes(self, openvino_model, use_cache: bool = True, stateful: bool = True): + self.assertIsInstance(openvino_model, self.OVMODEL_CLASS) + self.assertIsInstance(openvino_model.config, PretrainedConfig) + self.assertIsInstance(openvino_model.generation_config, GenerationConfig) + + self.assertIsInstance(openvino_model.encoder, OVEncoder) + self.assertIsInstance(openvino_model.decoder, OVDecoder) + self.assertIsInstance(openvino_model.encoder.model, openvino.Model) + self.assertIsInstance(openvino_model.decoder.model, openvino.Model) + + self.assertEqual(openvino_model.use_cache, use_cache) + self.assertEqual(openvino_model.decoder.stateful, stateful) + self.assertEqual(model_has_state(openvino_model.decoder.model), stateful) + + if not stateful and use_cache: + self.assertIsInstance(openvino_model.decoder_with_past, OVDecoder) + self.assertIsInstance(openvino_model.decoder_with_past.model, openvino.Model) + else: + self.assertIsNone(openvino_model.decoder_with_past) + + +class OVModelForSeq2SeqLMIntegrationTest(OVSeq2SeqTestMixin): SUPPORTED_ARCHITECTURES = ( "bart", # "bigbird_pegasus", @@ -89,14 +113,15 @@ class OVModelForSeq2SeqLMIntegrationTest(unittest.TestCase): "pegasus", "t5", ) + GENERATION_LENGTH = 100 + SPEEDUP_CACHE = 1.1 + OVMODEL_CLASS = OVModelForSeq2SeqLM + AUTOMODEL_CLASS = AutoModelForSeq2SeqLM if not (is_openvino_version(">=", "2025.3.0") and is_openvino_version("<", "2025.5.0")): # There are known issues with marian model on OpenVINO 2025.3.x and 2025.4.x SUPPORTED_ARCHITECTURES += ("marian",) - GENERATION_LENGTH = 100 - SPEEDUP_CACHE = 1.1 - SUPPORT_STATEFUL = ("t5", "mt5") if is_transformers_version(">=", "4.52.0"): SUPPORT_STATEFUL += ("bart", "blenderbot", "blenderbot-small", "m2m_100", "marian", "mbart") @@ -107,22 +132,15 @@ class OVModelForSeq2SeqLMIntegrationTest(unittest.TestCase): def test_compare_to_transformers(self, model_arch): model_id = MODEL_NAMES[model_arch] set_seed(SEED) - ov_model = OVModelForSeq2SeqLM.from_pretrained(model_id, export=True, ov_config=F32_CONFIG) - ov_stateless_model = OVModelForSeq2SeqLM.from_pretrained( - model_id, export=True, use_cache=False, stateful=False, ov_config=F32_CONFIG + ov_model = self.OVMODEL_CLASS.from_pretrained(model_id, ov_config=F32_CONFIG) + ov_stateless_model = self.OVMODEL_CLASS.from_pretrained( + model_id, use_cache=False, stateful=False, ov_config=F32_CONFIG ) expected_stateful = is_transformers_version(">", "4.46") and model_arch in self.SUPPORT_STATEFUL - self.assertEqual(ov_model.decoder.stateful, expected_stateful) - self.assertEqual(model_has_state(ov_model.decoder.model), expected_stateful) - check_with_past_available = self.assertIsNone if expected_stateful else self.assertIsNotNone - check_with_past_available(ov_model.decoder_with_past) - self.assertIsInstance(ov_model.encoder, OVEncoder) - self.assertIsInstance(ov_model.decoder, OVDecoder) - if not ov_model.decoder.stateful: - self.assertIsInstance(ov_model.decoder_with_past, OVDecoder) - self.assertIsInstance(ov_model.config, PretrainedConfig) - - transformers_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) + self.check_openvino_model_attributes(ov_model, use_cache=True, stateful=expected_stateful) + self.check_openvino_model_attributes(ov_stateless_model, use_cache=False, stateful=False) + + transformers_model = self.AUTOMODEL_CLASS.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) tokens = tokenizer("This is a sample input", return_tensors="pt") decoder_start_token_id = transformers_model.config.decoder_start_token_id if model_arch != "mbart" else 2 @@ -169,7 +187,7 @@ def test_pipeline(self, model_arch): model_id = MODEL_NAMES[model_arch] tokenizer = AutoTokenizer.from_pretrained(model_id) inputs = "This is a test" - model = OVModelForSeq2SeqLM.from_pretrained(model_id, compile=False) + model = self.OVMODEL_CLASS.from_pretrained(model_id, compile=False) model.eval() model.half() model.to("cpu") @@ -206,7 +224,7 @@ def test_pipeline(self, model_arch): @slow def test_generate_utils(self, model_arch): model_id = MODEL_NAMES[model_arch] - model = OVModelForSeq2SeqLM.from_pretrained(model_id, export=True) + model = self.OVMODEL_CLASS.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) text = "This is a sample input" tokens = tokenizer(text, return_tensors="pt") @@ -229,15 +247,15 @@ def test_compare_with_and_without_past_key_values(self): tokenizer = AutoTokenizer.from_pretrained(model_id) text = "This is a sample input" tokens = tokenizer(text, return_tensors="pt") + model_with_pkv = self.OVMODEL_CLASS.from_pretrained(model_id, use_cache=True) - model_with_pkv = OVModelForSeq2SeqLM.from_pretrained(model_id, export=True, use_cache=True) _ = model_with_pkv.generate(**tokens) # warmup with Timer() as with_pkv_timer: outputs_model_with_pkv = model_with_pkv.generate( **tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1 ) - model_without_pkv = OVModelForSeq2SeqLM.from_pretrained(model_id, export=True, use_cache=False) + model_without_pkv = self.OVMODEL_CLASS.from_pretrained(model_id, use_cache=False) _ = model_without_pkv.generate(**tokens) # warmup with Timer() as without_pkv_timer: outputs_model_without_pkv = model_without_pkv.generate( @@ -257,9 +275,12 @@ def test_compare_with_and_without_past_key_values(self): gc.collect() -class OVModelForSpeechSeq2SeqIntegrationTest(unittest.TestCase): +class OVModelForSpeechSeq2SeqIntegrationTest(OVSeq2SeqTestMixin): SUPPORTED_ARCHITECTURES = ("whisper",) + OVMODEL_CLASS = OVModelForSpeechSeq2Seq + AUTOMODEL_CLASS = AutoModelForSpeechSeq2Seq + def _generate_random_audio_data(self): np.random.seed(10) t = np.linspace(0, 5.0, int(5.0 * 22050), endpoint=False) @@ -271,19 +292,11 @@ def _generate_random_audio_data(self): def test_compare_to_transformers(self, model_arch): set_seed(SEED) model_id = MODEL_NAMES[model_arch] - transformers_model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id) - ov_model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True, ov_config=F32_CONFIG) - ov_model_stateless = OVModelForSpeechSeq2Seq.from_pretrained( - model_id, export=True, ov_config=F32_CONFIG, stateful=False - ) - self.assertIsInstance(ov_model.config, PretrainedConfig) - # whisper cache class support implemented in 4.43 - expected_stateful = True - self.assertEqual(ov_model.decoder.stateful, expected_stateful) - self.assertEqual(model_has_state(ov_model.decoder.model), expected_stateful) - check_with_past_available = self.assertIsNone if expected_stateful else self.assertIsNotNone - check_with_past_available(ov_model.decoder_with_past) - + transformers_model = self.AUTOMODEL_CLASS.from_pretrained(model_id) + ov_model = self.OVMODEL_CLASS.from_pretrained(model_id, ov_config=F32_CONFIG) + ov_model_stateless = self.OVMODEL_CLASS.from_pretrained(model_id, ov_config=F32_CONFIG, stateful=False) + self.check_openvino_model_attributes(ov_model, use_cache=True, stateful=True) + self.check_openvino_model_attributes(ov_model_stateless, use_cache=True, stateful=False) processor = get_preprocessor(model_id) data = self._generate_random_audio_data() pt_features = processor.feature_extractor(data, return_tensors="pt") @@ -342,7 +355,7 @@ def test_compare_to_transformers(self, model_arch): def test_pipeline(self, model_arch): set_seed(SEED) model_id = MODEL_NAMES[model_arch] - model = OVModelForSpeechSeq2Seq.from_pretrained(model_id) + model = self.OVMODEL_CLASS.from_pretrained(model_id) processor = get_preprocessor(model_id) pipe = pipeline( "automatic-speech-recognition", @@ -363,9 +376,10 @@ def test_pipeline(self, model_arch): gc.collect() -class OVModelForVision2SeqIntegrationTest(unittest.TestCase): +class OVModelForVision2SeqIntegrationTest(OVSeq2SeqTestMixin): SUPPORTED_ARCHITECTURES = ["vision-encoder-decoder", "trocr", "donut"] - + OVMODEL_CLASS = OVModelForVision2Seq + AUTOMODEL_CLASS = AutoModelForVision2Seq GENERATION_LENGTH = 100 SPEEDUP_CACHE = 1.1 @@ -382,7 +396,7 @@ def _get_preprocessors(self, model_id): def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: - _ = OVModelForVision2Seq.from_pretrained(MODEL_NAMES["bert"], export=True) + _ = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES["bert"], export=True) self.assertIn("only supports the tasks", str(context.exception)) @@ -391,7 +405,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): @slow def test_generate_utils(self, model_arch: str): model_id = MODEL_NAMES[model_arch] - model = OVModelForVision2Seq.from_pretrained(model_id, export=True) + model = self.OVMODEL_CLASS.from_pretrained(model_id, export=True) feature_extractor, tokenizer = self._get_preprocessors(model_id) data = self._get_sample_image() @@ -406,17 +420,12 @@ def test_generate_utils(self, model_arch: str): @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_compare_to_transformers(self, model_arch: str): model_id = MODEL_NAMES[model_arch] - ov_model = OVModelForVision2Seq.from_pretrained(model_id, export=True) - - self.assertIsInstance(ov_model.encoder, OVEncoder) + ov_model = self.OVMODEL_CLASS.from_pretrained(model_id) - self.assertIsInstance(ov_model.decoder, OVDecoder) - self.assertIsInstance(ov_model.decoder_with_past, OVDecoder) - - self.assertIsInstance(ov_model.config, PretrainedConfig) + self.check_openvino_model_attributes(ov_model, use_cache=True, stateful=False) set_seed(SEED) - transformers_model = AutoModelForVision2Seq.from_pretrained(model_id) + transformers_model = self.AUTOMODEL_CLASS.from_pretrained(model_id) feature_extractor, tokenizer = self._get_preprocessors(model_id) data = self._get_sample_image() @@ -449,7 +458,7 @@ def test_compare_to_transformers(self, model_arch: str): def test_pipeline(self, model_arch: str): set_seed(SEED) model_id = MODEL_NAMES[model_arch] - ov_model = OVModelForVision2Seq.from_pretrained(model_id, compile=False) + ov_model = self.OVMODEL_CLASS.from_pretrained(model_id, compile=False) feature_extractor, tokenizer = self._get_preprocessors(model_id) ov_model.reshape(1, -1) ov_model.compile() @@ -472,7 +481,7 @@ def test_pipeline(self, model_arch: str): gc.collect() -class OVModelForVisualCausalLMIntegrationTest(unittest.TestCase): +class OVModelForVisualCausalLMIntegrationTest(OVSeq2SeqTestMixin): SUPPORTED_ARCHITECTURES = [ "internvl_chat", "llava", @@ -486,6 +495,7 @@ class OVModelForVisualCausalLMIntegrationTest(unittest.TestCase): ] SUPPORT_VIDEO = ["llava_next_video", "qwen2_vl"] SUPPORT_AUDIO = [] + OVMODEL_CLASS = OVModelForVisualCausalLM if is_transformers_version(">=", "4.46.0"): SUPPORTED_ARCHITECTURES += ["maira2", "idefics3"] @@ -563,29 +573,27 @@ def test_compare_to_transformers(self, model_arch): set_seed(SEED) loading_kwargs = {} + trust_remote_code = model_arch in self.REMOTE_CODE_MODELS if "llama4" in model_arch: loading_kwargs = {"_attn_implementation": "sdpa"} transformers_model = self.get_transformer_model_class(model_arch).from_pretrained( - model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS, **loading_kwargs + model_id, trust_remote_code=trust_remote_code, **loading_kwargs ) transformers_model.eval() if "internvl_chat" in model_arch: - tokenizer = AutoTokenizer.from_pretrained( - model_id, trast_remote_code=model_arch in self.REMOTE_CODE_MODELS - ) + tokenizer = AutoTokenizer.from_pretrained(model_id, trast_remote_code=trust_remote_code) img_context_token_id = tokenizer.convert_tokens_to_ids("") transformers_model.img_context_token_id = img_context_token_id if "llava-qwen2" in model_arch: transformers_model.get_vision_tower().load_model() preprocessors = self.get_preprocessors(model_arch) set_seed(SEED) - ov_model = OVModelForVisualCausalLM.from_pretrained( - model_id, export=True, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS, compile=False - ) + ov_model = self.OVMODEL_CLASS.from_pretrained(model_id, trust_remote_code=trust_remote_code, compile=False) self.assertIsInstance(ov_model, MODEL_TYPE_TO_CLS_MAPPING[ov_model.config.model_type]) for component_name, component in ov_model.components.items(): self.assertIsInstance(component, MODEL_PARTS_CLS_MAPPING[component_name]) - self.assertIsInstance(ov_model.config, PretrainedConfig) + + self.check_openvino_model_attributes(ov_model, use_cache=True, stateful=True) inputs = ov_model.preprocess_inputs(**preprocessors, text=prompt, image=self.IMAGE.resize((600, 600))) transformers_inputs = copy.deepcopy(inputs) @@ -722,7 +730,8 @@ def test_compare_to_transformers(self, model_arch): def test_llava_with_new_preprocessing(self, model_arch): prompt = "\n What is shown in this image?" model_id = MODEL_NAMES[model_arch] - config = AutoConfig.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS) + trust_remote_code = model_arch in self.REMOTE_CODE_MODELS + config = AutoConfig.from_pretrained(model_id, trust_remote_code=trust_remote_code) processor = AutoProcessor.from_pretrained( model_id, patch_size=config.vision_config.patch_size, @@ -731,9 +740,7 @@ def test_llava_with_new_preprocessing(self, model_arch): num_additional_image_tokens=1, ) transformers_model = self.get_transformer_model_class(model_arch).from_pretrained(model_id) - ov_model = OVModelForVisualCausalLM.from_pretrained( - model_id, export=True, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS - ) + ov_model = self.OVMODEL_CLASS.from_pretrained(model_id, trust_remote_code=trust_remote_code) self.assertTrue(ov_model._support_new_processing) self.assertTrue(processor.patch_size is not None) self.assertTrue(processor.vision_feature_select_strategy is not None) @@ -776,11 +783,10 @@ def test_llava_with_new_preprocessing(self, model_arch): @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_generate_utils(self, model_arch): model_id = MODEL_NAMES[model_arch] - model = OVModelForVisualCausalLM.from_pretrained( - model_id, export=True, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS - ) + trust_remote_code = model_arch in self.REMOTE_CODE_MODELS + model = self.OVMODEL_CLASS.from_pretrained(model_id, export=True, trust_remote_code=trust_remote_code) - tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS) + tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=trust_remote_code) question = "Describe image" preprocessors = self.get_preprocessors(model_arch) inputs = model.preprocess_inputs(**preprocessors, text=question, image=self.IMAGE.resize((600, 600))) @@ -871,18 +877,20 @@ def get_preprocessors(self, model_arch): def test_model_can_be_loaded_after_saving(self, model_arch): model_id = MODEL_NAMES[model_arch] with TemporaryDirectory() as save_dir: - ov_model = OVModelForVisualCausalLM.from_pretrained( + ov_model = self.OVMODEL_CLASS.from_pretrained( model_id, compile=False, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS ) ov_model.save_pretrained(save_dir) - ov_restored_model = OVModelForVisualCausalLM.from_pretrained( + ov_restored_model = self.OVMODEL_CLASS.from_pretrained( save_dir, compile=False, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS ) self.assertIsInstance(ov_restored_model, type(ov_model)) -class OVModelForTextToSpeechSeq2SeqIntegrationTest(unittest.TestCase): +class OVModelForTextToSpeechSeq2SeqIntegrationTest(OVSeq2SeqTestMixin): SUPPORTED_ARCHITECTURES = ("speecht5",) + OVMODEL_CLASS = OVModelForTextToSpeechSeq2Seq + AUTOMODEL_CLASS = AutoModelForTextToSpectrogram def _generate_text(self): return "This text is converted to speech using OpenVINO backend" @@ -898,12 +906,6 @@ def _get_processor(self, model_id, model_arch): else: raise Exception("{} unknown processor for text-to-speech".format(model_arch)) - def _get_model(self, model_id, model_arch): - if model_arch == "speecht5": - return AutoModelForTextToSpectrogram.from_pretrained(model_id) - else: - raise Exception("{} unknown model for text-to-speech".format(model_arch)) - def _get_vocoder(self, vocoder_id, model_arch): if model_arch == "speecht5": from transformers import SpeechT5HifiGan @@ -925,7 +927,9 @@ def test_compare_to_transformers(self, model_arch): # generate model classes for reference generation vocoder_id = "fxmarty/speecht5-hifigan-tiny" processor = self._get_processor(model_id, model_arch) - model = self._get_model(model_id, model_arch) + + model = self.AUTOMODEL_CLASS.from_pretrained(model_id) + vocoder = self._get_vocoder(vocoder_id, model_arch) inputs = processor(text=text_data, return_tensors="pt") ref_speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) @@ -933,11 +937,9 @@ def test_compare_to_transformers(self, model_arch): else: raise Exception("{} unknown model for text-to-speech".format(model_arch)) - ov_pipe = OVModelForTextToSpeechSeq2Seq.from_pretrained(model_id, vocoder=vocoder_id) - ov_speech = ov_pipe.generate(input_ids=inputs["input_ids"], speaker_embeddings=speaker_embeddings) - - self.assertIsInstance(ov_pipe.config, PretrainedConfig) - self.assertTrue(model_has_state(ov_pipe.decoder.model)) + ov_model = self.OVMODEL_CLASS.from_pretrained(model_id, vocoder=vocoder_id) + ov_speech = ov_model.generate(input_ids=inputs["input_ids"], speaker_embeddings=speaker_embeddings) + self.check_openvino_model_attributes(ov_model, use_cache=True, stateful=True) self.assertTrue(torch.allclose(ov_speech, ref_speech, atol=1e-3)) del vocoder @@ -946,10 +948,11 @@ def test_compare_to_transformers(self, model_arch): gc.collect() -class OVModelForPix2StructIntegrationTest(unittest.TestCase): +class OVModelForPix2StructIntegrationTest(OVSeq2SeqTestMixin): SUPPORTED_ARCHITECTURES = ["pix2struct"] TASK = "image-to-text" # is it fine as well with visual-question-answering? - + OVMODEL_CLASS = OVModelForPix2Struct + AUTOMODEL_CLASS = Pix2StructForConditionalGeneration GENERATION_LENGTH = 100 SPEEDUP_CACHE = 1.1 @@ -964,15 +967,12 @@ class OVModelForPix2StructIntegrationTest(unittest.TestCase): def test_compare_to_transformers(self, model_arch): model_id = MODEL_NAMES[model_arch] set_seed(SEED) - ov_model = OVModelForPix2Struct.from_pretrained(model_id, export=True, ov_config=F32_CONFIG) + ov_model = self.OVMODEL_CLASS.from_pretrained(model_id, export=True, ov_config=F32_CONFIG) - self.assertIsInstance(ov_model.encoder, OVEncoder) - self.assertIsInstance(ov_model.decoder, OVDecoder) - self.assertIsInstance(ov_model.decoder_with_past, OVDecoder) - self.assertIsInstance(ov_model.config, PretrainedConfig) + self.check_openvino_model_attributes(ov_model, use_cache=True, stateful=False) question = "Who am I?" - transformers_model = Pix2StructForConditionalGeneration.from_pretrained(model_id) + transformers_model = self.AUTOMODEL_CLASS.from_pretrained(model_id) preprocessor = get_preprocessor(model_id) inputs = preprocessor(images=self.IMAGE, text=question, padding=True, return_tensors="pt") @@ -993,7 +993,7 @@ def test_compare_to_transformers(self, model_arch): @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_generate_utils(self, model_arch): model_id = MODEL_NAMES[model_arch] - model = OVModelForPix2Struct.from_pretrained(model_id, export=True) + model = self.OVMODEL_CLASS.from_pretrained(model_id, export=True) preprocessor = get_preprocessor(model_id) question = "Who am I?" inputs = preprocessor(images=self.IMAGE, text=question, return_tensors="pt") @@ -1012,14 +1012,14 @@ def test_compare_with_and_without_past_key_values(self): question = "Who am I?" inputs = preprocessor(images=self.IMAGE, text=question, return_tensors="pt") - model_with_pkv = OVModelForPix2Struct.from_pretrained(model_id, export=True, use_cache=True) + model_with_pkv = self.OVMODEL_CLASS.from_pretrained(model_id, use_cache=True) _ = model_with_pkv.generate(**inputs) # warmup with Timer() as with_pkv_timer: outputs_model_with_pkv = model_with_pkv.generate( **inputs, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1 ) - model_without_pkv = OVModelForPix2Struct.from_pretrained(model_id, export=True, use_cache=False) + model_without_pkv = self.OVMODEL_CLASS.from_pretrained(model_id, use_cache=False) _ = model_without_pkv.generate(**inputs) # warmup with Timer() as without_pkv_timer: outputs_model_without_pkv = model_without_pkv.generate( From 34ec242eac4f6bc01e4d020432d3ec288b9289de Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 3 Oct 2025 16:11:15 +0200 Subject: [PATCH 15/28] style --- tests/openvino/test_seq2seq.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/openvino/test_seq2seq.py b/tests/openvino/test_seq2seq.py index 073ea1f7bd..79bd03f45e 100644 --- a/tests/openvino/test_seq2seq.py +++ b/tests/openvino/test_seq2seq.py @@ -79,6 +79,7 @@ def __exit__(self, type, value, traceback): class OVSeq2SeqTestMixin(unittest.TestCase): SUPPORTED_ARCHITECTURES = None + def check_openvino_model_attributes(self, openvino_model, use_cache: bool = True, stateful: bool = True): self.assertIsInstance(openvino_model, self.OVMODEL_CLASS) self.assertIsInstance(openvino_model.config, PretrainedConfig) @@ -89,16 +90,16 @@ def check_openvino_model_attributes(self, openvino_model, use_cache: bool = True self.assertIsInstance(openvino_model.encoder.model, openvino.Model) self.assertIsInstance(openvino_model.decoder.model, openvino.Model) - self.assertEqual(openvino_model.use_cache, use_cache) - self.assertEqual(openvino_model.decoder.stateful, stateful) - self.assertEqual(model_has_state(openvino_model.decoder.model), stateful) - if not stateful and use_cache: self.assertIsInstance(openvino_model.decoder_with_past, OVDecoder) self.assertIsInstance(openvino_model.decoder_with_past.model, openvino.Model) else: self.assertIsNone(openvino_model.decoder_with_past) + self.assertEqual(openvino_model.use_cache, use_cache) + self.assertEqual(openvino_model.decoder.stateful, stateful) + self.assertEqual(model_has_state(openvino_model.decoder.model), stateful) + class OVModelForSeq2SeqLMIntegrationTest(OVSeq2SeqTestMixin): SUPPORTED_ARCHITECTURES = ( From 9c307903ddfc66b4aac6cc3908f3582a8e4607c2 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 3 Oct 2025 16:45:23 +0200 Subject: [PATCH 16/28] style --- tests/openvino/test_seq2seq.py | 84 ++++++++++++++++++++++++++-------- tests/openvino/utils_tests.py | 1 + 2 files changed, 66 insertions(+), 19 deletions(-) diff --git a/tests/openvino/test_seq2seq.py b/tests/openvino/test_seq2seq.py index 79bd03f45e..2c779718df 100644 --- a/tests/openvino/test_seq2seq.py +++ b/tests/openvino/test_seq2seq.py @@ -42,6 +42,7 @@ pipeline, set_seed, ) +from transformers.models.auto.configuration_auto import CONFIG_MAPPING_NAMES from transformers.onnx.utils import get_preprocessor from transformers.testing_utils import slow from transformers.utils import http_user_agent @@ -49,6 +50,7 @@ from optimum.exporters.openvino.model_patcher import patch_update_causal_mask from optimum.exporters.openvino.stateful import model_has_state +from optimum.exporters.tasks import TasksManager from optimum.intel import ( OVModelForPix2Struct, OVModelForSeq2SeqLM, @@ -58,6 +60,12 @@ OVModelForVisualCausalLM, ) from optimum.intel.openvino.modeling_seq2seq import OVDecoder, OVEncoder +from optimum.intel.openvino.modeling_text2speech import ( + OVTextToSpeechDecoder, + OVTextToSpeechEncoder, + OVTextToSpeechPostNet, + OVTextToSpeechVocoder, +) from optimum.intel.openvino.modeling_visual_language import MODEL_PARTS_CLS_MAPPING, MODEL_TYPE_TO_CLS_MAPPING from optimum.intel.pipelines import pipeline as optimum_pipeline from optimum.intel.utils.import_utils import is_openvino_version, is_transformers_version @@ -100,24 +108,46 @@ def check_openvino_model_attributes(self, openvino_model, use_cache: bool = True self.assertEqual(openvino_model.decoder.stateful, stateful) self.assertEqual(model_has_state(openvino_model.decoder.model), stateful) + def _test_find_untested_architectures(self): + if len(self.SUPPORTED_ARCHITECTURES) != len(set(self.SUPPORTED_ARCHITECTURES)): + raise ValueError( + f"For the task `{self.TASK}`, some architectures are duplicated in the list of tested architectures: " + f"{self.SUPPORTED_ARCHITECTURES}.\n" + ) + + tested_architectures = set(self.SUPPORTED_ARCHITECTURES) + transformers_architectures = set(CONFIG_MAPPING_NAMES.keys()) + ov_architectures = set(TasksManager.get_supported_model_type_for_task(task=self.TASK, exporter="openvino")) + supported_architectures = ov_architectures & transformers_architectures + + untested_architectures = supported_architectures - tested_architectures + + if len(untested_architectures) > 0: + raise ValueError( + f"For the task `{self.TASK}`, the OpenVINO exporter supports {untested_architectures} which are not tested" + ) + class OVModelForSeq2SeqLMIntegrationTest(OVSeq2SeqTestMixin): SUPPORTED_ARCHITECTURES = ( "bart", - # "bigbird_pegasus", + "bigbird_pegasus", "blenderbot", "blenderbot-small", - # "longt5", + "encoder-decoder", + "longt5", "m2m_100", + "marian", "mbart", "mt5", "pegasus", "t5", ) - GENERATION_LENGTH = 100 - SPEEDUP_CACHE = 1.1 OVMODEL_CLASS = OVModelForSeq2SeqLM AUTOMODEL_CLASS = AutoModelForSeq2SeqLM + TASK = "text2text-generation" + GENERATION_LENGTH = 100 + SPEEDUP_CACHE = 1.1 if not (is_openvino_version(">=", "2025.3.0") and is_openvino_version("<", "2025.5.0")): # There are known issues with marian model on OpenVINO 2025.3.x and 2025.4.x @@ -129,6 +159,9 @@ class OVModelForSeq2SeqLMIntegrationTest(OVSeq2SeqTestMixin): if is_transformers_version(">=", "4.53.0"): SUPPORT_STATEFUL += ("pegasus",) + def test_find_untested_architectures(self): + self._test_find_untested_architectures() + @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_compare_to_transformers(self, model_arch): model_id = MODEL_NAMES[model_arch] @@ -278,9 +311,9 @@ def test_compare_with_and_without_past_key_values(self): class OVModelForSpeechSeq2SeqIntegrationTest(OVSeq2SeqTestMixin): SUPPORTED_ARCHITECTURES = ("whisper",) - OVMODEL_CLASS = OVModelForSpeechSeq2Seq AUTOMODEL_CLASS = AutoModelForSpeechSeq2Seq + TASK = "automatic-speech-recognition" def _generate_random_audio_data(self): np.random.seed(10) @@ -916,6 +949,23 @@ def _get_vocoder(self, vocoder_id, model_arch): else: raise Exception("{} unknown model for text-to-speech".format(model_arch)) + def check_openvino_model_attributes(self, openvino_model, use_cache: bool = True): + self.assertIsInstance(openvino_model, self.OVMODEL_CLASS) + self.assertIsInstance(openvino_model.config, PretrainedConfig) + self.assertIsInstance(openvino_model.generation_config, GenerationConfig) + + self.assertIsInstance(openvino_model.encoder, OVTextToSpeechEncoder) + self.assertIsInstance(openvino_model.decoder, OVTextToSpeechDecoder) + self.assertIsInstance(openvino_model.postnet, OVTextToSpeechPostNet) + self.assertIsInstance(openvino_model.vocoder, OVTextToSpeechVocoder) + self.assertIsInstance(openvino_model.encoder.model, openvino.Model) + self.assertIsInstance(openvino_model.decoder.model, openvino.Model) + self.assertIsInstance(openvino_model.postnet.model, openvino.Model) + self.assertIsInstance(openvino_model.vocoder.model, openvino.Model) + + self.assertEqual(openvino_model.use_cache, use_cache) + self.assertEqual(model_has_state(openvino_model.decoder.model), use_cache) + @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_compare_to_transformers(self, model_arch): set_seed(SEED) @@ -923,24 +973,20 @@ def test_compare_to_transformers(self, model_arch): speaker_embeddings = self._generate_speaker_embedding() model_id = MODEL_NAMES[model_arch] - if model_arch == "speecht5": - # since Auto class for text-to-audio is not implemented in optimum - # generate model classes for reference generation - vocoder_id = "fxmarty/speecht5-hifigan-tiny" - processor = self._get_processor(model_id, model_arch) - - model = self.AUTOMODEL_CLASS.from_pretrained(model_id) + # since Auto class for text-to-audio is not implemented in optimum + # generate model classes for reference generation + vocoder_id = "fxmarty/speecht5-hifigan-tiny" + processor = self._get_processor(model_id, model_arch) + vocoder = self._get_vocoder(vocoder_id, model_arch) + model = self.AUTOMODEL_CLASS.from_pretrained(model_id) - vocoder = self._get_vocoder(vocoder_id, model_arch) - inputs = processor(text=text_data, return_tensors="pt") - ref_speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) - ref_speech = ref_speech.unsqueeze(0) if ref_speech.dim() == 1 else ref_speech - else: - raise Exception("{} unknown model for text-to-speech".format(model_arch)) + inputs = processor(text=text_data, return_tensors="pt") + ref_speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) + ref_speech = ref_speech.unsqueeze(0) if ref_speech.dim() == 1 else ref_speech ov_model = self.OVMODEL_CLASS.from_pretrained(model_id, vocoder=vocoder_id) ov_speech = ov_model.generate(input_ids=inputs["input_ids"], speaker_embeddings=speaker_embeddings) - self.check_openvino_model_attributes(ov_model, use_cache=True, stateful=True) + self.check_openvino_model_attributes(ov_model, use_cache=True) self.assertTrue(torch.allclose(ov_speech, ref_speech, atol=1e-3)) del vocoder diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 90ab9ddff4..815010f69d 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -66,6 +66,7 @@ "donut-swin": "hf-internal-testing/tiny-random-DonutSwinModel", "detr": "hf-internal-testing/tiny-random-DetrModel", "electra": "hf-internal-testing/tiny-random-electra", + "encoder-decoder": "optimum-internal-testing/tiny-random-encoder-decoder-gpt2-bert", "esm": "hf-internal-testing/tiny-random-EsmModel", "exaone": "katuni4ka/tiny-random-exaone", "gemma": "fxmarty/tiny-random-GemmaForCausalLM", From 206246e1a14515beda989e815cc5ea83d0a60653 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 3 Oct 2025 17:21:44 +0200 Subject: [PATCH 17/28] add vlm check_openvino_model_attributes --- tests/openvino/test_seq2seq.py | 39 ++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/tests/openvino/test_seq2seq.py b/tests/openvino/test_seq2seq.py index 2c779718df..b3578a60e0 100644 --- a/tests/openvino/test_seq2seq.py +++ b/tests/openvino/test_seq2seq.py @@ -88,7 +88,7 @@ def __exit__(self, type, value, traceback): class OVSeq2SeqTestMixin(unittest.TestCase): SUPPORTED_ARCHITECTURES = None - def check_openvino_model_attributes(self, openvino_model, use_cache: bool = True, stateful: bool = True): + def _check_openvino_model_attributes(self, openvino_model, use_cache: bool = True, stateful: bool = True): self.assertIsInstance(openvino_model, self.OVMODEL_CLASS) self.assertIsInstance(openvino_model.config, PretrainedConfig) self.assertIsInstance(openvino_model.generation_config, GenerationConfig) @@ -171,8 +171,8 @@ def test_compare_to_transformers(self, model_arch): model_id, use_cache=False, stateful=False, ov_config=F32_CONFIG ) expected_stateful = is_transformers_version(">", "4.46") and model_arch in self.SUPPORT_STATEFUL - self.check_openvino_model_attributes(ov_model, use_cache=True, stateful=expected_stateful) - self.check_openvino_model_attributes(ov_stateless_model, use_cache=False, stateful=False) + self._check_openvino_model_attributes(ov_model, use_cache=True, stateful=expected_stateful) + self._check_openvino_model_attributes(ov_stateless_model, use_cache=False, stateful=False) transformers_model = self.AUTOMODEL_CLASS.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id) @@ -329,8 +329,8 @@ def test_compare_to_transformers(self, model_arch): transformers_model = self.AUTOMODEL_CLASS.from_pretrained(model_id) ov_model = self.OVMODEL_CLASS.from_pretrained(model_id, ov_config=F32_CONFIG) ov_model_stateless = self.OVMODEL_CLASS.from_pretrained(model_id, ov_config=F32_CONFIG, stateful=False) - self.check_openvino_model_attributes(ov_model, use_cache=True, stateful=True) - self.check_openvino_model_attributes(ov_model_stateless, use_cache=True, stateful=False) + self._check_openvino_model_attributes(ov_model, use_cache=True, stateful=True) + self._check_openvino_model_attributes(ov_model_stateless, use_cache=True, stateful=False) processor = get_preprocessor(model_id) data = self._generate_random_audio_data() pt_features = processor.feature_extractor(data, return_tensors="pt") @@ -456,7 +456,7 @@ def test_compare_to_transformers(self, model_arch: str): model_id = MODEL_NAMES[model_arch] ov_model = self.OVMODEL_CLASS.from_pretrained(model_id) - self.check_openvino_model_attributes(ov_model, use_cache=True, stateful=False) + self._check_openvino_model_attributes(ov_model, use_cache=True, stateful=False) set_seed(SEED) transformers_model = self.AUTOMODEL_CLASS.from_pretrained(model_id) @@ -600,6 +600,20 @@ def _check_device_and_request(self, ov_model, expected_device, has_request): self.assertEqual(component._device, expected_device) request_check_fn(component.request is None) + def _check_openvino_model_attributes(self, openvino_model, use_cache: bool = True, stateful: bool = True): + self.assertIsInstance(openvino_model, self.OVMODEL_CLASS) + self.assertIsInstance(openvino_model.config, PretrainedConfig) + self.assertIsInstance(openvino_model.generation_config, GenerationConfig) + self.assertIsInstance(openvino_model, MODEL_TYPE_TO_CLS_MAPPING[openvino_model.config.model_type]) + + for component_name, component in openvino_model.components.items(): + self.assertIsInstance(component, MODEL_PARTS_CLS_MAPPING[component_name]) + self.assertIsInstance(component.model, openvino.Model) + + self.assertEqual(openvino_model.use_cache, use_cache) + self.assertEqual(openvino_model.language_model.stateful, stateful) + self.assertEqual(model_has_state(openvino_model.language_model.model), stateful) + @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_compare_to_transformers(self, model_arch): prompt = "What is shown in this image?" @@ -623,13 +637,10 @@ def test_compare_to_transformers(self, model_arch): preprocessors = self.get_preprocessors(model_arch) set_seed(SEED) ov_model = self.OVMODEL_CLASS.from_pretrained(model_id, trust_remote_code=trust_remote_code, compile=False) - self.assertIsInstance(ov_model, MODEL_TYPE_TO_CLS_MAPPING[ov_model.config.model_type]) - for component_name, component in ov_model.components.items(): - self.assertIsInstance(component, MODEL_PARTS_CLS_MAPPING[component_name]) - self.check_openvino_model_attributes(ov_model, use_cache=True, stateful=True) + self._check_openvino_model_attributes(ov_model, use_cache=True, stateful=True) - inputs = ov_model.preprocess_inputs(**preprocessors, text=prompt, image=self.IMAGE.resize((600, 600))) + inputs = ov_model.preprocess_inputs(**preprocessors, text=prompt, image=self.IMAGE.resize((100, 100))) transformers_inputs = copy.deepcopy(inputs) # llama4 preprocessing force bf16 dtype for pixel_values, that does not work on CPU with fp32 model # if past key values are not initialized, llama4 creates HybridCache with bf16 precision @@ -949,7 +960,7 @@ def _get_vocoder(self, vocoder_id, model_arch): else: raise Exception("{} unknown model for text-to-speech".format(model_arch)) - def check_openvino_model_attributes(self, openvino_model, use_cache: bool = True): + def _check_openvino_model_attributes(self, openvino_model, use_cache: bool = True): self.assertIsInstance(openvino_model, self.OVMODEL_CLASS) self.assertIsInstance(openvino_model.config, PretrainedConfig) self.assertIsInstance(openvino_model.generation_config, GenerationConfig) @@ -986,7 +997,7 @@ def test_compare_to_transformers(self, model_arch): ov_model = self.OVMODEL_CLASS.from_pretrained(model_id, vocoder=vocoder_id) ov_speech = ov_model.generate(input_ids=inputs["input_ids"], speaker_embeddings=speaker_embeddings) - self.check_openvino_model_attributes(ov_model, use_cache=True) + self._check_openvino_model_attributes(ov_model, use_cache=True) self.assertTrue(torch.allclose(ov_speech, ref_speech, atol=1e-3)) del vocoder @@ -1016,7 +1027,7 @@ def test_compare_to_transformers(self, model_arch): set_seed(SEED) ov_model = self.OVMODEL_CLASS.from_pretrained(model_id, export=True, ov_config=F32_CONFIG) - self.check_openvino_model_attributes(ov_model, use_cache=True, stateful=False) + self._check_openvino_model_attributes(ov_model, use_cache=True, stateful=False) question = "Who am I?" transformers_model = self.AUTOMODEL_CLASS.from_pretrained(model_id) From a6035e96e502fe54b80c50e225b321a6551a7c57 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 3 Oct 2025 18:28:57 +0200 Subject: [PATCH 18/28] set back img size --- tests/openvino/test_seq2seq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/openvino/test_seq2seq.py b/tests/openvino/test_seq2seq.py index b3578a60e0..9bdfaf6ad9 100644 --- a/tests/openvino/test_seq2seq.py +++ b/tests/openvino/test_seq2seq.py @@ -640,7 +640,7 @@ def test_compare_to_transformers(self, model_arch): self._check_openvino_model_attributes(ov_model, use_cache=True, stateful=True) - inputs = ov_model.preprocess_inputs(**preprocessors, text=prompt, image=self.IMAGE.resize((100, 100))) + inputs = ov_model.preprocess_inputs(**preprocessors, text=prompt, image=self.IMAGE.resize((600, 600))) transformers_inputs = copy.deepcopy(inputs) # llama4 preprocessing force bf16 dtype for pixel_values, that does not work on CPU with fp32 model # if past key values are not initialized, llama4 creates HybridCache with bf16 precision From a2cbaede2c99ce301e782997d18eeb73f4606ade Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 3 Oct 2025 18:40:23 +0200 Subject: [PATCH 19/28] fix longt5 --- tests/openvino/test_seq2seq.py | 2 +- tests/openvino/utils_tests.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/openvino/test_seq2seq.py b/tests/openvino/test_seq2seq.py index 9bdfaf6ad9..b29c96c873 100644 --- a/tests/openvino/test_seq2seq.py +++ b/tests/openvino/test_seq2seq.py @@ -153,7 +153,7 @@ class OVModelForSeq2SeqLMIntegrationTest(OVSeq2SeqTestMixin): # There are known issues with marian model on OpenVINO 2025.3.x and 2025.4.x SUPPORTED_ARCHITECTURES += ("marian",) - SUPPORT_STATEFUL = ("t5", "mt5") + SUPPORT_STATEFUL = ("t5", "mt5", "longt5") if is_transformers_version(">=", "4.52.0"): SUPPORT_STATEFUL += ("bart", "blenderbot", "blenderbot-small", "m2m_100", "marian", "mbart") if is_transformers_version(">=", "4.53.0"): diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 815010f69d..0230e5de6e 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -98,7 +98,7 @@ "internvl_chat": "katuni4ka/tiny-random-internvl2", "jais": "katuni4ka/tiny-random-jais", "levit": "hf-internal-testing/tiny-random-LevitModel", - "longt5": "hf-internal-testing/tiny-random-longt5", + "longt5": "hf-internal-testing/tiny-random-LongT5Model", "llama": "HuggingFaceM4/tiny-random-LlamaForCausalLM", "llama_awq": "HuggingFaceH4/tiny-random-LlamaForCausalLM", "llama4": "hf-internal-testing/tiny-random-llama4", From 11cf624b61e7b84b65c21cc45b8547e6eff06226 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Fri, 3 Oct 2025 19:02:46 +0200 Subject: [PATCH 20/28] marian should not be tested depednign on openvino version --- tests/openvino/test_seq2seq.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/openvino/test_seq2seq.py b/tests/openvino/test_seq2seq.py index b29c96c873..a2676b151c 100644 --- a/tests/openvino/test_seq2seq.py +++ b/tests/openvino/test_seq2seq.py @@ -85,6 +85,12 @@ def __exit__(self, type, value, traceback): self.elapsed = (time.perf_counter() - self.elapsed) * 1e3 +MODEL_NOT_TESTED = set() + +if is_openvino_version(">=", "2025.3.0") and is_openvino_version("<", "2025.5.0"): + MODEL_NOT_TESTED = {"marian"} + + class OVSeq2SeqTestMixin(unittest.TestCase): SUPPORTED_ARCHITECTURES = None @@ -122,7 +128,7 @@ def _test_find_untested_architectures(self): untested_architectures = supported_architectures - tested_architectures - if len(untested_architectures) > 0: + if len(untested_architectures - MODEL_NOT_TESTED) > 0: raise ValueError( f"For the task `{self.TASK}`, the OpenVINO exporter supports {untested_architectures} which are not tested" ) @@ -137,7 +143,6 @@ class OVModelForSeq2SeqLMIntegrationTest(OVSeq2SeqTestMixin): "encoder-decoder", "longt5", "m2m_100", - "marian", "mbart", "mt5", "pegasus", From 76b3405cae9984432475e49e23133efd229fe0a8 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 6 Oct 2025 15:24:59 +0200 Subject: [PATCH 21/28] add untested architectures for vision2seq models --- tests/openvino/test_seq2seq.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/openvino/test_seq2seq.py b/tests/openvino/test_seq2seq.py index a2676b151c..314c191795 100644 --- a/tests/openvino/test_seq2seq.py +++ b/tests/openvino/test_seq2seq.py @@ -416,7 +416,16 @@ def test_pipeline(self, model_arch): class OVModelForVision2SeqIntegrationTest(OVSeq2SeqTestMixin): - SUPPORTED_ARCHITECTURES = ["vision-encoder-decoder", "trocr", "donut"] + + SUPPORTED_ARCHITECTURES = [ + "donut", + "got_ocr2", + "pix2struct", + "trocr", + "vision-encoder-decoder", + ] + + TASK = "image-to-text" OVMODEL_CLASS = OVModelForVision2Seq AUTOMODEL_CLASS = AutoModelForVision2Seq GENERATION_LENGTH = 100 @@ -433,6 +442,10 @@ def _get_preprocessors(self, model_id): return image_processor, tokenizer + def test_find_untested_architectures(self): + self._test_find_untested_architectures() + + def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: _ = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES["bert"], export=True) From a44664e51fd707682ee737d26857892e3cc5da84 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 6 Oct 2025 17:49:56 +0200 Subject: [PATCH 22/28] phi4_multimodal --- optimum/exporters/openvino/model_configs.py | 12 +++++++++--- tests/openvino/test_seq2seq.py | 9 +++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 2bc69c00b8..c72e2a9df8 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -2984,9 +2984,6 @@ class Phi4MMConfigBehavior(str, enum.Enum): @register_in_tasks_manager( "phi4mm", *["image-text-to-text", "automatic-speech-recognition"], library_name="transformers" ) -@register_in_tasks_manager( - "phi4_multimodal", *["image-text-to-text", "automatic-speech-recognition"], library_name="transformers" -) class Phi4MMOpenVINOConfig(BaseVLMOpenVINOConfig): SUPPORTED_BEHAVIORS = [model_type.value for model_type in Phi4MMConfigBehavior] NORMALIZED_CONFIG_CLASS = NormalizedVisionConfig @@ -3210,6 +3207,15 @@ def rename_ambiguous_inputs(self, inputs): return inputs + +@register_in_tasks_manager( + "phi4_multimodal", *["image-text-to-text", "automatic-speech-recognition"], library_name="transformers" + ) +class Phi4MultimodalOpenVINOConfig(Phi4MMOpenVINOConfig): + MIN_TRANSFORMERS_VERSION = "4.51.0" + MAX_TRANSFORMERS_VERSION = "4.60.0" + + class DummyQwen2VLLMInputGenerator(DummyTextInputGenerator): def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): generated_input = super().generate(input_name, framework, int_dtype, float_dtype) diff --git a/tests/openvino/test_seq2seq.py b/tests/openvino/test_seq2seq.py index 314c191795..01ed7308bb 100644 --- a/tests/openvino/test_seq2seq.py +++ b/tests/openvino/test_seq2seq.py @@ -416,7 +416,6 @@ def test_pipeline(self, model_arch): class OVModelForVision2SeqIntegrationTest(OVSeq2SeqTestMixin): - SUPPORTED_ARCHITECTURES = [ "donut", "got_ocr2", @@ -445,7 +444,6 @@ def _get_preprocessors(self, model_id): def test_find_untested_architectures(self): self._test_find_untested_architectures() - def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: _ = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES["bert"], export=True) @@ -548,6 +546,7 @@ class OVModelForVisualCausalLMIntegrationTest(OVSeq2SeqTestMixin): SUPPORT_VIDEO = ["llava_next_video", "qwen2_vl"] SUPPORT_AUDIO = [] OVMODEL_CLASS = OVModelForVisualCausalLM + TASK = "image-text-to-text" if is_transformers_version(">=", "4.46.0"): SUPPORTED_ARCHITECTURES += ["maira2", "idefics3"] @@ -559,13 +558,12 @@ class OVModelForVisualCausalLMIntegrationTest(OVSeq2SeqTestMixin): if is_transformers_version(">", "4.49"): SUPPORTED_ARCHITECTURES += ["gemma3", "smolvlm"] if is_transformers_version(">=", "4.51"): - SUPPORTED_ARCHITECTURES += ["llama4"] + SUPPORTED_ARCHITECTURES += ["llama4", "phi4_multimodal"] if is_transformers_version(">=", "4.54.0"): # remote code models differs after transformers v4.54 SUPPORTED_ARCHITECTURES = set(SUPPORTED_ARCHITECTURES) - {"llava-qwen2", "phi3_v", "phi4mm"} - TASK = "image-text-to-text" REMOTE_CODE_MODELS = ["internvl_chat", "minicpmv", "llava-qwen2", "phi3_v", "maira2", "phi4mm"] IMAGE = Image.open( @@ -632,6 +630,9 @@ def _check_openvino_model_attributes(self, openvino_model, use_cache: bool = Tru self.assertEqual(openvino_model.language_model.stateful, stateful) self.assertEqual(model_has_state(openvino_model.language_model.model), stateful) + def test_find_untested_architectures(self): + self._test_find_untested_architectures() + @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_compare_to_transformers(self, model_arch): prompt = "What is shown in this image?" From 21689cb88919f686adb57dd5e646fbfda86a3a22 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 6 Oct 2025 17:50:16 +0200 Subject: [PATCH 23/28] add phi4_multimodal tiny model for test --- tests/openvino/utils_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 0230e5de6e..b58a29276d 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -145,6 +145,7 @@ "phimoe": "katuni4ka/phi-3.5-moe-tiny-random", "phi3_v": "katuni4ka/tiny-random-phi3-vision", "phi4mm": "katuni4ka/tiny-random-phi-4-multimodal", + "phi4_multimodal": "echarlaix/tiny-random-phi-4-multimodal", "poolformer": "hf-internal-testing/tiny-random-PoolFormerModel", "qwen": "katuni4ka/tiny-random-qwen", "qwen2": "fxmarty/tiny-dummy-qwen2", From 6c4f99d23a0fa66244167d23259d90e5087e8a72 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 6 Oct 2025 17:51:58 +0200 Subject: [PATCH 24/28] style --- optimum/exporters/openvino/model_configs.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index c72e2a9df8..05948a50b3 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -3207,10 +3207,9 @@ def rename_ambiguous_inputs(self, inputs): return inputs - @register_in_tasks_manager( "phi4_multimodal", *["image-text-to-text", "automatic-speech-recognition"], library_name="transformers" - ) +) class Phi4MultimodalOpenVINOConfig(Phi4MMOpenVINOConfig): MIN_TRANSFORMERS_VERSION = "4.51.0" MAX_TRANSFORMERS_VERSION = "4.60.0" From 63f22a96c31d3c0a53f8c4101df7b47876e65ac9 Mon Sep 17 00:00:00 2001 From: Ella Charlaix Date: Mon, 6 Oct 2025 18:10:10 +0200 Subject: [PATCH 25/28] add helium and nemotron --- tests/openvino/test_decoder.py | 5 ++++- tests/openvino/utils_tests.py | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index f86fae371a..d912a751a0 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -117,8 +117,11 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): if is_transformers_version(">", "4.47"): SUPPORTED_ARCHITECTURES += ("olmo2",) + if is_transformers_version(">", "4.48"): + SUPPORTED_ARCHITECTURES += ("nemotron",) + if is_transformers_version(">", "4.49"): - SUPPORTED_ARCHITECTURES += ("gemma3_text",) + SUPPORTED_ARCHITECTURES += ("gemma3_text", "helium") if is_transformers_version(">=", "4.51.0"): SUPPORTED_ARCHITECTURES += ("llama4", "llama4_text", "qwen3", "qwen3_moe") diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index b58a29276d..1b9a154106 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -90,6 +90,7 @@ "gptj": "hf-internal-testing/tiny-random-GPTJModel", "granite": "katuni4ka/tiny-random-granite", "granitemoe": "katuni4ka/tiny-random-granite-moe", + "helium": "hf-internal-testing/tiny-random-HeliumForCausalLM", "hubert": "hf-internal-testing/tiny-random-HubertModel", "ibert": "hf-internal-testing/tiny-random-ibert", "idefics3": "hf-internal-testing/tiny-random-Idefics3ForConditionalGeneration", @@ -132,6 +133,7 @@ "mt5": "stas/mt5-tiny-random", "llava-qwen2": "katuni4ka/tiny-random-nanollava", "nanollava_vision_tower": "katuni4ka/tiny-random-siglip", + "nemotron": "badaoui/tiny-random-NemotronForCausalLM", "nystromformer": "hf-internal-testing/tiny-random-NystromformerModel", "olmo": "katuni4ka/tiny-random-olmo-hf", "orion": "katuni4ka/tiny-random-orion", From b96d316107e51690eb281cff0a94004dcb11345a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CElla?= Date: Wed, 22 Oct 2025 16:46:38 +0200 Subject: [PATCH 26/28] fix doc --- .github/workflows/build_pr_documentation.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml index 41aa8ce9bd..a19356256c 100644 --- a/.github/workflows/build_pr_documentation.yml +++ b/.github/workflows/build_pr_documentation.yml @@ -45,6 +45,7 @@ jobs: run: | make doc BUILD_DIR=./doc-build VERSION=pr_${{ env.PR_NUMBER }} mv ./doc-build/optimum.intel optimum-intel + cd optimum-intel echo ${{ env.COMMIT_SHA }} > ./commit_sha echo ${{ env.PR_NUMBER }} > ./pr_number From f0f15d3fcacff640fa75ec881ac91aeda8b73fc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CElla?= Date: Wed, 22 Oct 2025 16:52:14 +0200 Subject: [PATCH 27/28] fix nemotron --- tests/openvino/test_decoder.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index 6db293cce1..d42c7b3aee 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -20,7 +20,7 @@ from transformers.testing_utils import slow from utils_tests import MODEL_NAMES, get_num_sdpa, mock_torch_cuda_is_available, patch_awq_for_inference -from optimum.exporters.openvino.model_configs import DeepseekOpenVINOConfig +from optimum.exporters.openvino.model_configs import DeepseekOpenVINOConfig,NemotronOnnxConfig from optimum.exporters.openvino.model_patcher import patch_update_causal_mask from optimum.exporters.tasks import TasksManager from optimum.intel import OVModelForCausalLM, OVModelForSequenceClassification @@ -166,7 +166,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "bart": 2, "baichuan2": 2, "baichuan2-13b": 2, - "bigbird_pegasus": 2, + "bigbird_pegasus": 2 if is_transformers_version(">=", "4.52") else 0, "gpt_bigcode": 5, "blenderbot": 2, "blenderbot-small": 2, @@ -187,8 +187,9 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "mistral": 2, "mixtral": 2, "mpt": 5, + "nemotron": 2, "olmo2": 2, - "opt": 5 if is_transformers_version(">=", "4.46.0") else 0, + "opt": 5 if is_transformers_version(">=", "4.46") else 0, "pegasus": 2, "qwen": 2, "phi": 2, @@ -228,6 +229,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "mixtral_awq": 2, "gemma3_text": 2, "glm4": 2, + "helium": 2, "qwen3": 2, "qwen3_moe": 2, "mamba": 0, @@ -253,6 +255,8 @@ def test_find_untested_architectures(self): supported_architectures.remove("deepseek_v2") if "deepseek_v3" in supported_architectures: supported_architectures.remove("deepseek_v3") + elif is_transformers_version("<", str(NemotronOnnxConfig.MIN_TRANSFORMERS_VERSION)): + supported_architectures.remove("nemotron") untested_architectures = supported_architectures - tested_architectures From d67b8c4ac3d798d5c45426da095eb65a862ea20f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CElla?= Date: Wed, 22 Oct 2025 18:51:03 +0200 Subject: [PATCH 28/28] revert --- .github/workflows/build_pr_documentation.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml index a19356256c..41aa8ce9bd 100644 --- a/.github/workflows/build_pr_documentation.yml +++ b/.github/workflows/build_pr_documentation.yml @@ -45,7 +45,6 @@ jobs: run: | make doc BUILD_DIR=./doc-build VERSION=pr_${{ env.PR_NUMBER }} mv ./doc-build/optimum.intel optimum-intel - cd optimum-intel echo ${{ env.COMMIT_SHA }} > ./commit_sha echo ${{ env.PR_NUMBER }} > ./pr_number