huggingface · echarlaix · Sep 12, 2025 · Sep 12, 2025 · Sep 19, 2025 · Sep 19, 2025
diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
@@ -3873,9 +3873,6 @@ class M2M100OpenVINOConfig(BartOpenVINOConfig):
     pass
 
 
-@register_in_tasks_manager(
-    "deepseek_v3", *["text-generation", "text-generation-with-past"], library_name="transformers"
-)
 @register_in_tasks_manager(
     "deepseek_v2", *["text-generation", "text-generation-with-past"], library_name="transformers"
 )
@@ -3887,6 +3884,15 @@ def patch_model_for_export(
         return DeepseekPatcher(self, model, model_kwargs=model_kwargs)
 
 
+@register_in_tasks_manager(
+    "deepseek_v3", *["text-generation", "text-generation-with-past"], library_name="transformers"
+)
+class DeepseekVOpenVINOConfig(LlamaOpenVINOConfig):
+    MIN_TRANSFORMERS_VERSION = "4.51.0"
+    DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, OVMiniCPM3DummyPastKeyValuesGenerator)
+    DUMMY_PKV_GENERATOR_CLASS = OVMiniCPM3DummyPastKeyValuesGenerator
+
+
 @register_in_tasks_manager("got_ocr2", *["image-to-text", "image-text-to-text"], library_name="transformers")
 class GotOCR2OpenVINOConfig(BaseVLMOpenVINOConfig):
     MIN_TRANSFORMERS_VERSION = "4.49.0"
@@ -4526,14 +4532,8 @@ def generate_dummy_inputs(self, framework: str = "pt", **kwargs):
 
 
 @register_in_tasks_manager("ernie4_5", *["text-generation", "text-generation-with-past"], library_name="transformers")
-class ErnieOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
+class ErnieOpenVINOConfig(LlamaOpenVINOConfig):
     MIN_TRANSFORMERS_VERSION = "4.54.0"
-
     DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, GemmaDummyPastKeyValuesGenerator)
     DUMMY_PKV_GENERATOR_CLASS = GemmaDummyPastKeyValuesGenerator
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
-
-    def patch_model_for_export(
-        self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
-    ) -> "ModelPatcher":
-        return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs)
diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py
@@ -103,9 +103,6 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
 
     if is_transformers_version(">=", "4.46.0"):
         SUPPORTED_ARCHITECTURES += ("glm", "mistral-nemo", "minicpm3", "phi3-moe")
-        # openvino 2025.0 required for disabling check_trace
-        if is_openvino_version(">=", "2025.0"):
-            SUPPORTED_ARCHITECTURES += ("deepseek",)
 
         # gptq and awq install disabled for windows test environment
         if platform.system() != "Windows":
@@ -128,7 +125,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
         SUPPORTED_ARCHITECTURES += ("arcee",)
 
     if is_transformers_version(">=", "4.54.0"):
-        SUPPORTED_ARCHITECTURES += ("ernie4_5",)
+        SUPPORTED_ARCHITECTURES += ("deepseek_v3", "ernie4_5")
 
     GENERATION_LENGTH = 100
     REMOTE_CODE_MODELS = (
@@ -150,7 +147,6 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
         "exaone",
         "decilm",
         "minicpm3",
-        "deepseek",
     )
 
     EXPECTED_NUM_SDPA = {
@@ -164,6 +160,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
         "chatglm": 2,
         "codegen": 5,
         "codegen2": 2,
+        "deepseek_v3": 2,
         "gpt2": 5,
         "gptj": 5,
         "gpt_neo": 4,
@@ -208,7 +205,6 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
         "mistral-nemo": 8,
         "minicpm3": 6,
         "phi3-moe": 2,
-        "deepseek": 2,
         "opt_gptq": 12,
         "mixtral_awq": 2,
         "gemma3_text": 2,
@@ -325,10 +321,6 @@ def test_compare_to_transformers(self, model_arch):
 
         ov_outputs = ov_model.generate(**tokens, generation_config=gen_config)
 
-        # TODO: add back once https://huggingface.co/katuni4ka/tiny-random-minicpm3/discussions/1 merged (for all models) as current mdoeling incompatible with transformers >= v4.49
-        if model_arch in {"deepseek"} and is_transformers_version(">=", "4.49"):
-            self.skipTest("Incompatible modeling code")
-
         additional_inputs = {}
         # gemma2 does not support dynamic cache, it is unfair to compare dynamic cache result vs hybrid cache,
         # align cache representation in torch model
@@ -536,10 +528,6 @@ def test_beam_search(self, model_arch):
         if model_arch in ["qwen", "chatglm", "chatglm4"]:
             return
 
-        # TODO: add back once https://huggingface.co/katuni4ka/tiny-random-minicpm3/discussions/1 merged (for all models) as current mdoeling incompatible with transformers >= v4.49
-        if model_arch in {"deepseek"} and is_transformers_version(">=", "4.49"):
-            self.skipTest("Incompatible modeling code")
-
         tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
         if model_arch == "persimmon":
             tokenizer.pad_token_id = tokenizer.bos_token_id

diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
@@ -56,7 +56,7 @@
     "deberta": "hf-internal-testing/tiny-random-deberta",
     "deberta-v2": "hf-internal-testing/tiny-random-DebertaV2Model",
     "decilm": "katuni4ka/tiny-random-decilm",
-    "deepseek": "katuni4ka/tiny-random-deepseek-v3",
+    "deepseek_v3": "hf-internal-testing/tiny-random-DeepseekV3ForCausalLM",
     "deit": "hf-internal-testing/tiny-random-DeiTModel",
     "convnext": "hf-internal-testing/tiny-random-convnext",
     "convnextv2": "hf-internal-testing/tiny-random-ConvNextV2Model",