From 800aae924e049af5ca9fe6815a79d18176af89b2 Mon Sep 17 00:00:00 2001 From: ethan Date: Mon, 28 Jul 2025 18:34:23 -0700 Subject: [PATCH 01/10] update hunyuan patcher --- optimum/exporters/openvino/model_configs.py | 15 ++++ optimum/exporters/openvino/model_patcher.py | 93 +++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 2577836967..ee5601fd11 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -38,6 +38,7 @@ GPTJOnnxConfig, GPTNeoOnnxConfig, GPTNeoXOnnxConfig, + HunyuanModelPatcher, IBertOnnxConfig, LlamaOnnxConfig, MarianOnnxConfig, @@ -67,6 +68,7 @@ DummyVisionInputGenerator, FalconDummyPastKeyValuesGenerator, GemmaDummyPastKeyValuesGenerator, + HunyuanDummyPastKeyValuesGenerator, MistralDummyPastKeyValuesGenerator, ) from optimum.utils.normalized_config import NormalizedConfig, NormalizedTextConfig, NormalizedVisionConfig @@ -4490,3 +4492,16 @@ def generate_dummy_inputs(self, framework: str = "pt", **kwargs): ) return dummy_inputs + +@register_in_tasks_manager("hunyuan_v1_dense", *["text-generation", "text-generation-with-past"], library_name="transformers") +class HunyuanOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig): + MIN_TRANSFORMERS_VERSION = "4.54.0" + + DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, HunyuanDummyPastKeyValuesGenerator) + DUMMY_PKV_GENERATOR_CLASS = HunyuanDummyPastKeyValuesGenerator + NORMALIZED_CONFIG_CLASS = NormalizedTextConfig + + def patch_model_for_export( + self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None + ) -> "ModelPatcher": + return HunyuanModelPatcher(self, model, model_kwargs=model_kwargs) \ No newline at end of file diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 0ffb612d6b..23638fba54 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -6754,3 +6754,96 @@ def __exit__(self, exc_type, exc_value, traceback): setattr(self._model, self.orig_forward_name, self.orig_forward) for layer in self._model.backbone.layers: layer.mixer.forward = layer.mixer._orig_forward + +def _hunyuan_attention_forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Cache] = None, + output_attentions: bool = False, + use_cache: bool = False, + kv_states: torch.Tensor = None, +) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]: + if output_attentions: + return super().forward( + hidden_states=hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + ) + + from transformers.models.hunyuan_v1_dense.hunyuan_v1_dense import apply_rotary_pos_emb, repeat_kv + + + bsz, q_len, _ = hidden_states.size() + + query_states = self.q_proj(hidden_states) + if self.attention_type == "cross" and kv_states is not None and isinstance(kv_states, tuple): + orig_key_states, orig_value_states = kv_states + key_states, value_states = kv_states + else: + query_states = self.q_proj(hidden_states) + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + orig_key_states, orig_value_states = key_states, value_states + + query_states = query_states.reshape(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2) + key_states = key_states.reshape(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) + value_states = value_states.reshape(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) + + kv_seq_len = key_states.shape[-2] + if past_key_value is not None: + kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx) + if self.use_rotary_pos_emb: + cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) + query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids) + + if self.use_qk_norm: + query_states = self.query_layernorm(query_states) + key_states = self.key_layernorm(key_states) + + if past_key_value is not None: + cache_kwargs = {"sin": sin, "cos": cos} # Specific to RoPE models + key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs) + + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) + + if attention_mask is not None: + if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): + raise ValueError( + f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}" + ) + + attn_output = torch.nn.functional.scaled_dot_product_attention( + query_states, + key_states, + value_states, + attn_mask=attention_mask, + dropout_p=self.attention_dropout if self.training else 0.0, + # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a + # causal mask in case q_len == 1. + is_causal=self.is_causal and attention_mask is None and q_len > 1, + ) + + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.reshape(bsz, q_len, -1) + + attn_output = self.o_proj(attn_output) + + return attn_output, None, past_key_value, (orig_key_states, orig_value_states) + +class HunyuanModelPatcher(DecoderModelPatcher): + def __enter__(self): + super().__enter__() + for layer in self._model.model.layers: + layer.self_attn._orig_forward = layer.self_attn.forward + layer.self_attn.forward = types.MethodType(_hunyuan_attention_forward, layer.self_attn) + + def __exit__(self, exc_type, exc_value, traceback): + super().__exit__(exc_type, exc_value, traceback) + for layer in self._model.model.layers: + layer.self_attn.forward = layer.self_attn._orig_forward \ No newline at end of file From 73193b216ffde42f3ba1fa9abd17ef94a6ae6f6d Mon Sep 17 00:00:00 2001 From: ethan Date: Mon, 28 Jul 2025 18:53:15 -0700 Subject: [PATCH 02/10] update hunyuan patcher --- optimum/exporters/openvino/model_configs.py | 40 ++++++++++++++++++++- optimum/exporters/openvino/model_patcher.py | 2 +- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index ee5601fd11..6bfa290228 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -38,7 +38,6 @@ GPTJOnnxConfig, GPTNeoOnnxConfig, GPTNeoXOnnxConfig, - HunyuanModelPatcher, IBertOnnxConfig, LlamaOnnxConfig, MarianOnnxConfig, @@ -104,6 +103,7 @@ GptNeoxJapaneseModelPatcher, GptNeoxModelPatcher, GraniteMoEModelPatcher, + HunyuanModelPatcher, IBertModelPatcher, Idefics3ImageEmbeddingsModelPatcher, InputEmbeddingPatcher, @@ -4493,6 +4493,44 @@ def generate_dummy_inputs(self, framework: str = "pt", **kwargs): return dummy_inputs + +class HunyuanDummyPastKeyValuesGenerator(DummyPastKeyValuesGenerator): + def __init__( + self, + task: str, + normalized_config: NormalizedTextConfig, + batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"], + sequence_length: int = DEFAULT_DUMMY_SHAPES["sequence_length"], + random_batch_size_range: Optional[Tuple[int, int]] = None, + random_sequence_length_range: Optional[Tuple[int, int]] = None, + **kwargs, + ): + super().__init__( + task=task, + normalized_config=normalized_config, + batch_size=batch_size, + sequence_length=sequence_length, + random_batch_size_range=random_batch_size_range, + random_sequence_length_range=random_sequence_length_range, + ) + self.num_key_value_heads = normalized_config.num_key_value_heads + self.head_dim = normalized_config.attention_head_dim + + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + shape = ( + self.batch_size, + self.num_key_value_heads, + self.sequence_length, + self.head_dim, + ) + return [ + ( + self.random_float_tensor(shape, framework=framework, dtype=float_dtype), + self.random_float_tensor(shape, framework=framework, dtype=float_dtype), + ) + for _ in range(self.num_layers) + ] + @register_in_tasks_manager("hunyuan_v1_dense", *["text-generation", "text-generation-with-past"], library_name="transformers") class HunyuanOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig): MIN_TRANSFORMERS_VERSION = "4.54.0" diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 23638fba54..a3d4af8568 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -6760,7 +6760,7 @@ def _hunyuan_attention_forward( hidden_states: torch.Tensor, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, - past_key_value: Optional[Cache] = None, + past_key_value: Optional["Cache"] = None, output_attentions: bool = False, use_cache: bool = False, kv_states: torch.Tensor = None, From 150db501a9683660ecb847e555734b4d1c6a995e Mon Sep 17 00:00:00 2001 From: ethan Date: Mon, 28 Jul 2025 19:51:57 -0700 Subject: [PATCH 03/10] update hunyuan patcher --- optimum/exporters/openvino/model_patcher.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index a3d4af8568..3d6e6f6779 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -6796,7 +6796,8 @@ def _hunyuan_attention_forward( kv_seq_len = key_states.shape[-2] if past_key_value is not None: - kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx) + previous_seq_length = past_key_value.get_seq_length(self.layer_idx) + kv_seq_len += previous_seq_length if self.use_rotary_pos_emb: cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids) From f051f080c561d740f649d05b5fd68a37c653e239 Mon Sep 17 00:00:00 2001 From: ethan Date: Sun, 3 Aug 2025 17:52:40 -0700 Subject: [PATCH 04/10] align with tranformers --- optimum/exporters/openvino/model_configs.py | 2 +- optimum/exporters/openvino/model_patcher.py | 96 +-------------------- 2 files changed, 2 insertions(+), 96 deletions(-) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 6bfa290228..23cab389c0 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -4542,4 +4542,4 @@ class HunyuanOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig): def patch_model_for_export( self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None ) -> "ModelPatcher": - return HunyuanModelPatcher(self, model, model_kwargs=model_kwargs) \ No newline at end of file + return UpdateCausalMaskModelPatcher(self, model, model_kwargs=model_kwargs) \ No newline at end of file diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 3d6e6f6779..3f86cd54bb 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -6753,98 +6753,4 @@ def __exit__(self, exc_type, exc_value, traceback): super().__exit__(exc_type, exc_value, traceback) setattr(self._model, self.orig_forward_name, self.orig_forward) for layer in self._model.backbone.layers: - layer.mixer.forward = layer.mixer._orig_forward - -def _hunyuan_attention_forward( - self, - hidden_states: torch.Tensor, - attention_mask: Optional[torch.Tensor] = None, - position_ids: Optional[torch.LongTensor] = None, - past_key_value: Optional["Cache"] = None, - output_attentions: bool = False, - use_cache: bool = False, - kv_states: torch.Tensor = None, -) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]: - if output_attentions: - return super().forward( - hidden_states=hidden_states, - attention_mask=attention_mask, - position_ids=position_ids, - past_key_value=past_key_value, - output_attentions=output_attentions, - use_cache=use_cache, - ) - - from transformers.models.hunyuan_v1_dense.hunyuan_v1_dense import apply_rotary_pos_emb, repeat_kv - - - bsz, q_len, _ = hidden_states.size() - - query_states = self.q_proj(hidden_states) - if self.attention_type == "cross" and kv_states is not None and isinstance(kv_states, tuple): - orig_key_states, orig_value_states = kv_states - key_states, value_states = kv_states - else: - query_states = self.q_proj(hidden_states) - key_states = self.k_proj(hidden_states) - value_states = self.v_proj(hidden_states) - orig_key_states, orig_value_states = key_states, value_states - - query_states = query_states.reshape(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2) - key_states = key_states.reshape(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) - value_states = value_states.reshape(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) - - kv_seq_len = key_states.shape[-2] - if past_key_value is not None: - previous_seq_length = past_key_value.get_seq_length(self.layer_idx) - kv_seq_len += previous_seq_length - if self.use_rotary_pos_emb: - cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) - query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids) - - if self.use_qk_norm: - query_states = self.query_layernorm(query_states) - key_states = self.key_layernorm(key_states) - - if past_key_value is not None: - cache_kwargs = {"sin": sin, "cos": cos} # Specific to RoPE models - key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs) - - key_states = repeat_kv(key_states, self.num_key_value_groups) - value_states = repeat_kv(value_states, self.num_key_value_groups) - - if attention_mask is not None: - if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): - raise ValueError( - f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}" - ) - - attn_output = torch.nn.functional.scaled_dot_product_attention( - query_states, - key_states, - value_states, - attn_mask=attention_mask, - dropout_p=self.attention_dropout if self.training else 0.0, - # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a - # causal mask in case q_len == 1. - is_causal=self.is_causal and attention_mask is None and q_len > 1, - ) - - attn_output = attn_output.transpose(1, 2).contiguous() - attn_output = attn_output.reshape(bsz, q_len, -1) - - attn_output = self.o_proj(attn_output) - - return attn_output, None, past_key_value, (orig_key_states, orig_value_states) - -class HunyuanModelPatcher(DecoderModelPatcher): - def __enter__(self): - super().__enter__() - for layer in self._model.model.layers: - layer.self_attn._orig_forward = layer.self_attn.forward - layer.self_attn.forward = types.MethodType(_hunyuan_attention_forward, layer.self_attn) - - def __exit__(self, exc_type, exc_value, traceback): - super().__exit__(exc_type, exc_value, traceback) - for layer in self._model.model.layers: - layer.self_attn.forward = layer.self_attn._orig_forward \ No newline at end of file + layer.mixer.forward = layer.mixer._orig_forward \ No newline at end of file From 2c013976673bbee9094d35ad8caa8f3b1e49d723 Mon Sep 17 00:00:00 2001 From: ethan Date: Sun, 3 Aug 2025 22:45:55 -0700 Subject: [PATCH 05/10] update update --- optimum/exporters/openvino/model_configs.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 23cab389c0..3341a76495 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -67,7 +67,6 @@ DummyVisionInputGenerator, FalconDummyPastKeyValuesGenerator, GemmaDummyPastKeyValuesGenerator, - HunyuanDummyPastKeyValuesGenerator, MistralDummyPastKeyValuesGenerator, ) from optimum.utils.normalized_config import NormalizedConfig, NormalizedTextConfig, NormalizedVisionConfig @@ -103,7 +102,6 @@ GptNeoxJapaneseModelPatcher, GptNeoxModelPatcher, GraniteMoEModelPatcher, - HunyuanModelPatcher, IBertModelPatcher, Idefics3ImageEmbeddingsModelPatcher, InputEmbeddingPatcher, From 116b098ed2ee6ce2db473695921fdc7ca564167a Mon Sep 17 00:00:00 2001 From: ethan Date: Wed, 10 Sep 2025 18:12:57 -0700 Subject: [PATCH 06/10] update test case --- tests/openvino/test_decoder.py | 4 ++++ tests/openvino/test_export.py | 3 +++ tests/openvino/test_exporters_cli.py | 1 + tests/openvino/utils_tests.py | 1 + 4 files changed, 9 insertions(+) diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index 247ce8d491..bc5d54e691 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -129,6 +129,9 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): if is_transformers_version(">=", "4.54.0"): SUPPORTED_ARCHITECTURES += ("ernie4_5",) + + if is_transformers_version(">=", "4.56.0"): + SUPPORTED_ARCHITECTURES += ("hunyuan_v1_dense",) GENERATION_LENGTH = 100 REMOTE_CODE_MODELS = ( @@ -219,6 +222,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "falcon-mamba": 0, "arcee": 2, "ernie4_5": 2, + "hunyuan_v1_dense": 2, } # TODO: remove gptq/awq from here diff --git a/tests/openvino/test_export.py b/tests/openvino/test_export.py index ae272f4baa..2f58621cdb 100644 --- a/tests/openvino/test_export.py +++ b/tests/openvino/test_export.py @@ -103,6 +103,9 @@ class ExportModelTest(unittest.TestCase): if is_transformers_version(">=", "4.54"): SUPPORTED_ARCHITECTURES.update({"ernie4_5": OVModelForCausalLM}) + + if is_transformers_version(">=", "4.56"): + SUPPORTED_ARCHITECTURES.update({"hunyuan_v1_dense": OVModelForCausalLM}) GENERATIVE_MODELS = ("pix2struct", "t5", "bart", "gpt2", "whisper", "llava", "speecht5") diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index 22f4a692d2..9a90a12e06 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -147,6 +147,7 @@ class OVCLIExportTestCase(unittest.TestCase): "falcon-mamba": 2, "ernie4_5": 2, "qwen3": 2, + "hunyuan_v1_dense": 2, } TOKENIZER_CHAT_TEMPLATE_TESTS_MODELS = { diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 8d8ba3e098..8e4f07afe3 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -197,6 +197,7 @@ "sana": "katuni4ka/tiny-random-sana", "sana-sprint": "katuni4ka/tiny-random-sana-sprint", "ltx-video": "katuni4ka/tiny-random-ltx-video", + "hunyuan_v1_dense": "snake7gun/tiny-random-hunyuan", } From c4f7cc379cb82edf9236cbcbbfdf4ca4c654e01d Mon Sep 17 00:00:00 2001 From: ethan Date: Wed, 10 Sep 2025 18:24:17 -0700 Subject: [PATCH 07/10] update test case --- optimum/exporters/openvino/model_patcher.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index f67f1eb117..851308e29e 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -6803,9 +6803,6 @@ def __exit__(self, exc_type, exc_value, traceback): super().__exit__(exc_type, exc_value, traceback) setattr(self._model, self.orig_forward_name, self.orig_forward) for layer in self._model.backbone.layers: -<<<<<<< HEAD - layer.mixer.forward = layer.mixer._orig_forward -======= layer.mixer.forward = layer.mixer._orig_forward @@ -6864,4 +6861,3 @@ def __exit__(self, exc_type, exc_value, traceback): if is_transformers_version(">=", "4.53"): Qwen3MoeSparseMoeBlock.forward = self.original_moe_forward ->>>>>>> upstream/main From 4127d04f0c66a1d840a8e5dc030d8a58ebbce53f Mon Sep 17 00:00:00 2001 From: Ethan Yang Date: Mon, 15 Sep 2025 22:27:34 +0800 Subject: [PATCH 08/10] Update optimum/exporters/openvino/model_configs.py Co-authored-by: Roman Kazantsev --- optimum/exporters/openvino/model_configs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 23fa451f1a..3d104445db 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -4552,6 +4552,7 @@ class HunyuanOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig): DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, HunyuanDummyPastKeyValuesGenerator) DUMMY_PKV_GENERATOR_CLASS = HunyuanDummyPastKeyValuesGenerator + @register_in_tasks_manager("ernie4_5", *["text-generation", "text-generation-with-past"], library_name="transformers") class ErnieOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig): MIN_TRANSFORMERS_VERSION = "4.54.0" From c50b7a9f36be98e9100f00a69ba4321e207bf5c2 Mon Sep 17 00:00:00 2001 From: Ethan Yang Date: Fri, 10 Oct 2025 22:55:30 +0800 Subject: [PATCH 09/10] Update model_configs.py --- optimum/exporters/openvino/model_configs.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 4983f7d058..612584d221 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -4645,7 +4645,7 @@ def generate_dummy_inputs(self, framework: str = "pt", **kwargs): return dummy_inputs -class HunyuanDummyPastKeyValuesGenerator(MistralDummyPastKeyValuesGenerator): +class HunyuanDummyPastKeyValuesGenerator(GemmaDummyPastKeyValuesGenerator): def __init__( self, task: str, @@ -4668,10 +4668,17 @@ def __init__( @register_in_tasks_manager("hunyuan_v1_dense", *["text-generation", "text-generation-with-past"], library_name="transformers") class HunyuanOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig): - MIN_TRANSFORMERS_VERSION = "4.56.0" + MIN_TRANSFORMERS_VERSION = "4.55.0.dev0" DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, HunyuanDummyPastKeyValuesGenerator) DUMMY_PKV_GENERATOR_CLASS = HunyuanDummyPastKeyValuesGenerator + NORMALIZED_CONFIG_CLASS = NormalizedTextConfig + + def patch_model_for_export( + self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None + ) -> "ModelPatcher": + return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs) + @register_in_tasks_manager( "gpt2", *[ From 4a34ab50cb925a9e2be5dbe921311ac6f8ed89f3 Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 16 Oct 2025 12:03:11 +0800 Subject: [PATCH 10/10] remove artifacts of merge --- tests/openvino/test_decoder.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/openvino/test_decoder.py b/tests/openvino/test_decoder.py index 24aa4c4cfc..04465d48de 100644 --- a/tests/openvino/test_decoder.py +++ b/tests/openvino/test_decoder.py @@ -227,11 +227,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "mamba": 0, "falcon-mamba": 0, "arcee": 2, -<<<<<<< HEAD - "ernie4_5": 2, "hunyuan_v1_dense": 2, -======= ->>>>>>> upstream/main } # TODO: remove gptq/awq from here