9999 GptBigCodeModelPatcher ,
100100 GptJModelPatcher ,
101101 GptNeoModelPatcher ,
102- GptNeoxJapaneseModelPatcher ,
103102 GptNeoxModelPatcher ,
104103 GraniteMoEModelPatcher ,
105104 IBertModelPatcher ,
112111 JaisModelPatcher ,
113112 Llama4ImageEmbeddingsModelPatcher ,
114113 Llama4TextModelPatcher ,
115- LlamaModelPatcher ,
116114 LlavaImageEmbeddingModelPatcher ,
117115 LlavaNextVideoImageEmbeddingModelPatcher ,
118116 LlavaQwen2ImageEmbeddingsModelPatcher ,
127125 MistralModelPatcher ,
128126 MixtralModelPatcher ,
129127 MPTModelPatcher ,
128+ OVDecoderModelPatcher ,
130129 OVSpeechT5ModelPatcher ,
131130 PegasusModelPatcher ,
132131 PegasusStatefulSeq2SeqDecoderPatcher ,
142141 Qwen2MoEPatcher ,
143142 Qwen2VLLanguageModelPatcher ,
144143 Qwen2VLVisionEmbMergerPatcher ,
144+ Qwen3MoeModelPatcher ,
145145 QwenModelPatcher ,
146- RotaryEmbPatcher ,
147146 SanaTextEncoderModelPatcher ,
148147 StatefulSeq2SeqDecoderPatcher ,
149- UpdateCausalMaskModelPatcher ,
150148 XverseModelPatcher ,
151149)
152150
@@ -289,7 +287,7 @@ class Qwen2OpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
289287 def patch_model_for_export (
290288 self , model : Union ["PreTrainedModel" , "TFPreTrainedModel" ], model_kwargs : Optional [Dict [str , Any ]] = None
291289 ) -> "ModelPatcher" :
292- return UpdateCausalMaskModelPatcher (self , model , model_kwargs = model_kwargs )
290+ return OVDecoderModelPatcher (self , model , model_kwargs = model_kwargs )
293291
294292
295293@register_in_tasks_manager ("qwen2_moe" , * ["text-generation" , "text-generation-with-past" ], library_name = "transformers" )
@@ -307,7 +305,6 @@ def patch_model_for_export(
307305
308306
309307@register_in_tasks_manager ("qwen3" , * ["text-generation" , "text-generation-with-past" ], library_name = "transformers" )
310- @register_in_tasks_manager ("qwen3_moe" , * ["text-generation" , "text-generation-with-past" ], library_name = "transformers" )
311308class Qwen3OpenVINOConfig (TextDecoderWithPositionIdsOnnxConfig ):
312309 MIN_TRANSFORMERS_VERSION = "4.51.0"
313310
@@ -318,7 +315,15 @@ class Qwen3OpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
318315 def patch_model_for_export (
319316 self , model : Union ["PreTrainedModel" , "TFPreTrainedModel" ], model_kwargs : Optional [Dict [str , Any ]] = None
320317 ) -> "ModelPatcher" :
321- return UpdateCausalMaskModelPatcher (self , model , model_kwargs = model_kwargs )
318+ return OVDecoderModelPatcher (self , model , model_kwargs = model_kwargs )
319+
320+
321+ @register_in_tasks_manager ("qwen3_moe" , * ["text-generation" , "text-generation-with-past" ], library_name = "transformers" )
322+ class Qwen3MoEOpenVINOConfig (Qwen3OpenVINOConfig ):
323+ def patch_model_for_export (
324+ self , model : Union ["PreTrainedModel" , "TFPreTrainedModel" ], model_kwargs : Optional [Dict [str , Any ]] = None
325+ ) -> "ModelPatcher" :
326+ return Qwen3MoeModelPatcher (self , model , model_kwargs = model_kwargs )
322327
323328
324329@register_in_tasks_manager ("minicpm" , * ["text-generation" , "text-generation-with-past" ], library_name = "transformers" )
@@ -400,7 +405,7 @@ class StableLMOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
400405 def patch_model_for_export (
401406 self , model : Union ["PreTrainedModel" , "TFPreTrainedModel" ], model_kwargs : Optional [Dict [str , Any ]] = None
402407 ) -> "ModelPatcher" :
403- return UpdateCausalMaskModelPatcher (self , model , model_kwargs = model_kwargs )
408+ return OVDecoderModelPatcher (self , model , model_kwargs = model_kwargs )
404409
405410
406411class ChatGLM2DummyPastKeyValuesGenerator (DummyPastKeyValuesGenerator ):
@@ -576,7 +581,7 @@ class GemmaOpenVINOConfig(GemmaOnnxConfig):
576581 def patch_model_for_export (
577582 self , model : Union ["PreTrainedModel" , "TFPreTrainedModel" ], model_kwargs : Optional [Dict [str , Any ]] = None
578583 ) -> "ModelPatcher" :
579- return LlamaModelPatcher (self , model , model_kwargs = model_kwargs )
584+ return OVDecoderModelPatcher (self , model , model_kwargs = model_kwargs )
580585
581586
582587@register_in_tasks_manager (
@@ -594,7 +599,7 @@ class LlamaOpenVINOConfig(LlamaOnnxConfig):
594599 def patch_model_for_export (
595600 self , model : Union ["PreTrainedModel" , "TFPreTrainedModel" ], model_kwargs : Optional [Dict [str , Any ]] = None
596601 ) -> "ModelPatcher" :
597- return LlamaModelPatcher (self , model , model_kwargs = model_kwargs )
602+ return OVDecoderModelPatcher (self , model , model_kwargs = model_kwargs )
598603
599604
600605@register_in_tasks_manager (
@@ -671,7 +676,6 @@ class QwenOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
671676 )
672677 DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator , QwenDummyPastKeyValuesGenerator )
673678 DUMMY_PKV_GENERATOR_CLASS = QwenDummyPastKeyValuesGenerator
674- no_position_ids = False
675679
676680 def generate_dummy_inputs (self , framework : str = "pt" , ** kwargs ):
677681 dummy_inputs_generators = self ._create_dummy_input_generator_classes (** kwargs )
@@ -734,7 +738,7 @@ def add_past_key_values(self, inputs_or_outputs: Dict[str, Dict[int, str]], dire
734738 decoder_sequence_name = "past_sequence_length"
735739 name = "past_key_values"
736740 else :
737- decoder_sequence_name = "past_sequence_length + 1 "
741+ decoder_sequence_name = "past_sequence_length + sequence_length "
738742 name = "present"
739743
740744 for i in range (self ._normalized_config .num_layers ):
@@ -760,13 +764,7 @@ class Starcoder2OpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
760764 def patch_model_for_export (
761765 self , model : Union ["PreTrainedModel" , "TFPreTrainedModel" ], model_kwargs : Optional [Dict [str , Any ]] = None
762766 ) -> "ModelPatcher" :
763- return UpdateCausalMaskModelPatcher (self , model , model_kwargs = model_kwargs )
764-
765-
766- def patch_model_for_export (
767- self , model : Union ["PreTrainedModel" , "TFPreTrainedModel" ], model_kwargs : Optional [Dict [str , Any ]] = None
768- ) -> "ModelPatcher" :
769- return RotaryEmbPatcher (self , model , model_kwargs = model_kwargs )
767+ return OVDecoderModelPatcher (self , model , model_kwargs = model_kwargs )
770768
771769
772770@register_in_tasks_manager ("internlm2" , * ["text-generation" , "text-generation-with-past" ], library_name = "transformers" )
@@ -867,7 +865,7 @@ class PhiOpenVINOConfig(PhiOnnxConfig):
867865 def patch_model_for_export (
868866 self , model : Union ["PreTrainedModel" , "TFPreTrainedModel" ], model_kwargs : Optional [Dict [str , Any ]] = None
869867 ) -> "ModelPatcher" :
870- return UpdateCausalMaskModelPatcher (self , model , model_kwargs = model_kwargs )
868+ return OVDecoderModelPatcher (self , model , model_kwargs = model_kwargs )
871869
872870
873871class OVFalconDummyPastKeyValuesGenerator (FalconDummyPastKeyValuesGenerator ):
@@ -952,20 +950,6 @@ class BioGPTOpenVINOConfig(
952950 NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
953951
954952
955- @register_in_tasks_manager (
956- "gpt_neox_japanese" , * ["text-generation" , "text-generation-with-past" ], library_name = "transformers"
957- )
958- class GPTNeoxJapaneseOpenVINOConfig (TextDecoderOnnxConfig ):
959- # GPTNeoxJapanese does not require position_ids input.
960- DEFAULT_ONNX_OPSET = 13
961- NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
962-
963- def patch_model_for_export (
964- self , model : Union ["PreTrainedModel" , "TFPreTrainedModel" ], model_kwargs : Optional [Dict [str , Any ]] = None
965- ) -> "ModelPatcher" :
966- return GptNeoxJapaneseModelPatcher (self , model , model_kwargs = model_kwargs )
967-
968-
969953@register_in_tasks_manager (
970954 "gpt_neo" ,
971955 * [
@@ -1295,6 +1279,20 @@ def patch_model_for_export(
12951279 return GptNeoxModelPatcher (self , model , model_kwargs = model_kwargs )
12961280
12971281
1282+ @register_in_tasks_manager (
1283+ "gpt_neox_japanese" , * ["text-generation" , "text-generation-with-past" ], library_name = "transformers"
1284+ )
1285+ class GPTNeoxJapaneseOpenVINOConfig (TextDecoderOnnxConfig ):
1286+ # GPTNeoxJapanese does not require position_ids input.
1287+ DEFAULT_ONNX_OPSET = 13
1288+ NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
1289+
1290+ def patch_model_for_export (
1291+ self , model : Union ["PreTrainedModel" , "TFPreTrainedModel" ], model_kwargs : Optional [Dict [str , Any ]] = None
1292+ ) -> "ModelPatcher" :
1293+ return GptNeoxModelPatcher (self , model , model_kwargs = model_kwargs )
1294+
1295+
12981296@register_in_tasks_manager (
12991297 "gemma2" ,
13001298 * [
@@ -1544,6 +1542,7 @@ def patch_model_for_export(
15441542 return IBertModelPatcher (self , model , model_kwargs = model_kwargs )
15451543
15461544
1545+ # TODO: this is a very confusing class TBH, why not simply decompose the VLM into components, like diffusion models ?
15471546class LMInputEmbedsConfigHelper (TextDecoderWithPositionIdsOnnxConfig ):
15481547 def __init__ (self , export_config , patcher_cls = None , dummy_input_generator = None , inputs_update = None ):
15491548 self .orig_export_config = export_config
@@ -1586,15 +1585,20 @@ def inputs(self) -> Dict[str, Dict[int, str]]:
15861585 def generate_dummy_inputs (self , framework : str = "pt" , ** kwargs ):
15871586 dummy_inputs = self .orig_export_config .generate_dummy_inputs (framework , ** kwargs )
15881587 input_ids = dummy_inputs .pop ("input_ids" )
1588+ pask_key_values = dummy_inputs .get ("past_key_values" )
15891589 inputs_embed_shape = (input_ids .shape [0 ], input_ids .shape [1 ], self ._normalized_config .hidden_size )
15901590 inputs_embeds = self .orig_export_config .DUMMY_INPUT_GENERATOR_CLASSES [0 ].random_float_tensor (
15911591 inputs_embed_shape
15921592 )
15931593 dummy_inputs ["inputs_embeds" ] = inputs_embeds
15941594 if "token_type_ids" in self .inputs :
1595+ if is_transformers_version (">=" , "4.53" ):
1596+ token_type_ids_shape = (input_ids .shape [0 ], input_ids .shape [1 ] + pask_key_values [0 ][0 ].shape [- 2 ])
1597+ else :
1598+ token_type_ids_shape = (input_ids .shape [0 ], input_ids .shape [1 ])
15951599 dummy_inputs ["token_type_ids" ] = self .orig_export_config .DUMMY_INPUT_GENERATOR_CLASSES [
15961600 0
1597- ].random_int_tensor (input_ids . shape , min_value = 0 , max_value = 2 )
1601+ ].random_int_tensor (token_type_ids_shape , min_value = 0 , max_value = 2 )
15981602 return dummy_inputs
15991603
16001604
0 commit comments