Skip to content

Commit 92f51a1

Browse files
IlyasMoutawwakilecharlaix
authored andcommitted
Add Transformers 4.53 support (#1377)
* test * fix inc * Update setup.py * fixes * fix maira2 * nikita model * Update tests/openvino/utils_tests.py * Apply suggestions from code review * fix pegasus stateful test * update whisper quant values * more whispe fixes * more whisper quantization fixes * fix sana using eager attention * add smollm3 * add _is_stateful * fix pipeline tests * big patch fixing gemma3 and patching eager mask for OpenVINO * fix sana withput using eager attention, only using eager mask * skip notebooks * always apply eager finfo fix * revert tests removal until refactor * build openvino specific qwen3_moe patcher instead of using onnx patcher * update branch * skip smollm3 stateless beam search * remove smollm3 :/ * remove scaled_dot_product_attention patch * make sure we unpatch attention mask * fix * main optimum branch * Update setup.py Co-authored-by: Ella Charlaix <[email protected]> * fix quant layers number --------- Co-authored-by: Ella Charlaix <[email protected]>
1 parent 1ac67bd commit 92f51a1

File tree

13 files changed

+534
-443
lines changed

13 files changed

+534
-443
lines changed

.github/workflows/test_openvino.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
run: |
4242
pip install --upgrade pip
4343
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
44-
pip install .[openvino,openvino-tokenizers,diffusers,tests]
44+
pip install .[openvino,diffusers,tests]
4545
4646
- if: ${{ matrix.transformers-version != 'latest' }}
4747
name: Install specific dependencies and versions required for older transformers

optimum/exporters/openvino/convert.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,9 @@ def export_pytorch(
381381
logger.info(f"Using framework PyTorch: {torch.__version__}")
382382
output = Path(output)
383383

384+
# TODO: temporary solution but statefulness should be added to the export config earlier
385+
config.stateful = stateful
386+
384387
if stateful:
385388
# Trigger bettertransformer together with stateful model because OpenVINO HW-dependent transformations expect
386389
# both of them are applied to demonstrate the best performance.

optimum/exporters/openvino/model_configs.py

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,6 @@
9999
GptBigCodeModelPatcher,
100100
GptJModelPatcher,
101101
GptNeoModelPatcher,
102-
GptNeoxJapaneseModelPatcher,
103102
GptNeoxModelPatcher,
104103
GraniteMoEModelPatcher,
105104
IBertModelPatcher,
@@ -112,7 +111,6 @@
112111
JaisModelPatcher,
113112
Llama4ImageEmbeddingsModelPatcher,
114113
Llama4TextModelPatcher,
115-
LlamaModelPatcher,
116114
LlavaImageEmbeddingModelPatcher,
117115
LlavaNextVideoImageEmbeddingModelPatcher,
118116
LlavaQwen2ImageEmbeddingsModelPatcher,
@@ -127,6 +125,7 @@
127125
MistralModelPatcher,
128126
MixtralModelPatcher,
129127
MPTModelPatcher,
128+
OVDecoderModelPatcher,
130129
OVSpeechT5ModelPatcher,
131130
PegasusModelPatcher,
132131
PegasusStatefulSeq2SeqDecoderPatcher,
@@ -142,11 +141,10 @@
142141
Qwen2MoEPatcher,
143142
Qwen2VLLanguageModelPatcher,
144143
Qwen2VLVisionEmbMergerPatcher,
144+
Qwen3MoeModelPatcher,
145145
QwenModelPatcher,
146-
RotaryEmbPatcher,
147146
SanaTextEncoderModelPatcher,
148147
StatefulSeq2SeqDecoderPatcher,
149-
UpdateCausalMaskModelPatcher,
150148
XverseModelPatcher,
151149
)
152150

@@ -289,7 +287,7 @@ class Qwen2OpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
289287
def patch_model_for_export(
290288
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
291289
) -> "ModelPatcher":
292-
return UpdateCausalMaskModelPatcher(self, model, model_kwargs=model_kwargs)
290+
return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs)
293291

294292

295293
@register_in_tasks_manager("qwen2_moe", *["text-generation", "text-generation-with-past"], library_name="transformers")
@@ -307,7 +305,6 @@ def patch_model_for_export(
307305

308306

309307
@register_in_tasks_manager("qwen3", *["text-generation", "text-generation-with-past"], library_name="transformers")
310-
@register_in_tasks_manager("qwen3_moe", *["text-generation", "text-generation-with-past"], library_name="transformers")
311308
class Qwen3OpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
312309
MIN_TRANSFORMERS_VERSION = "4.51.0"
313310

@@ -318,7 +315,15 @@ class Qwen3OpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
318315
def patch_model_for_export(
319316
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
320317
) -> "ModelPatcher":
321-
return UpdateCausalMaskModelPatcher(self, model, model_kwargs=model_kwargs)
318+
return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs)
319+
320+
321+
@register_in_tasks_manager("qwen3_moe", *["text-generation", "text-generation-with-past"], library_name="transformers")
322+
class Qwen3MoEOpenVINOConfig(Qwen3OpenVINOConfig):
323+
def patch_model_for_export(
324+
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
325+
) -> "ModelPatcher":
326+
return Qwen3MoeModelPatcher(self, model, model_kwargs=model_kwargs)
322327

323328

324329
@register_in_tasks_manager("minicpm", *["text-generation", "text-generation-with-past"], library_name="transformers")
@@ -400,7 +405,7 @@ class StableLMOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
400405
def patch_model_for_export(
401406
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
402407
) -> "ModelPatcher":
403-
return UpdateCausalMaskModelPatcher(self, model, model_kwargs=model_kwargs)
408+
return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs)
404409

405410

406411
class ChatGLM2DummyPastKeyValuesGenerator(DummyPastKeyValuesGenerator):
@@ -576,7 +581,7 @@ class GemmaOpenVINOConfig(GemmaOnnxConfig):
576581
def patch_model_for_export(
577582
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
578583
) -> "ModelPatcher":
579-
return LlamaModelPatcher(self, model, model_kwargs=model_kwargs)
584+
return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs)
580585

581586

582587
@register_in_tasks_manager(
@@ -594,7 +599,7 @@ class LlamaOpenVINOConfig(LlamaOnnxConfig):
594599
def patch_model_for_export(
595600
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
596601
) -> "ModelPatcher":
597-
return LlamaModelPatcher(self, model, model_kwargs=model_kwargs)
602+
return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs)
598603

599604

600605
@register_in_tasks_manager(
@@ -671,7 +676,6 @@ class QwenOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
671676
)
672677
DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, QwenDummyPastKeyValuesGenerator)
673678
DUMMY_PKV_GENERATOR_CLASS = QwenDummyPastKeyValuesGenerator
674-
no_position_ids = False
675679

676680
def generate_dummy_inputs(self, framework: str = "pt", **kwargs):
677681
dummy_inputs_generators = self._create_dummy_input_generator_classes(**kwargs)
@@ -734,7 +738,7 @@ def add_past_key_values(self, inputs_or_outputs: Dict[str, Dict[int, str]], dire
734738
decoder_sequence_name = "past_sequence_length"
735739
name = "past_key_values"
736740
else:
737-
decoder_sequence_name = "past_sequence_length + 1"
741+
decoder_sequence_name = "past_sequence_length + sequence_length"
738742
name = "present"
739743

740744
for i in range(self._normalized_config.num_layers):
@@ -760,13 +764,7 @@ class Starcoder2OpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
760764
def patch_model_for_export(
761765
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
762766
) -> "ModelPatcher":
763-
return UpdateCausalMaskModelPatcher(self, model, model_kwargs=model_kwargs)
764-
765-
766-
def patch_model_for_export(
767-
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
768-
) -> "ModelPatcher":
769-
return RotaryEmbPatcher(self, model, model_kwargs=model_kwargs)
767+
return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs)
770768

771769

772770
@register_in_tasks_manager("internlm2", *["text-generation", "text-generation-with-past"], library_name="transformers")
@@ -867,7 +865,7 @@ class PhiOpenVINOConfig(PhiOnnxConfig):
867865
def patch_model_for_export(
868866
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
869867
) -> "ModelPatcher":
870-
return UpdateCausalMaskModelPatcher(self, model, model_kwargs=model_kwargs)
868+
return OVDecoderModelPatcher(self, model, model_kwargs=model_kwargs)
871869

872870

873871
class OVFalconDummyPastKeyValuesGenerator(FalconDummyPastKeyValuesGenerator):
@@ -952,20 +950,6 @@ class BioGPTOpenVINOConfig(
952950
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
953951

954952

955-
@register_in_tasks_manager(
956-
"gpt_neox_japanese", *["text-generation", "text-generation-with-past"], library_name="transformers"
957-
)
958-
class GPTNeoxJapaneseOpenVINOConfig(TextDecoderOnnxConfig):
959-
# GPTNeoxJapanese does not require position_ids input.
960-
DEFAULT_ONNX_OPSET = 13
961-
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
962-
963-
def patch_model_for_export(
964-
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
965-
) -> "ModelPatcher":
966-
return GptNeoxJapaneseModelPatcher(self, model, model_kwargs=model_kwargs)
967-
968-
969953
@register_in_tasks_manager(
970954
"gpt_neo",
971955
*[
@@ -1295,6 +1279,20 @@ def patch_model_for_export(
12951279
return GptNeoxModelPatcher(self, model, model_kwargs=model_kwargs)
12961280

12971281

1282+
@register_in_tasks_manager(
1283+
"gpt_neox_japanese", *["text-generation", "text-generation-with-past"], library_name="transformers"
1284+
)
1285+
class GPTNeoxJapaneseOpenVINOConfig(TextDecoderOnnxConfig):
1286+
# GPTNeoxJapanese does not require position_ids input.
1287+
DEFAULT_ONNX_OPSET = 13
1288+
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
1289+
1290+
def patch_model_for_export(
1291+
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
1292+
) -> "ModelPatcher":
1293+
return GptNeoxModelPatcher(self, model, model_kwargs=model_kwargs)
1294+
1295+
12981296
@register_in_tasks_manager(
12991297
"gemma2",
13001298
*[
@@ -1544,6 +1542,7 @@ def patch_model_for_export(
15441542
return IBertModelPatcher(self, model, model_kwargs=model_kwargs)
15451543

15461544

1545+
# TODO: this is a very confusing class TBH, why not simply decompose the VLM into components, like diffusion models ?
15471546
class LMInputEmbedsConfigHelper(TextDecoderWithPositionIdsOnnxConfig):
15481547
def __init__(self, export_config, patcher_cls=None, dummy_input_generator=None, inputs_update=None):
15491548
self.orig_export_config = export_config
@@ -1586,15 +1585,20 @@ def inputs(self) -> Dict[str, Dict[int, str]]:
15861585
def generate_dummy_inputs(self, framework: str = "pt", **kwargs):
15871586
dummy_inputs = self.orig_export_config.generate_dummy_inputs(framework, **kwargs)
15881587
input_ids = dummy_inputs.pop("input_ids")
1588+
pask_key_values = dummy_inputs.get("past_key_values")
15891589
inputs_embed_shape = (input_ids.shape[0], input_ids.shape[1], self._normalized_config.hidden_size)
15901590
inputs_embeds = self.orig_export_config.DUMMY_INPUT_GENERATOR_CLASSES[0].random_float_tensor(
15911591
inputs_embed_shape
15921592
)
15931593
dummy_inputs["inputs_embeds"] = inputs_embeds
15941594
if "token_type_ids" in self.inputs:
1595+
if is_transformers_version(">=", "4.53"):
1596+
token_type_ids_shape = (input_ids.shape[0], input_ids.shape[1] + pask_key_values[0][0].shape[-2])
1597+
else:
1598+
token_type_ids_shape = (input_ids.shape[0], input_ids.shape[1])
15951599
dummy_inputs["token_type_ids"] = self.orig_export_config.DUMMY_INPUT_GENERATOR_CLASSES[
15961600
0
1597-
].random_int_tensor(input_ids.shape, min_value=0, max_value=2)
1601+
].random_int_tensor(token_type_ids_shape, min_value=0, max_value=2)
15981602
return dummy_inputs
15991603

16001604

0 commit comments

Comments
 (0)