Skip to content

Commit cd44f82

Browse files
Add transformers 4.48 support (#1136)
* tes 4.48 * fix llava offline test * couple initial fixes * fix type hint error, vison language models tests and phi3 * fix preprocess_inputs * fix * fix * fix streamer * fix * fix tests * remove unnecessary * fix llava new processing test
1 parent faeebf3 commit cd44f82

File tree

5 files changed

+48
-22
lines changed

5 files changed

+48
-22
lines changed

optimum/exporters/openvino/model_patcher.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -718,14 +718,15 @@ def _mistral_update_causal_mask(
718718
class MistralModelPatcher(DecoderModelPatcher):
719719
def __enter__(self):
720720
super().__enter__()
721-
if is_transformers_version(">=", "4.42.0"):
721+
if is_transformers_version(">=", "4.42.0") and is_transformers_version("<", "4.48.0"):
722722
# apply fix https://github.com/huggingface/transformers/commit/57d7594a79a9f5d835abf2d4d384db0e4818e548
723723
self._model.model._orig_update_causal_mask = self._model.model._update_causal_mask
724724
self._model.model._update_causal_mask = types.MethodType(_mistral_update_causal_mask, self._model.model)
725725

726726
else:
727727
for layer in self._model.model.layers:
728-
_reinitialize_cos_sin_cached_fp32(layer.self_attn.rotary_emb)
728+
if hasattr(layer.self_attn, "rotary_emb"):
729+
_reinitialize_cos_sin_cached_fp32(layer.self_attn.rotary_emb)
729730

730731
def __exit__(self, exc_type, exc_value, traceback):
731732
super().__exit__(exc_type, exc_value, traceback)
@@ -734,7 +735,7 @@ def __exit__(self, exc_type, exc_value, traceback):
734735
self._model.model._update_causal_mask = self._model.model._orig_update_causal_mask
735736

736737
for layer in self._model.model.layers:
737-
if hasattr(layer.self_attn.rotary_emb, "_orig_forward"):
738+
if hasattr(layer.self_attn, "rotary_emb") and hasattr(layer.self_attn.rotary_emb, "_orig_forward"):
738739
layer.self_attn.rotary_emb.forward = layer.self_attn.rotary_emb._orig_forward
739740

740741

@@ -1580,19 +1581,19 @@ def __enter__(self):
15801581
):
15811582
self._model.config.max_position_embeddings = self._model.config.original_max_position_embeddings
15821583

1583-
if is_transformers_version(">=", "4.42.0"):
1584+
if is_transformers_version(">=", "4.42.0") and is_transformers_version("<", "4.48.0"):
15841585
self._model.model._orig_forward = self._model.model.forward
15851586
self._model.model.forward = types.MethodType(phi3_442_forward, self._model.model)
15861587

15871588
# https://github.com/huggingface/transformers/blob/30ee508c6c92a1c0aa0281d193c7c0fb815b8d2f/src/transformers/models/phi3/modeling_phi3.py#L113
15881589
# init inv_freq for torchscript tracing
15891590
for layer in self._model.model.layers:
1590-
if is_torch_version(">=", "2.1.0"):
1591+
if is_torch_version(">=", "2.1.0") and is_transformers_version("<", "4.48.0"):
15911592
orig_self_attn_fwd = layer.self_attn.forward
15921593
layer.self_attn.forward = types.MethodType(_phi3_self_attn_sdpa_forward, layer.self_attn)
15931594
layer.self_attn._orig_forward = orig_self_attn_fwd
15941595

1595-
if layer.self_attn.rotary_emb.inv_freq is None:
1596+
if hasattr(layer.self_attn, "rotary_emb") and layer.self_attn.rotary_emb.inv_freq is None:
15961597
rotary_emb = layer.self_attn.rotary_emb
15971598
layer.self_attn.rotary_emb.inv_freq = 1.0 / (
15981599
rotary_emb.base ** (torch.arange(0, rotary_emb.dim, 2, dtype=torch.int64).float() / rotary_emb.dim)
@@ -2493,7 +2494,9 @@ class UpdateCausalMaskModelPatcher(DecoderModelPatcher):
24932494
def __enter__(self):
24942495
super().__enter__()
24952496
patch_update_causal_mask(self._model, "4.42.0")
2496-
if hasattr(self._model.model.layers[0].self_attn.rotary_emb, "_set_cos_sin_cache"):
2497+
if hasattr(self._model.model.layers[0].self_attn, "rotary_emb") and hasattr(
2498+
self._model.model.layers[0].self_attn.rotary_emb, "_set_cos_sin_cache"
2499+
):
24972500
for layer in self._model.model.layers:
24982501
_reinitialize_cos_sin_cached_fp32(layer.self_attn.rotary_emb)
24992502

@@ -3045,15 +3048,16 @@ def patched_forward(self, fn):
30453048
def __enter__(self):
30463049
if is_torch_version(">=", "2.1.0"):
30473050
if self._model.config.model_type == "qwen2" and self._model.config._attn_implementation != "sdpa":
3048-
from transformers.models.qwen2.modeling_qwen2 import QWEN2_ATTENTION_CLASSES
3051+
if is_transformers_version("<", "4.48"):
3052+
from transformers.models.qwen2.modeling_qwen2 import QWEN2_ATTENTION_CLASSES
30493053

3050-
sdpa_attn = QWEN2_ATTENTION_CLASSES["sdpa"]
3051-
self._model.config._orig_attn_implementation = self._model.config._attn_implementation
3052-
self._model.config._attn_implementation = "sdpa"
3054+
sdpa_attn = QWEN2_ATTENTION_CLASSES["sdpa"]
3055+
self._model.config._orig_attn_implementation = self._model.config._attn_implementation
3056+
self._model.config._attn_implementation = "sdpa"
30533057

3054-
for layer in self._model.model.layers:
3055-
layer.self_attn._orig_forward = layer.self_attn.forward
3056-
layer.self_attn.forward = types.MethodType(sdpa_attn.forward, layer.self_attn)
3058+
for layer in self._model.model.layers:
3059+
layer.self_attn._orig_forward = layer.self_attn.forward
3060+
layer.self_attn.forward = types.MethodType(sdpa_attn.forward, layer.self_attn)
30573061

30583062
if self._model.config.model_type == "llama" and self._model.config._attn_implementation != "sdpa":
30593063
self._model.config._orig_attn_implementation = self._model.config._attn_implementation

optimum/intel/openvino/modeling_decoder.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,11 @@
5656

5757

5858
if TYPE_CHECKING:
59-
from transformers.generation.streamers import BaseStreamer
59+
try:
60+
from transformers.generation.streamers import BaseStreamer
61+
except Exception:
62+
from typing import Generator as BaseStreamer
63+
6064
from transformers.modeling_utils import PreTrainedModel
6165

6266

optimum/intel/openvino/modeling_visual_language.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,18 @@ def preprocess_inputs(
10201020
prompt = "<image>\n" + text
10211021
else:
10221022
prompt = text
1023+
1024+
if getattr(processor, "patch_size", None) is None:
1025+
if (
1026+
getattr(config, "vision_config", None) is not None
1027+
and getattr(config.vision_config, "patch_size", None) is not None
1028+
):
1029+
processor.patch_size = config.vision_config.patch_size
1030+
else:
1031+
raise ValueError(
1032+
"Processor does not have `patch_size` attribute. Please fix the processor or provide `patch_size` in the config."
1033+
)
1034+
10231035
inputs = processor(images=image, text=prompt, return_tensors="pt")
10241036
return inputs
10251037

@@ -1915,6 +1927,7 @@ def preprocess_inputs(
19151927
input_ids = tokenizer(text, return_tensors="pt").input_ids
19161928
attention_mask = torch.ones_like(input_ids, dtype=torch.int64)
19171929
result = {"input_ids": input_ids, "attention_mask": attention_mask}
1930+
19181931
if image is not None:
19191932
result["images"] = processor(images=[image], return_tensors="pt")["pixel_values"]
19201933
return result

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
INSTALL_REQUIRE = [
3030
"torch>=1.11",
3131
"optimum~=1.24",
32-
"transformers>=4.36,<4.48",
32+
"transformers>=4.36,<4.49",
3333
"datasets>=1.4.0",
3434
"sentencepiece",
3535
"setuptools",

tests/openvino/test_modeling.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ def test_load_from_hub_and_save_visual_language_model(self):
271271
else:
272272
self.assertEqual(component.request.get_property("PERFORMANCE_HINT"), "LATENCY")
273273

274+
processor.patch_size = loaded_model.config.vision_config.patch_size
274275
inputs = processor(images=image, text=prompt, return_tensors="pt")
275276
set_seed(SEED)
276277
loaded_model_outputs = loaded_model(**inputs)
@@ -2170,6 +2171,7 @@ def test_compare_to_transformers(self, model_arch):
21702171
for component_name, component in ov_model.components.items():
21712172
self.assertIsInstance(component, MODEL_PARTS_CLS_MAPPING[component_name])
21722173
self.assertIsInstance(ov_model.config, PretrainedConfig)
2174+
21732175
inputs = ov_model.preprocess_inputs(**preprocessors, text=prompt, image=self.IMAGE.resize((600, 600)))
21742176
transformers_inputs = copy.deepcopy(inputs)
21752177
test_device = "AUTO"
@@ -2235,6 +2237,7 @@ def test_llava_with_new_preprocessing(self, model_arch):
22352237
patch_size=config.vision_config.patch_size,
22362238
vision_feature_select_strategy=config.vision_feature_select_strategy,
22372239
trust_remote_code=model_arch in self.REMOTE_CODE_MODELS,
2240+
num_additional_image_tokens=1,
22382241
)
22392242
transformers_model = self.get_transformer_model_class(model_arch).from_pretrained(model_id)
22402243
ov_model = OVModelForVisualCausalLM.from_pretrained(
@@ -2244,8 +2247,9 @@ def test_llava_with_new_preprocessing(self, model_arch):
22442247
self.assertTrue(processor.patch_size is not None)
22452248
self.assertTrue(processor.vision_feature_select_strategy is not None)
22462249
inputs = processor(images=self.IMAGE, text=prompt, return_tensors="pt")
2247-
self.assertTrue(
2248-
(inputs.input_ids == ov_model.config.image_token_index).sum(1).max() >= ov_model.config.image_seq_length
2250+
self.assertGreaterEqual(
2251+
(inputs.input_ids == ov_model.config.image_token_index).sum().max().item(),
2252+
ov_model.config.image_seq_length,
22492253
)
22502254
set_seed(SEED)
22512255
with torch.no_grad():
@@ -2308,17 +2312,17 @@ def test_generate_utils(self, model_arch):
23082312

23092313
def get_preprocessors(self, model_arch):
23102314
model_id = MODEL_NAMES[model_arch]
2315+
config = AutoConfig.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
2316+
23112317
if model_arch == "nanollava":
2312-
config = AutoConfig.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
23132318
processor = AutoProcessor.from_pretrained(
23142319
config.mm_vision_tower, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS
23152320
)
23162321
tokenizer = AutoTokenizer.from_pretrained(
23172322
model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS
23182323
)
2319-
preprocessors = {"processor": processor, "tokenizer": tokenizer}
2324+
preprocessors = {"processor": processor, "tokenizer": tokenizer, "config": config}
23202325
elif model_arch == "internvl2":
2321-
config = AutoConfig.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
23222326
tokenizer = AutoTokenizer.from_pretrained(
23232327
model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS
23242328
)
@@ -2327,7 +2331,8 @@ def get_preprocessors(self, model_arch):
23272331
processor = AutoProcessor.from_pretrained(
23282332
model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS
23292333
)
2330-
preprocessors = {"processor": processor, "tokenizer": None}
2334+
preprocessors = {"processor": processor, "tokenizer": None, "config": config}
2335+
23312336
return preprocessors
23322337

23332338
@parameterized.expand(SUPPORTED_ARCHITECTURES)

0 commit comments

Comments
 (0)