Skip to content

Commit ca4eb76

Browse files
committed
more patches
1 parent b97f9fa commit ca4eb76

File tree

7 files changed

+171
-70
lines changed

7 files changed

+171
-70
lines changed

CHANGELOGS.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ Change Logs
44
0.7.0
55
+++++
66

7+
* :pr:`146`: patch for IdeficsAttention, IdeficsEmbedding
78
* :pr:`145`: patch for _compute_dynamic_ntk_parameters (Phi3RotaryEmbedding)
89
* :pr:`144`: support for second inputs with different dimension,
910
rename test_helper into validate,

_doc/examples/plot_export_hub_codellama.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import pprint
2121
import torch
2222
from onnx_diagnostic import doc
23+
from onnx_diagnostic.ext_test_case import unit_test_going
2324
from onnx_diagnostic.helpers import string_type
2425
from onnx_diagnostic.torch_models.hghub import (
2526
get_untrained_model_with_inputs,
@@ -32,7 +33,12 @@
3233
from onnx_diagnostic.torch_export_patches import torch_export_patches
3334
from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
3435

35-
model_id = "codellama/CodeLlama-7b-Python-hf"
36+
model_id = (
37+
"HuggingFaceM4/tiny-random-idefics"
38+
if unit_test_going()
39+
else "codellama/CodeLlama-7b-Python-hf"
40+
)
41+
print(f"model_id={model_id!r}")
3642
print("info", get_model_info(model_id))
3743

3844
# %%

_unittests/ut_tasks/test_tasks_image_text_to_text.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,20 @@
11
import unittest
22
import torch
3-
from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout, has_transformers, has_torch
3+
from onnx_diagnostic.ext_test_case import (
4+
ExtTestCase,
5+
hide_stdout,
6+
requires_transformers,
7+
requires_torch,
8+
)
49
from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
510
from onnx_diagnostic.torch_export_patches import torch_export_patches
611
from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
712

813

914
class TestTasks(ExtTestCase):
1015
@hide_stdout()
16+
@requires_transformers("4.52")
17+
@requires_torch("2.7.99")
1118
def test_image_text_to_text(self):
1219
mid = "HuggingFaceM4/tiny-random-idefics"
1320
data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
@@ -16,10 +23,6 @@ def test_image_text_to_text(self):
1623
model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
1724
model(**inputs)
1825
model(**data["inputs2"])
19-
if not has_transformers("4.55"):
20-
raise unittest.SkipTest("The model has control flow.")
21-
if not has_torch("2.7.99"):
22-
raise unittest.SkipTest("sym_max does not work with dynamic dimension")
2326
with torch_export_patches(patch_transformers=True, verbose=10):
2427
torch.export.export(
2528
model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False

onnx_diagnostic/torch_export_patches/patches/patch_transformers.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -818,3 +818,152 @@ def forward(self, x, position_ids):
818818
sin = emb.sin() * self.attention_scaling
819819

820820
return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)
821+
822+
823+
class patched_IdeficsEmbedding(torch.nn.Module):
824+
_PATCHES_ = ["forward"]
825+
_PATCHED_CLASS_ = transformers.models.idefics.modeling_idefics.IdeficsEmbedding
826+
827+
def forward(self, x, seq_len=None):
828+
# x: [bs, num_attention_heads, seq_len, head_size]
829+
# if seq_len > self.max_seq_len_cached:
830+
# self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype)
831+
832+
def _set_cos_sin_cache_then(x, inv_freq, seq_len, _cos_cached, _sin_cached):
833+
t = torch.arange(seq_len, device=x.device, dtype=torch.int64).type_as(inv_freq)
834+
freqs = torch.einsum("i,j->ij", t, inv_freq)
835+
emb = torch.cat((freqs, freqs), dim=-1)
836+
return emb.cos().to(x.dtype), emb.sin().to(x.dtype)
837+
838+
def _set_cos_sin_cache_else(_x, _inv_freq, _seq_len, cos_cached, sin_cached):
839+
torch._check(seq_len.item() <= cos_cached.shape[0])
840+
co = cos_cached[: seq_len.item()].detach().clone()
841+
torch._check(seq_len.item() <= sin_cached.shape[0])
842+
si = sin_cached[: seq_len.item()].detach().clone()
843+
return co.to(dtype=x.dtype), si.to(dtype=x.dtype)
844+
845+
cos_cached, sin_cached = torch.cond(
846+
(seq_len > self.max_seq_len_cached).item(),
847+
_set_cos_sin_cache_then,
848+
_set_cos_sin_cache_else,
849+
[x, self.inv_freq, seq_len, self.cos_cached, self.sin_cached],
850+
)
851+
return cos_cached, sin_cached
852+
853+
854+
class patched_IdeficsAttention(torch.nn.Module):
855+
_PATCHES_ = ["forward"]
856+
_PATCHED_CLASS_ = transformers.models.idefics.modeling_idefics.IdeficsAttention
857+
858+
def forward(
859+
self,
860+
hidden_states: torch.Tensor,
861+
key_value_states: Optional[torch.Tensor] = None,
862+
attention_mask: Optional[torch.Tensor] = None,
863+
position_ids: Optional[torch.LongTensor] = None,
864+
past_key_value: Optional[Tuple[torch.Tensor]] = None,
865+
output_attentions: bool = False,
866+
use_cache: bool = False,
867+
cache_position: Optional[torch.LongTensor] = None,
868+
**kwargs,
869+
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
870+
# if key_value_states are provided this layer is used as a cross-attention layer
871+
is_cross_attention = self.is_cross_attention or key_value_states is not None
872+
873+
bsz, q_len, _ = hidden_states.size()
874+
875+
query_states = (
876+
self.q_proj(hidden_states)
877+
.view(bsz, q_len, self.num_heads, self.head_dim)
878+
.transpose(1, 2)
879+
)
880+
if not is_cross_attention:
881+
key_states = (
882+
self.k_proj(hidden_states)
883+
.view(bsz, q_len, self.num_heads, self.head_dim)
884+
.transpose(1, 2)
885+
)
886+
value_states = (
887+
self.v_proj(hidden_states)
888+
.view(bsz, q_len, self.num_heads, self.head_dim)
889+
.transpose(1, 2)
890+
)
891+
else:
892+
_, kv_len, _ = (
893+
key_value_states.size()
894+
) # Note that, in this case, `kv_len` == `kv_seq_len`
895+
key_states = (
896+
self.k_proj(key_value_states)
897+
.view(bsz, kv_len, self.num_heads, self.head_dim)
898+
.transpose(1, 2)
899+
)
900+
value_states = (
901+
self.v_proj(key_value_states)
902+
.view(bsz, kv_len, self.num_heads, self.head_dim)
903+
.transpose(1, 2)
904+
)
905+
906+
kv_seq_len = key_states.shape[-2]
907+
if past_key_value is not None:
908+
kv_seq_len += cache_position[0]
909+
910+
if not is_cross_attention:
911+
rotary_length = torch.maximum(
912+
torch.tensor(kv_seq_len, dtype=torch.int64),
913+
torch.tensor(q_len, dtype=torch.int64),
914+
)
915+
cos, sin = self.rotary_emb(value_states, seq_len=rotary_length)
916+
query_states, key_states = (
917+
transformers.models.idefics.modeling_idefics.apply_rotary_pos_emb(
918+
query_states, key_states, cos, sin, position_ids
919+
)
920+
)
921+
# [bsz, nh, t, hd]
922+
923+
if past_key_value is not None:
924+
# sin and cos are specific to RoPE models;
925+
# cache_position needed for the static cache
926+
cache_kwargs = {"cache_position": cache_position}
927+
key_states, value_states = past_key_value.update(
928+
key_states, value_states, self.layer_idx, cache_kwargs
929+
)
930+
931+
if self.qk_layer_norms:
932+
query_states = self.q_layer_norm(query_states)
933+
key_states = self.k_layer_norm(key_states)
934+
935+
attention_interface: Callable = (
936+
transformers.models.idefics.modeling_idefics.eager_attention_forward
937+
)
938+
939+
if self.config._attn_implementation != "eager":
940+
if self.config._attn_implementation == "sdpa" and output_attentions:
941+
transformers.models.idefics.modeling_idefics.logger.warning_once(
942+
"`torch.nn.functional.scaled_dot_product_attention` does not support "
943+
"`output_attentions=True`. Falling back to "
944+
"eager attention. This warning can be removed using the argument "
945+
'`attn_implementation="eager"` when loading the model.'
946+
)
947+
else:
948+
attention_interface = transformers.modeling_utils.ALL_ATTENTION_FUNCTIONS[
949+
self.config._attn_implementation
950+
]
951+
952+
attn_output, attn_weights = attention_interface(
953+
self,
954+
query_states,
955+
key_states,
956+
value_states,
957+
attention_mask,
958+
dropout=0.0 if not self.training else self.dropout,
959+
scaling=self.scaling,
960+
**kwargs,
961+
)
962+
963+
attn_output = attn_output.reshape(bsz, q_len, -1).contiguous()
964+
attn_output = self.o_proj(attn_output)
965+
966+
if output_attentions:
967+
attn_weights = None
968+
969+
return attn_output, attn_weights, past_key_value

onnx_diagnostic/torch_models/hghub/hub_api.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,11 @@ def get_cached_configuration(
5959
conf = copy.deepcopy(conf)
6060
update_config(conf, kwargs)
6161
return conf
62-
assert not exc and not os.environ.get(
63-
"NOHTTP", ""
64-
), f"Unable to find {name!r} in {pprint.pformat(sorted(cached))}"
62+
assert not exc and not os.environ.get("NOHTTP", ""), (
63+
f"Unable to find {name!r} (exc={exc}, "
64+
f"NOHTTP={os.environ.get('NOHTTP', '')!r}) "
65+
f"in {pprint.pformat(sorted(cached))}"
66+
)
6567
return None
6668

6769

onnx_diagnostic/torch_models/hghub/hub_data_cached_configs.py

Lines changed: 0 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -4160,63 +4160,3 @@ def _ccached_ydshieh_tiny_random_vit_for_image_classification():
41604160
"transformers_version": "4.24.0.dev0",
41614161
}
41624162
)
4163-
4164-
4165-
def _ccached_huggingface_tiny_random_idefics():
4166-
"HuggingFaceM4/tiny-random-idefics"
4167-
return transformers.Phi3Config(
4168-
**{
4169-
"additional_vocab_size": 2,
4170-
"alpha_initializer": "ones",
4171-
"alpha_type": "vector",
4172-
"alphas_initializer_range": 0.0,
4173-
"architectures": ["IdeficsForVisionText2Text"],
4174-
"bos_token_id": 1,
4175-
"cross_layer_activation_function": "swiglu",
4176-
"cross_layer_interval": 1,
4177-
"dropout": 0.0,
4178-
"eos_token_id": 2,
4179-
"ffn_dim": 64,
4180-
"freeze_lm_head": false,
4181-
"freeze_text_layers": false,
4182-
"freeze_text_module_exceptions": [],
4183-
"freeze_vision_layers": false,
4184-
"freeze_vision_module_exceptions": [],
4185-
"hidden_act": "silu",
4186-
"hidden_size": 16,
4187-
"initializer_range": 0.02,
4188-
"intermediate_size": 11008,
4189-
"max_new_tokens": 128,
4190-
"max_position_embeddings": 128,
4191-
"model_type": "idefics",
4192-
"num_attention_heads": 4,
4193-
"num_hidden_layers": 2,
4194-
"pad_token_id": 0,
4195-
"qk_layer_norms": false,
4196-
"rms_norm_eps": 1e-06,
4197-
"tie_word_embeddings": false,
4198-
"torch_dtype": "float16",
4199-
"transformers_version": "4.27.0.dev0",
4200-
"use_cache": true,
4201-
"use_resampler": true,
4202-
"vocab_size": 32000,
4203-
"word_embed_proj_dim": 16,
4204-
"vision_config": {
4205-
"hidden_act": "gelu",
4206-
"embed_dim": 32,
4207-
"image_size": 30,
4208-
"intermediate_size": 37,
4209-
"patch_size": 2,
4210-
"num_attention_heads": 4,
4211-
"num_hidden_layers": 5,
4212-
"vision_model_name": "hf-internal-testing/tiny-random-clip",
4213-
},
4214-
"perceiver_config": {
4215-
"qk_layer_norms_perceiver": false,
4216-
"resampler_depth": 2,
4217-
"resampler_head_dim": 8,
4218-
"resampler_n_heads": 2,
4219-
"resampler_n_latents": 16,
4220-
},
4221-
}
4222-
)

onnx_diagnostic/torch_models/hghub/model_inputs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def get_untrained_model_with_inputs(
2626
use_preinstalled: bool = True,
2727
add_second_input: bool = False,
2828
subfolder: Optional[str] = None,
29-
use_only_preinstalled: bool = True,
29+
use_only_preinstalled: bool = False,
3030
) -> Dict[str, Any]:
3131
"""
3232
Gets a non initialized model similar to the original model

0 commit comments

Comments
 (0)