Skip to content
2 changes: 2 additions & 0 deletions CHANGELOGS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Change Logs
0.7.0
+++++

* :pr:`146`: patch for IdeficsAttention, IdeficsEmbedding
* :pr:`145`: patch for _compute_dynamic_ntk_parameters (Phi3RotaryEmbedding)
* :pr:`144`: support for second inputs with different dimension,
rename test_helper into validate,
support ``interpolate_pos_encoding`` for ``VitModel``,
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ onnx-diagnostic: investigate onnx models
.. image:: https://img.shields.io/badge/code%20style-black-000000.svg
:target: https://github.com/psf/black

.. image:: https://codecov.io/gh/sdpython/onnx-diagnostic/branch/main/graph/badge.svg?token=Wb9ZGDta8J
.. image:: https://codecov.io/gh/sdpython/onnx-diagnostic/graph/badge.svg?token=91T5ZVIP96
:target: https://codecov.io/gh/sdpython/onnx-diagnostic

The main feature is about `patches <https://github.com/sdpython/onnx-diagnostic/tree/main/onnx_diagnostic/torch_export_patches>`_:
Expand Down
8 changes: 7 additions & 1 deletion _doc/examples/plot_export_hub_codellama.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import pprint
import torch
from onnx_diagnostic import doc
from onnx_diagnostic.ext_test_case import unit_test_going
from onnx_diagnostic.helpers import string_type
from onnx_diagnostic.torch_models.hghub import (
get_untrained_model_with_inputs,
Expand All @@ -32,7 +33,12 @@
from onnx_diagnostic.torch_export_patches import torch_export_patches
from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str

model_id = "codellama/CodeLlama-7b-Python-hf"
model_id = (
"HuggingFaceM4/tiny-random-idefics"
if unit_test_going()
else "codellama/CodeLlama-7b-Python-hf"
)
print(f"model_id={model_id!r}")
print("info", get_model_info(model_id))

# %%
Expand Down
2 changes: 1 addition & 1 deletion _doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ onnx-diagnostic: investigate onnx models
.. image:: https://img.shields.io/badge/code%20style-black-000000.svg
:target: https://github.com/psf/black

.. image:: https://codecov.io/gh/sdpython/onnx-diagnostic/branch/main/graph/badge.svg?token=Wb9ZGDta8J
.. image:: https://codecov.io/gh/sdpython/onnx-diagnostic/graph/badge.svg?token=91T5ZVIP96
:target: https://codecov.io/gh/sdpython/onnx-diagnostic

The main feature is about `patches <https://github.com/sdpython/onnx-diagnostic/tree/main/onnx_diagnostic/torch_export_patches>`_:
Expand Down
2 changes: 1 addition & 1 deletion _unittests/ut_helpers/test_doc_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_custom_doc_kernels_layer_normalization(self):
)
expected = torch_sess.run(None, feeds)
got = torch_sess_custom.run(None, feeds)
self.assertEqualAny(expected, got, atol=1e-3)
self.assertEqualAny(expected, got, atol=2e-3)

def test_custom_doc_kernels_matmul(self):
model = oh.make_model(
Expand Down
2 changes: 1 addition & 1 deletion _unittests/ut_helpers/test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ def test_flatten_encoder_decoder_cache(self):
self.assertIn("EncoderDecoderCache", s)

def test_string_typeçconfig(self):
conf = get_pretrained_config("microsoft/phi-2")
conf = get_pretrained_config("microsoft/phi-2", use_only_preinstalled=True)
s = string_type(conf)
self.assertStartsWith("PhiConfig(**{", s)

Expand Down
13 changes: 8 additions & 5 deletions _unittests/ut_tasks/test_tasks_image_text_to_text.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
import unittest
import torch
from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout, has_transformers, has_torch
from onnx_diagnostic.ext_test_case import (
ExtTestCase,
hide_stdout,
requires_transformers,
requires_torch,
)
from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
from onnx_diagnostic.torch_export_patches import torch_export_patches
from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str


class TestTasks(ExtTestCase):
@hide_stdout()
@requires_transformers("4.52")
@requires_torch("2.7.99")
def test_image_text_to_text(self):
mid = "HuggingFaceM4/tiny-random-idefics"
data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
Expand All @@ -16,10 +23,6 @@ def test_image_text_to_text(self):
model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
model(**inputs)
model(**data["inputs2"])
if not has_transformers("4.55"):
raise unittest.SkipTest("The model has control flow.")
if not has_torch("2.7.99"):
raise unittest.SkipTest("sym_max does not work with dynamic dimension")
with torch_export_patches(patch_transformers=True, verbose=10):
torch.export.export(
model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
Expand Down
6 changes: 4 additions & 2 deletions _unittests/ut_torch_models/test_hghub_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,16 @@ def test_task_from_id_long(self):
@requires_torch("2.7")
@hide_stdout()
def test_get_pretrained_config(self):
conf = get_pretrained_config("microsoft/phi-2")
conf = get_pretrained_config("microsoft/phi-2", use_only_preinstalled=True)
self.assertNotEmpty(conf)

@requires_transformers("4.50")
@requires_torch("2.7")
@hide_stdout()
def test_get_pretrained_config_options(self):
conf = get_pretrained_config("microsoft/phi-2", num_key_value_heads=16)
conf = get_pretrained_config(
"microsoft/phi-2", num_key_value_heads=16, use_only_preinstalled=True
)
self.assertNotEmpty(conf)
self.assertEqual(conf.num_key_value_heads, 16)

Expand Down
16 changes: 3 additions & 13 deletions _unittests/ut_torch_models/test_hghub_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,6 @@ def test_get_untrained_model_with_inputs_beit(self):
# different expected value for different version of transformers
self.assertIn((data["size"], data["n_weights"]), [(111448, 27862), (56880, 14220)])

@hide_stdout()
@ignore_errors(OSError)
def test_get_untrained_model_with_inputs_codellama(self):
mid = "codellama/CodeLlama-7b-Python-hf"
data = get_untrained_model_with_inputs(mid, verbose=1)
model, inputs = data["model"], data["inputs"]
model(**inputs)
# different expected value for different version of transformers
self.assertIn((data["size"], data["n_weights"]), [(547377152, 136844288)])

@hide_stdout()
@ignore_errors(OSError)
def test_get_untrained_model_with_inputs_clip_vit(self):
Expand Down Expand Up @@ -129,11 +119,11 @@ def _diff(c1, c2):
try:
model(**inputs)
except Exception as e:
diff = _diff(get_pretrained_config(mid), data["configuration"])
cf = get_pretrained_config(mid, use_only_preinstalled=True)
diff = _diff(cf, data["configuration"])
raise AssertionError(
f"Computation failed due to {e}.\n--- pretrained\n"
f"{pprint.pformat(get_pretrained_config(mid))}\n"
f"--- modified\n{data['configuration']}\n"
f"{pprint.pformat(cf)}\n--- modified\n{data['configuration']}\n"
f"--- diff\n{diff}"
) from e
# different expected value for different version of transformers
Expand Down
25 changes: 24 additions & 1 deletion _unittests/ut_torch_models/test_validate_whole_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,30 @@ def test_validate_phi35_mini_instruct(self):
inputs2=True,
patch=True,
rewrite=True,
# model_options={"rope_scaling": {"rope_type": "dynamic", "factor": 10.0}},
)
self.assertIsInstance(summary, dict)
self.assertIsInstance(data, dict)
onnx_filename = data["onnx_filename"]
onx = onnx.load(onnx_filename)
op_types = set(n.op_type for n in onx.graph.node)
self.assertIn("If", op_types)

@requires_torch("2.7")
@hide_stdout()
@ignore_warnings(FutureWarning)
@requires_transformers("4.51")
def test_validate_phi35_4k_mini_instruct(self):
mid = "microsoft/Phi-3-mini-4k-instruct"
summary, data = validate_model(
mid,
do_run=True,
verbose=10,
exporter="custom",
dump_folder="dump_test/validate_phi35_mini_instruct",
inputs2=True,
patch=True,
rewrite=True,
model_options={"rope_scaling": {"rope_type": "dynamic", "factor": 10.0}},
)
self.assertIsInstance(summary, dict)
self.assertIsInstance(data, dict)
Expand Down
18 changes: 15 additions & 3 deletions _unittests/ut_xrun_doc/test_documentation_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,22 @@ def add_test_methods(cls):

if (
not reason
and name in {"plot_export_locate_issue.py", "plot_export_with_auto.py"}
and not has_torch("4.7")
and name in {"plot_export_hub_codellama.py"}
and not has_transformers("4.52")
):
reason = "torch<2.7"
reason = "transformers<4.52"

if (
not reason
and name
in {
"plot_export_locate_issue.py",
"plot_export_with_auto.py",
"plot_export_hub_codellama.py",
}
and not has_torch("2.8")
):
reason = "torch<2.8"

if reason:

Expand Down
149 changes: 149 additions & 0 deletions onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,3 +818,152 @@ def forward(self, x, position_ids):
sin = emb.sin() * self.attention_scaling

return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)


class patched_IdeficsEmbedding(torch.nn.Module):
_PATCHES_ = ["forward"]
_PATCHED_CLASS_ = transformers.models.idefics.modeling_idefics.IdeficsEmbedding

def forward(self, x, seq_len=None):
# x: [bs, num_attention_heads, seq_len, head_size]
# if seq_len > self.max_seq_len_cached:
# self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype)

def _set_cos_sin_cache_then(x, inv_freq, seq_len, _cos_cached, _sin_cached):
t = torch.arange(seq_len, device=x.device, dtype=torch.int64).type_as(inv_freq)
freqs = torch.einsum("i,j->ij", t, inv_freq)
emb = torch.cat((freqs, freqs), dim=-1)
return emb.cos().to(x.dtype), emb.sin().to(x.dtype)

def _set_cos_sin_cache_else(_x, _inv_freq, _seq_len, cos_cached, sin_cached):
torch._check(seq_len.item() <= cos_cached.shape[0])
co = cos_cached[: seq_len.item()].detach().clone()
torch._check(seq_len.item() <= sin_cached.shape[0])
si = sin_cached[: seq_len.item()].detach().clone()
return co.to(dtype=x.dtype), si.to(dtype=x.dtype)

cos_cached, sin_cached = torch.cond(
(seq_len > self.max_seq_len_cached).item(),
_set_cos_sin_cache_then,
_set_cos_sin_cache_else,
[x, self.inv_freq, seq_len, self.cos_cached, self.sin_cached],
)
return cos_cached, sin_cached


class patched_IdeficsAttention(torch.nn.Module):
_PATCHES_ = ["forward"]
_PATCHED_CLASS_ = transformers.models.idefics.modeling_idefics.IdeficsAttention

def forward(
self,
hidden_states: torch.Tensor,
key_value_states: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: bool = False,
use_cache: bool = False,
cache_position: Optional[torch.LongTensor] = None,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
# if key_value_states are provided this layer is used as a cross-attention layer
is_cross_attention = self.is_cross_attention or key_value_states is not None

bsz, q_len, _ = hidden_states.size()

query_states = (
self.q_proj(hidden_states)
.view(bsz, q_len, self.num_heads, self.head_dim)
.transpose(1, 2)
)
if not is_cross_attention:
key_states = (
self.k_proj(hidden_states)
.view(bsz, q_len, self.num_heads, self.head_dim)
.transpose(1, 2)
)
value_states = (
self.v_proj(hidden_states)
.view(bsz, q_len, self.num_heads, self.head_dim)
.transpose(1, 2)
)
else:
_, kv_len, _ = (
key_value_states.size()
) # Note that, in this case, `kv_len` == `kv_seq_len`
key_states = (
self.k_proj(key_value_states)
.view(bsz, kv_len, self.num_heads, self.head_dim)
.transpose(1, 2)
)
value_states = (
self.v_proj(key_value_states)
.view(bsz, kv_len, self.num_heads, self.head_dim)
.transpose(1, 2)
)

kv_seq_len = key_states.shape[-2]
if past_key_value is not None:
kv_seq_len += cache_position[0]

if not is_cross_attention:
rotary_length = torch.maximum(
torch.tensor(kv_seq_len, dtype=torch.int64),
torch.tensor(q_len, dtype=torch.int64),
)
cos, sin = self.rotary_emb(value_states, seq_len=rotary_length)
query_states, key_states = (
transformers.models.idefics.modeling_idefics.apply_rotary_pos_emb(
query_states, key_states, cos, sin, position_ids
)
)
# [bsz, nh, t, hd]

if past_key_value is not None:
# sin and cos are specific to RoPE models;
# cache_position needed for the static cache
cache_kwargs = {"cache_position": cache_position}
key_states, value_states = past_key_value.update(
key_states, value_states, self.layer_idx, cache_kwargs
)

if self.qk_layer_norms:
query_states = self.q_layer_norm(query_states)
key_states = self.k_layer_norm(key_states)

attention_interface: Callable = (
transformers.models.idefics.modeling_idefics.eager_attention_forward
)

if self.config._attn_implementation != "eager":
if self.config._attn_implementation == "sdpa" and output_attentions:
transformers.models.idefics.modeling_idefics.logger.warning_once(
"`torch.nn.functional.scaled_dot_product_attention` does not support "
"`output_attentions=True`. Falling back to "
"eager attention. This warning can be removed using the argument "
'`attn_implementation="eager"` when loading the model.'
)
else:
attention_interface = transformers.modeling_utils.ALL_ATTENTION_FUNCTIONS[
self.config._attn_implementation
]

attn_output, attn_weights = attention_interface(
self,
query_states,
key_states,
value_states,
attention_mask,
dropout=0.0 if not self.training else self.dropout,
scaling=self.scaling,
**kwargs,
)

attn_output = attn_output.reshape(bsz, q_len, -1).contiguous()
attn_output = self.o_proj(attn_output)

if output_attentions:
attn_weights = None

return attn_output, attn_weights, past_key_value
Loading
Loading