Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOGS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Change Logs
0.7.13
++++++

* :pr:`237`: dummy inputs for google/gemma-3-4b-it
* :pr:`244`: add a patch to bypass the exception raised when the dynamic dimension is in {0,1}

0.7.12
Expand Down
33 changes: 33 additions & 0 deletions _unittests/ut_helpers/test_torch_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,39 @@ def forward(self, x, y):
set(restored),
)

@hide_stdout()
def test_steal_forward_dump_file_steal_append_drop(self):
class SubModel(torch.nn.Module):
def forward(self, x):
return x * x

class Model(torch.nn.Module):
def __init__(self):
super().__init__()
self.s1 = SubModel()
self.s2 = SubModel()

def forward(self, x, y):
sx = self.s1(x)
steal_append("sx", sx)
return sx + self.s2(y)

inputs = dict(x=torch.rand(3, 4), y=torch.rand(3, 4))
model = Model()
dump_file = self.get_dump_file("test_steal_forward_dump_file_drop.onnx")
with steal_forward(model, dump_file=dump_file, dump_drop={"x"}):
model(**inputs)
model(**inputs)
self.assertExists(dump_file)
restored = create_input_tensors_from_onnx_model(dump_file)
self.assertEqual(
{("", 1, "I"), ("", 1, "O"), "sx", ("", 0, "O"), "sx_1", ("", 0, "I")},
set(restored),
)
first = restored[("", 0, "I")]
_a, kws = first
self.assertNotIn("x", kws)

@hide_stdout()
def test_steal_forward_submodules(self):
class SubModel(torch.nn.Module):
Expand Down
14 changes: 14 additions & 0 deletions _unittests/ut_tasks/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import unittest
from onnx_diagnostic.ext_test_case import ExtTestCase
from onnx_diagnostic.tasks.data import get_data


class TestTasks(ExtTestCase):
def test_get_data(self):
name = "dummies_imagetext2text_generation_gemma3.onnx"
data = get_data(name)
print(data)


if __name__ == "__main__":
unittest.main(verbosity=2)
31 changes: 29 additions & 2 deletions _unittests/ut_tasks/test_tasks_image_text_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def test_image_text_to_text_idefics(self):
self.assertEqual(data["task"], "image-text-to-text")
model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
model(**torch_deepcopy(inputs))
print("***", self.string_type(data["inputs2"], with_shape=True))
model(**data["inputs2"])
with torch_export_patches(patch_transformers=True, verbose=10):
torch.export.export(
Expand All @@ -31,14 +32,13 @@ def test_image_text_to_text_idefics(self):
@hide_stdout()
@requires_transformers("4.57.99")
@requires_torch("2.7.99")
def test_image_text_to_text_gemma3(self):
def test_image_text_to_text_tiny_gemma3(self):
"""
If the model tails because of
``if inputs_embeds[special_image_mask].numel() != image_features.numel():```,
make sure this PR was merged:
https://github.com/huggingface/transformers/pull/39962.
"""
# mid = "google/gemma-3-4b-it"
mid = "tiny-random/gemma-3"
data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
self.assertEqual(data["task"], "image-text-to-text")
Expand All @@ -52,6 +52,33 @@ def test_image_text_to_text_gemma3(self):
model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
)

@hide_stdout()
@requires_transformers("4.56.99")
@requires_torch("2.8.99")
def test_image_text_to_text_gemma3_4b_it(self):
mid = "google/gemma-3-4b-it"
data = get_untrained_model_with_inputs(
mid,
verbose=1,
add_second_input=False,
# inputs_kwargs={
# "sequence_length": 281,
# "batch_size": 1,
# "max_sequence_length": 580,
# "n_images": 1,
# },
)
self.assertEqual(data["task"], "image-text-to-text")
# self.assertIn((data["size"], data["n_weights"]), [(17248576, 4312144)])
model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
# inputs.pop("attention_mask")
# ds.pop("attention_mask")
model(**torch_deepcopy(inputs))
with torch_export_patches(patch_transformers=True, verbose=10):
torch.export.export(
model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
)

@hide_stdout()
@requires_transformers("4.57.99")
@requires_torch("2.7.99")
Expand Down
9 changes: 5 additions & 4 deletions _unittests/ut_tasks/test_tasks_image_to_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,11 @@ def test_image_to_video_oblivious(self):
model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
model(**inputs)
model(**data["inputs2"])
with torch.fx.experimental._config.patch(
backed_size_oblivious=True
), torch_export_patches(
patch_transformers=True, patch_diffusers=True, verbose=10, stop_if_static=1
with (
torch.fx.experimental._config.patch(backed_size_oblivious=True),
torch_export_patches(
patch_transformers=True, patch_diffusers=True, verbose=10, stop_if_static=1
),
):
torch.export.export(
model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
Expand Down
155 changes: 153 additions & 2 deletions _unittests/ut_tasks/try_tasks.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os
import unittest
import torch
from onnx_diagnostic.ext_test_case import ExtTestCase, never_test
from onnx_diagnostic.helpers import string_type
from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
from onnx_diagnostic.helpers.torch_helper import steal_forward
from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
from onnx_diagnostic.torch_export_patches import torch_export_patches


class TestHuggingFaceHubModel(ExtTestCase):
Expand Down Expand Up @@ -137,8 +139,9 @@ def test_text_generation_phi4_mini(self):
import torch
from transformers import RobertaTokenizer, T5ForConditionalGeneration

tokenizer = RobertaTokenizer.from_pretrained("microsoft/Phi-4-mini-instruct")
model = T5ForConditionalGeneration.from_pretrained("microsoft/Phi-4-mini-instruct")
model_id = "microsoft/Phi-4-mini-instruct"
tokenizer = RobertaTokenizer.from_pretrained(model_id)
model = T5ForConditionalGeneration.from_pretrained(model_id)

text = "def greet(user): print(f'hello <extra_id_0>!')"
input_ids = tokenizer(text, return_tensors="pt").input_ids
Expand All @@ -156,6 +159,41 @@ def test_text_generation_phi4_mini(self):
)
print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))

@never_test()
def test_text_generation_phi3_mini(self):
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k phi3_mini

from transformers import Phi3ForCausalLM, AutoTokenizer

model_id = "microsoft/Phi-3-mini-4k-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = Phi3ForCausalLM.from_pretrained(model_id)

messages = [
{
"role": "system",
"content": (
"You are a helpful digital assistant. Please provide safe, "
"ethical and accurate information to the user."
),
},
{
"role": "user",
"content": (
"Can you provide ways to eat combinations of bananas and dragonfruits?"
),
},
]
inputs = tokenizer.apply_chat_template(
messages, add_generation_prompt=True, return_tensors="pt"
)

# simply generate a single sequence
print()
with steal_forward(model):
generated_ids = model.generate(inputs, max_length=100)
print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))

@never_test()
@unittest.skip(
reason="AttributeError: 'Phi4MMModel' object has no attribute "
Expand Down Expand Up @@ -791,6 +829,119 @@ def test_sentence_similary_alibaba_nlp_gte(self):
scores = (embeddings[:1] @ embeddings[1:].T) * 100
print(scores.tolist())

@never_test()
def test_imagetext2text_generation_gemma3_4b_it(self):
"""
clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k gemma3_4b_it
"""
from transformers import AutoProcessor, Gemma3ForConditionalGeneration

model_id = "google/gemma-3-4b-it"
if os.environ.get("PRETRAINED", ""):
model = Gemma3ForConditionalGeneration.from_pretrained(
model_id, device_map="cpu"
).eval()
else:
data = get_untrained_model_with_inputs(
model_id,
verbose=1,
add_second_input=False,
# same_as_pretrained=True, #use_pretrained=True
inputs_kwargs={
"sequence_length": 281,
"batch_size": 1,
"max_sequence_length": 580,
"n_images": 1,
},
)
model = data["model"]

print(f"-- model.device={model.device}")
processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
print(f"-- processor={type(processor)}")

messages = messages = [
{
"role": "system",
"content": [{"type": "text", "text": "You are a helpful assistant."}],
},
{
"role": "user",
"content": [
{
"type": "image",
"image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg",
},
{"type": "text", "text": "Describe this image in detail."},
],
},
]
inputs = processor.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_dict=True,
return_tensors="pt",
).to(model.device, dtype=torch.bfloat16)
# if "token_type_ids" in inputs:
# print(
# f"-- remove token_type_ids: "
# f"{self.string_type(inputs['token_type_ids'], with_shape=True)}"
# )
# inputs.pop("token_type_ids", None)
print(f"-- inputs={self.string_type(inputs)}")

# iteration merge = sequence > 1, cache not empty
# iteration 1 = sequence > 1, no cache
# cache_position:T7s281,
# past_key_values:StaticCache(key_cache=#0[], value_cache=#0[]),
# input_ids:T7s1x281,
# inputs_embeds:None,
# token_type_ids:T7s1x281,
# attention_mask:dict(sliding_attention:T9s1x1x281x580,
# full_attention:T9s1x1x281x580),
# position_ids:None,
# use_cache:bool,
# logits_to_keep:None,
# pixel_values:T16s1x3x896x896,
# return_dict:bool)
# iteration 2 = sequence = 1, cache not empty
# cache_position:T7s1,
# past_key_values:StaticCache(key_cache=#34[T1s1x4x580x256,...],
# value_cache=#34[T1s1x4x580x256,...]),
# input_ids:T7s1x1,
# inputs_embeds:None,
# token_type_ids:T7s1x1,
# attention_mask:dict(sliding_attention:T9s1x1x1x580,full_attention:T9s1x1x1x580),
# position_ids:None,
# use_cache:bool,logits_to_keep:None,return_dict:bool)

print()
with (
torch_export_patches(
patch_torch=False, patch_sympy=False, patch_transformers=True
),
steal_forward(
model,
dump_file=self.get_dump_file(
"test_imagetext2text_generation_gemma3_4b_it.onnx"
),
dump_drop={"attention_mask", "past_key_values", "pixel_values"},
save_as_external_data=False,
),
):
generated_ids = model.generate(
**inputs,
# 282 = value high enough to trigger multiple iterations of the model
max_new_tokens=282,
do_sample=False,
cache_implementation="static",
)
output_text = processor.decode(
generated_ids[0][inputs["input_ids"].shape[1] :], skip_special_tokens=False
)
print(output_text)


if __name__ == "__main__":
unittest.main(verbosity=2)
5 changes: 3 additions & 2 deletions _unittests/ut_torch_export_patches/test_patch_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,9 @@ def forward(self, x, ind1, ind2):
with self.subTest(
name="patch for 0/1 with oblivious", dynamic_shapes=dynamic_shapes
):
with torch_export_patches(), torch.fx.experimental._config.patch(
backed_size_oblivious=True
with (
torch_export_patches(),
torch.fx.experimental._config.patch(backed_size_oblivious=True),
):
ep = torch.export.export(model, inputs, dynamic_shapes=dynamic_shapes)
got = ep.module()(*inputs)
Expand Down
7 changes: 4 additions & 3 deletions _unittests/ut_torch_models/test_llm_phi2.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@ def test_export_phi2_1_batch_size_1_oblivious(self):
self.assertEqual(
{"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
)
with torch.fx.experimental._config.patch(
backed_size_oblivious=True
), torch_export_patches(patch_transformers=True):
with (
torch.fx.experimental._config.patch(backed_size_oblivious=True),
torch_export_patches(patch_transformers=True),
):
ep = torch.export.export(
model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds)
)
Expand Down
13 changes: 11 additions & 2 deletions onnx_diagnostic/export/dynamic_shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ def __init__(
self.kwargs = kwargs
self.dynamic_shapes = dynamic_shapes
self.args_names = args_names
if not self.kwargs and isinstance(self.dynamic_shapes, dict):
# This assumes the dictionary for the dynamic shapes is ordered
# the same way the args are. The input names are not known.
assert len(self.dynamic_shapes) == len(self.args), (
f"Length mismatch, kwargs is empty, len(dynamic_shapes)="
f"{len(self.dynamic_shapes)}, len(args)={len(self.args)}"
)
self.dynamic_shapes = tuple(self.dynamic_shapes.values())

def __str__(self) -> str:
return "\n".join(
Expand Down Expand Up @@ -232,8 +240,9 @@ def _generic_walker(
"""
if not self.args:
assert isinstance(self.kwargs, dict) and isinstance(self.dynamic_shapes, dict), (
f"Type mismatch, args={string_type(self.args)} and "
f"dynamic_shapes={self.dynamic_shapes} should have the same type."
f"Type mismatch, args={string_type(self.args)}, "
f"kwargs={string_type(self.kwargs)} and dynamic_shapes="
f"{string_type(self.dynamic_shapes)} should have the same type."
)
res = self._generic_walker_step(
processor,
Expand Down
4 changes: 2 additions & 2 deletions onnx_diagnostic/helpers/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ def string_type(
return "AUTO"
if verbose:
print(f"[string_type] Y7:{type(obj)}")
return str(obj)
return str(obj).replace("DimHint(DYNAMIC)", "DYNAMIC").replace("DimHint(AUTO)", "AUTO")

if isinstance(obj, bool):
if with_min_max:
Expand Down Expand Up @@ -939,7 +939,7 @@ def flatten_object(x: Any, drop_keys: bool = False) -> Any:
return flatten_object(list(x.values()), drop_keys=drop_keys)
return flatten_object(list(x.items()), drop_keys=drop_keys)

if x.__class__.__name__ in {"DynamicCache", "StaticCache"}:
if x.__class__.__name__ in {"DynamicCache", "StaticCache", "HybridCache"}:
from .cache_helper import CacheKeyValue

kc = CacheKeyValue(x)
Expand Down
Loading
Loading