sdpython · sdpython · Oct 3, 2025 · Sep 26, 2025 · Sep 26, 2025 · Sep 26, 2025
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -4,6 +4,7 @@ Change Logs
 0.7.13
 ++++++
 
+* :pr:`237`: dummy inputs for google/gemma-3-4b-it
 * :pr:`244`: add a patch to bypass the exception raised when the dynamic dimension is in {0,1}
 
 0.7.12

diff --git a/_unittests/ut_helpers/test_torch_helper.py b/_unittests/ut_helpers/test_torch_helper.py
@@ -181,6 +181,39 @@ def forward(self, x, y):
             set(restored),
         )
 
+    @hide_stdout()
+    def test_steal_forward_dump_file_steal_append_drop(self):
+        class SubModel(torch.nn.Module):
+            def forward(self, x):
+                return x * x
+
+        class Model(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.s1 = SubModel()
+                self.s2 = SubModel()
+
+            def forward(self, x, y):
+                sx = self.s1(x)
+                steal_append("sx", sx)
+                return sx + self.s2(y)
+
+        inputs = dict(x=torch.rand(3, 4), y=torch.rand(3, 4))
+        model = Model()
+        dump_file = self.get_dump_file("test_steal_forward_dump_file_drop.onnx")
+        with steal_forward(model, dump_file=dump_file, dump_drop={"x"}):
+            model(**inputs)
+            model(**inputs)
+        self.assertExists(dump_file)
+        restored = create_input_tensors_from_onnx_model(dump_file)
+        self.assertEqual(
+            {("", 1, "I"), ("", 1, "O"), "sx", ("", 0, "O"), "sx_1", ("", 0, "I")},
+            set(restored),
+        )
+        first = restored[("", 0, "I")]
+        _a, kws = first
+        self.assertNotIn("x", kws)
+
     @hide_stdout()
     def test_steal_forward_submodules(self):
         class SubModel(torch.nn.Module):

diff --git a/_unittests/ut_tasks/test_data.py b/_unittests/ut_tasks/test_data.py
@@ -0,0 +1,14 @@
+import unittest
+from onnx_diagnostic.ext_test_case import ExtTestCase
+from onnx_diagnostic.tasks.data import get_data
+
+
+class TestTasks(ExtTestCase):
+    def test_get_data(self):
+        name = "dummies_imagetext2text_generation_gemma3.onnx"
+        data = get_data(name)
+        print(data)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_tasks/test_tasks_image_text_to_text.py b/_unittests/ut_tasks/test_tasks_image_text_to_text.py
@@ -22,6 +22,7 @@ def test_image_text_to_text_idefics(self):
         self.assertEqual(data["task"], "image-text-to-text")
         model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         model(**torch_deepcopy(inputs))
+        print("***", self.string_type(data["inputs2"], with_shape=True))
         model(**data["inputs2"])
         with torch_export_patches(patch_transformers=True, verbose=10):
             torch.export.export(
@@ -31,14 +32,13 @@ def test_image_text_to_text_idefics(self):
     @hide_stdout()
     @requires_transformers("4.57.99")
     @requires_torch("2.7.99")
-    def test_image_text_to_text_gemma3(self):
+    def test_image_text_to_text_tiny_gemma3(self):
         """
         If the model tails because of
         ``if inputs_embeds[special_image_mask].numel() != image_features.numel():```,
         make sure this PR was merged:
         https://github.com/huggingface/transformers/pull/39962.
         """
-        # mid = "google/gemma-3-4b-it"
         mid = "tiny-random/gemma-3"
         data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
         self.assertEqual(data["task"], "image-text-to-text")
@@ -52,6 +52,33 @@ def test_image_text_to_text_gemma3(self):
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
             )
 
+    @hide_stdout()
+    @requires_transformers("4.56.99")
+    @requires_torch("2.8.99")
+    def test_image_text_to_text_gemma3_4b_it(self):
+        mid = "google/gemma-3-4b-it"
+        data = get_untrained_model_with_inputs(
+            mid,
+            verbose=1,
+            add_second_input=False,
+            # inputs_kwargs={
+            #    "sequence_length": 281,
+            #    "batch_size": 1,
+            #    "max_sequence_length": 580,
+            #    "n_images": 1,
+            # },
+        )
+        self.assertEqual(data["task"], "image-text-to-text")
+        # self.assertIn((data["size"], data["n_weights"]), [(17248576, 4312144)])
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        # inputs.pop("attention_mask")
+        # ds.pop("attention_mask")
+        model(**torch_deepcopy(inputs))
+        with torch_export_patches(patch_transformers=True, verbose=10):
+            torch.export.export(
+                model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
+            )
+
     @hide_stdout()
     @requires_transformers("4.57.99")
     @requires_torch("2.7.99")

diff --git a/_unittests/ut_tasks/test_tasks_image_to_video.py b/_unittests/ut_tasks/test_tasks_image_to_video.py
@@ -54,10 +54,11 @@ def test_image_to_video_oblivious(self):
         model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         model(**inputs)
         model(**data["inputs2"])
-        with torch.fx.experimental._config.patch(
-            backed_size_oblivious=True
-        ), torch_export_patches(
-            patch_transformers=True, patch_diffusers=True, verbose=10, stop_if_static=1
+        with (
+            torch.fx.experimental._config.patch(backed_size_oblivious=True),
+            torch_export_patches(
+                patch_transformers=True, patch_diffusers=True, verbose=10, stop_if_static=1
+            ),
         ):
             torch.export.export(
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False

diff --git a/_unittests/ut_tasks/try_tasks.py b/_unittests/ut_tasks/try_tasks.py
@@ -1,10 +1,12 @@
+import os
 import unittest
 import torch
 from onnx_diagnostic.ext_test_case import ExtTestCase, never_test
 from onnx_diagnostic.helpers import string_type
 from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
 from onnx_diagnostic.helpers.torch_helper import steal_forward
 from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
+from onnx_diagnostic.torch_export_patches import torch_export_patches
 
 
 class TestHuggingFaceHubModel(ExtTestCase):
@@ -137,8 +139,9 @@ def test_text_generation_phi4_mini(self):
         import torch
         from transformers import RobertaTokenizer, T5ForConditionalGeneration
 
-        tokenizer = RobertaTokenizer.from_pretrained("microsoft/Phi-4-mini-instruct")
-        model = T5ForConditionalGeneration.from_pretrained("microsoft/Phi-4-mini-instruct")
+        model_id = "microsoft/Phi-4-mini-instruct"
+        tokenizer = RobertaTokenizer.from_pretrained(model_id)
+        model = T5ForConditionalGeneration.from_pretrained(model_id)
 
         text = "def greet(user): print(f'hello <extra_id_0>!')"
         input_ids = tokenizer(text, return_tensors="pt").input_ids
@@ -156,6 +159,41 @@ def test_text_generation_phi4_mini(self):
             )
         print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
 
+    @never_test()
+    def test_text_generation_phi3_mini(self):
+        # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k phi3_mini
+
+        from transformers import Phi3ForCausalLM, AutoTokenizer
+
+        model_id = "microsoft/Phi-3-mini-4k-instruct"
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        model = Phi3ForCausalLM.from_pretrained(model_id)
+
+        messages = [
+            {
+                "role": "system",
+                "content": (
+                    "You are a helpful digital assistant. Please provide safe, "
+                    "ethical and accurate information to the user."
+                ),
+            },
+            {
+                "role": "user",
+                "content": (
+                    "Can you provide ways to eat combinations of bananas and dragonfruits?"
+                ),
+            },
+        ]
+        inputs = tokenizer.apply_chat_template(
+            messages, add_generation_prompt=True, return_tensors="pt"
+        )
+
+        # simply generate a single sequence
+        print()
+        with steal_forward(model):
+            generated_ids = model.generate(inputs, max_length=100)
+        print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
+
     @never_test()
     @unittest.skip(
         reason="AttributeError: 'Phi4MMModel' object has no attribute "
@@ -791,6 +829,119 @@ def test_sentence_similary_alibaba_nlp_gte(self):
         scores = (embeddings[:1] @ embeddings[1:].T) * 100
         print(scores.tolist())
 
+    @never_test()
+    def test_imagetext2text_generation_gemma3_4b_it(self):
+        """
+        clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k gemma3_4b_it
+        """
+        from transformers import AutoProcessor, Gemma3ForConditionalGeneration
+
+        model_id = "google/gemma-3-4b-it"
+        if os.environ.get("PRETRAINED", ""):
+            model = Gemma3ForConditionalGeneration.from_pretrained(
+                model_id, device_map="cpu"
+            ).eval()
+        else:
+            data = get_untrained_model_with_inputs(
+                model_id,
+                verbose=1,
+                add_second_input=False,
+                # same_as_pretrained=True, #use_pretrained=True
+                inputs_kwargs={
+                    "sequence_length": 281,
+                    "batch_size": 1,
+                    "max_sequence_length": 580,
+                    "n_images": 1,
+                },
+            )
+            model = data["model"]
+
+        print(f"-- model.device={model.device}")
+        processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
+        print(f"-- processor={type(processor)}")
+
+        messages = messages = [
+            {
+                "role": "system",
+                "content": [{"type": "text", "text": "You are a helpful assistant."}],
+            },
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image",
+                        "image": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg",
+                    },
+                    {"type": "text", "text": "Describe this image in detail."},
+                ],
+            },
+        ]
+        inputs = processor.apply_chat_template(
+            messages,
+            tokenize=True,
+            add_generation_prompt=True,
+            return_dict=True,
+            return_tensors="pt",
+        ).to(model.device, dtype=torch.bfloat16)
+        # if "token_type_ids" in inputs:
+        #    print(
+        #       f"-- remove token_type_ids: "
+        #       f"{self.string_type(inputs['token_type_ids'], with_shape=True)}"
+        #    )
+        # inputs.pop("token_type_ids", None)
+        print(f"-- inputs={self.string_type(inputs)}")
+
+        # iteration merge = sequence > 1, cache not empty
+        # iteration 1 = sequence > 1, no cache
+        #   cache_position:T7s281,
+        #   past_key_values:StaticCache(key_cache=#0[], value_cache=#0[]),
+        #   input_ids:T7s1x281,
+        #   inputs_embeds:None,
+        #   token_type_ids:T7s1x281,
+        #   attention_mask:dict(sliding_attention:T9s1x1x281x580,
+        #                       full_attention:T9s1x1x281x580),
+        #   position_ids:None,
+        #   use_cache:bool,
+        #   logits_to_keep:None,
+        #   pixel_values:T16s1x3x896x896,
+        #   return_dict:bool)
+        # iteration 2 = sequence = 1, cache not empty
+        #   cache_position:T7s1,
+        #   past_key_values:StaticCache(key_cache=#34[T1s1x4x580x256,...],
+        #                               value_cache=#34[T1s1x4x580x256,...]),
+        #   input_ids:T7s1x1,
+        #   inputs_embeds:None,
+        #   token_type_ids:T7s1x1,
+        #   attention_mask:dict(sliding_attention:T9s1x1x1x580,full_attention:T9s1x1x1x580),
+        #   position_ids:None,
+        #   use_cache:bool,logits_to_keep:None,return_dict:bool)
+
+        print()
+        with (
+            torch_export_patches(
+                patch_torch=False, patch_sympy=False, patch_transformers=True
+            ),
+            steal_forward(
+                model,
+                dump_file=self.get_dump_file(
+                    "test_imagetext2text_generation_gemma3_4b_it.onnx"
+                ),
+                dump_drop={"attention_mask", "past_key_values", "pixel_values"},
+                save_as_external_data=False,
+            ),
+        ):
+            generated_ids = model.generate(
+                **inputs,
+                # 282 = value high enough to trigger multiple iterations of the model
+                max_new_tokens=282,
+                do_sample=False,
+                cache_implementation="static",
+            )
+        output_text = processor.decode(
+            generated_ids[0][inputs["input_ids"].shape[1] :], skip_special_tokens=False
+        )
+        print(output_text)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_torch_export_patches/test_patch_torch.py b/_unittests/ut_torch_export_patches/test_patch_torch.py
@@ -309,8 +309,9 @@ def forward(self, x, ind1, ind2):
             with self.subTest(
                 name="patch for 0/1 with oblivious", dynamic_shapes=dynamic_shapes
             ):
-                with torch_export_patches(), torch.fx.experimental._config.patch(
-                    backed_size_oblivious=True
+                with (
+                    torch_export_patches(),
+                    torch.fx.experimental._config.patch(backed_size_oblivious=True),
                 ):
                     ep = torch.export.export(model, inputs, dynamic_shapes=dynamic_shapes)
                 got = ep.module()(*inputs)

diff --git a/_unittests/ut_torch_models/test_llm_phi2.py b/_unittests/ut_torch_models/test_llm_phi2.py
@@ -33,9 +33,10 @@ def test_export_phi2_1_batch_size_1_oblivious(self):
         self.assertEqual(
             {"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
         )
-        with torch.fx.experimental._config.patch(
-            backed_size_oblivious=True
-        ), torch_export_patches(patch_transformers=True):
+        with (
+            torch.fx.experimental._config.patch(backed_size_oblivious=True),
+            torch_export_patches(patch_transformers=True),
+        ):
             ep = torch.export.export(
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds)
             )

diff --git a/onnx_diagnostic/export/dynamic_shapes.py b/onnx_diagnostic/export/dynamic_shapes.py
@@ -56,6 +56,14 @@ def __init__(
         self.kwargs = kwargs
         self.dynamic_shapes = dynamic_shapes
         self.args_names = args_names
+        if not self.kwargs and isinstance(self.dynamic_shapes, dict):
+            # This assumes the dictionary for the dynamic shapes is ordered
+            # the same way the args are. The input names are not known.
+            assert len(self.dynamic_shapes) == len(self.args), (
+                f"Length mismatch, kwargs is empty, len(dynamic_shapes)="
+                f"{len(self.dynamic_shapes)}, len(args)={len(self.args)}"
+            )
+            self.dynamic_shapes = tuple(self.dynamic_shapes.values())
 
     def __str__(self) -> str:
         return "\n".join(
@@ -232,8 +240,9 @@ def _generic_walker(
         """
         if not self.args:
             assert isinstance(self.kwargs, dict) and isinstance(self.dynamic_shapes, dict), (
-                f"Type mismatch, args={string_type(self.args)} and "
-                f"dynamic_shapes={self.dynamic_shapes} should have the same type."
+                f"Type mismatch, args={string_type(self.args)}, "
+                f"kwargs={string_type(self.kwargs)} and dynamic_shapes="
+                f"{string_type(self.dynamic_shapes)} should have the same type."
             )
             res = self._generic_walker_step(
                 processor,

diff --git a/onnx_diagnostic/helpers/helper.py b/onnx_diagnostic/helpers/helper.py
@@ -397,7 +397,7 @@ def string_type(
             return "AUTO"
         if verbose:
             print(f"[string_type] Y7:{type(obj)}")
-        return str(obj)
+        return str(obj).replace("DimHint(DYNAMIC)", "DYNAMIC").replace("DimHint(AUTO)", "AUTO")
 
     if isinstance(obj, bool):
         if with_min_max:
@@ -939,7 +939,7 @@ def flatten_object(x: Any, drop_keys: bool = False) -> Any:
             return flatten_object(list(x.values()), drop_keys=drop_keys)
         return flatten_object(list(x.items()), drop_keys=drop_keys)
 
-    if x.__class__.__name__ in {"DynamicCache", "StaticCache"}:
+    if x.__class__.__name__ in {"DynamicCache", "StaticCache", "HybridCache"}:
         from .cache_helper import CacheKeyValue
 
         kc = CacheKeyValue(x)
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,7 @@ Change Logs @@
 .7.13
     ++++++
+    * :pr:`237`: dummy inputs for google/gemma-3-4b-it
     * :pr:`244`: add a patch to bypass the exception raised when the dynamic dimension is in {0,1}
 .7.12
@@ Expand Down @@