sdpython · sdpython · Mar 28, 2025 · Mar 28, 2025 · Mar 28, 2025 · Mar 28, 2025
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -4,6 +4,9 @@ Change Logs
 0.3.0
 +++++
 
+* :pr:`24`: dummy inputs for ``text2text-generation``, add new function
+  ``convert_dynamic_axes_into_dynamic_shapes`` to convert dynamic axes
+  into dynamic shapes, add support for ``T5ForConditionalGeneration``
 * :pr:`23`: dummy inputs for ``image-classification``
 * :pr:`22`: api to create untrained model copying the architecture
   of the trained models and dummy inputs for them,

diff --git a/_doc/api/torch_export_patches/index.rst b/_doc/api/torch_export_patches/index.rst
@@ -6,6 +6,8 @@ onnx_diagnostic.torch_export_patches
     :caption: submodules
 
     patches/index
+    patch_inputs
+
 
 .. automodule:: onnx_diagnostic.torch_export_patches
     :members:

diff --git a/_doc/api/torch_export_patches/patch_inputs.rst b/_doc/api/torch_export_patches/patch_inputs.rst
@@ -0,0 +1,7 @@
+
+onnx_diagnostic.torch_export_patches.patch_inputs
+=================================================
+
+.. automodule:: onnx_diagnostic.torch_export_patches.patch_inputs
+    :members:
+    :no-undoc-members:
diff --git a/_doc/conf.py b/_doc/conf.py
@@ -121,6 +121,7 @@
     ("py:class", "transformers.LlamaConfig"),
     ("py:class", "transformers.cache_utils.Cache"),
     ("py:class", "transformers.cache_utils.DynamicCache"),
+    ("py:class", "transformers.cache_utils.EncoderDecoderCache"),
     ("py:class", "transformers.cache_utils.MambaCache"),
     ("py:func", "torch.export._draft_export.draft_export"),
     ("py:func", "torch._export.tools.report_exportability"),

diff --git a/_doc/examples/plot_export_tiny_llm.py b/_doc/examples/plot_export_tiny_llm.py
@@ -32,6 +32,7 @@
 from onnx_diagnostic import doc
 from onnx_diagnostic.helpers import string_type
 from onnx_diagnostic.torch_models.llms import get_tiny_llm
+from onnx_diagnostic.torch_test_helper import steel_forward
 
 
 MODEL_NAME = "arnir0/Tiny-LLM"
@@ -49,7 +50,7 @@ def _forward_(*args, _f=None, **kwargs):
         print("<-", string_type((args, kwargs), with_shape=True, with_min_max=True))
     res = _f(*args, **kwargs)
     if not hasattr(torch.compiler, "is_exporting") or not torch.compiler.is_exporting():
-        print("->", string_type((args, kwargs), with_shape=True, with_min_max=True))
+        print("->", string_type(res, with_shape=True, with_min_max=True))
     return res
 
 
@@ -75,6 +76,12 @@ def _forward_(*args, _f=None, **kwargs):
 # Let's restore the forward as it was.
 model.forward = keep_model_forward
 
+# %%
+# Another syntax with :func:`onnx_diagnostic.torch_test_helper.steel_forward`.
+
+with steel_forward(model):
+    model.generate(inputs, max_length=50, temperature=1, top_k=50, top_p=0.95, do_sample=True)
+
 # %%
 # Untrained model
 # +++++++++++++++

diff --git a/_unittests/ut_torch_export_patches/test_patch_inputs.py b/_unittests/ut_torch_export_patches/test_patch_inputs.py
@@ -0,0 +1,116 @@
+import unittest
+import torch
+import transformers
+from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout
+from onnx_diagnostic.helpers import string_type
+from onnx_diagnostic.torch_export_patches.patch_inputs import (
+    convert_dynamic_axes_into_dynamic_shapes,
+)
+
+
+class TestPatchInputs(ExtTestCase):
+    @hide_stdout()
+    def test_convert_dynamic_axes_into_dynamic_shapes_1(self):
+        args = (
+            torch.randint(0, 10, size=(2, 8)).to(torch.int64),
+            torch.randint(0, 10, size=(2, 8)).to(torch.int64),
+            torch.randint(0, 10, size=(2, 8)).to(torch.int64),
+            [(torch.rand((2, 1, 3, 96)), torch.rand((2, 1, 3, 96)))],
+        )
+        dynamic_axes = {
+            "attention_mask": {0: "batch_size", 1: "total_sequence_length"},
+            "input_ids": {0: "batch_size", 1: "sequence_length"},
+            "logits": {0: "batch_size", 1: "sequence_length"},
+            "past_key_values.0.key": {0: "batch_size", 2: "past_sequence_length"},
+            "past_key_values.0.value": {0: "batch_size", 2: "past_sequence_length"},
+            "position_ids": {0: "batch_size", 1: "sequence_length"},
+            "present.0.key": {0: "batch_size", 2: "total_sequence_length"},
+            "present.0.value": {0: "batch_size", 2: "total_sequence_length"},
+        }
+
+        model_cls = transformers.LlamaModel
+        res = convert_dynamic_axes_into_dynamic_shapes(
+            model_cls, args=args, dynamic_axes=dynamic_axes, verbose=1
+        )
+        self.assertEqual((), res[0])
+        self.assertEqual(
+            (
+                "dict(input_ids:T7s2x8,attention_mask:T7s2x8,position_ids:T7s2x8,"
+                "past_key_values:DynamicCache(key_cache=#1[T1s2x1x3x96], "
+                "value_cache=#1[T1s2x1x3x96]))"
+            ),
+            string_type(res[1], with_shape=True),
+        )
+        self.assertEqual(
+            {
+                "attention_mask": {0: "batch_size", 1: "total_sequence_length"},
+                "input_ids": {0: "batch_size", 1: "sequence_length"},
+                "past_key_values": [
+                    [{0: "batch_size", 2: "past_sequence_length"}],
+                    [{0: "batch_size", 2: "past_sequence_length"}],
+                ],
+                "position_ids": {0: "batch_size", 1: "sequence_length"},
+            },
+            res[2],
+        )
+
+    @hide_stdout()
+    def test_convert_dynamic_axes_into_dynamic_shapes_2(self):
+        args = (
+            torch.randint(0, 10, size=(2, 8)).to(torch.int64),
+            torch.randint(0, 10, size=(2, 8)).to(torch.int64),
+            torch.randint(0, 10, size=(2, 8)).to(torch.int64),
+            [(torch.rand((2, 1, 3, 96)), torch.rand((2, 1, 3, 96)))],
+        )
+        dynamic_axes = {
+            "input_ids": {0: "batch_size", 1: "sequence_length"},
+            "attention_mask": {0: "batch_size", 1: "sequence_length"},
+            "position_ids": {0: "batch_size", 1: "sequence_length"},
+            "logits": {0: "batch_size", 1: "sequence_length"},
+            "present.0.key": {0: "batch_size", 2: "past_sequence_length"},
+            "present.0.value": {0: "batch_size", 2: "past_sequence_length"},
+        }
+
+        model_cls = transformers.LlamaModel
+        res = convert_dynamic_axes_into_dynamic_shapes(
+            model_cls,
+            args=args,
+            dynamic_axes=dynamic_axes,
+            verbose=1,
+            prefix_mapping={"present": "past_key_values"},
+        )
+        self.assertEqual((), res[0])
+        self.assertEqual(
+            {"attention_mask", "input_ids", "past_key_values", "position_ids"}, set(res[2])
+        )
+        self.assertEqual(
+            [
+                [{0: "batch_size", 2: "past_sequence_length"}],
+                [{0: "batch_size", 2: "past_sequence_length"}],
+            ],
+            res[2]["past_key_values"],
+        )
+        self.assertEqual(
+            {
+                "attention_mask": {0: "batch_size", 1: "sequence_length"},
+                "input_ids": {0: "batch_size", 1: "sequence_length"},
+                "past_key_values": [
+                    [{0: "batch_size", 2: "past_sequence_length"}],
+                    [{0: "batch_size", 2: "past_sequence_length"}],
+                ],
+                "position_ids": {0: "batch_size", 1: "sequence_length"},
+            },
+            res[2],
+        )
+        self.assertEqual(
+            (
+                "dict(input_ids:T7s2x8,attention_mask:T7s2x8,position_ids:T7s2x8,"
+                "past_key_values:DynamicCache(key_cache=#1[T1s2x1x3x96], "
+                "value_cache=#1[T1s2x1x3x96]))"
+            ),
+            string_type(res[1], with_shape=True),
+        )
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_torch_models/test_hghub_model.py b/_unittests/ut_torch_models/test_hghub_model.py
@@ -4,7 +4,6 @@
 from onnx_diagnostic.ext_test_case import (
     ExtTestCase,
     hide_stdout,
-    long_test,
     requires_torch,
     requires_transformers,
 )
@@ -79,7 +78,7 @@ def test_get_untrained_model_with_inputs_beit(self):
         model, inputs = data["model"], data["inputs"]
         model(**inputs)
         # different expected value for different version of transformers
-        self.assertIn((data["size"], data["n_weights"]), [(111448, 27862)])
+        self.assertIn((data["size"], data["n_weights"]), [(111448, 27862), (56880, 14220)])
 
     @hide_stdout()
     def test_get_untrained_model_with_inputs_codellama(self):
@@ -91,8 +90,18 @@ def test_get_untrained_model_with_inputs_codellama(self):
         self.assertIn((data["size"], data["n_weights"]), [(410532864, 102633216)])
 
     @hide_stdout()
-    @long_test()
+    def test_get_untrained_model_with_inputs_text2text_generation(self):
+        mid = "sshleifer/tiny-marian-en-de"
+        # mid = "Salesforce/codet5-small"
+        data = get_untrained_model_with_inputs(mid, verbose=1)
+        self.assertIn((data["size"], data["n_weights"]), [(473928, 118482)])
+        model, inputs = data["model"], data["inputs"]
+        raise unittest.SkipTest(f"not working for {mid!r}")
+        model(**inputs)
+
+    @hide_stdout()
     def test_get_untrained_model_Ltesting_models(self):
+        # UNHIDE=1 python _unittests/ut_torch_models/test_hghub_model.py -k L -f
         def _diff(c1, c2):
             rows = [f"types {c1.__class__.__name__} <> {c2.__class__.__name__}"]
             for k, v in c1.__dict__.items():
@@ -102,11 +111,22 @@ def _diff(c1, c2):
                     rows.append(f"{k} :: -- {v} ++ {getattr(c2, k, 'MISS')}")
             return "\n".join(rows)
 
-        # UNHIDE=1 LONGTEST=1 python _unittests/ut_torch_models/test_hghub_model.py -k L -f
         for mid in load_models_testing():
             with self.subTest(mid=mid):
+                if mid in {
+                    "hf-internal-testing/tiny-random-MaskFormerForInstanceSegmentation",
+                    "hf-internal-testing/tiny-random-MoonshineForConditionalGeneration",
+                    "fxmarty/pix2struct-tiny-random",
+                    "hf-internal-testing/tiny-random-ViTMSNForImageClassification",
+                    "hf-internal-testing/tiny-random-YolosModel",
+                }:
+                    print(f"-- not implemented yet for {mid!r}")
+                    continue
                 data = get_untrained_model_with_inputs(mid, verbose=1)
                 model, inputs = data["model"], data["inputs"]
+                if mid in {"sshleifer/tiny-marian-en-de"}:
+                    print(f"-- not fully implemented yet for {mid!r}")
+                    continue
                 try:
                     model(**inputs)
                 except Exception as e:

diff --git a/_unittests/ut_torch_models/try_models.py b/_unittests/ut_torch_models/try_models.py
diff --git a/_unittests/ut_torch_models/try_tasks.py b/_unittests/ut_torch_models/try_tasks.py
@@ -0,0 +1,56 @@
+import unittest
+from onnx_diagnostic.ext_test_case import ExtTestCase, never_test
+from onnx_diagnostic.helpers import string_type
+from onnx_diagnostic.torch_test_helper import steel_forward
+
+
+class TestHuggingFaceHubModel(ExtTestCase):
+    @never_test()
+    def test_image_classiciation(self):
+        # clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k image_c
+
+        from transformers import ViTImageProcessor, ViTModel
+        from PIL import Image
+        import requests
+
+        url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+        image = Image.open(requests.get(url, stream=True).raw)
+
+        processor = ViTImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
+        model = ViTModel.from_pretrained("google/vit-base-patch16-224-in21k")
+        inputs = processor(images=image, return_tensors="pt")
+        print()
+        print("-- inputs", string_type(inputs, with_shape=True, with_min_max=True))
+
+        outputs = model(**inputs)
+        print("-- outputs", string_type(outputs, with_shape=True, with_min_max=True))
+
+    @never_test()
+    def test_text2text_generation(self):
+        # clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k text2t
+
+        import torch
+        from transformers import RobertaTokenizer, T5ForConditionalGeneration
+
+        tokenizer = RobertaTokenizer.from_pretrained("Salesforce/codet5-small")
+        model = T5ForConditionalGeneration.from_pretrained("Salesforce/codet5-small")
+
+        text = "def greet(user): print(f'hello <extra_id_0>!')"
+        input_ids = tokenizer(text, return_tensors="pt").input_ids
+        mask = (
+            torch.tensor([1 for i in range(input_ids.shape[1])])
+            .to(torch.int64)
+            .reshape((1, -1))
+        )
+
+        # simply generate a single sequence
+        print()
+        with steel_forward(model):
+            generated_ids = model.generate(
+                decoder_input_ids=input_ids, attention_mask=mask, max_length=100
+            )
+        print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_xrun_doc/test_helpers.py b/_unittests/ut_xrun_doc/test_helpers.py
@@ -36,7 +36,7 @@
     rename_dynamic_dimensions,
     rename_dynamic_expression,
 )
-from onnx_diagnostic.cache_helpers import make_dynamic_cache
+from onnx_diagnostic.cache_helpers import make_dynamic_cache, make_encoder_decoder_cache
 
 TFLOAT = onnx.TensorProto.FLOAT
 
@@ -164,6 +164,8 @@ def test_flatten(self):
                 },
             ],
         )
+        diff = max_diff(inputs, inputs, flatten=True, verbose=10)
+        self.assertEqual(diff["abs"], 0)
         flat = flatten_object(inputs, drop_keys=True)
         diff = max_diff(inputs, flat, flatten=True, verbose=10)
         self.assertEqual(diff["abs"], 0)
@@ -442,6 +444,32 @@ def test_from_tensor(self):
             convert_endian(proto)
             dtype_to_tensor_dtype(dt)
 
+    @hide_stdout()
+    def test_flatten_encoder_decoder_cache(self):
+        inputs = (
+            torch.rand((3, 4), dtype=torch.float16),
+            [
+                torch.rand((5, 6), dtype=torch.float16),
+                torch.rand((5, 6, 7), dtype=torch.float16),
+                {
+                    "a": torch.rand((2,), dtype=torch.float16),
+                    "cache": make_encoder_decoder_cache(
+                        make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))]),
+                        make_dynamic_cache([(torch.rand((5, 5, 5)), torch.rand((5, 5, 5)))]),
+                    ),
+                },
+            ],
+        )
+        diff = max_diff(inputs, inputs, flatten=True, verbose=10)
+        self.assertEqual(diff["abs"], 0)
+        flat = flatten_object(inputs, drop_keys=True)
+        diff = max_diff(inputs, flat, flatten=True, verbose=10)
+        self.assertEqual(diff["abs"], 0)
+        d = string_diff(diff)
+        self.assertIsInstance(d, str)
+        s = string_type(inputs)
+        self.assertIn("EncoderDecoderCache", s)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)