sdpython
diff --git a/‎_doc/examples/plot_export_hub_codellama.py‎
Lines changed: 1 addition & 3 deletions b/‎_doc/examples/plot_export_hub_codellama.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎_doc/examples/plot_export_tiny_phi2.py‎
Lines changed: 1 addition & 3 deletions b/‎_doc/examples/plot_export_tiny_phi2.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎_doc/technical/plot_generate.py‎
Lines changed: 1 addition & 1 deletion b/‎_doc/technical/plot_generate.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎_unittests/ut_export/test_api.py‎
Lines changed: 73 additions & 3 deletions b/‎_unittests/ut_export/test_api.py‎
Lines changed: 73 additions & 3 deletions
diff --git a/‎_unittests/ut_export/test_dynamic_shapes.py‎
Lines changed: 99 additions & 27 deletions b/‎_unittests/ut_export/test_dynamic_shapes.py‎
Lines changed: 99 additions & 27 deletions
diff --git a/‎_unittests/ut_export/test_serialization.py‎
Lines changed: 7 additions & 14 deletions b/‎_unittests/ut_export/test_serialization.py‎
Lines changed: 7 additions & 14 deletions
@@ -22,9 +22,7 @@
 from onnx_diagnostic import doc
 from onnx_diagnostic.ext_test_case import unit_test_going
 from onnx_diagnostic.helpers import string_type
-from onnx_diagnostic.torch_models.hghub import (
-    get_untrained_model_with_inputs,
-)
+from onnx_diagnostic.torch_models.hghub import get_untrained_model_with_inputs
 from onnx_diagnostic.torch_models.hghub.hub_api import (
     get_model_info,
     get_pretrained_config,
 
@@ -33,9 +33,7 @@
 from onnx_diagnostic.helpers.rt_helper import make_feeds
 from onnx_diagnostic.torch_export_patches import torch_export_patches
 from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
-from onnx_diagnostic.torch_models.hghub import (
-    get_untrained_model_with_inputs,
-)
+from onnx_diagnostic.torch_models.hghub import get_untrained_model_with_inputs
 
 warnings.simplefilter("ignore")
 
 
@@ -155,7 +155,7 @@ def simple_generate_with_cache(
 dtype = get_weight_type(model)
 print("-- model dtype:", dtype)
 export_inputs["past_key_values"] = to_any(export_inputs["past_key_values"], dtype)
-exporter = "custom" if "custom" in sys.argv else "onnx-dynamo"
+exporter = "onnx-dynamo" if "dynamo" in sys.argv else "custom"
 model_name = f"model_{model_id.replace('/', '-')}.{exporter}.onnx"
 if not os.path.exists(model_name):
     # This step is slow so let's skip it if it was already done.
 
@@ -1,6 +1,12 @@
 import unittest
 import torch
-from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout
+from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout, has_transformers
+from onnx_diagnostic.helpers import max_diff
+from onnx_diagnostic.helpers.torch_helper import torch_deepcopy
+from onnx_diagnostic.helpers.rt_helper import make_feeds
+from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache
+from onnx_diagnostic.torch_models.hghub import get_untrained_model_with_inputs
+from onnx_diagnostic.torch_export_patches import torch_export_patches
 from onnx_diagnostic.export.api import to_onnx
 
 
@@ -19,16 +25,80 @@ def forward(self, x, y):
             (x, y),
             dynamic_shapes=ds,
             exporter="custom",
-            filename=self.get_dump_file("custom.onnx"),
+            filename=self.get_dump_file("to_onnx_custom.onnx"),
         )
         to_onnx(
             Model(),
             (x, y),
             dynamic_shapes=ds,
             exporter="onnx-dynamo",
-            filename=self.get_dump_file("onnx-dynamo.onnx"),
+            filename=self.get_dump_file("to_onnx_onnx-dynamo.onnx"),
         )
 
+    @hide_stdout()
+    def test_tiny_llm_to_onnx(self):
+        import onnxruntime
+
+        data = get_untrained_model_with_inputs("arnir0/Tiny-LLM")
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        b1 = data["inputs_batch1"]
+        filenames = {
+            "custom": self.get_dump_file("test_tiny_llm_to_onnx-custom.onnx"),
+            "onnx-dynamo": self.get_dump_file("test_tiny_llm_to_onnx-dynamo.onnx"),
+            "modelbuilder": self.get_dump_file("model.onnx"),
+        }
+        if not has_transformers("4.55"):
+            # <4.55: torch._check(causal_mask.shape[3] != 33)
+            #        torch._check(causal_mask.shape[3] == 33)
+            del filenames["onnx-dynamo"]
+        del inputs["position_ids"]
+        del ds["position_ids"]
+        del b1["position_ids"]
+
+        expected = model(**torch_deepcopy(b1))
+
+        with torch_export_patches(patch_transformers=True):
+            for exporter, filename in filenames.items():
+                with self.subTest(exporter=exporter):
+                    to_onnx(
+                        model,
+                        kwargs=inputs,
+                        dynamic_shapes=ds,
+                        exporter=exporter,
+                        filename=filename,
+                    )
+        for exporter, filename in filenames.items():
+            with self.subTest(exporter=f"validate-{exporter}"):
+                sess = onnxruntime.InferenceSession(
+                    filename, providers=["CPUExecutionProvider"]
+                )
+                feeds = make_feeds(sess, b1, use_numpy=True)
+                got = sess.run(None, feeds)
+                diff = max_diff(expected, got)
+                assert diff["abs"] <= 1e-5, f"diff={diff}"
+
+        problem = dict(
+            input_ids=torch.tensor([[24320]], dtype=torch.int64),
+            attention_mask=torch.tensor([[1, 1, 1, 1]], dtype=torch.int64),
+            past_key_values=make_dynamic_cache(
+                [
+                    torch.rand((1, 1, 3, 96), dtype=torch.float32),
+                    torch.rand((1, 1, 3, 96), dtype=torch.float32),
+                ]
+            ),
+        )
+
+        expected = model(**torch_deepcopy(problem))
+        for exporter, filename in filenames.items():
+            with self.subTest(exporter=f"full-mask-{exporter}"):
+                sess = onnxruntime.InferenceSession(
+                    filename, providers=["CPUExecutionProvider"]
+                )
+                feeds = make_feeds(sess, problem, use_numpy=True)
+                got = sess.run(None, feeds)
+                diff = max_diff(expected, got)
+                assert diff["abs"] <= 1e-5, f"diff={diff}"
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
@@ -452,19 +452,18 @@ def forward(self, cache, z):
             (
                 (
                     [
-                        [{}, {}],
-                        [
-                            {
-                                0: torch.export.Dim.DYNAMIC,
-                                2: torch.export.Dim.DYNAMIC,
-                                3: torch.export.Dim.DYNAMIC,
-                            },
-                            {
-                                0: torch.export.Dim.DYNAMIC,
-                                2: torch.export.Dim.DYNAMIC,
-                                3: torch.export.Dim.DYNAMIC,
-                            },
-                        ],
+                        {},
+                        {
+                            0: torch.export.Dim.DYNAMIC,
+                            2: torch.export.Dim.DYNAMIC,
+                            3: torch.export.Dim.DYNAMIC,
+                        },
+                        {},
+                        {
+                            0: torch.export.Dim.DYNAMIC,
+                            2: torch.export.Dim.DYNAMIC,
+                            3: torch.export.Dim.DYNAMIC,
+                        },
                     ],
                     {3: torch.export.Dim.DYNAMIC},
                 ),
@@ -520,11 +519,10 @@ def forward(self, cache, z):
             (
                 (
                     [
-                        [{}, {}],
-                        [
-                            {0: "dim_0I_1o_0l0", 2: "dim_0I_1o_0l2", 3: "dim_0I_1o_0l3"},
-                            {0: "dim_0I_1o_1l0", 2: "dim_0I_1o_1l2", 3: "dim_0I_1o_1l3"},
-                        ],
+                        {},
+                        {0: "dim_0I_1o0", 2: "dim_0I_1o2", 3: "dim_0I_1o3"},
+                        {},
+                        {0: "dim_0I_3o0", 2: "dim_0I_3o2", 3: "dim_0I_3o3"},
                     ],
                     {3: "dim_1I3"},
                 ),
@@ -641,18 +639,18 @@ def test_couple_input_ds_cache(self):
                     kwargs,
                     {
                         "A": ds_batch,
-                        "B": (ds_batch, [[ds_batch, ds_batch], [ds_batch, ds_batch]]),
+                        "B": (ds_batch, [ds_batch, ds_batch, ds_batch, ds_batch]),
                     },
                 ).invalid_dimensions_for_export(),
             )
             self.assertEqual(
-                {"B": (None, [[None, {2: "d=[1]"}], [None, {2: "d=[1]"}]])},
+                {"B": (None, [None, {2: "d=[1]"}, None, {2: "d=[1]"}])},
                 Cls(
                     (),
                     kwargs,
                     {
                         "A": ds_batch,
-                        "B": (ds_batch, [[ds_batch, ds_batch_seq], [ds_batch, ds_batch_seq]]),
+                        "B": (ds_batch, [ds_batch, ds_batch_seq, ds_batch, ds_batch_seq]),
                     },
                 ).invalid_dimensions_for_export(),
             )
@@ -831,18 +829,17 @@ def test_dynamic_cache_replace_by_string(self):
 
         DYN = torch.export.Dim.DYNAMIC
         ds = {
-            "cache": [
-                [{0: DYN, 1: DYN}, {0: DYN, 1: DYN}],
-                [{0: DYN, 1: DYN}, {0: DYN, 1: DYN}],
-            ]
+            "cache": [{0: DYN, 1: DYN}, {0: DYN, 1: DYN}, {0: DYN, 1: DYN}, {0: DYN, 1: DYN}]
         }
         inst = CoupleInputsDynamicShapes((), dict(cache=cache), ds)
         as_string = inst.replace_by_string()
         self.assertEqual(
             {
                 "cache": [
-                    [{0: "Dim0", 1: "Dim1"}, {0: "Dim2", 1: "Dim3"}],
-                    [{0: "Dim4", 1: "Dim5"}, {0: "Dim6", 1: "Dim7"}],
+                    {0: "Dim0", 1: "Dim1"},
+                    {0: "Dim2", 1: "Dim3"},
+                    {0: "Dim4", 1: "Dim5"},
+                    {0: "Dim6", 1: "Dim7"},
                 ]
             },
             as_string,
@@ -865,6 +862,81 @@ def test_unbatch_inputs(self):
             s,
         )
 
+    def test_guess_dynamic_cache_without_patches(self):
+        n_layers = 2
+        bsize, nheads, slen, dim = 2, 4, 3, 7
+        cache = make_dynamic_cache(
+            [
+                (torch.randn(bsize, nheads, slen, dim), torch.randn(bsize, nheads, slen, dim))
+                for i in range(n_layers)
+            ]
+        )
+        z = torch.randn((1, 1, 1, 7))
+        cache2 = make_dynamic_cache(
+            [
+                (
+                    torch.randn(bsize + 1, nheads, slen + 1, dim + 1),
+                    torch.randn(bsize + 1, nheads, slen + 1, dim + 1),
+                )
+                for i in range(n_layers)
+            ]
+        )
+        inputs = [
+            (cache, z),
+            (cache2, torch.randn((1, 1, 1, 8))),
+        ]
+
+        class Model(torch.nn.Module):
+            def forward(self, cache, z):
+                cache = CacheKeyValue(cache)
+                return (
+                    z
+                    + cache.key_cache[0]
+                    + cache.key_cache[1]
+                    + cache.value_cache[0]
+                    + cache.value_cache[1]
+                )
+
+        mi = ModelInputs(Model(), inputs)
+        ds = mi.guess_dynamic_shapes()
+        DYN = torch.export.Dim.DYNAMIC
+        self.assertEqual(
+            (
+                (
+                    [
+                        {0: DYN, 2: DYN, 3: DYN},
+                        {0: DYN, 2: DYN, 3: DYN},
+                        {0: DYN, 2: DYN, 3: DYN},
+                        {0: DYN, 2: DYN, 3: DYN},
+                    ],
+                    {3: DYN},
+                ),
+                {},
+            ),
+            ds,
+        )
+
+    def test_invalid_dimensions_for_export(self):
+        ags = []
+        kws = dict(
+            input_ids=torch.randint(0, 10, (2, 3)),
+            attention_mask=torch.randint(0, 1, (2, 33)),
+            position_ids=torch.randint(0, 10, (2, 3)),
+            past_key_values=make_dynamic_cache(
+                [torch.rand((2, 1, 30, 96)), torch.rand((2, 1, 30, 96))]
+            ),
+        )
+        ds = dict(
+            input_ids={0: "batch", 1: "seq_length"},
+            attention_mask={0: "batch", 1: "seq_length"},
+            position_ids={0: "batch", 1: "seq_length"},
+            past_key_values=[{0: "batch", 2: "cache_length"}, {0: "batch", 2: "cache_length"}],
+        )
+        with torch_export_patches(patch_transformers=True):
+            cpl = CoupleInputsDynamicShapes(ags, kws, ds)
+            backed_size_oblivious = cpl.invalid_dimensions_for_export()
+            self.assertFalse(backed_size_oblivious)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
@@ -30,7 +30,7 @@ def forward(self, cache):
         cache = self._get_cache()
         DYN = torch.export.Dim.DYNAMIC
         ds = {0: DYN, 1: DYN, 3: DYN}
-        dynamic_shapes = ([[ds, ds], [ds, ds]],)
+        dynamic_shapes = ([ds, ds, ds, ds],)
         with torch_export_patches(patch_transformers=True):
             exp = torch.export.export(Model(), (cache,), dynamic_shapes=dynamic_shapes)
         self.assertNotEmpty(exp)
@@ -44,7 +44,7 @@ def forward(self, cache):
         cache = self._get_cache()
         flat_unflat = flatten_unflatten_for_dynamic_shapes(cache)
         s = string_type(flat_unflat, with_shape=True)
-        self.assertEqual("#2[#2[T1s2x4x1x7,T1s2x4x1x7],#2[T1s2x4x1x7,T1s2x4x1x7]]", s)
+        self.assertEqual("#4[T1s2x4x1x7,T1s2x4x1x7,T1s2x4x1x7,T1s2x4x1x7]", s)
 
     def test_dynamic_cache_bypass(self):
         class Model(torch.nn.Module):
@@ -55,7 +55,7 @@ def forward(self, cache):
         with torch_export_patches(patch_transformers=True):
             flat_unflat = flatten_unflatten_for_dynamic_shapes(cache)
             s = string_type(flat_unflat, with_shape=True)
-            self.assertEqual("#2[#2[T1s2x4x1x7,T1s2x4x1x7],#2[T1s2x4x1x7,T1s2x4x1x7]]", s)
+            self.assertEqual("#4[T1s2x4x1x7,T1s2x4x1x7,T1s2x4x1x7,T1s2x4x1x7]", s)
 
     def test_dynamic_cache_guess_static(self):
         class Model(torch.nn.Module):
@@ -65,7 +65,7 @@ def forward(self, cache):
         cache = self._get_cache()
         md = ModelInputs(Model(), [(cache,)])
         guessed = md.guess_dynamic_shapes()
-        self.assertEqual(guessed, (([[{}, {}], [{}, {}]],), {}))
+        self.assertEqual(guessed, (([{}, {}, {}, {}],), {}))
 
     def test_dynamic_cache_guess_auto(self):
         class Model(torch.nn.Module):
@@ -77,7 +77,7 @@ def forward(self, cache):
         guessed = md.guess_dynamic_shapes(auto=True)
         AUTO = torch.export.Dim.AUTO
         ds = {i: AUTO for i in range(4)}  # noqa: C420
-        self.assertEqual(guessed, (([[ds, ds], [ds, ds]],), {}))
+        self.assertEqual(guessed, (([ds, ds, ds, ds],), {}))
 
     def test_dynamic_cache_guess_dynamic(self):
         class Model(torch.nn.Module):
@@ -88,18 +88,11 @@ def forward(self, cache):
             Model(), [(self._get_cache(),), (self._get_cache(bsize=3, nheads=5),)]
         )
         guessed = md.guess_dynamic_shapes()
+        print("****", guessed)
         DYN = torch.export.Dim.DYNAMIC
         self.assertEqual(
+            (([{0: DYN, 1: DYN}, {0: DYN, 1: DYN}, {0: DYN, 1: DYN}, {0: DYN, 1: DYN}],), {}),
             guessed,
-            (
-                (
-                    [
-                        [{0: DYN, 1: DYN}, {0: DYN, 1: DYN}],
-                        [{0: DYN, 1: DYN}, {0: DYN, 1: DYN}],
-                    ],
-                ),
-                {},
-            ),
         )