sdpython
diff --git a/‎_unittests/ut_helpers/test_cache_helper.py‎
Lines changed: 43 additions & 1 deletion b/‎_unittests/ut_helpers/test_cache_helper.py‎
Lines changed: 43 additions & 1 deletion
diff --git a/‎_unittests/ut_tasks/try_tasks.py‎
Lines changed: 29 additions & 0 deletions b/‎_unittests/ut_tasks/try_tasks.py‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎_unittests/ut_torch_models/test_tiny_llms.py‎
Lines changed: 37 additions & 8 deletions b/‎_unittests/ut_torch_models/test_tiny_llms.py‎
Lines changed: 37 additions & 8 deletions
@@ -2,13 +2,14 @@
 import torch
 import transformers
 from onnx_diagnostic.ext_test_case import ExtTestCase, requires_transformers
-from onnx_diagnostic.helpers import string_type
+from onnx_diagnostic.helpers import string_type, max_diff
 from onnx_diagnostic.helpers.cache_helper import (
     flatten_unflatten_for_dynamic_shapes,
     make_dynamic_cache,
     make_encoder_decoder_cache,
     make_mamba_cache,
     make_sliding_window_cache,
+    make_static_cache,
 )
 from onnx_diagnostic.export import CoupleInputsDynamicShapes
 from onnx_diagnostic.torch_export_patches.patch_inputs import (
@@ -104,6 +105,7 @@ def test_unflatten_flatten_encoder_decoder_cache(self):
                     ]
                 ),
             )
+            self.assertEqual(0, max_diff(c2, c2)["abs"])
             self.assertIsInstance(c2, transformers.cache_utils.EncoderDecoderCache)
             flat, _spec = torch.utils._pytree.tree_flatten(c2)
             self.assertIsInstance(flat, list)
@@ -149,6 +151,7 @@ def test_make_mamba_cache(self):
             "ssm_states=#3[T1s4x4x4,T1s4x4x4,T1s4x4x4])",
             text,
         )
+        self.assertEqual(0, max_diff(cache, cache)["abs"])
 
     def test_make_sliding_window_cache(self):
         cache = make_sliding_window_cache(
@@ -164,6 +167,45 @@ def test_make_sliding_window_cache(self):
             "value_cache=#3[T1s4x5x6x7,T1s4x5x6x7,T1s4x5x6x7])",
             text,
         )
+        self.assertEqual(0, max_diff(cache, cache)["abs"])
+
+    def test_make_static_cache(self):
+        cache = make_static_cache(
+            [
+                (torch.rand((4, 5, 6, 7)), torch.rand((4, 5, 6, 7))),
+                (torch.rand((4, 5, 6, 7)), torch.rand((4, 5, 6, 7))),
+                (torch.rand((4, 5, 6, 7)), torch.rand((4, 5, 6, 7))),
+            ]
+        )
+        text = self.string_type(cache, with_shape=True)
+        self.assertEqual(
+            "StaticCache(key_cache=#3[T1s4x5x6x7,T1s4x5x6x7,T1s4x5x6x7], "
+            "value_cache=#3[T1s4x5x6x7,T1s4x5x6x7,T1s4x5x6x7])",
+            text,
+        )
+        self.assertEqual(0, max_diff(cache, cache)["abs"])
+
+    def test_unflatten_flatten_static_cache(self):
+        with torch_export_patches(patch_transformers=True):
+            c2 = make_static_cache(
+                [
+                    (torch.rand((4, 5, 6, 7)), torch.rand((4, 5, 6, 7))),
+                    (torch.rand((4, 5, 6, 7)), torch.rand((4, 5, 6, 7))),
+                    (torch.rand((4, 5, 6, 7)), torch.rand((4, 5, 6, 7))),
+                ]
+            )
+            self.assertEqual(0, max_diff(c2, c2)["abs"])
+            self.assertIsInstance(c2, transformers.cache_utils.StaticCache)
+            flat, _spec = torch.utils._pytree.tree_flatten(c2)
+            self.assertIsInstance(flat, list)
+            self.assertEqual(len(flat), 6)
+            unflat = flatten_unflatten_for_dynamic_shapes(c2)
+            self.assertIsInstance(unflat, list)
+            self.assertEqual(len(unflat), 2)
+            self.assertEqual(
+                "#2[#3[T1s4x5x6x7,T1s4x5x6x7,T1s4x5x6x7],#3[T1s4x5x6x7,T1s4x5x6x7,T1s4x5x6x7]]",
+                self.string_type(unflat, with_shape=True),
+            )
 
 
 if __name__ == "__main__":
 
@@ -98,6 +98,35 @@ def test_text2text_generation(self):
             )
         print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
 
+    @never_test()
+    def test_text2text_generation_static(self):
+        # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k text2t
+
+        import torch
+        from transformers import AutoTokenizer, AutoModelForCausalLM
+
+        tokenizer = AutoTokenizer.from_pretrained("arnir0/Tiny-LLM")
+        model = AutoModelForCausalLM.from_pretrained("arnir0/Tiny-LLM")
+
+        text = "def greet(user): print(f'hello <extra_id_0>!')"
+        input_ids = tokenizer(text, return_tensors="pt").input_ids
+        mask = (
+            torch.tensor([1 for i in range(input_ids.shape[1])])
+            .to(torch.int64)
+            .reshape((1, -1))
+        )
+
+        # simply generate a single sequence
+        print()
+        with steal_forward(model):
+            generated_ids = model.generate(
+                input_ids=input_ids,
+                attention_mask=mask,
+                max_new_tokens=117,
+                cache_implementation="static",
+            )
+        print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
+
     @never_test()
     def test_text_generation_phi4_mini(self):
         # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k phi4_mini
 
@@ -1,32 +1,61 @@
 import copy
 import unittest
 import torch
-from onnx_diagnostic.ext_test_case import ExtTestCase, ignore_warnings, requires_transformers
+from onnx_diagnostic.ext_test_case import ExtTestCase, ignore_warnings
 from onnx_diagnostic.torch_models.llms import get_tiny_llm
 from onnx_diagnostic.helpers import string_type
+from onnx_diagnostic.torch_export_patches import torch_export_patches
+from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
 
 
 class TestTinyLlm(ExtTestCase):
-    def test_get_tiny_llm(self):
+    def test_tiny_llm_run_dynamic(self):
         data = get_tiny_llm()
         model, inputs = data["model"], data["inputs"]
         self.assertIn("DynamicCache", string_type(inputs))
         model(**inputs)
 
     @ignore_warnings(UserWarning)
-    @requires_transformers("4.53")
-    def test_export_tiny_llm_1(self):
+    def test_tiny_llm_export_dynamic(self):
         data = get_tiny_llm()
         model, inputs = data["model"], data["inputs"]
         expected = model(**copy.deepcopy(inputs))
         self.assertEqual(
             {"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
         )
-        ep = torch.export.export(
-            model, (), kwargs=copy.deepcopy(inputs), dynamic_shapes=data["dynamic_shapes"]
+        with torch_export_patches(patch_transformers=True):
+            ep = torch.export.export(
+                model,
+                (),
+                kwargs=copy.deepcopy(inputs),
+                dynamic_shapes=use_dyn_not_str(data["dynamic_shapes"]),
+            )
+            got = ep.module()(**inputs)
+            self.assertEqualArrayAny(expected, got)
+
+    def test_tiny_llm_run_static(self):
+        data = get_tiny_llm(use_static_cache=True)
+        model, inputs = data["model"], data["inputs"]
+        self.assertIn("StaticCache", string_type(inputs))
+        model(**inputs)
+
+    @ignore_warnings(UserWarning)
+    def test_tiny_llm_export_static(self):
+        data = get_tiny_llm(use_static_cache=True)
+        model, inputs = data["model"], data["inputs"]
+        expected = model(**copy.deepcopy(inputs))
+        self.assertEqual(
+            {"attention_mask", "past_key_values", "input_ids", "cache_position"}, set(inputs)
         )
-        got = ep.module()(**inputs)
-        self.assertEqualArrayAny(expected, got)
+        with torch_export_patches(patch_transformers=True, stop_if_static=1):
+            ep = torch.export.export(
+                model,
+                (),
+                kwargs=copy.deepcopy(inputs),
+                dynamic_shapes=use_dyn_not_str(data["dynamic_shapes"]),
+            )
+            got = ep.module()(**inputs)
+            self.assertEqualArrayAny(expected, got)
 
 
 if __name__ == "__main__":