Changes Cache serialization

xadupre · xadupre · commit 28f35155416b · 2025-10-28T12:00:47.000+01:00
diff --git a/_unittests/ut_torch_export_patches/test_patch_serialization_transformers.py b/_unittests/ut_torch_export_patches/test_patch_serialization_transformers.py
@@ -8,6 +8,7 @@
     make_static_cache,
     make_sliding_window_cache,
     flatten_unflatten_for_dynamic_shapes,
+    make_dynamic_shapes_kv_cache,
     CacheKeyValue,
 )
 from onnx_diagnostic.torch_export_patches.onnx_export_errors import (
@@ -64,8 +65,8 @@ def forward(self, cache):
         model(cache)
         DYN = torch.export.Dim.DYNAMIC
         ds = [
-            [[{0: DYN}, {0: DYN}, {0: DYN}], [{0: DYN}, {0: DYN}, {0: DYN}]],
-            [[{0: DYN}, {0: DYN}, {0: DYN}], [{0: DYN}, {0: DYN}, {0: DYN}]],
+            make_dynamic_shapes_kv_cache(cache1, {0: DYN}),
+            make_dynamic_shapes_kv_cache(cache2, {0: DYN}),
         ]
 
         with torch_export_patches(patch_transformers=True):
@@ -99,9 +100,15 @@ def forward(self, cache):
         model = Model()
         model(cache)
         DYN = torch.export.Dim.DYNAMIC
-        ds = [[{0: DYN}, {0: DYN}, {0: DYN}], [{0: DYN}, {0: DYN}, {0: DYN}]]
+        ds = make_dynamic_shapes_kv_cache(cache, {0: DYN})
+        self.assertEqual(len(ds), 6)
 
-        with torch_export_patches():
+        with torch_export_patches(patch_transformers=True):
+            flat, _spec = torch.utils._pytree.tree_flatten(cache)
+            self.assertEqual(len(flat), len(ds))
+            unflat = torch.utils._pytree.tree_unflatten(flat, _spec)
+            if hasattr(unflat, "layers"):
+                self.assertEqual(len(unflat.layers), 3)
             torch.export.export(model, (cache,), dynamic_shapes=(ds,))
 
     @ignore_warnings(UserWarning)
@@ -195,7 +202,7 @@ def forward(self, cache):
         model = Model()
         model(cache)
         DYN = torch.export.Dim.DYNAMIC
-        ds = [[{0: DYN}, {0: DYN}], [{0: DYN}, {0: DYN}]]
+        ds = make_dynamic_shapes_kv_cache(cache, {0: DYN})
 
         with torch_export_patches(patch_transformers=True):
             torch.export.export(model, (cache,), dynamic_shapes=(ds,))
@@ -265,9 +272,7 @@ def test_static_cache(self):
             flat, _spec = torch.utils._pytree.tree_flatten(bo)
             unflat = flatten_unflatten_for_dynamic_shapes(bo, use_dict=True)
             self.assertIsInstance(unflat, list)
-            self.assertEqual(
-                "#2[#3[T1r4,T1r4,T1r4],#3[T1r4,T1r4,T1r4]]", self.string_type(unflat)
-            )
+            self.assertEqual("#6[T1r4,T1r4,T1r4,T1r4,T1r4,T1r4]", self.string_type(unflat))
 
         # export
         class Model(torch.nn.Module):
@@ -278,7 +283,7 @@ def forward(self, cache):
         model = Model()
         model(bo)
         DYN = torch.export.Dim.DYNAMIC
-        ds = [[{0: DYN}, {0: DYN}, {0: DYN}], [{0: DYN}, {0: DYN}, {0: DYN}]]
+        ds = make_dynamic_shapes_kv_cache(bo, {0: DYN})
 
         with torch_export_patches(patch_transformers=True, stop_if_static=1):
             torch.export.export(model, (bo,), dynamic_shapes=(ds,))
diff --git a/onnx_diagnostic/helpers/cache_helper.py b/onnx_diagnostic/helpers/cache_helper.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, List, Optional, Tuple
+from typing import Any, Callable, Dict, List, Optional, Tuple
 import packaging.version as pv
 import torch
 import transformers
@@ -46,9 +46,14 @@ def __init__(self, cache=None):
             raise NotImplementedError(f"type(cache)={type(cache)}")
 
     def make_dynamic_cache(self):
-        """Do the reverse operation."""
+        """Does the reverse operation."""
         return make_dynamic_cache(list(zip(self.key_cache, self.value_cache)))
 
+    @property
+    def n_layers(self) -> int:
+        """Returns the number of layers."""
+        return len(self.key_cache) if self.key_cache else 0
+
 
 def flatten_unflatten_for_dynamic_shapes(
     obj: Any,
@@ -134,6 +139,19 @@ def is_cache_dynamic_registered(fast: bool = False) -> bool:
     return len(cache2.key_cache) == len(cache.value_cache)
 
 
+def make_dynamic_shapes_kv_cache(
+    cache: transformers.cache_utils.Cache, shape_of_one: Dict[str, Any]
+) -> List[Dict[int, Any]]:
+    """
+    Returns the dynamic shapes for key-value cache
+
+    :param cache: a cache
+    :param shape_of_one: shape of one element
+    :return: dynamic shapes
+    """
+    return [shape_of_one for _ in range(CacheKeyValue(cache).n_layers * 2)]
+
+
 if pv.Version(transformers.__version__) > pv.Version("4.49.99999"):
 
     def make_dynamic_cache(
diff --git a/onnx_diagnostic/helpers/helper.py b/onnx_diagnostic/helpers/helper.py
@@ -1,6 +1,7 @@
 import ast
 import enum
 import inspect
+import itertools
 from dataclasses import is_dataclass, fields
 from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
 import numpy as np
@@ -948,8 +949,8 @@ def flatten_object(x: Any, drop_keys: bool = False) -> Any:
         from .cache_helper import CacheKeyValue
 
         kc = CacheKeyValue(x)
-        res = flatten_object(kc.key_cache) + flatten_object(kc.value_cache)
-        return tuple(res)
+        return list(itertools.chain.from_iterable(zip(kc.key_cache, kc.value_cache)))
+
     if x.__class__.__name__ == "EncoderDecoderCache":
         res = flatten_object(x.self_attention_cache) + flatten_object(x.cross_attention_cache)
         return tuple(res)
diff --git a/onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py b/onnx_diagnostic/torch_export_patches/serialization/transformers_impl.py