eccache

xadupre · xadupre · commit 16b9ff455092 · 2025-04-10T17:25:47.000+02:00
diff --git a/_unittests/ut_torch_export_patches/test_patch_serialization.py b/_unittests/ut_torch_export_patches/test_patch_serialization.py
@@ -0,0 +1,88 @@
+import unittest
+import torch
+from onnx_diagnostic.ext_test_case import ExtTestCase, ignore_warnings
+from onnx_diagnostic.helpers.cache_helper import make_encoder_decoder_cache, make_dynamic_cache
+from onnx_diagnostic.torch_export_patches.onnx_export_errors import (
+    bypass_export_some_errors,
+)
+
+
+class TestPatchSerialization(ExtTestCase):
+    @ignore_warnings(UserWarning)
+    def test_flatten_encoder_decoder_cache(self):
+        cache = make_encoder_decoder_cache(
+            make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))]),
+            make_dynamic_cache([(torch.rand((5, 5, 5)), torch.rand((5, 5, 5)))]),
+        )
+        with bypass_export_some_errors():
+            flat, _spec = torch.utils._pytree.tree_flatten(cache)
+            self.assertEqual(
+                "#4[T1s4x4x4,T1s4x4x4,T1s5x5x5,T1s5x5x5]",
+                self.string_type(flat, with_shape=True),
+            )
+            cache2 = torch.utils._pytree.tree_unflatten(flat, _spec)
+            self.assertEqual(
+                self.string_type(cache, with_shape=True, with_min_max=True),
+                self.string_type(cache2, with_shape=True, with_min_max=True),
+            )
+
+    @ignore_warnings(UserWarning)
+    def test_export_encoder_decoder_cache(self):
+        class Model(torch.nn.Module):
+            def forward(self, cache):
+                return cache.self_attention_cache.key_cache[0]
+
+        cache1 = make_dynamic_cache(
+            [(torch.randn(2, 4, 3, 7), torch.randn(2, 4, 3, 7)) for i in range(3)]
+        )
+        cache2 = make_dynamic_cache(
+            [(torch.randn(2, 4, 3, 7), torch.randn(2, 4, 3, 7)) for i in range(3)]
+        )
+
+        cache = make_encoder_decoder_cache(cache1, cache2)
+        model = Model()
+        model(cache)
+        DYN = torch.export.Dim.DYNAMIC
+        ds = [
+            [[{0: DYN}, {0: DYN}, {0: DYN}], [{0: DYN}, {0: DYN}, {0: DYN}]],
+            [[{0: DYN}, {0: DYN}, {0: DYN}], [{0: DYN}, {0: DYN}, {0: DYN}]],
+        ]
+
+        with bypass_export_some_errors():
+            torch.export.export(model, (cache,), dynamic_shapes=(ds,))
+
+    @ignore_warnings(UserWarning)
+    def test_flatten_dynamic_cache(self):
+        cache = make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))])
+        with bypass_export_some_errors():
+            flat, _spec = torch.utils._pytree.tree_flatten(cache)
+            self.assertEqual(
+                "#2[T1s4x4x4,T1s4x4x4]",
+                self.string_type(flat, with_shape=True),
+            )
+            cache2 = torch.utils._pytree.tree_unflatten(flat, _spec)
+            self.assertEqual(
+                self.string_type(cache, with_shape=True, with_min_max=True),
+                self.string_type(cache2, with_shape=True, with_min_max=True),
+            )
+
+    @ignore_warnings(UserWarning)
+    def test_export_dynamic_cache(self):
+        class Model(torch.nn.Module):
+            def forward(self, cache):
+                return cache.key_cache[0]
+
+        cache = make_dynamic_cache(
+            [(torch.randn(2, 4, 3, 7), torch.randn(2, 4, 3, 7)) for i in range(3)]
+        )
+        model = Model()
+        model(cache)
+        DYN = torch.export.Dim.DYNAMIC
+        ds = [[{0: DYN}, {0: DYN}, {0: DYN}], [{0: DYN}, {0: DYN}, {0: DYN}]]
+
+        with bypass_export_some_errors():
+            torch.export.export(model, (cache,), dynamic_shapes=(ds,))
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_torch_models/test_hghub_model.py b/_unittests/ut_torch_models/test_hghub_model.py
@@ -1,5 +1,6 @@
 import pprint
 import unittest
+import torch
 import transformers
 from onnx_diagnostic.ext_test_case import (
     ExtTestCase,
@@ -14,6 +15,7 @@
 )
 from onnx_diagnostic.torch_models.hghub.hub_api import get_pretrained_config
 from onnx_diagnostic.torch_models.hghub.hub_data import load_models_testing
+from onnx_diagnostic.torch_export_patches import bypass_export_some_errors
 
 
 class TestHuggingFaceHubModel(ExtTestCase):
@@ -109,8 +111,21 @@ def test_get_untrained_model_with_inputs_automatic_speech_recognition(self):
         mid = "openai/whisper-tiny"
         data = get_untrained_model_with_inputs(mid, verbose=1)
         self.assertIn((data["size"], data["n_weights"]), [(132115968, 33028992)])
-        model, inputs = data["model"], data["inputs"]
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         model(**inputs)
+        self.assertEqual(
+            "#1[T1r3]",
+            self.string_type(torch.utils._pytree.tree_flatten(inputs["encoder_outputs"])[0]),
+        )
+        with bypass_export_some_errors(patch_transformers=True):
+            flat = torch.utils._pytree.tree_flatten(inputs["past_key_values"])[0]
+            self.assertIsInstance(flat, list)
+            self.assertIsInstance(flat[0], torch.Tensor)
+            self.assertEqual(
+                "#8[T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4,T1r4]",
+                self.string_type(flat),
+            )
+            torch.export.export(model, (), kwargs=inputs, dynamic_shapes=ds)
 
     @hide_stdout()
     def test_get_untrained_model_with_inputs_imagetext2text_generation(self):
diff --git a/onnx_diagnostic/helpers/cache_helper.py b/onnx_diagnostic/helpers/cache_helper.py
@@ -132,9 +132,7 @@ def make_encoder_decoder_cache(
     self_attention_cache: transformers.cache_utils.DynamicCache,
     cross_attention_cache: transformers.cache_utils.DynamicCache,
 ) -> transformers.cache_utils.EncoderDecoderCache:
-    """
-    Creates an EncoderDecoderCache.
-    """
+    """Creates an EncoderDecoderCache."""
     return transformers.cache_utils.EncoderDecoderCache(
         self_attention_cache=self_attention_cache, cross_attention_cache=cross_attention_cache
     )
diff --git a/onnx_diagnostic/torch_export_patches/onnx_export_errors.py b/onnx_diagnostic/torch_export_patches/onnx_export_errors.py
@@ -1,13 +1,8 @@
 import contextlib
-import pprint
-from typing import Any, Callable, Dict, List, Optional, Set
+from typing import Any, Callable, Dict, List, Optional
 from .onnx_export_serialization import (
-    flatten_with_keys_dynamic_cache,
-    flatten_dynamic_cache,
-    unflatten_dynamic_cache,
-    flatten_mamba_cache,
-    flatten_with_keys_mamba_cache,
-    unflatten_mamba_cache,
+    _register_cache_serialization,
+    _unregister_cache_serialization,
 )
 from .patches import patch_transformers as patch_transformers_list
 
@@ -84,156 +79,6 @@ def unpatch_module_or_classes(mod, info: Dict[type, Dict[type, Callable]], verbo
                 setattr(original, n, v)
 
 
-PATCH_OF_PATCHES: Set[Any] = set()
-
-
-def _register_cache_serialization(verbose: int = 0) -> Dict[str, bool]:
-    # Cache serialization: to be moved into appropriate packages
-    import torch
-    import transformers
-    import packaging.version as pv
-
-    try:
-        from transformers.cache_utils import DynamicCache
-    except ImportError:
-        DynamicCache = None
-
-    try:
-        from transformers.cache_utils import MambaCache
-    except ImportError:
-        MambaCache = None
-
-    # MambaCache
-    unregistered_mamba_cache = True
-    if MambaCache is not None and MambaCache in torch.utils._pytree.SUPPORTED_NODES:
-        if verbose > 1:
-            print(f"[_register_cache_serialization] {MambaCache} already registered")
-        # It is already registered because bypass_export_some_errors was called
-        # within a section already calling bypass_export_some_errors or transformers
-        # has updated its code to do it.
-        # No need to register and unregister then.
-        unregistered_mamba_cache = False
-    else:
-        if verbose:
-            print("[_register_cache_serialization] register MambaCache")
-        torch.utils._pytree.register_pytree_node(
-            MambaCache,
-            flatten_mamba_cache,
-            unflatten_mamba_cache,
-            serialized_type_name=f"{MambaCache.__module__}.{MambaCache.__name__}",
-            flatten_with_keys_fn=flatten_with_keys_mamba_cache,
-        )
-
-    # DynamicCache serialization is different in transformers and does not
-    # play way with torch.export.export.
-    # see test test_export_dynamic_cache_cat with NOBYPASS=1
-    # :: NOBYBASS=1 python _unittests/ut_torch_export_patches/test_dynamic_class.py -k e_c
-    # This is caused by this line:
-    # torch.fx._pytree.register_pytree_flatten_spec(
-    #           DynamicCache, _flatten_dynamic_cache_for_fx)
-    # so we remove it anyway
-    if (
-        DynamicCache in torch.fx._pytree.SUPPORTED_NODES
-        and not PATCH_OF_PATCHES
-        # and pv.Version(torch.__version__) < pv.Version("2.7")
-        and pv.Version(transformers.__version__) >= pv.Version("4.50")
-    ):
-        if verbose:
-            print(
-                "[_register_cache_serialization] DynamicCache "
-                "is unregistered and registered first."
-            )
-        _unregister(DynamicCache)
-        torch.utils._pytree.register_pytree_node(
-            DynamicCache,
-            flatten_dynamic_cache,
-            unflatten_dynamic_cache,
-            serialized_type_name=f"{DynamicCache.__module__}.{DynamicCache.__name__}",
-            flatten_with_keys_fn=flatten_with_keys_dynamic_cache,
-        )
-        if pv.Version(torch.__version__) < pv.Version("2.7"):
-            torch.fx._pytree.register_pytree_flatten_spec(
-                DynamicCache, lambda x, _: [x.key_cache, x.value_cache]
-            )
-        # To avoid doing it multiple times.
-        PATCH_OF_PATCHES.add(DynamicCache)
-
-    unregistered_dynamic_cache = True
-    if DynamicCache is not None and DynamicCache in torch.utils._pytree.SUPPORTED_NODES:
-        if verbose > 1:
-            print(f"[_register_cache_serialization] {DynamicCache} already registered")
-        unregistered_dynamic_cache = False
-    else:
-        if verbose:
-            print("[_register_cache_serialization] register DynamicCache")
-        torch.utils._pytree.register_pytree_node(
-            DynamicCache,
-            flatten_dynamic_cache,
-            unflatten_dynamic_cache,
-            serialized_type_name=f"{DynamicCache.__module__}.{DynamicCache.__name__}",
-            flatten_with_keys_fn=flatten_with_keys_dynamic_cache,
-        )
-        if pv.Version(torch.__version__) < pv.Version("2.7"):
-            torch.fx._pytree.register_pytree_flatten_spec(
-                DynamicCache, lambda x, _: [x.key_cache, x.value_cache]
-            )
-
-        # check
-        from ..helpers.cache_helper import make_dynamic_cache
-
-        cache = make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))])
-        values, spec = torch.utils._pytree.tree_flatten(cache)
-        cache2 = torch.utils._pytree.tree_unflatten(values, spec)
-        # torch.fx._pytree.tree_flatten(cache)
-        assert len(cache2.key_cache) == 1
-
-    return dict(DynamicCache=unregistered_dynamic_cache, MambaCache=unregistered_mamba_cache)
-
-
-def _unregister(cls: type, verbose: int = 0):
-    import optree
-    import torch
-
-    # torch.fx._pytree._deregister_pytree_flatten_spec(cls)
-    if cls in torch.fx._pytree.SUPPORTED_NODES:
-        del torch.fx._pytree.SUPPORTED_NODES[cls]
-    if cls in torch.fx._pytree.SUPPORTED_NODES_EXACT_MATCH:
-        del torch.fx._pytree.SUPPORTED_NODES_EXACT_MATCH[cls]
-    if hasattr(torch.utils._pytree, "_deregister_pytree_node"):
-        # torch >= 2.7
-        torch.utils._pytree._deregister_pytree_node(cls)
-    optree.unregister_pytree_node(cls, namespace="torch")
-    if cls in torch.utils._pytree.SUPPORTED_NODES:
-        import packaging.version as pv
-
-        if pv.Version(torch.__version__) < pv.Version("2.7.0"):
-            del torch.utils._pytree.SUPPORTED_NODES[cls]
-    assert cls not in torch.utils._pytree.SUPPORTED_NODES, (
-        f"{cls} was not successful unregistered "
-        f"from torch.utils._pytree.SUPPORTED_NODES="
-        f"{pprint.pformat(list(torch.utils._pytree.SUPPORTED_NODES))}"
-    )
-    if verbose:
-        print(f"[_unregister_cache_serialization] unregistered {cls.__name__}")
-
-
-def _unregister_cache_serialization(undo: Dict[str, bool], verbose: int = 0):
-
-    if undo.get("MambaCache", False):
-        from transformers.cache_utils import MambaCache
-
-        _unregister(MambaCache, verbose)
-    elif verbose > 1:
-        print("[_unregister_cache_serialization] skip unregister MambaCache")
-
-    if undo.get("DynamicCache", False):
-        from transformers.cache_utils import DynamicCache
-
-        _unregister(DynamicCache, verbose)
-    elif verbose > 1:
-        print("[_unregister_cache_serialization] skip unregister DynamicCache")
-
-
 @contextlib.contextmanager
 def register_additional_serialization_functions(
     patch_transformers: bool = False, verbose: int = 0
diff --git a/onnx_diagnostic/torch_export_patches/onnx_export_serialization.py b/onnx_diagnostic/torch_export_patches/onnx_export_serialization.py
diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py