add custom

xadupre · xadupre · commit 26ee1ce26ea2 · 2025-04-11T17:41:55.000+02:00
diff --git a/_unittests/ut_torch_export_patches/test_patch_serialization.py b/_unittests/ut_torch_export_patches/test_patch_serialization.py
@@ -1,16 +1,17 @@
 import unittest
 import torch
+from transformers.modeling_outputs import BaseModelOutput
 from onnx_diagnostic.ext_test_case import ExtTestCase, ignore_warnings
 from onnx_diagnostic.helpers.cache_helper import make_encoder_decoder_cache, make_dynamic_cache
 from onnx_diagnostic.torch_export_patches.onnx_export_errors import (
     bypass_export_some_errors,
 )
-from transformers.modeling_outputs import BaseModelOutput
+from onnx_diagnostic.helpers.torch_test_helper import torch_deepcopy
 
 
 class TestPatchSerialization(ExtTestCase):
     @ignore_warnings(UserWarning)
-    def test_flatten_encoder_decoder_cache(self):
+    def test_encoder_decoder_cache_flatten(self):
         cache = make_encoder_decoder_cache(
             make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))]),
             make_dynamic_cache([(torch.rand((5, 5, 5)), torch.rand((5, 5, 5)))]),
@@ -28,7 +29,17 @@ def test_flatten_encoder_decoder_cache(self):
             )
 
     @ignore_warnings(UserWarning)
-    def test_export_encoder_decoder_cache(self):
+    def test_encoder_decoder_cache_deepcopy(self):
+        cache = make_encoder_decoder_cache(
+            make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))]),
+            make_dynamic_cache([(torch.rand((5, 5, 5)), torch.rand((5, 5, 5)))]),
+        )
+        with bypass_export_some_errors():
+            cache2 = torch_deepcopy([cache])
+            self.assertEqualAny([cache], cache2)
+
+    @ignore_warnings(UserWarning)
+    def test_encoder_decoder_cache_export(self):
         class Model(torch.nn.Module):
             def forward(self, cache):
                 return cache.self_attention_cache.key_cache[0]
@@ -53,7 +64,7 @@ def forward(self, cache):
             torch.export.export(model, (cache,), dynamic_shapes=(ds,))
 
     @ignore_warnings(UserWarning)
-    def test_flatten_dynamic_cache(self):
+    def test_dynamic_cache_flatten(self):
         cache = make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))])
         with bypass_export_some_errors():
             flat, _spec = torch.utils._pytree.tree_flatten(cache)
@@ -68,7 +79,7 @@ def test_flatten_dynamic_cache(self):
             )
 
     @ignore_warnings(UserWarning)
-    def test_export_dynamic_cache(self):
+    def test_dynamic_cache_export(self):
         class Model(torch.nn.Module):
             def forward(self, cache):
                 return cache.key_cache[0]
@@ -85,7 +96,33 @@ def forward(self, cache):
             torch.export.export(model, (cache,), dynamic_shapes=(ds,))
 
     @ignore_warnings(UserWarning)
-    def test_base_model_output(self):
+    def test_dynamic_cache_deepcopy(self):
+        cache = make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))])
+        with bypass_export_some_errors():
+            cache2 = torch_deepcopy([cache])
+            self.assertEqualAny([cache], cache2)
+
+    @ignore_warnings(UserWarning)
+    def test_base_model_output_deepcopy(self):
+        bo = BaseModelOutput(last_hidden_state=torch.rand((4, 4, 4)))
+        self.assertEqual(bo.__class__.__name__, "BaseModelOutput")
+        with bypass_export_some_errors():
+            bo2 = torch_deepcopy([bo])
+            self.assertIsInstance(bo2, list)
+            self.assertEqual(bo2[0].__class__.__name__, "BaseModelOutput")
+            self.assertEqualAny([bo], bo2)
+
+    @ignore_warnings(UserWarning)
+    def test_base_model_output_string_type(self):
+        bo = BaseModelOutput(last_hidden_state=torch.rand((4, 4, 4)))
+        with bypass_export_some_errors():
+            self.assertEqual(
+                "BaseModelOutput(last_hidden_state:T1s4x4x4)",
+                self.string_type(bo, with_shape=True),
+            )
+
+    @ignore_warnings(UserWarning)
+    def test_base_model_output_flatten(self):
         bo = BaseModelOutput(last_hidden_state=torch.rand((4, 4, 4)))
         with bypass_export_some_errors():
             flat, _spec = torch.utils._pytree.tree_flatten(bo)
@@ -100,7 +137,7 @@ def test_base_model_output(self):
             )
 
     @ignore_warnings(UserWarning)
-    def test_export_base_model_output(self):
+    def test_base_model_output_export(self):
         class Model(torch.nn.Module):
             def forward(self, cache):
                 return cache.last_hidden_state[0]
diff --git a/onnx_diagnostic/_command_lines_parser.py b/onnx_diagnostic/_command_lines_parser.py
@@ -315,6 +315,10 @@ def _cmd_validate(argv: List[Any]):
         for k, v in data["dynamic_shapes"].items():
             print(f"  + {k.ljust(max_length)}: {_ds_clean(v)}")
     else:
+        # Let's skip any invalid combination if known to be unsupported
+        if "onnx" not in args.export and "custom" not in args.export and args.opt:
+            print(f"validate - unsupported args: export={args.export!r}, opt={args.opt!r}")
+            return
         summary, _data = validate_model(
             model_id=args.mid,
             task=args.task,
diff --git a/onnx_diagnostic/ext_test_case.py b/onnx_diagnostic/ext_test_case.py
@@ -887,7 +887,18 @@ def assertEqual(self, expected: Any, value: Any, msg: str = ""):
     def assertEqualAny(
         self, expected: Any, value: Any, atol: float = 0, rtol: float = 0, msg: str = ""
     ):
-        if isinstance(expected, (tuple, list, dict)):
+        if expected.__class__.__name__ == "BaseModelOutput":
+            self.assertEqual(type(expected), type(value), msg=msg)
+            self.assertEqual(len(expected), len(value), msg=msg)
+            self.assertEqual(list(expected), list(value), msg=msg)  # checks the order
+            self.assertEqualAny(
+                {k: v for k, v in expected.items()},  # noqa: C416
+                {k: v for k, v in value.items()},  # noqa: C416
+                atol=atol,
+                rtol=rtol,
+                msg=msg,
+            )
+        elif isinstance(expected, (tuple, list, dict)):
             self.assertIsInstance(value, type(expected), msg=msg)
             self.assertEqual(len(expected), len(value), msg=msg)
             if isinstance(expected, dict):
@@ -898,13 +909,23 @@ def assertEqualAny(
                 for e, g in zip(expected, value):
                     self.assertEqualAny(e, g, msg=msg, atol=atol, rtol=rtol)
         elif expected.__class__.__name__ == "DynamicCache":
+            self.assertEqual(type(expected), type(value), msg=msg)
             atts = ["key_cache", "value_cache"]
             self.assertEqualAny(
                 {k: expected.__dict__.get(k, None) for k in atts},
                 {k: value.__dict__.get(k, None) for k in atts},
                 atol=atol,
                 rtol=rtol,
             )
+        elif expected.__class__.__name__ == "EncoderDecoderCache":
+            self.assertEqual(type(expected), type(value), msg=msg)
+            atts = ["self_attention_cache", "cross_attention_cache"]
+            self.assertEqualAny(
+                {k: expected.__dict__.get(k, None) for k in atts},
+                {k: value.__dict__.get(k, None) for k in atts},
+                atol=atol,
+                rtol=rtol,
+            )
         elif isinstance(expected, (int, float, str)):
             self.assertEqual(expected, value, msg=msg)
         elif hasattr(expected, "shape"):
diff --git a/onnx_diagnostic/helpers/rt_helper.py b/onnx_diagnostic/helpers/rt_helper.py
@@ -11,6 +11,7 @@ def make_feeds(
     inputs: Any,
     use_numpy: bool = False,
     copy: bool = False,
+    check_flatten: bool = True,
 ) -> Dict[str, Union[torch.Tensor, np.ndarray]]:
     """
     Serializes the inputs to produce feeds expected
@@ -21,17 +22,20 @@ def make_feeds(
     :param use_numpy: if True, converts torch tensors into numpy arrays
     :param copy: a copy is made, this should be the case if the inputs is ingested
         by ``OrtValue``
+    :param check_flatten: if True, checks the ``torch.utils._pytree.tree_flatten``
+        returns the same number of outputs
     :return: feeds dictionary
     """
     flat = flatten_object(inputs, drop_keys=True)
     assert (
-        not all(isinstance(obj, torch.Tensor) for obj in flat)
+        not check_flatten
+        or not all(isinstance(obj, torch.Tensor) for obj in flat)
         or not is_cache_dynamic_registered(fast=True)
         or len(flat) == len(torch.utils._pytree.tree_flatten(inputs)[0])
     ), (
         f"Unexpected number of flattened objects, "
-        f"{string_type(flat, with_shape=True, limit=20)} != "
-        f"{string_type(torch.utils._pytree.tree_flatten(inputs)[0], with_shape=True,limit=20)}"
+        f"{string_type(flat, with_shape=True)} != "
+        f"{string_type(torch.utils._pytree.tree_flatten(inputs)[0], with_shape=True)}"
     )
     if use_numpy:
         flat = [t.detach().cpu().numpy() if isinstance(t, torch.Tensor) else t for t in flat]
diff --git a/onnx_diagnostic/helpers/torch_test_helper.py b/onnx_diagnostic/helpers/torch_test_helper.py
@@ -351,7 +351,10 @@ def torch_deepcopy(value: Any) -> Any:
     if isinstance(value, set):
         return {torch_deepcopy(v) for v in value}
     if isinstance(value, dict):
-        return {k: torch_deepcopy(v) for k, v in value.items()}
+        if type(value) is dict:
+            return {k: torch_deepcopy(v) for k, v in value.items()}
+        # for BaseModelOutput
+        return value.__class__(**{k: torch_deepcopy(v) for k, v in value.items()})
     if isinstance(value, np.ndarray):
         return value.copy()
     if hasattr(value, "clone"):
diff --git a/onnx_diagnostic/torch_export_patches/onnx_export_errors.py b/onnx_diagnostic/torch_export_patches/onnx_export_errors.py
@@ -424,6 +424,9 @@ def replacement_before_exporting(args: Any) -> Any:
         return None
     if isinstance(args, (int, float)):
         return args
+    if type(args) not in {dict, tuple, list}:
+        # BaseModelOutput is a dict
+        return args
     if isinstance(args, dict):
         return {k: replacement_before_exporting(v) for k, v in args.items()}
     if isinstance(args, tuple):
diff --git a/onnx_diagnostic/torch_export_patches/onnx_export_serialization.py b/onnx_diagnostic/torch_export_patches/onnx_export_serialization.py
@@ -188,36 +188,13 @@ def _unregister_cache_serialization(undo: Dict[str, bool], verbose: int = 0):
 ############
 
 
-# self.conv_states: torch.Tensor = torch.zeros(
-#     config.num_hidden_layers,
-#     self.max_batch_size,
-#     self.intermediate_size,
-#     self.conv_kernel_size,
-#     device=device,
-#     dtype=dtype,
-# )
-# self.ssm_states: torch.Tensor = torch.zeros(
-#     config.num_hidden_layers,
-#     self.max_batch_size,
-#     self.intermediate_size,
-#     self.ssm_state_size,
-#     device=device,
-#     dtype=dtype,
-# )
 def flatten_mamba_cache(
     mamba_cache: MambaCache,
 ) -> Tuple[List[Any], torch.utils._pytree.Context]:
     """Serializes a :class:`transformers.cache_utils.MambaCache` with python objects."""
     flat = [
         (k, getattr(mamba_cache, k))
-        for k in [
-            # "max_batch_size",  # new in transformers==4.47
-            # "intermediate_size",
-            # "ssm_state_size",
-            # "conv_kernel_size",
-            "conv_states",
-            "ssm_states",
-        ]
+        for k in ["conv_states", "ssm_states"]
         if hasattr(mamba_cache, k)
     ]
     return [f[1] for f in flat], [f[0] for f in flat]
@@ -242,8 +219,6 @@ def __init__(self):
                 self.conv_kernel = conv_states.shape[3]
                 self.num_hidden_layers = conv_states.shape[0]
 
-    from transformers.cache_utils import MambaCache
-
     cache = MambaCache(
         _config(),
         max_batch_size=1,
@@ -348,7 +323,7 @@ def unflatten_encoder_decoder_cache(
 ) -> EncoderDecoderCache:
     """Restores a :class:`transformers.cache_utils.EncoderDecoderCache` from python objects."""
     dictionary = torch.utils._pytree._dict_unflatten(values, context)
-    return transformers.cache_utils.EncoderDecoderCache(**dictionary)
+    return EncoderDecoderCache(**dictionary)
 
 
 #################
diff --git a/onnx_diagnostic/torch_models/test_helper.py b/onnx_diagnostic/torch_models/test_helper.py