other fixes

xadupre · xadupre · commit f483e37e5796 · 2025-04-04T09:31:16.000+02:00
diff --git a/onnx_diagnostic/export/dynamic_shapes.py b/onnx_diagnostic/export/dynamic_shapes.py
@@ -319,7 +319,7 @@ def guess_dynamic_shape_object(self, *objs: Any, msg: Optional[Callable] = None)
                 f"All instances of argument {i} are not of the same class but {kcl}, "
                 f"types should be the same."
             )
-            col_args = [torch.utils._pytree.tree_flatten(o) for o in objs]
+            col_args = [torch.utils._pytree.tree_flatten(o)[0] for o in objs]
             kc = set(len(col_args) for o in objs)
             assert len(kc) == 1, (
                 f"All instances of type {kcl.pop()} are not serialized into the same number "
diff --git a/onnx_diagnostic/helpers/helper.py b/onnx_diagnostic/helpers/helper.py
@@ -343,65 +343,23 @@ def string_type(
 
     # others classes
 
-    if type(obj).__name__ == "MambaCache":
-        c = string_type(
-            obj.conv_states,
-            with_shape=with_shape,
-            with_min_max=with_min_max,
-            with_device=with_device,
-            limit=limit,
-        )
-        d = string_type(
-            obj.ssm_states,
+    if obj.__class__ in torch.utils._pytree.SUPPORTED_NODES:
+        args, _spec = torch.utils._pytree.tree_flatten(obj)
+        att = string_type(
+            args,
             with_shape=with_shape,
             with_min_max=with_min_max,
             with_device=with_device,
             limit=limit,
         )
-        return f"MambaCache(conv_states={c}, ssm_states={d})"
+        return f"{obj.__class__.__name__}[serialized]({att})"
+
     if type(obj).__name__ == "Node" and hasattr(obj, "meta"):
         # torch.fx.node.Node
         return f"%{obj.target}"
     if type(obj).__name__ == "ValueInfoProto":
         return f"OT{obj.type.tensor_type.elem_type}"
 
-    if obj.__class__.__name__ == "DynamicCache":
-        kc = string_type(
-            obj.key_cache,
-            with_shape=with_shape,
-            with_min_max=with_min_max,
-            with_device=with_device,
-            limit=limit,
-        )
-        vc = string_type(
-            obj.value_cache,
-            with_shape=with_shape,
-            with_min_max=with_min_max,
-            with_device=with_device,
-            limit=limit,
-        )
-        return f"{obj.__class__.__name__}(key_cache={kc}, value_cache={vc})"
-
-    if obj.__class__.__name__ == "EncoderDecoderCache":
-        att = string_type(
-            obj.self_attention_cache,
-            with_shape=with_shape,
-            with_min_max=with_min_max,
-            with_device=with_device,
-            limit=limit,
-        )
-        cross = string_type(
-            obj.cross_attention_cache,
-            with_shape=with_shape,
-            with_min_max=with_min_max,
-            with_device=with_device,
-            limit=limit,
-        )
-        return (
-            f"{obj.__class__.__name__}(self_attention_cache={att}, "
-            f"cross_attention_cache={cross})"
-        )
-
     if obj.__class__.__name__ == "BatchFeature":
         s = string_type(
             obj.data,
@@ -440,19 +398,64 @@ def string_type(
     if isinstance(obj, torch.utils._pytree.TreeSpec):
         return repr(obj).replace(" ", "").replace("\n", " ")
 
-    if ignore:
-        return f"{obj.__class__.__name__}(...)"
+    # to avoid failures
 
-    if obj.__class__ in torch.utils._pytree.SUPPORTED_NODES:
-        args, _spec = torch.utils._pytree.tree_flatten(obj)
+    if type(obj).__name__ == "MambaCache":
+        c = string_type(
+            obj.conv_states,
+            with_shape=with_shape,
+            with_min_max=with_min_max,
+            with_device=with_device,
+            limit=limit,
+        )
+        d = string_type(
+            obj.ssm_states,
+            with_shape=with_shape,
+            with_min_max=with_min_max,
+            with_device=with_device,
+            limit=limit,
+        )
+        return f"MambaCache(conv_states={c}, ssm_states={d})"
+
+    if obj.__class__.__name__ == "DynamicCache":
+        kc = string_type(
+            obj.key_cache,
+            with_shape=with_shape,
+            with_min_max=with_min_max,
+            with_device=with_device,
+            limit=limit,
+        )
+        vc = string_type(
+            obj.value_cache,
+            with_shape=with_shape,
+            with_min_max=with_min_max,
+            with_device=with_device,
+            limit=limit,
+        )
+        return f"{obj.__class__.__name__}(key_cache={kc}, value_cache={vc})"
+
+    if obj.__class__.__name__ == "EncoderDecoderCache":
         att = string_type(
-            args,
+            obj.self_attention_cache,
             with_shape=with_shape,
             with_min_max=with_min_max,
             with_device=with_device,
             limit=limit,
         )
-        return f"{obj.__class__.__name__}({att})"
+        cross = string_type(
+            obj.cross_attention_cache,
+            with_shape=with_shape,
+            with_min_max=with_min_max,
+            with_device=with_device,
+            limit=limit,
+        )
+        return (
+            f"{obj.__class__.__name__}(self_attention_cache={att}, "
+            f"cross_attention_cache={cross})"
+        )
+
+    if ignore:
+        return f"{obj.__class__.__name__}(...)"
 
     raise AssertionError(f"Unsupported type {type(obj).__name__!r} - {type(obj)}")
 
diff --git a/onnx_diagnostic/torch_export_patches/onnx_export_serialization.py b/onnx_diagnostic/torch_export_patches/onnx_export_serialization.py
@@ -97,6 +97,10 @@ def flatten_dynamic_cache(
     dynamic_cache: transformers.cache_utils.DynamicCache,
 ) -> Tuple[List[Any], torch.utils._pytree.Context]:
     """Serializes a :class:`transformers.cache_utils.DynamicCache` with python objects."""
+    import transformers.cache_utils
+
+    if hasattr(transformers.cache_utils, "_flatten_dynamic_cache"):
+        return transformers.cache_utils._flatten_dynamic_cache(dynamic_cache)
     flat = [
         (k, getattr(dynamic_cache, k))
         for k in ["key_cache", "value_cache"]
@@ -111,7 +115,10 @@ def flatten_with_keys_dynamic_cache(d: Dict[Any, Any]) -> Tuple[
 ]:
     """Serializes a :class:`transformers.cache_utils.DynamicCache` with python objects."""
     import torch
+    import transformers.cache_utils
 
+    if hasattr(transformers.cache_utils, "_flatten_with_keys_dynamic_cache"):
+        return transformers.cache_utils._flatten_with_keys_dynamic_cache(d)
     values, context = flatten_dynamic_cache(d)
     return [(torch.utils._pytree.MappingKey(k), v) for k, v in zip(context, values)], context
 
@@ -122,9 +129,13 @@ def unflatten_dynamic_cache(
     output_type=None,
 ) -> transformers.cache_utils.DynamicCache:
     """Restores a :class:`transformers.cache_utils.DynamicCache` from python objects."""
-    from transformers.cache_utils import DynamicCache
+    import transformers.cache_utils
+
+    if hasattr(transformers.cache_utils, "_unflatten_dynamic_cache"):
+        assert output_type is None, f"output_type={output_type} not supported"
+        return transformers.cache_utils._unflatten_dynamic_cache(values, context)
 
-    cache = DynamicCache()
+    cache = transformers.cache_utils.DynamicCache()
     values = dict(zip(context, values))
     for k, v in values.items():
         setattr(cache, k, v)

Original file line number	Diff line number	Diff line change
`@@ -319,7 +319,7 @@ def guess_dynamic_shape_object(self, *objs: Any, msg: Optional[Callable] = None)`
`319`	`319`	`f"All instances of argument {i} are not of the same class but {kcl}, "`
`320`	`320`	`f"types should be the same."`
`321`	`321`	`)`
`322`		`- col_args = [torch.utils._pytree.tree_flatten(o) for o in objs]`
	`322`	`+ col_args = [torch.utils._pytree.tree_flatten(o)[0] for o in objs]`
`323`	`323`	`kc = set(len(col_args) for o in objs)`
`324`	`324`	`assert len(kc) == 1, (`
`325`	`325`	`f"All instances of type {kcl.pop()} are not serialized into the same number "`