dump stolen

xadupre · xadupre · commit 85960d66170e · 2025-05-07T15:36:48.000+02:00
diff --git a/_unittests/ut_helpers/test_torch_test_helper.py b/_unittests/ut_helpers/test_torch_test_helper.py
@@ -20,6 +20,7 @@
     make_mamba_cache,
     make_sliding_window_cache,
 )
+from onnx_diagnostic.helpers.mini_onnx_builder import create_input_tensors_from_onnx_model
 
 TFLOAT = onnx.TensorProto.FLOAT
 
@@ -88,6 +89,61 @@ def forward(self, x, y):
         ):
             model(*inputs)
 
+    @hide_stdout()
+    def test_steal_forward_dump_file(self):
+        class SubModel(torch.nn.Module):
+            def forward(self, x):
+                return x * x
+
+        class Model(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.s1 = SubModel()
+                self.s2 = SubModel()
+
+            def forward(self, x, y):
+                return self.s1(x) + self.s2(y)
+
+        inputs = torch.rand(3, 4), torch.rand(3, 4)
+        model = Model()
+        dump_file = self.get_dump_file("test_steal_forward_dump_file.onnx")
+        with steal_forward(
+            [
+                (
+                    "main",
+                    model,
+                ),
+                ("  s1", model.s1),
+                ("  s2", model.s2),
+            ],
+            dump_file=dump_file,
+        ):
+            res1 = model(*inputs)
+            res2 = model(*inputs)
+        self.assertExists(dump_file)
+        restored = create_input_tensors_from_onnx_model(dump_file)
+        self.assertEqual(
+            [
+                ("main", 0, "I"),
+                ("main", 0, "O"),
+                ("main", 1, "I"),
+                ("main", 1, "O"),
+                ("s1", 0, "I"),
+                ("s1", 0, "O"),
+                ("s1", 1, "I"),
+                ("s1", 1, "O"),
+                ("s2", 0, "I"),
+                ("s2", 0, "O"),
+                ("s2", 1, "I"),
+                ("s2", 1, "O"),
+            ],
+            sorted(restored),
+        )
+        self.assertEqualAny(restored["main", 0, "I"], (inputs, {}))
+        self.assertEqualAny(restored["main", 1, "I"], (inputs, {}))
+        self.assertEqualAny(restored["main", 0, "O"], res1)
+        self.assertEqualAny(restored["main", 0, "O"], res2)
+
     def test_replace_string_by_dynamic(self):
         example = {
             "input_ids": {0: "batch_size", 1: "sequence_length"},
diff --git a/onnx_diagnostic/helpers/mini_onnx_builder.py b/onnx_diagnostic/helpers/mini_onnx_builder.py
@@ -2,7 +2,7 @@
 import sys
 from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
 import numpy as np
-from onnx import GraphProto, ModelProto, TensorProto
+from onnx import GraphProto, ModelProto, NodeProto, TensorProto
 import onnx.helper as oh
 import torch
 from .onnx_helper import dtype_to_tensor_dtype, tensor_dtype_to_np_dtype, from_array_extended
@@ -34,10 +34,7 @@ def proto_from_array(
         )
 
     # arr.contiguous() is slow after a transpose, maybe there is a way to optimize this.
-    if arr.is_contiguous():
-        arr_cpu = arr.cpu()
-    else:
-        arr_cpu = arr.contiguous().cpu()
+    arr_cpu = arr.cpu() if arr.is_contiguous() else arr.contiguous().cpu()
 
     numel = torch.numel(arr_cpu)
     element_size = arr_cpu.element_size()
@@ -91,10 +88,10 @@ class MiniOnnxBuilder:
     """
 
     def __init__(self, target_opset: int = 18, ir_version: int = 10, sep: str = "___"):
-        self.initializers_dict = {}
-        self.inputs = []
-        self.outputs = []
-        self.nodes = []
+        self.initializers_dict: Dict[str, Any] = {}
+        self.inputs: List[Any] = []
+        self.outputs: List[Any] = []
+        self.nodes: List[NodeProto] = []
         self.opsets = {"": target_opset}
         self.ir_version = ir_version
         self.torch = torch
@@ -270,7 +267,7 @@ def _build_initializers(
 
             return initializer
 
-        res = []
+        res: List[TensorProto] = []
         for k, v in init_dict.items():
             if isinstance(v, TensorProto):
                 res.append(v)
@@ -354,12 +351,19 @@ def _flatten_iterator(obj: Any, sep: str) -> Iterator:
                         f"Key {k!r} cannot contain '{sep}'. "
                         f"It would interfere with the serialization."
                     )
+
+                    def _mk(k):
+                        if isinstance(k, tuple):
+                            # this assumes the tuple contains simple types
+                            return f"(({','.join(map(str,k))}))"
+                        return str(k)
+
                     if i == len(obj) - 1:
                         for p, o in _flatten_iterator(v, sep):
-                            yield f"dict._{k}{sep}{p}", o
+                            yield f"dict._{_mk(k)}{sep}{p}", o
                     else:
                         for p, o in _flatten_iterator(v, sep):
-                            yield f"dict_{k}{sep}{p}", o
+                            yield f"dict_{_mk(k)}{sep}{p}", o
         elif obj.__class__.__name__ == "DynamicCache":
             # transformers
             import transformers
@@ -420,7 +424,7 @@ def _unflatten(
     pos: int = 0,
     level: int = 0,
     device: str = "cpu",
-) -> Tuple[int, Tuple[Any, ...]]:
+) -> Tuple[int, Any]:
     """Unflattens a list of outputs flattened with :func:`flatten_iterator`."""
     name = names[pos]
     spl = name.split(sep)
@@ -465,7 +469,7 @@ def _unflatten(
 
         if end:
             if prefix.startswith("dict"):
-                ty = dict
+                ty: type = dict
             elif prefix.startswith("list"):
                 ty = list
             elif prefix.startswith("tuple"):
@@ -479,12 +483,30 @@ def _unflatten(
             break
         pos = next_pos
 
+    def _tryint(s):
+        try:
+            return int(s)
+        except (ValueError, TypeError):
+            if s in {"True", "False"}:
+                return s == "True"
+            return s
+
     def _make(ty: type, res: Any) -> Any:
         if ty.__name__ == "DynamicCache":
             r = ty()
             for k, v in res:
                 setattr(r, k, v)
             return r
+        if ty is dict:
+            d = {}
+            for k, v in res:
+                if k.startswith("((") and k.endswith("))"):
+                    spl = k[2:-2].split(",")
+                    key = tuple(_tryint(s) for s in spl)
+                else:
+                    key = _tryint(k)
+                d[key] = v
+            return d
         return ty(res)
 
     return next_pos, (
diff --git a/onnx_diagnostic/helpers/torch_test_helper.py b/onnx_diagnostic/helpers/torch_test_helper.py
@@ -2,6 +2,7 @@
 from collections.abc import Iterable
 from typing import Any, Callable, List, Optional, Tuple, Union
 import numpy as np
+import onnx
 import torch
 from .helper import string_type
 from .cache_helper import (
@@ -10,9 +11,12 @@
     make_sliding_window_cache,
     make_mamba_cache,
 )
+from .mini_onnx_builder import create_onnx_model_from_input_tensors
 
 
-def _forward_(*args, _f=None, _fprint=string_type, _prefix="", _context=None, **kwargs):
+def _forward_(
+    *args, _f=None, _fprint=string_type, _prefix="", _context=None, _storage=None, **kwargs
+):
     assert _f is not None, "_f cannot be None"
     assert _context is not None, "_context cannot be None"
     indent = "  " * (len(_prefix) - len(_prefix.lstrip()))
@@ -28,10 +32,16 @@ def _forward_(*args, _f=None, _fprint=string_type, _prefix="", _context=None, **
     if not hasattr(torch.compiler, "is_exporting") or not torch.compiler.is_exporting():
         # torch.compiler.is_exporting requires torch>=2.7
         print(f"{indent}  <- args={_fprint(args, **kws)} --- kwargs={_fprint(kwargs, **kws)}")
+    if _storage is not None:
+        it = _context["iteration"]
+        key = (_prefix, it)
+        _storage[(*key, "I")] = (torch_deepcopy(args), torch_deepcopy(kwargs))
     res = _f(*args, **kwargs)
     if not hasattr(torch.compiler, "is_exporting") or not torch.compiler.is_exporting():
         print(f"{indent}  -> {_fprint(res, **kws)}")
         print(f"{indent}-{_prefix}.")
+    if _storage is not None:
+        _storage[(*key, "O")] = torch_deepcopy(res)
     _context["iteration"] += 1
     return res
 
@@ -43,6 +53,7 @@ def steal_forward(
         List[Union[torch.nn.Module, Tuple[str, torch.nn.Module]]],
     ],
     fprint: Callable = string_type,
+    dump_file: Optional[str] = None,
     **kwargs,
 ):
     """
@@ -56,26 +67,38 @@ def steal_forward(
         :func:`onnx_diagnostic.helpers.string_type`
     :param kwargs: additional parameters sent to :func:`onnx_diagnostic.helpers.string_type`
         or any other function defined by ``fprint``
+    :param dump_file: dumps stolen inputs and outputs in an onnx model,
+        they can be restored with :func:`create_input_tensors_from_onnx_model
+        <onnx_diagnostic.helpers.mini_onnx_builder.create_input_tensors_from_onnx_model>`
     """
     context = dict(iteration=0, **kwargs)
     if "with_shape" not in context and fprint == string_type:
         context["with_shape"] = True
     if not isinstance(model, list):
         model = [model]
     keep_model_forward = {}
+    storage = {} if dump_file else None
     for mt in model:
         name, m = mt if isinstance(mt, tuple) else ("", mt)
         keep_model_forward[id(m)] = (m, m.forward)
         c = context.copy()
         c["class_name"] = m.__class__.__name__
-        m.forward = lambda *args, _f=m.forward, _fp=fprint, _c=c, _p=name, **kws: _forward_(
-            *args, _f=_f, _fprint=_fp, _context=_c, _prefix=_p, **kws
+        m.forward = lambda *args, _f=m.forward, _fp=fprint, _c=c, _p=name, _s=storage, **kws: _forward_(  # noqa: E501
+            *args, _f=_f, _fprint=_fp, _context=_c, _prefix=_p, _storage=_s, **kws
         )
     try:
         yield
     finally:
         for f in keep_model_forward.values():
             f[0].forward = f[1]
+        if dump_file:
+            proto = create_onnx_model_from_input_tensors(storage)
+            onnx.save(
+                proto,
+                dump_file,
+                save_as_external_data=False,
+                all_tensors_to_one_file=True,
+            )
 
 
 def is_torchdynamo_exporting() -> bool: