diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index 47a01f9c..2f02ddd7 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -4,6 +4,8 @@ Change Logs
 0.5.0
 +++++
 
+* :pr:`92`: support errors distribution in max_diff
+* :pr:`91`: enable strings in ``guess_dynamic_shapes``
 * :pr:`88`, :pr:`89`: extends ``steal_forward`` to dump input, outputs in onnx models
 * :pr:`83`, :pr:`85`: improves the automated rewriting of control flow (test)
 
diff --git a/_unittests/ut_helpers/test_helper.py b/_unittests/ut_helpers/test_helper.py
index 42a0d395..5a380bdb 100644
--- a/_unittests/ut_helpers/test_helper.py
+++ b/_unittests/ut_helpers/test_helper.py
@@ -127,7 +127,7 @@ def test_print_pretty_onnx(self):
         )
         self.print_onnx(proto)
         self.print_model(proto)
-        self.dump_onnx("test_print_pretty_onnx", proto)
+        self.dump_onnx("test_print_pretty.onnx", proto)
         self.check_ort(proto)
         self.assertNotEmpty(proto)
         self.assertEmpty(None)
@@ -203,6 +203,101 @@ def test_max_diff_verbose(self):
         d = string_diff(diff)
         self.assertIsInstance(d, str)
 
+    def test_max_diff_hist_array(self):
+        x = np.arange(12).reshape((3, 4)).astype(dtype=np.float32)
+        y = x.copy()
+        y[0, 1] += 0.1
+        y[0, 2] += 0.01
+        y[0, 3] += 0.001
+        y[1, 1] += 0.0001
+        y[1, 2] += 1
+        y[2, 2] += 10
+        y[1, 3] += 100
+        y[2, 1] += 1000
+        diff = max_diff(x, y, hist=True)
+        self.assertEqual(
+            diff["rep"],
+            {
+                ">0.0": 8,
+                ">0.0001": 8,
+                ">0.001": 6,
+                ">0.01": 5,
+                ">0.1": 5,
+                ">1.0": 3,
+                ">10.0": 2,
+                ">100.0": 1,
+            },
+        )
+
+    def test_max_diff_hist_array_string_diff(self):
+        x = np.arange(12).reshape((3, 4)).astype(dtype=np.float32)
+        y = x.copy()
+        y[0, 1] += 0.1
+        y[0, 2] += 0.01
+        y[0, 3] += 0.001
+        y[1, 1] += 0.0001
+        y[1, 2] += 1
+        y[2, 2] += 10
+        y[1, 3] += 100
+        y[2, 1] += 1000
+        diff = max_diff(x, y, hist=True)
+        s = string_diff(diff)
+        self.assertEndsWith(
+            "/#8>0.0-#8>0.0001-#6>0.001-#5>0.01-#5>0.1-#3>1.0-#2>10.0-#1>100.0", s
+        )
+
+    def test_max_diff_hist_tensor(self):
+        x = torch.arange(12).reshape((3, 4)).to(dtype=torch.float32)
+        y = x.clone()
+        y[0, 1] += 0.1
+        y[0, 2] += 0.01
+        y[0, 3] += 0.001
+        y[1, 1] += 0.0001
+        y[1, 2] += 1
+        y[2, 2] += 10
+        y[1, 3] += 100
+        y[2, 1] += 1000
+        diff = max_diff(x, y, hist=True)
+        self.assertEqual(
+            diff["rep"],
+            {
+                ">0.0": 8,
+                ">0.0001": 8,
+                ">0.001": 6,
+                ">0.01": 5,
+                ">0.1": 5,
+                ">1.0": 3,
+                ">10.0": 2,
+                ">100.0": 1,
+            },
+        )
+
+    def test_max_diff_hist_tensor_composed(self):
+        x = torch.arange(12).reshape((3, 4)).to(dtype=torch.float32)
+        y = x.clone()
+        y[0, 1] += 0.1
+        y[0, 2] += 0.01
+        y[0, 3] += 0.001
+        y[1, 1] += 0.0001
+        y[1, 2] += 1
+        y[2, 2] += 10
+        y[1, 3] += 100
+        y[2, 1] += 1000
+        diff = max_diff([x, (x, {"e": x})], [y, (y, {"e": y})], hist=True)
+        self.assertEqual(
+            diff["rep"],
+            {
+                ">0.0": 24,
+                ">0.0001": 24,
+                ">0.001": 18,
+                ">0.01": 15,
+                ">0.1": 15,
+                ">1.0": 9,
+                ">10.0": 6,
+                ">100.0": 3,
+            },
+        )
+
     def test_type_info(self):
         for tt in [
             onnx.TensorProto.FLOAT,
diff --git a/_unittests/ut_helpers/test_torch_test_helper.py b/_unittests/ut_helpers/test_torch_test_helper.py
index ccbef435..6416188d 100644
--- a/_unittests/ut_helpers/test_torch_test_helper.py
+++ b/_unittests/ut_helpers/test_torch_test_helper.py
@@ -9,6 +9,7 @@
     dummy_llm,
     to_numpy,
     is_torchdynamo_exporting,
+    model_statistics,
     steal_forward,
     replace_string_by_dynamic,
     to_any,
@@ -172,14 +173,15 @@ def forward(self, x, y):
             else:
                 print("output", k, v)
         print(string_type(restored, with_shape=True))
+        l1, l2 = 151, 160
         self.assertEqual(
             [
-                ("-Model-159", 0, "I"),
-                ("-Model-159", 0, "O"),
-                ("s1-SubModel-150", 0, "I"),
-                ("s1-SubModel-150", 0, "O"),
-                ("s2-SubModel-150", 0, "I"),
-                ("s2-SubModel-150", 0, "O"),
+                (f"-Model-{l2}", 0, "I"),
+                (f"-Model-{l2}", 0, "O"),
+                (f"s1-SubModel-{l1}", 0, "I"),
+                (f"s1-SubModel-{l1}", 0, "O"),
+                (f"s2-SubModel-{l1}", 0, "I"),
+                (f"s2-SubModel-{l1}", 0, "O"),
             ],
             sorted(restored),
         )
@@ -279,6 +281,32 @@ def test_torch_deepcopy_sliding_windon_cache(self):
     def test_torch_deepcopy_none(self):
         self.assertEmpty(torch_deepcopy(None))
 
+    def test_model_statistics(self):
+        class Model(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.p1 = torch.nn.Parameter(torch.tensor([1], dtype=torch.float32))
+                self.b1 = torch.nn.Buffer(torch.tensor([1], dtype=torch.float32))
+
+            def forward(self, x, y=None):
+                return x + y + self.p1 + self.b1
+
+        model = Model()
+        x, y = torch.rand((3, 4)), torch.rand((3, 4))
+        model(x, y)
+        stat = model_statistics(model)
+        self.assertEqual(
+            {
+                "type": "Model",
+                "n_modules": 1,
+                "param_size": 4,
+                "buffer_size": 4,
+                "float32": 8,
+                "size_mb": 0,
+            },
+            stat,
+        )
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/onnx_diagnostic/ext_test_case.py b/onnx_diagnostic/ext_test_case.py
index 66e468c4..7e006e57 100644
--- a/onnx_diagnostic/ext_test_case.py
+++ b/onnx_diagnostic/ext_test_case.py
@@ -1070,6 +1070,11 @@ def assertStartsWith(self, prefix: str, full: str):
         if not full.startswith(prefix):
             raise AssertionError(f"prefix={prefix!r} does not start string  {full!r}.")
 
+    def assertEndsWith(self, suffix: str, full: str):
+        """In the name"""
+        if not full.endswith(suffix):
+            raise AssertionError(f"suffix={suffix!r} does not end string  {full!r}.")
+
     def capture(self, fct: Callable):
         """
         Runs a function and capture standard output and error.
diff --git a/onnx_diagnostic/helpers/helper.py b/onnx_diagnostic/helpers/helper.py
index b4ec9326..c2a69195 100644
--- a/onnx_diagnostic/helpers/helper.py
+++ b/onnx_diagnostic/helpers/helper.py
@@ -2,7 +2,7 @@
 import enum
 import inspect
 from dataclasses import is_dataclass, fields
-from typing import Any, Callable, Dict, List, Optional, Set
+from typing import Any, Callable, Dict, List, Optional, Set, Union
 import numpy as np
 
 
@@ -854,6 +854,12 @@ def flatten_object(x: Any, drop_keys: bool = False) -> Any:
     )
 
 
+def _make_debug_info(msg, level, debug_info, verbose) -> Optional[List[str]]:
+    return (
+        [*(debug_info if debug_info else []), f"{' ' * level}{msg}"] if verbose > 5 else None
+    )
+
+
 def max_diff(
     expected: Any,
     got: Any,
@@ -865,6 +871,7 @@ def max_diff(
     end: int = -1,
     _index: int = 0,
     allow_unique_tensor_with_list_of_one_element: bool = True,
+    hist: Optional[Union[bool, List[float]]] = None,
 ) -> Dict[str, float]:
     """
     Returns the maximum discrepancy.
@@ -880,6 +887,7 @@ def max_diff(
     :param _index: used with begin and end
     :param allow_unique_tensor_with_list_of_one_element:
         allow a comparison between a single tensor and a list of one tensor
+    :param hist: compute an histogram of the discrepancies
     :return: dictionary with many values
 
     * abs: max absolute error
@@ -894,6 +902,20 @@ def max_diff(
     """
     if expected is None and got is None:
         return dict(abs=0, rel=0, sum=0, n=0, dnan=0)
+
+    _dkws_ = dict(
+        verbose=verbose,
+        level=level + 1,
+        begin=begin,
+        end=end,
+        _index=_index,
+        hist=hist,
+    )
+    _dkws = {**_dkws_, "flatten": flatten}
+    _dkwsf = {**_dkws_, "flatten": False}
+
+    _debug = lambda msg: _make_debug_info(msg, level, debug_info, verbose)  # noqa: E731
+
     if allow_unique_tensor_with_list_of_one_element:
         if hasattr(expected, "shape") and isinstance(got, (list, tuple)) and len(got) == 1:
             return max_diff(
@@ -904,6 +926,7 @@ def max_diff(
                 flatten=False,
                 debug_info=debug_info,
                 allow_unique_tensor_with_list_of_one_element=False,
+                hist=hist,
             )
         return max_diff(
             expected,
@@ -916,44 +939,17 @@ def max_diff(
             end=end,
             _index=_index,
             allow_unique_tensor_with_list_of_one_element=False,
+            hist=hist,
         )
     if hasattr(expected, "to_tuple"):
         if verbose >= 6:
             print(f"[max_diff] to_tuple1: {string_type(expected)} ? {string_type(got)}")
-        return max_diff(
-            expected.to_tuple(),
-            got,
-            verbose=verbose,
-            level=level + 1,
-            debug_info=(
-                [*(debug_info if debug_info else []), f"{' ' * level}to_tupleA"]
-                if verbose > 5
-                else None
-            ),
-            begin=begin,
-            end=end,
-            _index=_index,
-            flatten=flatten,
-        )
+        return max_diff(expected.to_tuple(), got, debug_info=_debug("to_tuple1"), **_dkws)
 
     if hasattr(got, "to_tuple"):
         if verbose >= 6:
             print(f"[max_diff] to_tuple2: {string_type(expected)} ? {string_type(got)}")
-        return max_diff(
-            expected,
-            got.to_tuple(),
-            verbose=verbose,
-            level=level + 1,
-            debug_info=(
-                [*(debug_info if debug_info else []), f"{' ' * level}to_tupleB"]
-                if verbose > 5
-                else None
-            ),
-            begin=begin,
-            end=end,
-            _index=_index,
-            flatten=flatten,
-        )
+        return max_diff(expected, got.to_tuple(), debug_info=_debug("to_tuple2"), **_dkws)
 
         if isinstance(got, (list, tuple)):
             if len(got) != 1:
@@ -983,17 +979,7 @@ def max_diff(
                 return dict(abs=np.inf, rel=np.inf, sum=np.inf, n=np.inf, dnan=np.inf)
             if verbose >= 6:
                 print(f"[max_diff] list,tuple,1: {string_type(expected)} ? {string_type(got)}")
-            return max_diff(
-                expected,
-                got[0],
-                verbose=verbose,
-                level=level + 1,
-                begin=begin,
-                end=end,
-                _index=_index,
-                debug_info=debug_info,
-                flatten=flatten,
-            )
+            return max_diff(expected, got[0], debug_info=_debug("lt1"), **_dkws)
 
     if isinstance(expected, (tuple, list)):
         if verbose >= 6:
@@ -1001,17 +987,7 @@ def max_diff(
         if len(expected) == 1 and not isinstance(got, type(expected)):
             if verbose >= 6:
                 print(f"[max_diff] list,tuple,3: {string_type(expected)} ? {string_type(got)}")
-            return max_diff(
-                expected[0],
-                got,
-                verbose=verbose,
-                level=level + 1,
-                begin=begin,
-                end=end,
-                _index=_index,
-                debug_info=debug_info,
-                flatten=flatten,
-            )
+            return max_diff(expected[0], got, debug_info=_debug("lt2"), **_dkws)
         if not isinstance(got, (tuple, list)):
             if verbose >= 6:
                 print(f"[max_diff] list,tuple,4: {string_type(expected)} ? {string_type(got)}")
@@ -1047,11 +1023,6 @@ def max_diff(
                 return max_diff(
                     flat_a,
                     flat_b,
-                    verbose=verbose,
-                    level=level,
-                    begin=begin,
-                    end=end,
-                    _index=_index,
                     debug_info=(
                         [
                             *(debug_info if debug_info else []),
@@ -1063,7 +1034,7 @@ def max_diff(
                         if verbose > 5
                         else None
                     ),
-                    flatten=False,
+                    **_dkwsf,
                 )
 
             if verbose > 2:
@@ -1085,7 +1056,7 @@ def max_diff(
 
         if verbose >= 6:
             print(f"[max_diff] list,tuple,6: {string_type(expected)} ? {string_type(got)}")
-        am, rm, sm, n, dn = 0, 0, 0.0, 0.0, 0
+        am, rm, sm, n, dn, drep = 0, 0, 0.0, 0.0, 0, None
         for ip, (e, g) in enumerate(zip(expected, got)):
             d = max_diff(
                 e,
@@ -1104,20 +1075,31 @@ def max_diff(
                 end=end,
                 _index=_index + ip,
                 flatten=flatten,
+                hist=hist,
             )
             am = max(am, d["abs"])
             dn = max(dn, d["dnan"])
             rm = max(rm, d["rel"])
             sm += d["sum"]
             n += d["n"]
-        return dict(abs=am, rel=rm, sum=sm, n=n, dnan=dn)
+            if "rep" in d:
+                if drep is None:
+                    drep = d["rep"].copy()
+                else:
+                    for k, v in d["rep"].items():
+                        drep[k] += v
+        res = dict(abs=am, rel=rm, sum=sm, n=n, dnan=dn)
+        if drep:
+            res["rep"] = drep
+        return res
 
     if isinstance(expected, dict):
         if verbose >= 6:
             print(f"[max_diff] dict: {string_type(expected)} ? {string_type(got)}")
-        assert (
-            begin == 0 and end == -1
-        ), f"begin={begin}, end={end} not compatible with dictionaries"
+        assert begin == 0 and end == -1, (
+            f"begin={begin}, end={end} not compatible with dictionaries, "
+            f"keys={sorted(expected)}"
+        )
         if isinstance(got, dict):
             if len(expected) != len(got):
                 return dict(abs=np.inf, rel=np.inf, sum=np.inf, n=np.inf, dnan=np.inf)
@@ -1127,30 +1109,15 @@ def max_diff(
             return max_diff(
                 [expected[k] for k in keys],
                 [got[k] for k in keys],
-                level=level,
-                flatten=flatten,
-                debug_info=debug_info,
-                begin=begin,
-                end=end,
-                _index=_index,
-                verbose=verbose,
+                debug_info=_debug("dict1"),
+                **_dkws,
             )
 
         if not isinstance(got, (tuple, list)):
             return dict(abs=np.inf, rel=np.inf, sum=np.inf, n=np.inf, dnan=np.inf)
         if len(expected) != len(got):
             return dict(abs=np.inf, rel=np.inf, sum=np.inf, n=np.inf, dnan=np.inf)
-        return max_diff(
-            list(expected.values()),
-            got,
-            level=level,
-            flatten=flatten,
-            debug_info=debug_info,
-            begin=begin,
-            end=end,
-            _index=_index,
-            verbose=verbose,
-        )
+        return max_diff(list(expected.values()), got, debug_info=_debug("dict2"), **_dkws)
 
     import torch
 
@@ -1159,7 +1126,6 @@ def max_diff(
             expected = expected.detach().cpu().numpy()
         if isinstance(got, torch.Tensor):
             got = got.detach().cpu().numpy()
-
         if verbose >= 6:
             print(f"[max_diff] tensor: {string_type(expected)} ? {string_type(got)}")
 
@@ -1253,7 +1219,16 @@ def max_diff(
                     f"_index={_index}"
                 )
 
-        return dict(abs=abs_diff, rel=rel_diff, sum=sum_diff, n=n_diff, dnan=nan_diff)
+        res = dict(abs=abs_diff, rel=rel_diff, sum=sum_diff, n=n_diff, dnan=nan_diff)
+        if hist:
+            if isinstance(hist, bool):
+                hist = np.array([0, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100], dtype=diff.dtype)
+            ind = np.digitize(diff.reshape((-1,)), hist, right=True)
+            cou = np.bincount(ind, minlength=ind.shape[0] + 1)
+            res["rep"] = dict(
+                zip([f">{x}" for x in hist], [int(i) for i in (cou.sum() - np.cumsum(cou))])
+            )
+        return res
 
     if isinstance(expected, torch.Tensor) and isinstance(got, torch.Tensor):
         if verbose >= 6:
@@ -1339,7 +1314,21 @@ def max_diff(
                     f"_index={_index}"
                 )
 
-        return dict(abs=abs_diff, rel=rel_diff, sum=sum_diff, n=n_diff, dnan=nan_diff)
+        res = dict(abs=abs_diff, rel=rel_diff, sum=sum_diff, n=n_diff, dnan=nan_diff)
+        if hist:
+            if isinstance(hist, bool):
+                hist = torch.tensor(
+                    [0, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100], dtype=diff.dtype
+                )
+            ind = torch.bucketize(diff.reshape((-1,)), hist, right=False)
+            cou = torch.bincount(ind, minlength=ind.shape[0] + 1)
+            res["rep"] = dict(
+                zip(
+                    [f">{x}" for x in hist],
+                    [int(i) for i in (cou.sum() - torch.cumsum(cou, 0))],
+                )
+            )
+        return res
 
     if "SquashedNormal" in expected.__class__.__name__:
         if verbose >= 6:
@@ -1348,16 +1337,7 @@ def max_diff(
             expected.mean.detach().to("cpu"),
             expected.scale.detach().to("cpu"),
         )
-        return max_diff(
-            values,
-            got,
-            verbose=verbose,
-            level=level + 1,
-            begin=begin,
-            end=end,
-            _index=_index,
-            flatten=flatten,
-        )
+        return max_diff(values, got, debug_info=_debug("SquashedNormal"), **_dkws)
 
     if expected.__class__ in torch.utils._pytree.SUPPORTED_NODES:
         if got.__class__ not in torch.utils._pytree.SUPPORTED_NODES:
@@ -1370,15 +1350,7 @@ def max_diff(
         expected_args, _spec = torch.utils._pytree.tree_flatten(expected)
         got_args, _spec = torch.utils._pytree.tree_flatten(got)
         return max_diff(
-            expected_args,
-            got_args,
-            level=level,
-            flatten=flatten,
-            debug_info=debug_info,
-            begin=begin,
-            end=end,
-            _index=_index,
-            verbose=verbose,
+            expected_args, got_args, debug_info=_debug(expected.__class__.__name__), **_dkws
         )
 
     # backup function in case pytorch does not know how to serialize.
@@ -1390,12 +1362,14 @@ def max_diff(
                 [expected.key_cache, expected.value_cache],
                 [got.key_cache, got.value_cache],
                 verbose=verbose,
+                hist=hist,
             )
         if isinstance(got, tuple) and len(got) == 2:
             return max_diff(
                 [expected.key_cache, expected.value_cache],
                 [got[0], got[1]],
-                verbose=verbose,
+                debug_info=_debug(expected.__class__.__name__),
+                **_dkws,
             )
         raise AssertionError(
             f"DynamicCache not fully implemented with classes "
@@ -1412,12 +1386,14 @@ def max_diff(
                 [expected.key_cache, expected.value_cache],
                 [got.key_cache, got.value_cache],
                 verbose=verbose,
+                hist=hist,
             )
         if isinstance(got, tuple) and len(got) == 2:
             return max_diff(
                 [expected.key_cache, expected.value_cache],
                 [got[0], got[1]],
-                verbose=verbose,
+                debug_info=_debug(expected.__class__.__name__),
+                **_dkws,
             )
         raise AssertionError(
             f"SlidingWindowCache not fully implemented with classes "
@@ -1437,12 +1413,14 @@ def max_diff(
                 [expected.self_attention_cache, expected.cross_attention_cache],
                 [got.self_attention_cache, got.cross_attention_cache],
                 verbose=verbose,
+                hist=hist,
             )
         if isinstance(got, tuple) and len(got) == 2:
             return max_diff(
                 [expected.self_attention_cache, expected.cross_attention_cache],
                 [got[0], got[1]],
-                verbose=verbose,
+                debug_info=_debug(expected.__class__.__name__),
+                **_dkws,
             )
         raise AssertionError(
             f"EncoderDecoderCache not fully implemented with classes "
@@ -1466,13 +1444,8 @@ def max_diff(
         return max_diff(
             [getattr(expected, k) for k in atts],
             [getattr(got, k) for k in atts],
-            level=level,
-            flatten=flatten,
-            debug_info=debug_info,
-            begin=begin,
-            end=end,
-            _index=_index,
-            verbose=verbose,
+            debug_info=_debug(expected.__class__.__name__),
+            **_dkws,
         )
 
     raise AssertionError(
@@ -1485,10 +1458,20 @@ def max_diff(
 def string_diff(diff: Dict[str, Any]) -> str:
     """Renders discrepancies return by :func:`max_diff` into one string."""
     # dict(abs=, rel=, sum=, n=n_diff, dnan=)
+    suffix = ""
+    if "rep" in diff:
+        rows = []
+        for k, v in diff["rep"].items():
+            if v > 0:
+                rows.append(f"#{v}{k}")
+        suffix = "-".join(rows)
+        suffix = f"/{suffix}"
     if diff.get("dnan", None):
         if diff["abs"] == 0 or diff["rel"] == 0:
-            return f"abs={diff['abs']}, rel={diff['rel']}, dnan={diff['dnan']}"
-        return f"abs={diff['abs']}, rel={diff['rel']}, n={diff['n']}, dnan={diff['dnan']}"
+            return f"abs={diff['abs']}, rel={diff['rel']}, dnan={diff['dnan']}{suffix}"
+        return (
+            f"abs={diff['abs']}, rel={diff['rel']}, n={diff['n']}, dnan={diff['dnan']}{suffix}"
+        )
     if diff["abs"] == 0 or diff["rel"] == 0:
-        return f"abs={diff['abs']}, rel={diff['rel']}"
-    return f"abs={diff['abs']}, rel={diff['rel']}, n={diff['n']}"
+        return f"abs={diff['abs']}, rel={diff['rel']}{suffix}"
+    return f"abs={diff['abs']}, rel={diff['rel']}, n={diff['n']}{suffix}"
diff --git a/onnx_diagnostic/helpers/torch_test_helper.py b/onnx_diagnostic/helpers/torch_test_helper.py
index e3f60a68..aa349eb7 100644
--- a/onnx_diagnostic/helpers/torch_test_helper.py
+++ b/onnx_diagnostic/helpers/torch_test_helper.py
@@ -487,3 +487,36 @@ def torch_deepcopy(value: Any) -> Any:
     # We should have a code using serialization, deserialization assuming a model
     # cannot be exported without them.
     raise NotImplementedError(f"torch_deepcopy not implemented for type {type(value)}")
+
+
+def model_statistics(model: torch.nn.Module):
+    """Returns statistics on a model in a dictionary."""
+    n_subs = len(list(model.modules()))
+    sizes = {}
+    param_size = 0
+    for param in model.parameters():
+        size = param.nelement() * param.element_size()
+        param_size += size
+        name = str(param.dtype).replace("torch.", "")
+        if name not in sizes:
+            sizes[name] = 0
+        sizes[name] += size
+
+    buffer_size = 0
+    for buffer in model.buffers():
+        size = buffer.nelement() * buffer.element_size()
+        buffer_size += size
+        name = str(buffer.dtype).replace("torch.", "")
+        if name not in sizes:
+            sizes[name] = 0
+        sizes[name] += size
+
+    res = dict(
+        type=model.__class__.__name__,
+        n_modules=n_subs,
+        param_size=param_size,
+        buffer_size=buffer_size,
+        size_mb=(param_size + buffer_size) // 2**20,
+    )
+    res.update(sizes)
+    return res