fix issues

xadupre · xadupre · commit 90c9064ac02a · 2025-05-13T13:16:27.000+02:00
diff --git a/_unittests/ut_helpers/test_onnx_helper.py b/_unittests/ut_helpers/test_onnx_helper.py
@@ -246,7 +246,10 @@ def test_iterate_function(self):
     def test_statistics(self):
         rnd = np.random.rand(40, 50).astype(np.float16)
         stat = tensor_statistics(rnd)
-        print(stat)
+        self.assertEqual(stat["stype"], "FLOAT16")
+        rnd = np.random.rand(40, 50).astype(np.float32)
+        stat = tensor_statistics(rnd)
+        self.assertEqual(stat["stype"], "FLOAT")
 
 
 if __name__ == "__main__":
diff --git a/_unittests/ut_xrun_doc/test_command_lines.py b/_unittests/ut_xrun_doc/test_command_lines.py
@@ -8,6 +8,7 @@
     get_parser_find,
     get_parser_lighten,
     get_parser_print,
+    get_parser_stats,
     get_parser_unlighten,
     get_parser_validate,
 )
@@ -63,6 +64,13 @@ def test_parser_validate(self):
         text = st.getvalue()
         self.assertIn("mid", text)
 
+    def test_parser_stats(self):
+        st = StringIO()
+        with redirect_stdout(st):
+            get_parser_stats().print_help()
+        text = st.getvalue()
+        self.assertIn("input", text)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_xrun_doc/test_command_lines_exe.py b/_unittests/ut_xrun_doc/test_command_lines_exe.py
@@ -20,6 +20,15 @@ def test_parser_print(self):
         text = st.getvalue()
         self.assertIn("Add", text)
 
+    def test_parser_stats(self):
+        output = self.get_dump_file("test_parser_stats.xlsx")
+        st = StringIO()
+        with redirect_stdout(st):
+            main(["stats", "-i", self.dummy_path, "-o", output])
+        text = st.getvalue()
+        self.assertIn("processing", text)
+        self.assertExists(output)
+
     def test_parser_find(self):
         st = StringIO()
         with redirect_stdout(st):
diff --git a/onnx_diagnostic/_command_lines_parser.py b/onnx_diagnostic/_command_lines_parser.py
@@ -1,5 +1,7 @@
 import argparse
 import json
+import os
+import re
 import sys
 import textwrap
 import onnx
@@ -425,6 +427,106 @@ def _cmd_validate(argv: List[Any]):
             print(f":{k},{v};")
 
 
+def get_parser_stats() -> ArgumentParser:
+    parser = ArgumentParser(
+        prog="stats",
+        description=dedent(
+            """
+        Prints out statistics on an ONNX model.
+        """
+        ),
+        epilog="",
+    )
+    parser.add_argument(
+        "-i",
+        "--input",
+        type=str,
+        required=True,
+        help="ONNX file",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        required=False,
+        default="",
+        help="outputs the statistics in a file",
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        required=False,
+        default=1,
+        type=int,
+        help="verbosity",
+    )
+    parser.add_argument(
+        "-e",
+        "--end",
+        required=False,
+        default=-1,
+        type=int,
+        help="ends after this many tensors",
+    )
+    parser.add_argument(
+        "-b",
+        "--begin",
+        required=False,
+        default=0,
+        type=int,
+        help="starts after this many tensors",
+    )
+    parser.add_argument(
+        "-r",
+        "--regex",
+        required=False,
+        default="",
+        type=str,
+        help="keeps only tensors whose name verifies "
+        "this regular expression, empty = no filter",
+    )
+    return parser
+
+
+def _cmd_stats(argv: List[Any]):
+    from .helpers.onnx_helper import iterator_initializer_constant, tensor_statistics
+
+    parser = get_parser_stats()
+    args = parser.parse_args(argv[1:])
+    assert os.path.exists(args.input), f"Missing filename {args.input!r}"
+    if args.verbose:
+        print(f"Loading {args.input}")
+    onx = onnx.load(args.input)
+    reg = re.compile(args.regex) if args.regex else None
+    data = []
+    for index, (name, init) in enumerate(iterator_initializer_constant(onx)):
+        if reg and not reg.seach(name):
+            continue
+        if index < args.begin:
+            continue
+        if args.end > 0 and index >= args.end:
+            break
+        if args.verbose:
+            print(f"processing {index + 1}: {name!r}")
+        stats = tensor_statistics(init)
+        if not args.output:
+            print(f"{name}: {stats}")
+        stats["name"] = name
+        data.append(stats)
+    if args.output:
+        if args.verbose:
+            print(f"saving into {args.output!r}")
+        import pandas
+
+        df = pandas.DataFrame(data)
+        ext = os.path.splitext(args.output)
+        if ext[-1] == ".xlsx":
+            df.to_excel(args.output, index=False)
+        else:
+            df.to_csv(args.output, index=False)
+    if args.verbose:
+        print("done.")
+
+
 def get_main_parser() -> ArgumentParser:
     parser = ArgumentParser(
         prog="onnx_diagnostic",
@@ -441,12 +543,13 @@ def get_main_parser() -> ArgumentParser:
         unlighten  - restores an onnx model produces by the previous experiment
         print      - prints the model on standard output
         validate   - validate a model
+        stats      - produces statistics on a model
         """
         ),
     )
     parser.add_argument(
         "cmd",
-        choices=["config", "find", "lighten", "print", "unlighten", "validate"],
+        choices=["config", "find", "lighten", "print", "stats", "unlighten", "validate"],
         help="Selects a command.",
     )
     return parser
@@ -460,6 +563,7 @@ def main(argv: Optional[List[Any]] = None):
         find=_cmd_find,
         config=_cmd_config,
         validate=_cmd_validate,
+        stats=_cmd_stats,
     )
 
     if argv is None:
@@ -480,6 +584,7 @@ def main(argv: Optional[List[Any]] = None):
                 find=get_parser_find,
                 config=get_parser_config,
                 validate=get_parser_validate,
+                stats=get_parser_stats,
             )
             cmd = argv[0]
             if cmd not in parsers:
diff --git a/onnx_diagnostic/helpers/onnx_helper.py b/onnx_diagnostic/helpers/onnx_helper.py
@@ -2,6 +2,7 @@
 import json
 import os
 import sys
+import warnings
 from typing import Any, Dict, Iterator, List, Optional, Sequence, Set, Tuple, Union
 import numpy as np
 import numpy.typing as npt
@@ -330,9 +331,10 @@ def onnx_dtype_name(itype: int) -> str:
         print(onnx_dtype_name(7))
     """
     for k in dir(TensorProto):
-        v = getattr(TensorProto, k)
-        if v == itype:
-            return k
+        if "FLOAT" in k or "INT" in k or "TEXT" in k or "BOOL" in k:
+            v = getattr(TensorProto, k)
+            if v == itype:
+                return k
     raise ValueError(f"Unexpected value itype: {itype}")
 
 
@@ -841,47 +843,61 @@ def tensor_statistics(tensor: Union[np.ndarray, TensorProto]) -> Dict[str, Union
 
     if isinstance(tensor, TensorProto):
         tensor = to_array_extended(tensor)
+    itype = np_dtype_to_tensor_dtype(tensor.dtype)
     stat = dict(
         mean=float(tensor.mean()),
         std=float(tensor.std()),
         shape="x".join(map(str, tensor.shape)),
         numel=tensor.size,
         size=tensor.size * size_type(tensor.dtype),
-        itype=np_dtype_to_tensor_dtype(tensor.dtype),
-        stype=onnx_dtype_name(np_dtype_to_tensor_dtype(tensor.dtype)),
+        itype=itype,
+        stype=onnx_dtype_name(itype),
         min=float(tensor.min()),
         max=float(tensor.max()),
-        nnan=np.isnan(tensor).sum(),
+        nnan=float(np.isnan(tensor).sum()),
     )
 
-    hist = np.array(
-        [
-            0,
-            1e-10,
-            1e-8,
-            1e-7,
-            1e-6,
-            1e-5,
-            0.0001,
-            0.001,
-            0.01,
-            0.1,
-            0.5,
-            1,
-            1.96,
-            10,
-            100,
-            1e3,
-            1e4,
-            1e5,
-            1e6,
-            1e7,
-            1e8,
-            1e10,
-            1e50,
-        ],
-        dtype=tensor.dtype,
-    )
+    if tensor.size < 8:
+        return stat
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        try:
+            hist = np.array(
+                [
+                    0,
+                    1e-10,
+                    1e-8,
+                    1e-7,
+                    1e-6,
+                    1e-5,
+                    0.0001,
+                    0.001,
+                    0.01,
+                    0.1,
+                    0.5,
+                    1,
+                    1.96,
+                    10,
+                    1e2,
+                    1e3,
+                    1e4,
+                    1e5,
+                    1e6,
+                    1e7,
+                    1e8,
+                    1e10,
+                    1e50,
+                ],
+                dtype=tensor.dtype,
+            )
+        except OverflowError as e:
+            from .helper import string_type
+
+            raise ValueError(
+                f"Unable to convert one value into {tensor.dtype}, "
+                f"tensor={string_type(tensor, with_shape=True)}"
+            ) from e
     hist = np.array(sorted(set(hist[~np.isinf(hist)])), dtype=tensor.dtype)
     ind = np.digitize(np.abs(tensor).reshape((-1,)), hist, right=True)
     cou = np.bincount(ind, minlength=ind.shape[0] + 1)