add one gallery

xadupre · xadupre · commit a41b3d100251 · 2025-05-14T17:53:12.000+02:00
diff --git a/.github/workflows/check-release.yml b/.github/workflows/check-release.yml
@@ -74,6 +74,7 @@ jobs:
           echo _unittests/ >> .git/info/sparse-checkout
           echo _doc/examples/ >> .git/info/sparse-checkout
           echo _doc/recipes/ >> .git/info/sparse-checkout
+          echo _doc/technical/ >> .git/info/sparse-checkout
           echo pyproject.toml >> .git/info/sparse-checkout
           echo requirements-dev.txt >> .git/info/sparse-checkout
           git pull origin main
diff --git a/.gitignore b/.gitignore
@@ -57,6 +57,7 @@ prof
 plot_*.txt
 _doc/auto_examples/*
 _doc/auto_recipes/*
+_doc/auto_technical/*
 _doc/sg_execution_times.rst
 _doc/examples/_cache/*
 _doc/examples/dump_models/*
diff --git a/_doc/conf.py b/_doc/conf.py
@@ -156,11 +156,13 @@ def linkcode_resolve(domain, info):
     "examples_dirs": [
         os.path.join(os.path.dirname(__file__), "examples"),
         os.path.join(os.path.dirname(__file__), "recipes"),
+        os.path.join(os.path.dirname(__file__), "technical"),
     ],
     # path where to save gallery generated examples
     "gallery_dirs": [
         "auto_examples",
         "auto_recipes",
+        "auto_technical",
     ],
     # no parallelization to avoid conflict with environment variables
     "parallel": 1,
diff --git a/_doc/index.rst b/_doc/index.rst
@@ -39,6 +39,7 @@ It also implements tools to investigate, validate exported models (ExportedProgr
     cmds/index
     auto_examples/index
     auto_recipes/index
+    auto_technical/index
 
 .. toctree::
     :maxdepth: 1
diff --git a/_doc/technical/README.txt b/_doc/technical/README.txt
@@ -0,0 +1,2 @@
+Technical Details
+=================
diff --git a/_doc/technical/plot_parallelized_reduction.py b/_doc/technical/plot_parallelized_reduction.py
@@ -0,0 +1,198 @@
+"""
+Reproducible Parallelized Reduction is difficult
+================================================
+
+A reduction is a frequent operation in neural network. It appears in layer normalization,
+softmax. Because of the float precision, the result of the computation
+changes based on the order of the elements. The following examples show the variation
+based on different hypothesis on the vector distribution.
+We consider a vector :math:`X = (x_1, ..., x_n)`.
+It computes the average:
+
+.. math::
+
+    mean(X) = \\frac{\\sum_{i=1}^n x_i}{n}
+
+Or the normalization of the vector:
+
+.. math::
+
+    norm(X)_i = \\frac{ X_i  - \\mathbb{E}X}{ \\sqrt{ \\mathbb{V}X}}
+
+We draw 128 random permutation of X. The average or mean should not change.
+And the normalized vector should have the same value. In the first case, we compute
+the difference between the highest and the lowest values obtained for the average.
+In the second case, we look for the maximum difference between the original normalized
+vector and the permuted one (both sorted).
+
+The computation code
+++++++++++++++++++++
+"""
+
+import itertools
+from tqdm import tqdm
+import numpy as np
+import pandas
+
+DATA = []
+
+
+def str_dtype(dtype):
+    """Displays numpy dtype in a nicer way."""
+    if dtype == np.float64:
+        return "fp64"
+    if dtype == np.float32:
+        return "fp32"
+    if dtype == np.float16:
+        return "fp16"
+    raise ValueError(f"Unexpected value {dtype}")
+
+
+def layer_norm(a, eps=1e-6):
+    """
+    Normalized the vector a.
+    The computation is done in float32 or float64.
+    """
+    ctype = np.float32 if a.dtype == np.float16 else a.dtype
+    a32 = a.astype(ctype)
+    m = a32.mean(axis=-1, keepdims=True)
+    c = a32 - m
+    va = np.sqrt((c * c).mean(axis=-1, keepdims=True))
+    va += eps
+    return (c / va).astype(a.dtype)
+
+
+def compute(values, fct):
+    """
+    Compare the results of function ``fct`` on a sample.
+    Loops over multiple sizes, dtypes. Tries 128 times.
+    """
+
+    def make_value(base, value):
+        if value.size > 1:
+            return np.abs(np.sort(base) - np.sort(value)).max()
+        return value
+
+    sizes = [2, 4, 8, 16, 512, 1024, 2048, 4096, 8192]
+    dtypes = [np.float64, np.float32, np.float16]
+    N = list(range(128))
+    exps = list(itertools.product(sizes, dtypes, N))
+    data = []
+    ech = None
+    for size, dtype, n in tqdm(exps):
+        if n == 0:
+            ech = values[:size].astype(dtype)
+            base = fct(ech)
+            assert base.dtype == ech.dtype
+            obs = dict(
+                n=n, size=size, dtype=str_dtype(ech.dtype), value=make_value(base, fct(ech))
+            )
+            data.append(obs)
+
+        if n == 1:
+            new_ech = np.sort(ech)
+        elif n == 2:
+            new_ech = np.sort(ech)[::-1]
+        else:
+            new_ech = np.random.permutation(ech)
+        assert new_ech.dtype == ech.dtype
+        assert new_ech.shape == ech.shape
+        obs = dict(
+            n=n + 1,
+            size=size,
+            dtype=str_dtype(new_ech.dtype),
+            value=make_value(base, fct(new_ech)),
+        )
+        data.append(obs)
+
+    df = pandas.DataFrame(data)
+    agg = df.drop("n", axis=1).groupby(["dtype", "size"], as_index=False).agg(["min", "max"])
+    agg["value", "delta"] = agg["value", "max"] - agg["value", "min"]
+    piv = agg.pivot(index="size", columns="dtype", values=("value", "delta"))
+    return piv
+
+
+# %%
+# Normal Law
+# ++++++++++
+#
+# Let's see what it returns an on random sample following a normal law.
+# First the average.
+
+values = np.random.randn(4096)
+mean = compute(values, lambda x: np.mean(x).astype(x.dtype))
+mean["name"] = "normal"
+print(mean)
+
+# %%
+# Then the layer normalization.
+
+ln = compute(values, layer_norm)
+ln["name"] = "normal"
+DATA.append(ln.reset_index(drop=True).max(axis=0))
+print(ln)
+
+# %%
+# Fixed values
+# ++++++++++++
+#
+# We try a fixed vector with one very high value and all the others are small.
+
+values[:] = -1e-4
+values[::128] = 100
+mean = compute(values, lambda x: np.mean(x).astype(x.dtype))
+mean["name"] = "fixed"
+print(mean)
+
+
+ln = compute(values, layer_norm)
+ln["name"] = "fixed"
+DATA.append(ln.reset_index(drop=True).max(axis=0))
+print(ln)
+
+# %%
+# Pareto Distribution
+# +++++++++++++++++++
+#
+# A law with a long tail.
+
+values = np.random.pareto(1, (4096,))
+print(values)
+
+mean = compute(values, lambda x: np.mean(x).astype(x.dtype))
+mean["name"] = "normal"
+print(mean)
+
+
+ln = compute(values, layer_norm)
+ln["name"] = "pareto"
+DATA.append(ln.reset_index(drop=True).max(axis=0))
+print(ln)
+
+# %%
+# Summary
+# +++++++
+#
+# We consider the maximum difference obtained for any sample size.
+
+print(DATA)
+df = pandas.DataFrame(DATA).set_index("name")
+print(df)
+
+# %%
+# Visually.
+
+ax = df.plot.bar(logy=True)
+fig = ax.get_figure()
+fig.savefig("plot_parallelized_reduction.png")
+
+# %%
+# In a deep neural network
+# ++++++++++++++++++++++++
+#
+# Some of the vector have 500 values, 16x32x1024x1024. A layer normalization
+# does 16x32x1024 ~ 2M reductions, over 20 layers.
+# When a deep neural network is computed with a difference code,
+# doing a different parallelization (GPU/CPU for example),
+# the order of the reduction may change and therefore,
+# some errors will appear and propagate.
diff --git a/_unittests/ut_xrun_doc/test_documentation_technical.py b/_unittests/ut_xrun_doc/test_documentation_technical.py
@@ -0,0 +1,97 @@
+import unittest
+import os
+import sys
+import importlib.util
+import subprocess
+import time
+from onnx_diagnostic import __file__ as onnx_diagnostic_file
+from onnx_diagnostic.ext_test_case import ExtTestCase, is_windows, ignore_errors
+
+
+VERBOSE = 0
+ROOT = os.path.realpath(os.path.abspath(os.path.join(onnx_diagnostic_file, "..", "..")))
+
+
+def import_source(module_file_path, module_name):
+    if not os.path.exists(module_file_path):
+        raise FileNotFoundError(module_file_path)
+    module_spec = importlib.util.spec_from_file_location(module_name, module_file_path)
+    if module_spec is None:
+        raise FileNotFoundError(
+            "Unable to find '{}' in '{}'.".format(module_name, module_file_path)
+        )
+    module = importlib.util.module_from_spec(module_spec)
+    return module_spec.loader.exec_module(module)
+
+
+class TestDocumentationTechnical(ExtTestCase):
+    def run_test(self, fold: str, name: str, verbose=0) -> int:
+        ppath = os.environ.get("PYTHONPATH", "")
+        if not ppath:
+            os.environ["PYTHONPATH"] = ROOT
+        elif ROOT not in ppath:
+            sep = ";" if is_windows() else ":"
+            os.environ["PYTHONPATH"] = ppath + sep + ROOT
+        perf = time.perf_counter()
+        try:
+            mod = import_source(fold, os.path.splitext(name)[0])
+            assert mod is not None
+        except FileNotFoundError:
+            # try another way
+            cmds = [sys.executable, "-u", os.path.join(fold, name)]
+            p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            res = p.communicate()
+            out, err = res
+            st = err.decode("ascii", errors="ignore")
+            if st and "Traceback" in st:
+                if '"dot" not found in path.' in st:
+                    # dot not installed, this part
+                    # is tested in onnx framework
+                    raise unittest.SkipTest(f"failed: {name!r} due to missing dot.")
+                if (
+                    "We couldn't connect to 'https://huggingface.co'" in st
+                    or "Cannot access content at: https://huggingface.co/" in st
+                ):
+                    raise unittest.SkipTest(f"Connectivity issues due to\n{err}")
+                raise AssertionError(  # noqa: B904
+                    "Example '{}' (cmd: {} - exec_prefix='{}') "
+                    "failed due to\n{}"
+                    "".format(name, cmds, sys.exec_prefix, st)
+                )
+        dt = time.perf_counter() - perf
+        if verbose:
+            print(f"{dt:.3f}: run {name!r}")
+        return 1
+
+    @classmethod
+    def add_test_methods(cls):
+        this = os.path.abspath(os.path.dirname(__file__))
+        fold = os.path.normpath(os.path.join(this, "..", "..", "_doc", "technical"))
+        found = os.listdir(fold)
+        for name in found:
+            if not name.endswith(".py") or not name.startswith("plot_"):
+                continue
+            reason = None
+
+            if reason:
+
+                @unittest.skip(reason)
+                def _test_(self, name=name):
+                    res = self.run_test(fold, name, verbose=VERBOSE)
+                    self.assertTrue(res)
+
+            else:
+
+                @ignore_errors(OSError)  # connectivity issues
+                def _test_(self, name=name):
+                    res = self.run_test(fold, name, verbose=VERBOSE)
+                    self.assertTrue(res)
+
+            short_name = os.path.split(os.path.splitext(name)[0])[-1]
+            setattr(cls, f"test_{short_name}", _test_)
+
+
+TestDocumentationTechnical.add_test_methods()
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_xrun_doc/test_unit_test.py b/_unittests/ut_xrun_doc/test_unit_test.py
@@ -52,7 +52,11 @@ def test_statistics_on_folders(self):
 
         df = pandas.DataFrame(stat)
         gr = df.drop("name", axis=1).groupby(["ext", "dir"]).sum().reset_index()
-        gr = gr[(gr["dir"] != "_doc/auto_examples") & (gr["dir"] != "_doc/auto_recipes")]
+        gr = gr[
+            (gr["dir"] != "_doc/auto_examples")
+            & (gr["dir"] != "_doc/auto_recipes")
+            & (gr["dir"] != "_doc/auto_technical")
+        ]
         total = (
             gr[gr["dir"].str.contains("onnx_diagnostic/")]
             .drop(["ext", "dir"], axis=1)
diff --git a/clean_onnx.sh b/clean_onnx.sh
@@ -65,6 +65,25 @@ rm _doc/recipes/*.script.onnx
 rm _doc/recipes/dump_models -rf
 rm _doc/recipes/dump_onx_*
 
+rm _doc/technical/plot*.onnx
+rm _doc/technical/plot*.onnx.weight
+rm _doc/technical/plot*.onnx.data
+rm _doc/technical/plot*.txt
+rm _doc/technical/ort*.onnx
+rm _doc/technical/*.sarif
+rm _doc/technical/*.json
+rm _doc/technical/*.png
+rm _doc/technical/*.csv
+rm _doc/technical/*.pte
+rm _doc/technical/*.xlsx
+rm _doc/technical/dummy*.onnx
+rm _doc/technical/evaluation*-script.onnx
+rm _doc/technical/*.opt.onnx
+rm _doc/technical/*.dynamo.onnx
+rm _doc/technical/*.script.onnx
+rm _doc/technical/dump_models -rf
+rm _doc/technical/dump_onx_*
+
 rm _tools/bin -rf
 rm _tools/mambaroot -rf
 rm _tools/repos -rf
diff --git a/onnx_diagnostic/export/validate.py b/onnx_diagnostic/export/validate.py
@@ -96,7 +96,10 @@ def _get(a):
             )
             print(f"[compare_modules] discrepancies={string_diff(diff)}")
         assert not exc or (
-            diff["abs"] <= atol and diff["rel"] <= rtol
+            isinstance(diff["abs"], float)
+            and isinstance(diff["rel"], float)
+            and diff["abs"] <= atol
+            and diff["rel"] <= rtol
         ), f"Discrepancies={string_diff(diff)} higher than expected."
         return dict(args=args, kwargs=kwargs, expected=expected, got=got, diff=diff)
     return dict(args=args, kwargs=kwargs, got=got)
diff --git a/onnx_diagnostic/helpers/helper.py b/onnx_diagnostic/helpers/helper.py
diff --git a/pyproject.toml b/pyproject.toml

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+Technical Details`
	`2`	`+=================`