Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/check-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ jobs:
echo _unittests/ >> .git/info/sparse-checkout
echo _doc/examples/ >> .git/info/sparse-checkout
echo _doc/recipes/ >> .git/info/sparse-checkout
echo _doc/technical/ >> .git/info/sparse-checkout
echo pyproject.toml >> .git/info/sparse-checkout
echo requirements-dev.txt >> .git/info/sparse-checkout
git pull origin main
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ prof
plot_*.txt
_doc/auto_examples/*
_doc/auto_recipes/*
_doc/auto_technical/*
_doc/sg_execution_times.rst
_doc/examples/_cache/*
_doc/examples/dump_models/*
Expand Down
2 changes: 2 additions & 0 deletions _doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,13 @@ def linkcode_resolve(domain, info):
"examples_dirs": [
os.path.join(os.path.dirname(__file__), "examples"),
os.path.join(os.path.dirname(__file__), "recipes"),
os.path.join(os.path.dirname(__file__), "technical"),
],
# path where to save gallery generated examples
"gallery_dirs": [
"auto_examples",
"auto_recipes",
"auto_technical",
],
# no parallelization to avoid conflict with environment variables
"parallel": 1,
Expand Down
5 changes: 3 additions & 2 deletions _doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ It also implements tools to investigate, validate exported models (ExportedProgr
cmds/index
auto_examples/index
auto_recipes/index
auto_technical/index

.. toctree::
:maxdepth: 1
Expand Down Expand Up @@ -116,12 +117,12 @@ See :func:`onnx_diagnostic.helpers.string_type`.
onnx_dtype_name
+++++++++++++++

See :func:`onnx_diagnostic.helpers.onnx_dtype_name`.
See :func:`onnx_diagnostic.helpers.onnx_helper.onnx_dtype_name`.

.. code-block:: python

import onnx
from onnx_diagnostic.helpers import onnx_dtype_name
from onnx_diagnostic.helpers.onnx_helper import onnx_dtype_name

itype = onnx.TensorProto.BFLOAT16
print(onnx_dtype_name(itype))
Expand Down
2 changes: 2 additions & 0 deletions _doc/technical/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Technical Details
=================
201 changes: 201 additions & 0 deletions _doc/technical/plot_parallelized_reduction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
"""
Reproducible Parallelized Reduction is difficult
================================================

A reduction is a frequent operation with neural networks. It appears in layer normalization,
softmax... Because of the float precision, the result of the computation
changes based on the order of the elements. The following examples show the variation
based on different hypothesis on the vector distribution.
We consider a vector :math:`X = (x_1, ..., x_n)`.
It computes the average:

.. math::

mean(X) = \\frac{\\sum_{i=1}^n x_i}{n}

Or the normalization of the vector:

.. math::

norm(X)_i = \\frac{ X_i - \\mathbb{E}X}{ \\sqrt{ \\mathbb{V}X}}

With :math:`\\mathbb{E}X = mean(X)`,
:math:`\\mathbb{V}X = mean\\left(\\left(X - mean(X)\\right)^2\\right)`.
We draw 128 random permutations of X. The average or mean should not change.
And the normalized vector should have the same values. In the first case, we compute
the difference between the highest and the lowest values obtained for the average.
In the second case, we look for the maximum difference between the original normalized
vector and the permuted one, both sorted.

The computation code
++++++++++++++++++++
"""

import itertools
from tqdm import tqdm
import numpy as np
import pandas

DATA = []


def str_dtype(dtype):
"""Displays numpy dtype in a nicer way."""
if dtype == np.float64:
return "fp64"
if dtype == np.float32:
return "fp32"
if dtype == np.float16:
return "fp16"
raise ValueError(f"Unexpected value {dtype}")


def layer_norm(a, eps=1e-6):
"""
Normalized the vector a.
The computation is done in float32 or float64.
"""
ctype = np.float32 if a.dtype == np.float16 else a.dtype
a32 = a.astype(ctype)
m = a32.mean(axis=-1, keepdims=True)
c = a32 - m
va = np.sqrt((c * c).mean(axis=-1, keepdims=True))
va += eps
return (c / va).astype(a.dtype)


def compute(values, fct):
"""
Compare the results of function ``fct`` on a sample.
Loops over multiple sizes, dtypes. Tries 128 times.
"""

def make_value(base, value):
if value.size > 1:
return np.abs(np.sort(base) - np.sort(value)).max()
return value

sizes = [2, 4, 8, 16, 512, 1024, 2048, 4096, 8192]
dtypes = [np.float64, np.float32, np.float16]
N = list(range(128))
exps = list(itertools.product(sizes, dtypes, N))
data = []
ech = None
for size, dtype, n in tqdm(exps):
if n == 0:
ech = values[:size].astype(dtype)
base = fct(ech)
assert base.dtype == ech.dtype
obs = dict(
n=n, size=size, dtype=str_dtype(ech.dtype), value=make_value(base, fct(ech))
)
data.append(obs)

if n == 1:
new_ech = np.sort(ech)
elif n == 2:
new_ech = np.sort(ech)[::-1]
else:
new_ech = np.random.permutation(ech)
assert new_ech.dtype == ech.dtype
assert new_ech.shape == ech.shape
obs = dict(
n=n + 1,
size=size,
dtype=str_dtype(new_ech.dtype),
value=make_value(base, fct(new_ech)),
)
data.append(obs)

df = pandas.DataFrame(data)
agg = df.drop("n", axis=1).groupby(["dtype", "size"], as_index=False).agg(["min", "max"])
agg["value", "delta"] = agg["value", "max"] - agg["value", "min"]
piv = agg.pivot(index="size", columns="dtype", values=("value", "delta"))
return piv


# %%
# Normal Law
# ++++++++++
#
# Let's see what it returns an on random sample following a normal law.
# First the average.

values = np.random.randn(4096)
mean = compute(values, lambda x: np.mean(x).astype(x.dtype))
mean["name"] = "normal"
print(mean)

# %%
# Then the layer normalization.

ln = compute(values, layer_norm)
ln["name"] = "normal"
DATA.append(ln.reset_index(drop=True).max(axis=0))
print(ln)

# %%
# Fixed values
# ++++++++++++
#
# We try a fixed vector with one very high value and all the others are small.

values[:] = -1e-4
values[::128] = 100
mean = compute(values, lambda x: np.mean(x).astype(x.dtype))
mean["name"] = "fixed"
print(mean)

# %%
# And the normalized vector.
ln = compute(values, layer_norm)
ln["name"] = "fixed"
DATA.append(ln.reset_index(drop=True).max(axis=0))
print(ln)

# %%
# Pareto Distribution
# +++++++++++++++++++
#
# A law with a long tail.

values = np.random.pareto(1, (4096,))
print(values)

mean = compute(values, lambda x: np.mean(x).astype(x.dtype))
mean["name"] = "normal"
print(mean)

# %%
# And the normalized vector.
ln = compute(values, layer_norm)
ln["name"] = "pareto"
DATA.append(ln.reset_index(drop=True).max(axis=0))
print(ln)

# %%
# Summary
# +++++++
#
# We consider the maximum difference obtained for any sample size.

df = pandas.DataFrame(DATA).set_index("name")
print(df)

# %%
# Visually.

ax = df.plot.bar(logy=True)
fig = ax.get_figure()
fig.savefig("plot_parallelized_reduction.png")

# %%
# In a deep neural network
# ++++++++++++++++++++++++
#
# Some of the vector have 500 values, 16x32x1024x1024. A layer normalization
# does 16x32x1024 ~ 2M reductions, over 20 layers.
# When a deep neural network is computed with a different code
# doing a different parallelization (GPU/CPU for example),
# the order of the reduction may change and therefore,
# some errors will appear and propagate.
4 changes: 2 additions & 2 deletions _unittests/ut_helpers/test_bench_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,14 @@ def test_make_configs_replace(self):
def test_max_diff(self):
self.assertEqual(
max_diff(torch.Tensor([1, 2]), torch.Tensor([1, 2])),
{"abs": 0.0, "rel": 0.0, "sum": 0.0, "n": 2.0, "dnan": 0.0},
{"abs": 0.0, "rel": 0.0, "sum": 0.0, "n": 2.0, "dnan": 0.0, "argm": (0,)},
)
self.assertEqual(
max_diff(
(torch.Tensor([1, 2]),),
(torch.Tensor([1, 2])),
),
{"abs": 0.0, "rel": 0.0, "sum": 0.0, "n": 2.0, "dnan": 0.0},
{"abs": 0.0, "rel": 0.0, "sum": 0.0, "n": 2.0, "dnan": 0.0, "argm": (0,)},
)
self.assertEqual(
max_diff(
Expand Down
2 changes: 1 addition & 1 deletion _unittests/ut_helpers/test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def test_max_diff_hist_array_string_diff(self):
diff = max_diff(x, y, hist=True)
s = string_diff(diff)
self.assertEndsWith(
"/#8>0.0-#8>0.0001-#6>0.001-#5>0.01-#5>0.1-#3>1.0-#2>10.0-#1>100.0", s
"/#8>0.0-#8>0.0001-#6>0.001-#5>0.01-#5>0.1-#3>1.0-#2>10.0-#1>100.0,amax=2,1", s
)

def test_max_diff_hist_tensor(self):
Expand Down
97 changes: 97 additions & 0 deletions _unittests/ut_xrun_doc/test_documentation_technical.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import unittest
import os
import sys
import importlib.util
import subprocess
import time
from onnx_diagnostic import __file__ as onnx_diagnostic_file
from onnx_diagnostic.ext_test_case import ExtTestCase, is_windows, ignore_errors


VERBOSE = 0
ROOT = os.path.realpath(os.path.abspath(os.path.join(onnx_diagnostic_file, "..", "..")))


def import_source(module_file_path, module_name):
if not os.path.exists(module_file_path):
raise FileNotFoundError(module_file_path)
module_spec = importlib.util.spec_from_file_location(module_name, module_file_path)
if module_spec is None:
raise FileNotFoundError(
"Unable to find '{}' in '{}'.".format(module_name, module_file_path)
)
module = importlib.util.module_from_spec(module_spec)
return module_spec.loader.exec_module(module)


class TestDocumentationTechnical(ExtTestCase):
def run_test(self, fold: str, name: str, verbose=0) -> int:
ppath = os.environ.get("PYTHONPATH", "")
if not ppath:
os.environ["PYTHONPATH"] = ROOT
elif ROOT not in ppath:
sep = ";" if is_windows() else ":"
os.environ["PYTHONPATH"] = ppath + sep + ROOT
perf = time.perf_counter()
try:
mod = import_source(fold, os.path.splitext(name)[0])
assert mod is not None
except FileNotFoundError:
# try another way
cmds = [sys.executable, "-u", os.path.join(fold, name)]
p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
res = p.communicate()
out, err = res
st = err.decode("ascii", errors="ignore")
if st and "Traceback" in st:
if '"dot" not found in path.' in st:
# dot not installed, this part
# is tested in onnx framework
raise unittest.SkipTest(f"failed: {name!r} due to missing dot.")
if (
"We couldn't connect to 'https://huggingface.co'" in st
or "Cannot access content at: https://huggingface.co/" in st
):
raise unittest.SkipTest(f"Connectivity issues due to\n{err}")
raise AssertionError( # noqa: B904
"Example '{}' (cmd: {} - exec_prefix='{}') "
"failed due to\n{}"
"".format(name, cmds, sys.exec_prefix, st)
)
dt = time.perf_counter() - perf
if verbose:
print(f"{dt:.3f}: run {name!r}")
return 1

@classmethod
def add_test_methods(cls):
this = os.path.abspath(os.path.dirname(__file__))
fold = os.path.normpath(os.path.join(this, "..", "..", "_doc", "technical"))
found = os.listdir(fold)
for name in found:
if not name.endswith(".py") or not name.startswith("plot_"):
continue
reason = None

if reason:

@unittest.skip(reason)
def _test_(self, name=name):
res = self.run_test(fold, name, verbose=VERBOSE)
self.assertTrue(res)

else:

@ignore_errors(OSError) # connectivity issues
def _test_(self, name=name):
res = self.run_test(fold, name, verbose=VERBOSE)
self.assertTrue(res)

short_name = os.path.split(os.path.splitext(name)[0])[-1]
setattr(cls, f"test_{short_name}", _test_)


TestDocumentationTechnical.add_test_methods()

if __name__ == "__main__":
unittest.main(verbosity=2)
6 changes: 5 additions & 1 deletion _unittests/ut_xrun_doc/test_unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ def test_statistics_on_folders(self):

df = pandas.DataFrame(stat)
gr = df.drop("name", axis=1).groupby(["ext", "dir"]).sum().reset_index()
gr = gr[(gr["dir"] != "_doc/auto_examples") & (gr["dir"] != "_doc/auto_recipes")]
gr = gr[
(gr["dir"] != "_doc/auto_examples")
& (gr["dir"] != "_doc/auto_recipes")
& (gr["dir"] != "_doc/auto_technical")
]
total = (
gr[gr["dir"].str.contains("onnx_diagnostic/")]
.drop(["ext", "dir"], axis=1)
Expand Down
Loading
Loading