Skip to content

Commit 1ede022

Browse files
authored
add argmax in max_diff (#99)
* add argmax * ut * fix git * fix issues * mypy * add one gallery * docu * spell * spell
1 parent 7936215 commit 1ede022

File tree

18 files changed

+378
-22
lines changed

18 files changed

+378
-22
lines changed

.github/workflows/check-release.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ jobs:
7474
echo _unittests/ >> .git/info/sparse-checkout
7575
echo _doc/examples/ >> .git/info/sparse-checkout
7676
echo _doc/recipes/ >> .git/info/sparse-checkout
77+
echo _doc/technical/ >> .git/info/sparse-checkout
7778
echo pyproject.toml >> .git/info/sparse-checkout
7879
echo requirements-dev.txt >> .git/info/sparse-checkout
7980
git pull origin main

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ prof
5757
plot_*.txt
5858
_doc/auto_examples/*
5959
_doc/auto_recipes/*
60+
_doc/auto_technical/*
6061
_doc/sg_execution_times.rst
6162
_doc/examples/_cache/*
6263
_doc/examples/dump_models/*

_doc/conf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,11 +156,13 @@ def linkcode_resolve(domain, info):
156156
"examples_dirs": [
157157
os.path.join(os.path.dirname(__file__), "examples"),
158158
os.path.join(os.path.dirname(__file__), "recipes"),
159+
os.path.join(os.path.dirname(__file__), "technical"),
159160
],
160161
# path where to save gallery generated examples
161162
"gallery_dirs": [
162163
"auto_examples",
163164
"auto_recipes",
165+
"auto_technical",
164166
],
165167
# no parallelization to avoid conflict with environment variables
166168
"parallel": 1,

_doc/index.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ It also implements tools to investigate, validate exported models (ExportedProgr
3939
cmds/index
4040
auto_examples/index
4141
auto_recipes/index
42+
auto_technical/index
4243

4344
.. toctree::
4445
:maxdepth: 1
@@ -116,12 +117,12 @@ See :func:`onnx_diagnostic.helpers.string_type`.
116117
onnx_dtype_name
117118
+++++++++++++++
118119

119-
See :func:`onnx_diagnostic.helpers.onnx_dtype_name`.
120+
See :func:`onnx_diagnostic.helpers.onnx_helper.onnx_dtype_name`.
120121

121122
.. code-block:: python
122123
123124
import onnx
124-
from onnx_diagnostic.helpers import onnx_dtype_name
125+
from onnx_diagnostic.helpers.onnx_helper import onnx_dtype_name
125126
126127
itype = onnx.TensorProto.BFLOAT16
127128
print(onnx_dtype_name(itype))

_doc/technical/README.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Technical Details
2+
=================
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
"""
2+
Reproducible Parallelized Reduction is difficult
3+
================================================
4+
5+
A reduction is a frequent operation with neural networks. It appears in layer normalization,
6+
softmax... Because of the float precision, the result of the computation
7+
changes based on the order of the elements. The following examples show the variation
8+
based on different hypothesis on the vector distribution.
9+
We consider a vector :math:`X = (x_1, ..., x_n)`.
10+
It computes the average:
11+
12+
.. math::
13+
14+
mean(X) = \\frac{\\sum_{i=1}^n x_i}{n}
15+
16+
Or the normalization of the vector:
17+
18+
.. math::
19+
20+
norm(X)_i = \\frac{ X_i - \\mathbb{E}X}{ \\sqrt{ \\mathbb{V}X}}
21+
22+
With :math:`\\mathbb{E}X = mean(X)`,
23+
:math:`\\mathbb{V}X = mean\\left(\\left(X - mean(X)\\right)^2\\right)`.
24+
We draw 128 random permutations of X. The average or mean should not change.
25+
And the normalized vector should have the same values. In the first case, we compute
26+
the difference between the highest and the lowest values obtained for the average.
27+
In the second case, we look for the maximum difference between the original normalized
28+
vector and the permuted one, both sorted.
29+
30+
The computation code
31+
++++++++++++++++++++
32+
"""
33+
34+
import itertools
35+
from tqdm import tqdm
36+
import numpy as np
37+
import pandas
38+
39+
DATA = []
40+
41+
42+
def str_dtype(dtype):
43+
"""Displays numpy dtype in a nicer way."""
44+
if dtype == np.float64:
45+
return "fp64"
46+
if dtype == np.float32:
47+
return "fp32"
48+
if dtype == np.float16:
49+
return "fp16"
50+
raise ValueError(f"Unexpected value {dtype}")
51+
52+
53+
def layer_norm(a, eps=1e-6):
54+
"""
55+
Normalized the vector a.
56+
The computation is done in float32 or float64.
57+
"""
58+
ctype = np.float32 if a.dtype == np.float16 else a.dtype
59+
a32 = a.astype(ctype)
60+
m = a32.mean(axis=-1, keepdims=True)
61+
c = a32 - m
62+
va = np.sqrt((c * c).mean(axis=-1, keepdims=True))
63+
va += eps
64+
return (c / va).astype(a.dtype)
65+
66+
67+
def compute(values, fct):
68+
"""
69+
Compare the results of function ``fct`` on a sample.
70+
Loops over multiple sizes, dtypes. Tries 128 times.
71+
"""
72+
73+
def make_value(base, value):
74+
if value.size > 1:
75+
return np.abs(np.sort(base) - np.sort(value)).max()
76+
return value
77+
78+
sizes = [2, 4, 8, 16, 512, 1024, 2048, 4096, 8192]
79+
dtypes = [np.float64, np.float32, np.float16]
80+
N = list(range(128))
81+
exps = list(itertools.product(sizes, dtypes, N))
82+
data = []
83+
ech = None
84+
for size, dtype, n in tqdm(exps):
85+
if n == 0:
86+
ech = values[:size].astype(dtype)
87+
base = fct(ech)
88+
assert base.dtype == ech.dtype
89+
obs = dict(
90+
n=n, size=size, dtype=str_dtype(ech.dtype), value=make_value(base, fct(ech))
91+
)
92+
data.append(obs)
93+
94+
if n == 1:
95+
new_ech = np.sort(ech)
96+
elif n == 2:
97+
new_ech = np.sort(ech)[::-1]
98+
else:
99+
new_ech = np.random.permutation(ech)
100+
assert new_ech.dtype == ech.dtype
101+
assert new_ech.shape == ech.shape
102+
obs = dict(
103+
n=n + 1,
104+
size=size,
105+
dtype=str_dtype(new_ech.dtype),
106+
value=make_value(base, fct(new_ech)),
107+
)
108+
data.append(obs)
109+
110+
df = pandas.DataFrame(data)
111+
agg = df.drop("n", axis=1).groupby(["dtype", "size"], as_index=False).agg(["min", "max"])
112+
agg["value", "delta"] = agg["value", "max"] - agg["value", "min"]
113+
piv = agg.pivot(index="size", columns="dtype", values=("value", "delta"))
114+
return piv
115+
116+
117+
# %%
118+
# Normal Law
119+
# ++++++++++
120+
#
121+
# Let's see what it returns an on random sample following a normal law.
122+
# First the average.
123+
124+
values = np.random.randn(4096)
125+
mean = compute(values, lambda x: np.mean(x).astype(x.dtype))
126+
mean["name"] = "normal"
127+
print(mean)
128+
129+
# %%
130+
# Then the layer normalization.
131+
132+
ln = compute(values, layer_norm)
133+
ln["name"] = "normal"
134+
DATA.append(ln.reset_index(drop=True).max(axis=0))
135+
print(ln)
136+
137+
# %%
138+
# Fixed values
139+
# ++++++++++++
140+
#
141+
# We try a fixed vector with one very high value and all the others are small.
142+
143+
values[:] = -1e-4
144+
values[::128] = 100
145+
mean = compute(values, lambda x: np.mean(x).astype(x.dtype))
146+
mean["name"] = "fixed"
147+
print(mean)
148+
149+
# %%
150+
# And the normalized vector.
151+
ln = compute(values, layer_norm)
152+
ln["name"] = "fixed"
153+
DATA.append(ln.reset_index(drop=True).max(axis=0))
154+
print(ln)
155+
156+
# %%
157+
# Pareto Distribution
158+
# +++++++++++++++++++
159+
#
160+
# A law with a long tail.
161+
162+
values = np.random.pareto(1, (4096,))
163+
print(values)
164+
165+
mean = compute(values, lambda x: np.mean(x).astype(x.dtype))
166+
mean["name"] = "normal"
167+
print(mean)
168+
169+
# %%
170+
# And the normalized vector.
171+
ln = compute(values, layer_norm)
172+
ln["name"] = "pareto"
173+
DATA.append(ln.reset_index(drop=True).max(axis=0))
174+
print(ln)
175+
176+
# %%
177+
# Summary
178+
# +++++++
179+
#
180+
# We consider the maximum difference obtained for any sample size.
181+
182+
df = pandas.DataFrame(DATA).set_index("name")
183+
print(df)
184+
185+
# %%
186+
# Visually.
187+
188+
ax = df.plot.bar(logy=True)
189+
fig = ax.get_figure()
190+
fig.savefig("plot_parallelized_reduction.png")
191+
192+
# %%
193+
# In a deep neural network
194+
# ++++++++++++++++++++++++
195+
#
196+
# Some of the vector have 500 values, 16x32x1024x1024. A layer normalization
197+
# does 16x32x1024 ~ 2M reductions, over 20 layers.
198+
# When a deep neural network is computed with a different code
199+
# doing a different parallelization (GPU/CPU for example),
200+
# the order of the reduction may change and therefore,
201+
# some errors will appear and propagate.

_unittests/ut_helpers/test_bench_run.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,14 @@ def test_make_configs_replace(self):
105105
def test_max_diff(self):
106106
self.assertEqual(
107107
max_diff(torch.Tensor([1, 2]), torch.Tensor([1, 2])),
108-
{"abs": 0.0, "rel": 0.0, "sum": 0.0, "n": 2.0, "dnan": 0.0},
108+
{"abs": 0.0, "rel": 0.0, "sum": 0.0, "n": 2.0, "dnan": 0.0, "argm": (0,)},
109109
)
110110
self.assertEqual(
111111
max_diff(
112112
(torch.Tensor([1, 2]),),
113113
(torch.Tensor([1, 2])),
114114
),
115-
{"abs": 0.0, "rel": 0.0, "sum": 0.0, "n": 2.0, "dnan": 0.0},
115+
{"abs": 0.0, "rel": 0.0, "sum": 0.0, "n": 2.0, "dnan": 0.0, "argm": (0,)},
116116
)
117117
self.assertEqual(
118118
max_diff(

_unittests/ut_helpers/test_helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def test_max_diff_hist_array_string_diff(self):
245245
diff = max_diff(x, y, hist=True)
246246
s = string_diff(diff)
247247
self.assertEndsWith(
248-
"/#8>0.0-#8>0.0001-#6>0.001-#5>0.01-#5>0.1-#3>1.0-#2>10.0-#1>100.0", s
248+
"/#8>0.0-#8>0.0001-#6>0.001-#5>0.01-#5>0.1-#3>1.0-#2>10.0-#1>100.0,amax=2,1", s
249249
)
250250

251251
def test_max_diff_hist_tensor(self):
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import unittest
2+
import os
3+
import sys
4+
import importlib.util
5+
import subprocess
6+
import time
7+
from onnx_diagnostic import __file__ as onnx_diagnostic_file
8+
from onnx_diagnostic.ext_test_case import ExtTestCase, is_windows, ignore_errors
9+
10+
11+
VERBOSE = 0
12+
ROOT = os.path.realpath(os.path.abspath(os.path.join(onnx_diagnostic_file, "..", "..")))
13+
14+
15+
def import_source(module_file_path, module_name):
16+
if not os.path.exists(module_file_path):
17+
raise FileNotFoundError(module_file_path)
18+
module_spec = importlib.util.spec_from_file_location(module_name, module_file_path)
19+
if module_spec is None:
20+
raise FileNotFoundError(
21+
"Unable to find '{}' in '{}'.".format(module_name, module_file_path)
22+
)
23+
module = importlib.util.module_from_spec(module_spec)
24+
return module_spec.loader.exec_module(module)
25+
26+
27+
class TestDocumentationTechnical(ExtTestCase):
28+
def run_test(self, fold: str, name: str, verbose=0) -> int:
29+
ppath = os.environ.get("PYTHONPATH", "")
30+
if not ppath:
31+
os.environ["PYTHONPATH"] = ROOT
32+
elif ROOT not in ppath:
33+
sep = ";" if is_windows() else ":"
34+
os.environ["PYTHONPATH"] = ppath + sep + ROOT
35+
perf = time.perf_counter()
36+
try:
37+
mod = import_source(fold, os.path.splitext(name)[0])
38+
assert mod is not None
39+
except FileNotFoundError:
40+
# try another way
41+
cmds = [sys.executable, "-u", os.path.join(fold, name)]
42+
p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
43+
res = p.communicate()
44+
out, err = res
45+
st = err.decode("ascii", errors="ignore")
46+
if st and "Traceback" in st:
47+
if '"dot" not found in path.' in st:
48+
# dot not installed, this part
49+
# is tested in onnx framework
50+
raise unittest.SkipTest(f"failed: {name!r} due to missing dot.")
51+
if (
52+
"We couldn't connect to 'https://huggingface.co'" in st
53+
or "Cannot access content at: https://huggingface.co/" in st
54+
):
55+
raise unittest.SkipTest(f"Connectivity issues due to\n{err}")
56+
raise AssertionError( # noqa: B904
57+
"Example '{}' (cmd: {} - exec_prefix='{}') "
58+
"failed due to\n{}"
59+
"".format(name, cmds, sys.exec_prefix, st)
60+
)
61+
dt = time.perf_counter() - perf
62+
if verbose:
63+
print(f"{dt:.3f}: run {name!r}")
64+
return 1
65+
66+
@classmethod
67+
def add_test_methods(cls):
68+
this = os.path.abspath(os.path.dirname(__file__))
69+
fold = os.path.normpath(os.path.join(this, "..", "..", "_doc", "technical"))
70+
found = os.listdir(fold)
71+
for name in found:
72+
if not name.endswith(".py") or not name.startswith("plot_"):
73+
continue
74+
reason = None
75+
76+
if reason:
77+
78+
@unittest.skip(reason)
79+
def _test_(self, name=name):
80+
res = self.run_test(fold, name, verbose=VERBOSE)
81+
self.assertTrue(res)
82+
83+
else:
84+
85+
@ignore_errors(OSError) # connectivity issues
86+
def _test_(self, name=name):
87+
res = self.run_test(fold, name, verbose=VERBOSE)
88+
self.assertTrue(res)
89+
90+
short_name = os.path.split(os.path.splitext(name)[0])[-1]
91+
setattr(cls, f"test_{short_name}", _test_)
92+
93+
94+
TestDocumentationTechnical.add_test_methods()
95+
96+
if __name__ == "__main__":
97+
unittest.main(verbosity=2)

_unittests/ut_xrun_doc/test_unit_test.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,11 @@ def test_statistics_on_folders(self):
5252

5353
df = pandas.DataFrame(stat)
5454
gr = df.drop("name", axis=1).groupby(["ext", "dir"]).sum().reset_index()
55-
gr = gr[(gr["dir"] != "_doc/auto_examples") & (gr["dir"] != "_doc/auto_recipes")]
55+
gr = gr[
56+
(gr["dir"] != "_doc/auto_examples")
57+
& (gr["dir"] != "_doc/auto_recipes")
58+
& (gr["dir"] != "_doc/auto_technical")
59+
]
5660
total = (
5761
gr[gr["dir"].str.contains("onnx_diagnostic/")]
5862
.drop(["ext", "dir"], axis=1)

0 commit comments

Comments
 (0)