Skip to content

Commit a41b3d1

Browse files
committed
add one gallery
1 parent 0e5e11d commit a41b3d1

File tree

12 files changed

+339
-9
lines changed

12 files changed

+339
-9
lines changed

.github/workflows/check-release.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ jobs:
7474
echo _unittests/ >> .git/info/sparse-checkout
7575
echo _doc/examples/ >> .git/info/sparse-checkout
7676
echo _doc/recipes/ >> .git/info/sparse-checkout
77+
echo _doc/technical/ >> .git/info/sparse-checkout
7778
echo pyproject.toml >> .git/info/sparse-checkout
7879
echo requirements-dev.txt >> .git/info/sparse-checkout
7980
git pull origin main

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ prof
5757
plot_*.txt
5858
_doc/auto_examples/*
5959
_doc/auto_recipes/*
60+
_doc/auto_technical/*
6061
_doc/sg_execution_times.rst
6162
_doc/examples/_cache/*
6263
_doc/examples/dump_models/*

_doc/conf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,11 +156,13 @@ def linkcode_resolve(domain, info):
156156
"examples_dirs": [
157157
os.path.join(os.path.dirname(__file__), "examples"),
158158
os.path.join(os.path.dirname(__file__), "recipes"),
159+
os.path.join(os.path.dirname(__file__), "technical"),
159160
],
160161
# path where to save gallery generated examples
161162
"gallery_dirs": [
162163
"auto_examples",
163164
"auto_recipes",
165+
"auto_technical",
164166
],
165167
# no parallelization to avoid conflict with environment variables
166168
"parallel": 1,

_doc/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ It also implements tools to investigate, validate exported models (ExportedProgr
3939
cmds/index
4040
auto_examples/index
4141
auto_recipes/index
42+
auto_technical/index
4243

4344
.. toctree::
4445
:maxdepth: 1

_doc/technical/README.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Technical Details
2+
=================
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
"""
2+
Reproducible Parallelized Reduction is difficult
3+
================================================
4+
5+
A reduction is a frequent operation in neural network. It appears in layer normalization,
6+
softmax. Because of the float precision, the result of the computation
7+
changes based on the order of the elements. The following examples show the variation
8+
based on different hypothesis on the vector distribution.
9+
We consider a vector :math:`X = (x_1, ..., x_n)`.
10+
It computes the average:
11+
12+
.. math::
13+
14+
mean(X) = \\frac{\\sum_{i=1}^n x_i}{n}
15+
16+
Or the normalization of the vector:
17+
18+
.. math::
19+
20+
norm(X)_i = \\frac{ X_i - \\mathbb{E}X}{ \\sqrt{ \\mathbb{V}X}}
21+
22+
We draw 128 random permutation of X. The average or mean should not change.
23+
And the normalized vector should have the same value. In the first case, we compute
24+
the difference between the highest and the lowest values obtained for the average.
25+
In the second case, we look for the maximum difference between the original normalized
26+
vector and the permuted one (both sorted).
27+
28+
The computation code
29+
++++++++++++++++++++
30+
"""
31+
32+
import itertools
33+
from tqdm import tqdm
34+
import numpy as np
35+
import pandas
36+
37+
DATA = []
38+
39+
40+
def str_dtype(dtype):
41+
"""Displays numpy dtype in a nicer way."""
42+
if dtype == np.float64:
43+
return "fp64"
44+
if dtype == np.float32:
45+
return "fp32"
46+
if dtype == np.float16:
47+
return "fp16"
48+
raise ValueError(f"Unexpected value {dtype}")
49+
50+
51+
def layer_norm(a, eps=1e-6):
52+
"""
53+
Normalized the vector a.
54+
The computation is done in float32 or float64.
55+
"""
56+
ctype = np.float32 if a.dtype == np.float16 else a.dtype
57+
a32 = a.astype(ctype)
58+
m = a32.mean(axis=-1, keepdims=True)
59+
c = a32 - m
60+
va = np.sqrt((c * c).mean(axis=-1, keepdims=True))
61+
va += eps
62+
return (c / va).astype(a.dtype)
63+
64+
65+
def compute(values, fct):
66+
"""
67+
Compare the results of function ``fct`` on a sample.
68+
Loops over multiple sizes, dtypes. Tries 128 times.
69+
"""
70+
71+
def make_value(base, value):
72+
if value.size > 1:
73+
return np.abs(np.sort(base) - np.sort(value)).max()
74+
return value
75+
76+
sizes = [2, 4, 8, 16, 512, 1024, 2048, 4096, 8192]
77+
dtypes = [np.float64, np.float32, np.float16]
78+
N = list(range(128))
79+
exps = list(itertools.product(sizes, dtypes, N))
80+
data = []
81+
ech = None
82+
for size, dtype, n in tqdm(exps):
83+
if n == 0:
84+
ech = values[:size].astype(dtype)
85+
base = fct(ech)
86+
assert base.dtype == ech.dtype
87+
obs = dict(
88+
n=n, size=size, dtype=str_dtype(ech.dtype), value=make_value(base, fct(ech))
89+
)
90+
data.append(obs)
91+
92+
if n == 1:
93+
new_ech = np.sort(ech)
94+
elif n == 2:
95+
new_ech = np.sort(ech)[::-1]
96+
else:
97+
new_ech = np.random.permutation(ech)
98+
assert new_ech.dtype == ech.dtype
99+
assert new_ech.shape == ech.shape
100+
obs = dict(
101+
n=n + 1,
102+
size=size,
103+
dtype=str_dtype(new_ech.dtype),
104+
value=make_value(base, fct(new_ech)),
105+
)
106+
data.append(obs)
107+
108+
df = pandas.DataFrame(data)
109+
agg = df.drop("n", axis=1).groupby(["dtype", "size"], as_index=False).agg(["min", "max"])
110+
agg["value", "delta"] = agg["value", "max"] - agg["value", "min"]
111+
piv = agg.pivot(index="size", columns="dtype", values=("value", "delta"))
112+
return piv
113+
114+
115+
# %%
116+
# Normal Law
117+
# ++++++++++
118+
#
119+
# Let's see what it returns an on random sample following a normal law.
120+
# First the average.
121+
122+
values = np.random.randn(4096)
123+
mean = compute(values, lambda x: np.mean(x).astype(x.dtype))
124+
mean["name"] = "normal"
125+
print(mean)
126+
127+
# %%
128+
# Then the layer normalization.
129+
130+
ln = compute(values, layer_norm)
131+
ln["name"] = "normal"
132+
DATA.append(ln.reset_index(drop=True).max(axis=0))
133+
print(ln)
134+
135+
# %%
136+
# Fixed values
137+
# ++++++++++++
138+
#
139+
# We try a fixed vector with one very high value and all the others are small.
140+
141+
values[:] = -1e-4
142+
values[::128] = 100
143+
mean = compute(values, lambda x: np.mean(x).astype(x.dtype))
144+
mean["name"] = "fixed"
145+
print(mean)
146+
147+
148+
ln = compute(values, layer_norm)
149+
ln["name"] = "fixed"
150+
DATA.append(ln.reset_index(drop=True).max(axis=0))
151+
print(ln)
152+
153+
# %%
154+
# Pareto Distribution
155+
# +++++++++++++++++++
156+
#
157+
# A law with a long tail.
158+
159+
values = np.random.pareto(1, (4096,))
160+
print(values)
161+
162+
mean = compute(values, lambda x: np.mean(x).astype(x.dtype))
163+
mean["name"] = "normal"
164+
print(mean)
165+
166+
167+
ln = compute(values, layer_norm)
168+
ln["name"] = "pareto"
169+
DATA.append(ln.reset_index(drop=True).max(axis=0))
170+
print(ln)
171+
172+
# %%
173+
# Summary
174+
# +++++++
175+
#
176+
# We consider the maximum difference obtained for any sample size.
177+
178+
print(DATA)
179+
df = pandas.DataFrame(DATA).set_index("name")
180+
print(df)
181+
182+
# %%
183+
# Visually.
184+
185+
ax = df.plot.bar(logy=True)
186+
fig = ax.get_figure()
187+
fig.savefig("plot_parallelized_reduction.png")
188+
189+
# %%
190+
# In a deep neural network
191+
# ++++++++++++++++++++++++
192+
#
193+
# Some of the vector have 500 values, 16x32x1024x1024. A layer normalization
194+
# does 16x32x1024 ~ 2M reductions, over 20 layers.
195+
# When a deep neural network is computed with a difference code,
196+
# doing a different parallelization (GPU/CPU for example),
197+
# the order of the reduction may change and therefore,
198+
# some errors will appear and propagate.
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import unittest
2+
import os
3+
import sys
4+
import importlib.util
5+
import subprocess
6+
import time
7+
from onnx_diagnostic import __file__ as onnx_diagnostic_file
8+
from onnx_diagnostic.ext_test_case import ExtTestCase, is_windows, ignore_errors
9+
10+
11+
VERBOSE = 0
12+
ROOT = os.path.realpath(os.path.abspath(os.path.join(onnx_diagnostic_file, "..", "..")))
13+
14+
15+
def import_source(module_file_path, module_name):
16+
if not os.path.exists(module_file_path):
17+
raise FileNotFoundError(module_file_path)
18+
module_spec = importlib.util.spec_from_file_location(module_name, module_file_path)
19+
if module_spec is None:
20+
raise FileNotFoundError(
21+
"Unable to find '{}' in '{}'.".format(module_name, module_file_path)
22+
)
23+
module = importlib.util.module_from_spec(module_spec)
24+
return module_spec.loader.exec_module(module)
25+
26+
27+
class TestDocumentationTechnical(ExtTestCase):
28+
def run_test(self, fold: str, name: str, verbose=0) -> int:
29+
ppath = os.environ.get("PYTHONPATH", "")
30+
if not ppath:
31+
os.environ["PYTHONPATH"] = ROOT
32+
elif ROOT not in ppath:
33+
sep = ";" if is_windows() else ":"
34+
os.environ["PYTHONPATH"] = ppath + sep + ROOT
35+
perf = time.perf_counter()
36+
try:
37+
mod = import_source(fold, os.path.splitext(name)[0])
38+
assert mod is not None
39+
except FileNotFoundError:
40+
# try another way
41+
cmds = [sys.executable, "-u", os.path.join(fold, name)]
42+
p = subprocess.Popen(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
43+
res = p.communicate()
44+
out, err = res
45+
st = err.decode("ascii", errors="ignore")
46+
if st and "Traceback" in st:
47+
if '"dot" not found in path.' in st:
48+
# dot not installed, this part
49+
# is tested in onnx framework
50+
raise unittest.SkipTest(f"failed: {name!r} due to missing dot.")
51+
if (
52+
"We couldn't connect to 'https://huggingface.co'" in st
53+
or "Cannot access content at: https://huggingface.co/" in st
54+
):
55+
raise unittest.SkipTest(f"Connectivity issues due to\n{err}")
56+
raise AssertionError( # noqa: B904
57+
"Example '{}' (cmd: {} - exec_prefix='{}') "
58+
"failed due to\n{}"
59+
"".format(name, cmds, sys.exec_prefix, st)
60+
)
61+
dt = time.perf_counter() - perf
62+
if verbose:
63+
print(f"{dt:.3f}: run {name!r}")
64+
return 1
65+
66+
@classmethod
67+
def add_test_methods(cls):
68+
this = os.path.abspath(os.path.dirname(__file__))
69+
fold = os.path.normpath(os.path.join(this, "..", "..", "_doc", "technical"))
70+
found = os.listdir(fold)
71+
for name in found:
72+
if not name.endswith(".py") or not name.startswith("plot_"):
73+
continue
74+
reason = None
75+
76+
if reason:
77+
78+
@unittest.skip(reason)
79+
def _test_(self, name=name):
80+
res = self.run_test(fold, name, verbose=VERBOSE)
81+
self.assertTrue(res)
82+
83+
else:
84+
85+
@ignore_errors(OSError) # connectivity issues
86+
def _test_(self, name=name):
87+
res = self.run_test(fold, name, verbose=VERBOSE)
88+
self.assertTrue(res)
89+
90+
short_name = os.path.split(os.path.splitext(name)[0])[-1]
91+
setattr(cls, f"test_{short_name}", _test_)
92+
93+
94+
TestDocumentationTechnical.add_test_methods()
95+
96+
if __name__ == "__main__":
97+
unittest.main(verbosity=2)

_unittests/ut_xrun_doc/test_unit_test.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,11 @@ def test_statistics_on_folders(self):
5252

5353
df = pandas.DataFrame(stat)
5454
gr = df.drop("name", axis=1).groupby(["ext", "dir"]).sum().reset_index()
55-
gr = gr[(gr["dir"] != "_doc/auto_examples") & (gr["dir"] != "_doc/auto_recipes")]
55+
gr = gr[
56+
(gr["dir"] != "_doc/auto_examples")
57+
& (gr["dir"] != "_doc/auto_recipes")
58+
& (gr["dir"] != "_doc/auto_technical")
59+
]
5660
total = (
5761
gr[gr["dir"].str.contains("onnx_diagnostic/")]
5862
.drop(["ext", "dir"], axis=1)

clean_onnx.sh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,25 @@ rm _doc/recipes/*.script.onnx
6565
rm _doc/recipes/dump_models -rf
6666
rm _doc/recipes/dump_onx_*
6767

68+
rm _doc/technical/plot*.onnx
69+
rm _doc/technical/plot*.onnx.weight
70+
rm _doc/technical/plot*.onnx.data
71+
rm _doc/technical/plot*.txt
72+
rm _doc/technical/ort*.onnx
73+
rm _doc/technical/*.sarif
74+
rm _doc/technical/*.json
75+
rm _doc/technical/*.png
76+
rm _doc/technical/*.csv
77+
rm _doc/technical/*.pte
78+
rm _doc/technical/*.xlsx
79+
rm _doc/technical/dummy*.onnx
80+
rm _doc/technical/evaluation*-script.onnx
81+
rm _doc/technical/*.opt.onnx
82+
rm _doc/technical/*.dynamo.onnx
83+
rm _doc/technical/*.script.onnx
84+
rm _doc/technical/dump_models -rf
85+
rm _doc/technical/dump_onx_*
86+
6887
rm _tools/bin -rf
6988
rm _tools/mambaroot -rf
7089
rm _tools/repos -rf

onnx_diagnostic/export/validate.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,10 @@ def _get(a):
9696
)
9797
print(f"[compare_modules] discrepancies={string_diff(diff)}")
9898
assert not exc or (
99-
diff["abs"] <= atol and diff["rel"] <= rtol
99+
isinstance(diff["abs"], float)
100+
and isinstance(diff["rel"], float)
101+
and diff["abs"] <= atol
102+
and diff["rel"] <= rtol
100103
), f"Discrepancies={string_diff(diff)} higher than expected."
101104
return dict(args=args, kwargs=kwargs, expected=expected, got=got, diff=diff)
102105
return dict(args=args, kwargs=kwargs, got=got)

0 commit comments

Comments
 (0)