Skip to content

Commit fc664f8

Browse files
committed
example
1 parent 8a78172 commit fc664f8

File tree

5 files changed

+99
-47
lines changed

5 files changed

+99
-47
lines changed

_doc/technical/plot_layer_norm_discrepancies.py

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,26 @@
66
:ref:`l-plot-parallelized-reduction`, reduction operations
77
are sensitive to parallelization.
88
9-
We consider a small model including a layer normalization
10-
followed by a matrix multiplication and we show that replacing
11-
a kernel by another one may significantly impact the output.
9+
Methodology
10+
+++++++++++
11+
12+
We consider a simple model with a LayerNormalization followed by a MatMul.
13+
Each operator can be run with :epkg:`onnxruntime` or :epkg:`pytorch`.
14+
We compare the four combinations.
1215
1316
The model
1417
+++++++++
1518
"""
1619

1720
import itertools
21+
import numpy as np
1822
import pandas
1923
import onnx
2024
import onnx.helper as oh
2125
import onnxruntime
2226
import torch
2327
from onnx_array_api.plotting.graphviz_helper import plot_dot
24-
from onnx_diagnostic.doc import rotate_align, save_fig
28+
from onnx_diagnostic.doc import rotate_align, save_fig, plot_histogram, title
2529
from onnx_diagnostic.ext_test_case import unit_test_going
2630
from onnx_diagnostic.helpers import max_diff, string_diff, string_type
2731
from onnx_diagnostic.helpers.onnx_helper import onnx_dtype_name, onnx_dtype_to_np_dtype
@@ -80,6 +84,8 @@ def make_feeds(last_dim: int):
8084

8185

8286
def cast_feeds(itype, provider, feeds):
87+
ttype = onnx_dtype_to_torch_dtype(itype)
88+
np_dtype = onnx_dtype_to_np_dtype(itype)
8389
np_feeds = {k: v.detach().numpy() for k, v in feeds.items()}
8490
if provider == "CUDA":
8591
if not torch.cuda.is_available():
@@ -102,8 +108,6 @@ def cast_feeds(itype, provider, feeds):
102108
baseline = {}
103109

104110
for provider, itype in itertools.product(["CPU", "CUDA"], [TFLOAT, TFLOAT16]):
105-
ttype = onnx_dtype_to_torch_dtype(itype)
106-
np_dtype = onnx_dtype_to_np_dtype(itype)
107111
tch_feeds, ort_feeds = cast_feeds(itype, provider, feeds)
108112
if tch_feeds is None:
109113
continue
@@ -156,6 +160,22 @@ def cast_feeds(itype, provider, feeds):
156160
# Let's see which operator is responsible for them,
157161
# *LayerNormalization* or *MatMul*.
158162

163+
# %%
164+
# Distribution of the results
165+
# +++++++++++++++++++++++++++
166+
167+
tensor = baseline[TFLOAT16, "CPU", "ort"][0].ravel().astype(np.float32)
168+
print(pandas.DataFrame({"expected": tensor}).describe())
169+
170+
# %%
171+
# Histogram.
172+
173+
save_fig(
174+
title(plot_histogram(tensor), "Distribution of the computed results"),
175+
"plot_layer_norm_discrepancies_hist.png",
176+
)
177+
178+
159179
# %%
160180
# The discrepancies come from?
161181
# ++++++++++++++++++++++++++++
@@ -165,19 +185,18 @@ def cast_feeds(itype, provider, feeds):
165185
data = []
166186

167187
for mod, provider, itype in itertools.product(
168-
["ORT-TORCH", "TORCH-ORT"], ["CPU", "CUDA"], [TFLOAT, TFLOAT16]
188+
["ORT-ORT", "ORT-TORCH", "TORCH-ORT", "TORCH-TORCH"], ["CPU", "CUDA"], [TFLOAT, TFLOAT16]
169189
):
170190
ttype = onnx_dtype_to_torch_dtype(itype)
171191
np_dtype = onnx_dtype_to_np_dtype(itype)
172192
tch_feeds, _ = cast_feeds(itype, provider, feeds)
173193
if tch_feeds is None:
174194
continue
175195

196+
ker1, ker2 = mod.split("-")
176197
custom_kernels = (
177-
{("", "LayerNormalization"): LayerNormalizationOrt}
178-
if mod == "ORT-TORCH"
179-
else {("", "MatMul"): MatMulOrt}
180-
)
198+
{("", "LayerNormalization"): LayerNormalizationOrt} if ker1 == "ORT" else {}
199+
) | ({("", "MatMul"): MatMulOrt} if ker2 == "ORT" else {})
181200

182201
model = get_model(itype)
183202
print()
@@ -206,7 +225,7 @@ def cast_feeds(itype, provider, feeds):
206225
)
207226

208227
# %%
209-
df = pandas.DataFrame(data).set_index(["model", "provider", "dtype"])
228+
df = pandas.DataFrame(data).set_index(["dtype", "provider", "model"])
210229
df = df.sort_index()
211230
print(df)
212231

@@ -216,8 +235,17 @@ def cast_feeds(itype, provider, feeds):
216235
save_fig(
217236
rotate_align(
218237
df[["diff_ort", "diff_torch"]].plot.bar(
219-
title="ORT/Torch or Torch/ORT for LayerNorm(X) @ W + B"
238+
title="ORT/Torch or Torch/ORT for LayerNorm(X) @ W + B",
239+
figsize=(10, 4),
220240
)
221241
),
222242
"plot_layer_norm_discrepancies_2.png",
223243
)
244+
245+
# %%
246+
# Conclusion
247+
# ++++++++++
248+
#
249+
# :epkg:`torch` seems able to replicate the same results if the same computation
250+
# is run multiple times. :epkg:`onnxruntime` is only able to do that on CUDA.
251+
# With float16 and CUDA, LayerNormalization seems to introduce some discrepancies.

_doc/technical/plot_parallelized_reduction.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@
2323
2424
With :math:`\\mathbb{E}X = mean(X)`,
2525
:math:`\\mathbb{V}X = mean\\left(\\left(X - mean(X)\\right)^2\\right)`.
26+
27+
Methodology
28+
+++++++++++
29+
30+
**Permutation should not change the average.**
31+
2632
We draw 128 random permutations of X. The average or mean should not change.
2733
And the normalized vector should have the same values. In the first case, we compute
2834
the difference between the highest and the lowest values obtained for the average.

k.py

Lines changed: 0 additions & 30 deletions
This file was deleted.

onnx_diagnostic/doc.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
from typing import Optional
2+
import numpy as np
3+
4+
15
def reset_torch_transformers(gallery_conf, fname):
26
"Resets torch dynamo for :epkg:`sphinx-gallery`."
37
import matplotlib.pyplot as plt
@@ -48,3 +52,27 @@ def save_fig(ax, name: str):
4852
fig = ax.get_figure()
4953
fig.savefig(name)
5054
return ax
55+
56+
57+
def title(ax: "plt.axes", title: str) -> "plt.axes": # noqa: F821
58+
"Adds a title to axes and returns them."
59+
ax.set_title(title)
60+
return ax
61+
62+
63+
def plot_histogram(
64+
tensor: np.ndarray,
65+
ax: Optional["plt.axes"] = None, # noqa: F821
66+
bins: int = 30,
67+
color: str = "orange",
68+
alpha: float = 0.7,
69+
) -> "plt.axes": # noqa: F821
70+
"Computes the distribution for a tensor."
71+
if ax is None:
72+
import matplotlib.pyplot as plt
73+
74+
ax = plt.gca()
75+
ax.cla()
76+
ax.hist(tensor, bins=30, color="orange", alpha=0.7)
77+
ax.set_yscale("log")
78+
return ax

onnx_diagnostic/helpers/doc_helper.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
from typing import Dict, Optional, Tuple
23
import onnx
34
import onnx.helper as oh
@@ -6,14 +7,25 @@
67
from .torch_helper import onnx_dtype_to_torch_dtype, torch_dtype_to_onnx_dtype
78
from .ort_session import InferenceSessionForTorch
89

10+
_SAVED = []
11+
_SAVE_OPTIMIZED_MODEL_ = int(os.environ.get("DUMP_ONNX", "0"))
12+
13+
14+
def _get_model_name(op_name: str, provider: str) -> Optional[str]:
15+
if _SAVE_OPTIMIZED_MODEL_:
16+
name = f"dump_doc_layer_norm_{provider}_{len(_SAVED)}.onnx"
17+
_SAVED.append(name)
18+
return name
19+
return None
20+
921

1022
class LayerNormalizationOrt(OpRunKernel):
1123
"LayerNormalization with onnxruntime"
1224

1325
@classmethod
1426
def device_dependent(cls) -> bool:
1527
"Needs device."
16-
return False
28+
return True
1729

1830
def __init__(
1931
self,
@@ -70,7 +82,11 @@ def _make_model(self, itype: int, rank: int, has_bias: bool) -> onnx.ModelProto:
7082
)
7183
provider = "CPUExecutionProvider" if self.is_cpu else "CUDAExecutionProvider"
7284
self._provider = provider
73-
return InferenceSessionForTorch(layer_model, providers=[provider])
85+
return InferenceSessionForTorch(
86+
layer_model,
87+
optimized_model_filepath=_get_model_name("layer_norm", provider),
88+
providers=[provider],
89+
)
7490

7591
def run(self, x, scale, bias=None):
7692
itype = torch_dtype_to_onnx_dtype(x.dtype)
@@ -94,7 +110,7 @@ class MatMulOrt(OpRunKernel):
94110
@classmethod
95111
def device_dependent(cls) -> bool:
96112
"Needs device."
97-
return False
113+
return True
98114

99115
def __init__(
100116
self,
@@ -127,7 +143,11 @@ def _make_model(self, itype: int, ranka: int, rankb: int) -> onnx.ModelProto:
127143
)
128144
provider = "CPUExecutionProvider" if self.is_cpu else "CUDAExecutionProvider"
129145
self._provider = provider
130-
return InferenceSessionForTorch(model, providers=[provider])
146+
return InferenceSessionForTorch(
147+
model,
148+
optimized_model_filepath=_get_model_name("matmul", provider),
149+
providers=[provider],
150+
)
131151

132152
def run(self, a, b):
133153
itype = torch_dtype_to_onnx_dtype(a.dtype)

0 commit comments

Comments
 (0)