Skip to content

Commit e2f9930

Browse files
authored
Merge pull request #4 from sdpython/bench
Adds a benchmark to the documentation (TreeEnsemble)
2 parents e2f8c14 + d669884 commit e2f9930

File tree

6 files changed

+331
-6
lines changed

6 files changed

+331
-6
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
*.dylib
44
*.so
55
coverage.html/*
6+
_cache/*
67
.coverage
78
dist/*
89
build/*
910
.eggs/*
1011
*egg-info/*
12+
_doc/examples/_cache/*
1113
_doc/auto_examples/*
1214
_doc/examples/plot_*.png
1315
_doc/_static/require.js

_doc/examples/plot_benchmark_rf.py

Lines changed: 306 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,306 @@
1+
"""
2+
.. _l-example-benchmark-tree-implementation:
3+
4+
Benchmark of TreeEnsemble implementation
5+
========================================
6+
7+
The following example compares the inference time between
8+
:epkg:`onnxruntime` and :class:`sklearn.ensemble.RandomForestRegressor`,
9+
fow different number of estimators, max depth, and parallelization.
10+
It does it for a fixed number of rows and features.
11+
12+
import and registration of necessary converters
13+
++++++++++++++++++++++++++++++++++++++++++++++++
14+
"""
15+
import pickle
16+
import os
17+
import time
18+
from itertools import product
19+
20+
import matplotlib.pyplot as plt
21+
import numpy
22+
import pandas
23+
from lightgbm import LGBMRegressor
24+
from onnxmltools.convert.lightgbm.operator_converters.LightGbm import convert_lightgbm
25+
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
26+
from onnxruntime import InferenceSession, SessionOptions
27+
from psutil import cpu_count
28+
from pyquickhelper.loghelper import run_cmd
29+
from skl2onnx import to_onnx, update_registered_converter
30+
from skl2onnx.common.shape_calculator import calculate_linear_regressor_output_shapes
31+
from sklearn import set_config
32+
from sklearn.ensemble import RandomForestRegressor
33+
from tqdm import tqdm
34+
from xgboost import XGBRegressor
35+
36+
37+
def skl2onnx_convert_lightgbm(scope, operator, container):
38+
options = scope.get_options(operator.raw_operator)
39+
if "split" in options:
40+
operator.split = options["split"]
41+
else:
42+
operator.split = None
43+
convert_lightgbm(scope, operator, container)
44+
45+
46+
update_registered_converter(
47+
LGBMRegressor,
48+
"LightGbmLGBMRegressor",
49+
calculate_linear_regressor_output_shapes,
50+
skl2onnx_convert_lightgbm,
51+
options={"split": None},
52+
)
53+
update_registered_converter(
54+
XGBRegressor,
55+
"XGBoostXGBRegressor",
56+
calculate_linear_regressor_output_shapes,
57+
convert_xgboost,
58+
)
59+
60+
# The following instruction reduces the time spent by scikit-learn
61+
# to validate the data.
62+
set_config(assume_finite=True)
63+
64+
##########################################
65+
# Machine details
66+
# +++++++++++++++
67+
68+
69+
print(f"Number of cores: {cpu_count()}")
70+
71+
###############################################
72+
# But this information is not usually enough.
73+
# Let's extract the cache information.
74+
75+
try:
76+
out, err = run_cmd("lscpu")
77+
print(out)
78+
except Exception as e:
79+
print(f"lscpu not available: {e}")
80+
81+
###############################################
82+
# Or with the following command.
83+
out, err = run_cmd("cat /proc/cpuinfo")
84+
85+
###############################################
86+
# Fonction to measure inference time
87+
# ++++++++++++++++++++++++++++++++++
88+
89+
90+
def measure_inference(fct, X, repeat, max_time=5, quantile=1):
91+
"""
92+
Run *repeat* times the same function on data *X*.
93+
94+
:param fct: fonction to run
95+
:param X: data
96+
:param repeat: number of times to run
97+
:param max_time: maximum time to use to measure the inference
98+
:return: number of runs, sum of the time, average, median
99+
"""
100+
times = []
101+
for n in range(repeat):
102+
perf = time.perf_counter()
103+
fct(X)
104+
delta = time.perf_counter() - perf
105+
times.append(delta)
106+
if len(times) < 3:
107+
continue
108+
if max_time is not None and sum(times) >= max_time:
109+
break
110+
times.sort()
111+
quantile = 0 if (len(times) - quantile * 2) < 3 else quantile
112+
if quantile == 0:
113+
tt = times
114+
else:
115+
tt = times[quantile:-quantile]
116+
return (len(times), sum(times), sum(tt) / len(tt), times[len(times) // 2])
117+
118+
119+
###############################################
120+
# Benchmark
121+
# +++++++++
122+
#
123+
# The following script benchmarks the inference for the same
124+
# model for a random forest and onnxruntime after it was converted
125+
# into ONNX and for the following configurations.
126+
127+
legend = "parallel-batch-4096-block"
128+
129+
small = cpu_count() < 12
130+
if small:
131+
N = 1000
132+
n_features = 10
133+
n_jobs = [1, cpu_count() // 2, cpu_count()]
134+
n_ests = [10, 20, 30]
135+
depth = [4, 6, 8, 10]
136+
Regressor = RandomForestRegressor
137+
else:
138+
N = 100000
139+
n_features = 50
140+
n_jobs = [cpu_count(), cpu_count() // 2, 1]
141+
n_ests = [100, 200, 400]
142+
depth = [6, 8, 10, 12, 14]
143+
Regressor = RandomForestRegressor
144+
145+
# avoid duplicates on machine with 1 or 2 cores.
146+
n_jobs = list(sorted(set(n_jobs), reverse=True))
147+
148+
##############################################
149+
# Benchmark parameters
150+
repeat = 7 # repeat n times the same inference
151+
quantile = 1 # exclude extreme times
152+
max_time = 5 # maximum number of seconds to spend on one configuration
153+
154+
##############################################
155+
# Data
156+
157+
158+
X = numpy.random.randn(N, n_features).astype(numpy.float32)
159+
noise = (numpy.random.randn(X.shape[0]) / (n_features // 5)).astype(numpy.float32)
160+
y = X.mean(axis=1) + noise
161+
n_train = min(N, N // 3)
162+
163+
164+
data = []
165+
couples = list(product(n_jobs, depth, n_ests))
166+
bar = tqdm(couples)
167+
cache_dir = "_cache"
168+
if not os.path.exists(cache_dir):
169+
os.mkdir(cache_dir)
170+
171+
for n_j, max_depth, n_estimators in bar:
172+
if n_j == 1 and n_estimators > n_ests[0]:
173+
# skipping
174+
continue
175+
176+
# parallelization
177+
cache_name = os.path.join(
178+
cache_dir, f"rf-J-{n_j}-E-{n_estimators}-D-{max_depth}.pkl"
179+
)
180+
if os.path.exists(cache_name):
181+
with open(cache_name, "rb") as f:
182+
rf = pickle.load(f)
183+
else:
184+
bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} train rf")
185+
if n_j == 1 and issubclass(Regressor, RandomForestRegressor):
186+
rf = Regressor(max_depth=max_depth, n_estimators=n_estimators, n_jobs=-1)
187+
rf.fit(X[:n_train], y[:n_train])
188+
rf.n_jobs = 1
189+
else:
190+
rf = Regressor(max_depth=max_depth, n_estimators=n_estimators, n_jobs=n_j)
191+
rf.fit(X[:n_train], y[:n_train])
192+
with open(cache_name, "wb") as f:
193+
pickle.dump(rf, f)
194+
195+
bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} ISession")
196+
so = SessionOptions()
197+
so.intra_op_num_threads = n_j
198+
cache_name = os.path.join(
199+
cache_dir, f"rf-J-{n_j}-E-{n_estimators}-D-{max_depth}.onnx"
200+
)
201+
if os.path.exists(cache_name):
202+
sess = InferenceSession(cache_name, so)
203+
else:
204+
bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} cvt onnx")
205+
onx = to_onnx(rf, X[:1])
206+
with open(cache_name, "wb") as f:
207+
f.write(onx.SerializeToString())
208+
sess = InferenceSession(cache_name, so)
209+
onx_size = os.stat(cache_name).st_size
210+
211+
# run once to avoid counting the first run
212+
bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} predict1")
213+
rf.predict(X)
214+
sess.run(None, {"X": X})
215+
216+
# fixed data
217+
obs = dict(
218+
n_jobs=n_j,
219+
max_depth=max_depth,
220+
n_estimators=n_estimators,
221+
repeat=repeat,
222+
max_time=max_time,
223+
name=rf.__class__.__name__,
224+
n_rows=X.shape[0],
225+
n_features=X.shape[1],
226+
onnx_size=onx_size,
227+
)
228+
229+
# baseline
230+
bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} predictB")
231+
r, t, mean, med = measure_inference(rf.predict, X, repeat=repeat, max_time=max_time)
232+
o1 = obs.copy()
233+
o1.update(dict(avg=mean, med=med, n_runs=r, ttime=t, name="base"))
234+
data.append(o1)
235+
236+
# baseline
237+
bar.set_description(f"J={n_j} E={n_estimators} D={max_depth} predictO")
238+
r, t, mean, med = measure_inference(
239+
lambda x: sess.run(None, {"X": x}), X, repeat=repeat, max_time=max_time
240+
)
241+
o2 = obs.copy()
242+
o2.update(dict(avg=mean, med=med, n_runs=r, ttime=t, name="ort_"))
243+
data.append(o2)
244+
245+
246+
###################################################
247+
# Saving data
248+
# +++++++++++
249+
250+
name = os.path.join(cache_dir, "plot_beanchmark_rf")
251+
print(f"Saving data into {name!r}")
252+
253+
df = pandas.DataFrame(data)
254+
df2 = df.copy()
255+
df2["legend"] = legend
256+
df2.to_csv(f"{name}-{legend}.csv", index=False)
257+
258+
#######################################################
259+
# Printing the data
260+
print(df)
261+
262+
#####################################################
263+
# Plot
264+
# ++++
265+
266+
n_rows = len(n_jobs)
267+
n_cols = len(n_ests)
268+
269+
270+
fig, axes = plt.subplots(n_rows, n_cols, figsize=(4 * n_cols, 4 * n_rows))
271+
fig.suptitle(f"{rf.__class__.__name__}")
272+
273+
for n_j, n_estimators in tqdm(product(n_jobs, n_ests)):
274+
i = n_jobs.index(n_j)
275+
j = n_ests.index(n_estimators)
276+
ax = axes[i, j]
277+
278+
subdf = df[(df.n_estimators == n_estimators) & (df.n_jobs == n_j)]
279+
if subdf.shape[0] == 0:
280+
continue
281+
piv = subdf.pivot(index="max_depth", columns="name", values=["avg", "med"])
282+
piv.plot(ax=ax, title=f"jobs={n_j}, trees={n_estimators}")
283+
ax.set_ylabel(f"n_jobs={n_j}", fontsize="small")
284+
ax.set_xlabel("max_depth", fontsize="small")
285+
286+
# ratio
287+
ax2 = ax.twinx()
288+
piv1 = subdf.pivot(index="max_depth", columns="name", values="avg")
289+
piv1["speedup"] = piv1.base / piv1.ort_
290+
ax2.plot(piv1.index, piv1.speedup, "b--", label="speedup avg")
291+
292+
piv1 = subdf.pivot(index="max_depth", columns="name", values="med")
293+
piv1["speedup"] = piv1.base / piv1.ort_
294+
ax2.plot(piv1.index, piv1.speedup, "y--", label="speedup med")
295+
ax2.legend(fontsize="x-small")
296+
297+
# 1
298+
ax2.plot(piv1.index, [1 for _ in piv1.index], "k--", label="no speedup")
299+
300+
for i in range(axes.shape[0]):
301+
for j in range(axes.shape[1]):
302+
axes[i, j].legend(fontsize="small")
303+
304+
fig.tight_layout()
305+
fig.savefig(f"{name}-{legend}.png")
306+
# plt.show()

_doc/tutorial/benchmarks.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
.. _l-benchmarks:
2+
3+
==========
4+
Benchmarks
5+
==========
6+
7+
A list of benchmark used to improve to the performance of
8+
ONNX components (onnx, onnxruntime, onnx-array-api, ...).
9+
10+
.. toctree::
11+
12+
../auto_examples/plot_benchmark_rf

_doc/tutorial/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ Tutorial
77
:maxdepth: 1
88

99
overview
10-
10+
benchmarks

azure-pipelines.yml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ jobs:
2727
displayName: 'Install Requirements'
2828
- script: pip install -r requirements-dev.txt
2929
displayName: 'Install Requirements dev'
30+
- script: pip install onnxmltools --no-deps
31+
displayName: 'Install onnxmltools'
3032
- script: |
3133
ruff .
3234
displayName: 'Ruff'
@@ -52,7 +54,7 @@ jobs:
5254
vmImage: 'windows-latest'
5355
strategy:
5456
matrix:
55-
Python310-Linux:
57+
Python310-Windows:
5658
python.version: '3.10'
5759
maxParallel: 3
5860

@@ -67,6 +69,8 @@ jobs:
6769
displayName: 'Install Requirements'
6870
- script: pip install -r requirements-dev.txt
6971
displayName: 'Install Requirements dev'
72+
- script: pip install onnxmltools --no-deps
73+
displayName: 'Install onnxmltools'
7074
- script: |
7175
python -m pytest -v
7276
displayName: 'Runs Unit Tests'
@@ -107,10 +111,6 @@ jobs:
107111
displayName: 'Install omp'
108112
- script: brew install p7zip
109113
displayName: 'Install p7zip'
110-
# - script: brew install pandoc
111-
# displayName: 'Install Pandoc'
112-
# - script: brew install graphviz
113-
# displayName: 'Install Graphviz'
114114
- script: python -m pip install --upgrade pip setuptools wheel
115115
displayName: 'Install tools'
116116
- script: brew install pybind11
@@ -119,6 +119,8 @@ jobs:
119119
displayName: 'Install Requirements'
120120
- script: pip install -r requirements-dev.txt
121121
displayName: 'Install Requirements dev'
122+
- script: pip install onnxmltools --no-deps
123+
displayName: 'Install onnxmltools'
122124
- script: |
123125
python -m pytest -v -v
124126
displayName: 'Runs Unit Tests'

0 commit comments

Comments
 (0)