Skip to content

Commit 34ea69e

Browse files
committed
Add numba-mlir benchmarks
1 parent 61dcea6 commit 34ea69e

File tree

17 files changed

+396
-2
lines changed

17 files changed

+396
-2
lines changed

.github/workflows/build_and_run.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
conda install numpy numba cython cmake ninja scikit-build pandas
4848
conda install scipy scikit-learn pybind11 tomli
4949
conda install -c pkgs/main libgcc-ng">=11.2.0" libstdcxx-ng">=11.2.0" libgomp">=11.2.0"
50-
conda install -c dppy/label/dev -c intel -c main dpctl numba-dpex dpnp
50+
conda install -c dppy/label/dev -c intel -c main dpctl numba-dpex dpnp numba-mlir
5151
pip install alembic
5252
conda list
5353
@@ -61,6 +61,8 @@ jobs:
6161
run: |
6262
export OCL_ICD_FILENAMES=libintelocl.so
6363
64+
export NUMBA_MLIR_GPU_RUNTIME=sycl
65+
6466
# Turn off numba-dpex autofall back
6567
export NUMBA_DPEX_FALLBACK_ON_CPU=0
6668
# Make sure numba-dpex is using native atomics in github CI

README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,26 @@ SPDX-License-Identifier: Apache-2.0
7474
```bash
7575
$ python -c "import dpbench; dpbench.run_benchmark(\"black_scholes\", "<absolute path to json file>")"
7676
```
77+
78+
## Running numba-mlir benchmarks
79+
1. Setting up conda environment and installing dependencies:
80+
81+
Use same instructions as for usual dpbench setup, but do not install numba-dpex.
82+
83+
Install latest `numba-mlir` dev package:
84+
85+
$ conda install numba-mlir -c dppy/label/dev -c intel
86+
87+
2. Build and run DPBench
88+
89+
Use same commands to setup and run dpbench:
90+
91+
$ python -c "import dpbench; dpbench.run_benchmark(\"black_scholes\")" 2> /dev/null
92+
93+
or, to run specific version:
94+
95+
$ python -c "import dpbench; dpbench.run_benchmark(\"black_scholes\",implementation_postfix=\"numba_mlir_k\")" 2> /dev/null
96+
97+
to run all `numba-mlir` benchmarks:
98+
99+
$ python -c "import dpbench; dpbench.run_benchmarks(implementations=[\"numba_mlir_n\",\"numba_mlir_p\",\"numba_mlir_k\"])" 2> /dev/null
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from math import erf, exp, log, sqrt
6+
7+
import numba_mlir.kernel as nb
8+
9+
10+
@nb.kernel
11+
def _black_scholes_kernel(nopt, price, strike, t, rate, volatility, call, put):
12+
mr = -rate
13+
sig_sig_two = volatility * volatility * 2
14+
15+
i = nb.get_global_id(0)
16+
17+
P = price[i]
18+
S = strike[i]
19+
T = t[i]
20+
21+
a = log(P / S)
22+
b = T * mr
23+
24+
z = T * sig_sig_two
25+
c = 0.25 * z
26+
y = 1.0 / sqrt(z)
27+
28+
w1 = (a - b + c) * y
29+
w2 = (a - b - c) * y
30+
31+
d1 = 0.5 + 0.5 * erf(w1)
32+
d2 = 0.5 + 0.5 * erf(w2)
33+
34+
Se = exp(b) * S
35+
36+
r = P * d1 - Se * d2
37+
call[i] = r
38+
put[i] = r - P + Se
39+
40+
41+
def black_scholes(nopt, price, strike, t, rate, volatility, call, put):
42+
_black_scholes_kernel[nopt, nb.DEFAULT_LOCAL_SIZE](
43+
nopt, price, strike, t, rate, volatility, call, put
44+
)
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from math import erf
6+
7+
import numba_mlir as nb
8+
from numpy import exp, log, sqrt
9+
10+
11+
@nb.vectorize(nopython=True)
12+
def _nberf(x):
13+
return erf(x)
14+
15+
16+
@nb.njit(parallel=True, fastmath=True)
17+
def _black_scholes(price, strike, t, rate, volatility, call, put):
18+
mr = -rate
19+
sig_sig_two = volatility * volatility * 2
20+
21+
P = price
22+
S = strike
23+
T = t
24+
25+
a = log(P / S)
26+
b = T * mr
27+
28+
z = T * sig_sig_two
29+
c = 0.25 * z
30+
y = 1.0 / sqrt(z)
31+
32+
w1 = (a - b + c) * y
33+
w2 = (a - b - c) * y
34+
35+
d1 = 0.5 + 0.5 * _nberf(w1)
36+
d2 = 0.5 + 0.5 * _nberf(w2)
37+
38+
Se = exp(b) * S
39+
40+
r = P * d1 - Se * d2
41+
call[:] = r # temporary `r` is necessary for faster `put` computation
42+
put[:] = r - P + Se
43+
44+
45+
def black_scholes(nopt, price, strike, t, rate, volatility, call, put):
46+
_black_scholes(price, strike, t, rate, volatility, call, put)
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
from math import erf, exp, log, sqrt
6+
7+
import numba
8+
import numba_mlir as nb
9+
10+
11+
# blackscholes implemented as a parallel loop using numba.prange
12+
@nb.njit(parallel=True, fastmath=True)
13+
def _black_scholes(nopt, price, strike, t, rate, volatility, call, put):
14+
mr = -rate
15+
sig_sig_two = volatility * volatility * 2
16+
17+
for i in numba.prange(nopt):
18+
P = price[i]
19+
S = strike[i]
20+
T = t[i]
21+
22+
a = log(P / S)
23+
b = T * mr
24+
25+
z = T * sig_sig_two
26+
c = 0.25 * z
27+
y = 1.0 / sqrt(z)
28+
29+
w1 = (a - b + c) * y
30+
w2 = (a - b - c) * y
31+
32+
d1 = 0.5 + 0.5 * erf(w1)
33+
d2 = 0.5 + 0.5 * erf(w2)
34+
35+
Se = exp(b) * S
36+
37+
r = P * d1 - Se * d2
38+
call[i] = r
39+
put[i] = r - P + Se
40+
41+
42+
def black_scholes(nopt, price, strike, t, rate, volatility, call, put):
43+
_black_scholes(nopt, price, strike, t, rate, volatility, call, put)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import numba_mlir.kernel as nb
6+
import numpy as np
7+
8+
9+
@nb.kernel
10+
def l2_norm_kernel(a, d):
11+
i = nb.get_global_id(0)
12+
a_rows = a.shape[1]
13+
d[i] = 0.0
14+
for k in range(a_rows):
15+
d[i] += a[i, k] * a[i, k]
16+
d[i] = np.sqrt(d[i])
17+
18+
19+
def l2_norm(a, d):
20+
l2_norm_kernel[a.shape[0], nb.DEFAULT_LOCAL_SIZE](a, d)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import numba_mlir as nb
6+
import numpy as np
7+
8+
9+
@nb.njit(parallel=True, fastmath=True)
10+
def _l2_norm(a, d):
11+
sq = np.square(a)
12+
sum = sq.sum(axis=1)
13+
d[:] = np.sqrt(sum)
14+
15+
16+
def l2_norm(a, d):
17+
_l2_norm(a, d)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import numba
6+
import numba_mlir as nb
7+
import numpy as np
8+
9+
10+
@nb.njit(parallel=True, fastmath=True)
11+
def _l2_norm(a, d):
12+
for i in numba.prange(a.shape[0]):
13+
for k in range(a.shape[1]):
14+
d[i] += np.square(a[i, k])
15+
d[i] = np.sqrt(d[i])
16+
17+
18+
def l2_norm(a, d):
19+
_l2_norm(a, d)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import numba_mlir.kernel as nb
6+
import numpy as np
7+
8+
9+
@nb.kernel
10+
def _pairwise_distance_kernel(X1, X2, D):
11+
i = nb.get_global_id(0)
12+
13+
X2_rows = X2.shape[0]
14+
X1_cols = X1.shape[1]
15+
for j in range(X2_rows):
16+
d = 0.0
17+
for k in range(X1_cols):
18+
tmp = X1[i, k] - X2[j, k]
19+
d += tmp * tmp
20+
D[i, j] = np.sqrt(d)
21+
22+
23+
def pairwise_distance(X1, X2, D):
24+
_pairwise_distance_kernel[X1.shape[0], nb.DEFAULT_LOCAL_SIZE](X1, X2, D)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import numba_mlir as nb
6+
import numpy as np
7+
8+
9+
@nb.njit(parallel=True, fastmath=True)
10+
def _pairwise_distance(X1, X2, D):
11+
x1 = np.sum(np.square(X1), axis=1)
12+
x2 = np.sum(np.square(X2), axis=1)
13+
np.dot(X1, X2.T, D)
14+
# D *= -2 TODO: inplace ops doesn't work as intended
15+
D[:] = D * -2
16+
x3 = x1.reshape(x1.size, 1)
17+
np.add(D, x3, D)
18+
np.add(D, x2, D)
19+
np.sqrt(D, D)
20+
21+
22+
def pairwise_distance(X1, X2, D):
23+
_pairwise_distance(X1, X2, D)

0 commit comments

Comments
 (0)