Skip to content

Commit 374d153

Browse files
authored
Merge pull request #248 from IntelPython/feature/add_numba_dpex_p_implementations
Add numba_dpex_p implementation for poly/np-bench
2 parents 8866480 + 5f39673 commit 374d153

File tree

19 files changed

+723
-0
lines changed

19 files changed

+723
-0
lines changed
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# SPDX-FileCopyrightText: 2014 Jérôme Kieffer et al.
2+
# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
3+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
4+
#
5+
# SPDX-License-Identifier: BSD-3-Clause
6+
7+
"""
8+
Jérôme Kieffer and Giannis Ashiotis. Pyfai: a python library for
9+
high performance azimuthal integration on gpu, 2014. In Proceedings of the
10+
7th European Conference on Python in Science (EuroSciPy 2014).
11+
"""
12+
13+
import dpnp as np
14+
import numba as nb
15+
from numba_dpex import dpjit
16+
17+
18+
@dpjit
19+
def get_bin_edges_prange(a, bins):
20+
bin_edges = np.zeros((bins + 1,), dtype=np.float64)
21+
a_min = a.min()
22+
a_max = a.max()
23+
delta = (a_max - a_min) / bins
24+
for i in nb.prange(bin_edges.shape[0]):
25+
bin_edges[i] = a_min + i * delta
26+
27+
bin_edges[-1] = a_max # Avoid roundoff error on last point
28+
return bin_edges
29+
30+
31+
@dpjit
32+
def compute_bin(x, bin_edges):
33+
# assuming uniform bins for now
34+
n = bin_edges.shape[0] - 1
35+
a_min = bin_edges[0]
36+
a_max = bin_edges[-1]
37+
38+
# special case to mirror NumPy behavior for last bin
39+
if x == a_max:
40+
return n - 1 # a_max always in last bin
41+
42+
return int(n * (x - a_min) / (a_max - a_min))
43+
44+
45+
@dpjit
46+
def histogram_prange(a, bins, weights):
47+
hist = np.zeros((bins,), dtype=a.dtype)
48+
bin_edges = get_bin_edges_prange(a, bins)
49+
50+
for i in nb.prange(a.shape[0]):
51+
bin = compute_bin(a[i], bin_edges)
52+
hist[bin] += weights[i]
53+
54+
return hist, bin_edges
55+
56+
57+
@dpjit
58+
def azimint_hist(data, radius, npt):
59+
histu = np.histogram(radius, npt)[0]
60+
# histw = np.histogram(radius, npt, weights=data)[0]
61+
histw = histogram_prange(radius, npt, weights=data)[0]
62+
return histw / histu
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# SPDX-FileCopyrightText: 2014 Jérôme Kieffer et al.
2+
# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
3+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
4+
#
5+
# SPDX-License-Identifier: BSD-3-Clause
6+
7+
"""
8+
Jérôme Kieffer and Giannis Ashiotis. Pyfai: a python library for
9+
high performance azimuthal integration on gpu, 2014. In Proceedings of the
10+
7th European Conference on Python in Science (EuroSciPy 2014).
11+
"""
12+
13+
14+
import dpnp as np
15+
import numba as nb
16+
from numba_dpex import dpjit
17+
18+
19+
@dpjit
20+
def azimint_naive(data, radius, npt):
21+
rmax = radius.max()
22+
res = np.zeros(npt, dtype=np.float64)
23+
for i in nb.prange(npt):
24+
r1 = rmax * i / npt
25+
r2 = rmax * (i + 1) / npt
26+
mask_r12 = np.logical_and((r1 <= radius), (radius < r2))
27+
values_r12 = data[mask_r12]
28+
res[i] = values_r12.mean()
29+
return res
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
2+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
3+
#
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
6+
import dpnp as np
7+
import numba as nb
8+
from numba_dpex import dpjit
9+
10+
11+
@dpjit
12+
def contour_integral(NR, NM, slab_per_bc, Ham, int_pts, Y):
13+
P0 = np.zeros((NR, NM), dtype=np.complex128)
14+
P1 = np.zeros((NR, NM), dtype=np.complex128)
15+
# for z in int_pts:
16+
for i in nb.prange(len(int_pts)):
17+
z = int_pts[i]
18+
Tz = np.zeros((NR, NR), dtype=np.complex128)
19+
for n in nb.prange(slab_per_bc + 1):
20+
zz = np.power(z, slab_per_bc / 2 - n)
21+
Tz += zz * Ham[n]
22+
if NR == NM:
23+
X = np.linalg.inv(Tz)
24+
else:
25+
X = np.linalg.solve(Tz, Y)
26+
if abs(z) < 1.0:
27+
X = -X
28+
P0 += X
29+
P1 += z * X
30+
31+
return P0, P1
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
2+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
3+
#
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
6+
7+
import dpnp as np
8+
import numba as nb
9+
from numba_dpex import dpjit
10+
11+
12+
# Deep learning convolutional operator (stride = 1)
13+
@dpjit
14+
def conv2d(input, weights):
15+
K = weights.shape[0] # Assuming square kernel
16+
N = input.shape[0]
17+
H_out = input.shape[1] - K + 1
18+
W_out = input.shape[2] - K + 1
19+
C_in = input.shape[3]
20+
C_out = weights.shape[3]
21+
output = np.empty((N, H_out, W_out, C_out), dtype=np.float32)
22+
23+
# Loop structure adapted from https://github.com/SkalskiP/ILearnDeepLearning.py/blob/ba0b5ba589d4e656141995e8d1a06d44db6ce58d/01_mysteries_of_neural_networks/06_numpy_convolutional_neural_net/src/layers/convolutional.py#L88
24+
for i in nb.prange(H_out):
25+
for j in nb.prange(W_out):
26+
# output[:, i, j, :] = np.sum(
27+
# input[:, i:i + K, j:j + K, :, np.newaxis] *
28+
# weights[np.newaxis, :, :, :],
29+
# axis=(1, 2, 3),
30+
# )
31+
# Reshape supported only on contiguous arrays
32+
inp = input[:, i : i + K, j : j + K, :].copy()
33+
# Tuple of ints not supported in axis keyword
34+
output[:, i, j, :] = np.sum(
35+
np.sum(
36+
np.sum(
37+
np.reshape(inp, (N, K, K, C_in, 1))
38+
* np.reshape(weights, (1, K, K, C_in, C_out)),
39+
axis=1,
40+
),
41+
axis=1,
42+
),
43+
axis=1,
44+
)
45+
46+
return output
47+
48+
49+
@dpjit
50+
def conv2d_bias(input, weights, bias):
51+
return conv2d(input, weights) + bias
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
2+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
3+
#
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
6+
7+
import dpnp as np
8+
import numba as nb
9+
from numba_dpex import dpjit
10+
11+
12+
@dpjit
13+
def relu(x):
14+
return np.maximum(x, 0)
15+
16+
17+
# Deep learning convolutional operator (stride = 1)
18+
@dpjit
19+
def conv2d(input, weights):
20+
K = weights.shape[0] # Assuming square kernel
21+
N = input.shape[0]
22+
H_out = input.shape[1] - K + 1
23+
W_out = input.shape[2] - K + 1
24+
C_in = input.shape[3]
25+
C_out = weights.shape[3]
26+
output = np.empty((N, H_out, W_out, C_out), dtype=np.float32)
27+
28+
# Loop structure adapted from https://github.com/SkalskiP/ILearnDeepLearning.py/blob/ba0b5ba589d4e656141995e8d1a06d44db6ce58d/01_mysteries_of_neural_networks/06_numpy_convolutional_neural_net/src/layers/convolutional.py#L88
29+
for i in nb.prange(H_out):
30+
for j in nb.prange(W_out):
31+
# output[:, i, j, :] = np.sum(
32+
# input[:, i:i + K, j:j + K, :, np.newaxis] *
33+
# weights[np.newaxis, :, :, :],
34+
# axis=(1, 2, 3),
35+
# )
36+
# Reshape supported only on contiguous arrays
37+
inp = input[:, i : i + K, j : j + K, :].copy()
38+
# Tuple of ints not supported in axis keyword
39+
output[:, i, j, :] = np.sum(
40+
np.sum(
41+
np.sum(
42+
np.reshape(inp, (N, K, K, C_in, 1))
43+
* np.reshape(weights, (1, K, K, C_in, C_out)),
44+
axis=1,
45+
),
46+
axis=1,
47+
),
48+
axis=1,
49+
)
50+
51+
return output
52+
53+
54+
# 2x2 maxpool operator, as used in LeNet-5
55+
@dpjit
56+
def maxpool2d(x):
57+
# output = np.empty(
58+
# [x.shape[0], x.shape[1] // 2, x.shape[2] // 2, x.shape[3]],
59+
# dtype=x.dtype)
60+
output = np.empty(
61+
(x.shape[0], x.shape[1] // 2, x.shape[2] // 2, x.shape[3]),
62+
dtype=x.dtype,
63+
)
64+
for i in nb.prange(x.shape[1] // 2):
65+
for j in nb.prange(x.shape[2] // 2):
66+
# output[:, i, j, :] = np.max(x[:, 2 * i:2 * i + 2,
67+
# 2 * j:2 * j + 2, :],
68+
# axis=(1, 2))
69+
for k in nb.prange(x.shape[0]):
70+
for l in nb.prange(x.shape[3]): # noqa: E741 math variable
71+
output[k, i, j, l] = np.max(
72+
x[k, 2 * i : 2 * i + 2, 2 * j : 2 * j + 2, l]
73+
)
74+
return output
75+
76+
77+
# LeNet-5 Convolutional Neural Network (inference mode)
78+
@dpjit
79+
def lenet5(
80+
input,
81+
conv1,
82+
conv1bias,
83+
conv2,
84+
conv2bias,
85+
fc1w,
86+
fc1b,
87+
fc2w,
88+
fc2b,
89+
fc3w,
90+
fc3b,
91+
N,
92+
C_before_fc1,
93+
):
94+
x = relu(conv2d(input, conv1) + conv1bias)
95+
x = maxpool2d(x)
96+
x = relu(conv2d(x, conv2) + conv2bias)
97+
x = maxpool2d(x)
98+
x = np.reshape(x, (N, C_before_fc1))
99+
x = relu(x @ fc1w + fc1b)
100+
x = relu(x @ fc2w + fc2b)
101+
return x @ fc3w + fc3b
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
2+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
3+
#
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
6+
import dpnp as np
7+
import numba as nb
8+
from numba_dpex import dpjit
9+
10+
11+
@dpjit
12+
def relu(x):
13+
return np.maximum(x, 0)
14+
15+
16+
# Numerically-stable version of softmax
17+
@dpjit
18+
def softmax(x):
19+
new_shape = (x.shape[0], 1)
20+
# tmp_max = np.max(x, axis=-1, keepdims=True)
21+
tmp_max = np.empty(new_shape, dtype=x.dtype)
22+
for i in nb.prange(x.shape[0]):
23+
tmp_max[i, 0] = np.max(x[i])
24+
tmp_out = np.exp(x - tmp_max)
25+
# tmp_sum = np.sum(tmp_out, axis=-1, keepdims=True)
26+
tmp_sum = np.reshape(np.sum(tmp_out, axis=-1), new_shape)
27+
return tmp_out / tmp_sum
28+
29+
30+
# 3-layer MLP
31+
@dpjit
32+
def mlp(input, w1, b1, w2, b2, w3, b3):
33+
x = relu(input @ w1 + b1)
34+
x = relu(x @ w2 + b2)
35+
x = softmax(x @ w3 + b3) # Softmax call can be omitted if necessary
36+
return x
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
2+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
3+
#
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
6+
import dpnp as np
7+
import numba as nb
8+
from numba_dpex import dpjit
9+
10+
11+
@dpjit
12+
def relu(x):
13+
return np.maximum(x, 0)
14+
15+
16+
# Deep learning convolutional operator (stride = 1)
17+
@dpjit
18+
def conv2d(input, weights):
19+
K = weights.shape[0] # Assuming square kernel
20+
N = input.shape[0]
21+
H_out = input.shape[1] - K + 1
22+
W_out = input.shape[2] - K + 1
23+
C_in = input.shape[3]
24+
C_out = weights.shape[3]
25+
output = np.empty((N, H_out, W_out, C_out), dtype=np.float32)
26+
27+
# Loop structure adapted from https://github.com/SkalskiP/ILearnDeepLearning.py/blob/ba0b5ba589d4e656141995e8d1a06d44db6ce58d/01_mysteries_of_neural_networks/06_numpy_convolutional_neural_net/src/layers/convolutional.py#L88
28+
for i in nb.prange(H_out):
29+
for j in nb.prange(W_out):
30+
# output[:, i, j, :] = np.sum(
31+
# input[:, i:i + K, j:j + K, :, np.newaxis] *
32+
# weights[np.newaxis, :, :, :],
33+
# axis=(1, 2, 3),
34+
# )
35+
# Reshape supported only on contiguous arrays
36+
inp = input[:, i : i + K, j : j + K, :].copy()
37+
# Tuple of ints not supported in axis keyword
38+
output[:, i, j, :] = np.sum(
39+
np.sum(
40+
np.sum(
41+
np.reshape(inp, (N, K, K, C_in, 1))
42+
* np.reshape(weights, (1, K, K, C_in, C_out)),
43+
axis=1,
44+
),
45+
axis=1,
46+
),
47+
axis=1,
48+
)
49+
50+
return output
51+
52+
53+
# Batch normalization operator, as used in ResNet
54+
@dpjit
55+
def batchnorm2d(x, eps=1e-5):
56+
# mean = np.mean(x, axis=0, keepdims=True)
57+
mean = np.empty(x.shape, dtype=x.dtype)
58+
mean[:] = np.sum(x, axis=0) / x.shape[0]
59+
# std = np.std(x, axis=0, keepdims=True)
60+
std = np.empty(x.shape, dtype=x.dtype)
61+
std[:] = np.sqrt(np.sum((x - mean) ** 2, axis=0) / x.shape[0])
62+
return (x - mean) / np.sqrt(std + eps)
63+
64+
65+
# Bottleneck residual block (after initial convolution, without downsampling)
66+
# in the ResNet-50 CNN (inference)
67+
@dpjit
68+
def resnet_basicblock(input, conv1, conv2, conv3):
69+
# Pad output of first convolution for second convolution
70+
padded = np.zeros(
71+
(input.shape[0], input.shape[1] + 2, input.shape[2] + 2, conv1.shape[3])
72+
)
73+
74+
padded[:, 1:-1, 1:-1, :] = conv2d(input, conv1)
75+
x = batchnorm2d(padded)
76+
x = relu(x)
77+
78+
x = conv2d(x, conv2)
79+
x = batchnorm2d(x)
80+
x = relu(x)
81+
x = conv2d(x, conv3)
82+
x = batchnorm2d(x)
83+
return relu(x + input)

0 commit comments

Comments
 (0)