Skip to content

Commit c5ff998

Browse files
authored
Merge pull request #242 from IntelPython/feature/add_numba_dpex_n_implementations
Feature/add numba dpex n implementations
2 parents 5300f94 + 49ee54a commit c5ff998

36 files changed

+1271
-7
lines changed
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# SPDX-FileCopyrightText: 2014 Jérôme Kieffer et al.
2+
# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
3+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
4+
#
5+
# SPDX-License-Identifier: BSD-3-Clause
6+
7+
"""
8+
Jérôme Kieffer and Giannis Ashiotis. Pyfai: a python library for
9+
high performance azimuthal integration on gpu, 2014. In Proceedings of the
10+
7th European Conference on Python in Science (EuroSciPy 2014).
11+
"""
12+
13+
import dpnp as np
14+
from numba_dpex import dpjit
15+
16+
17+
@dpjit
18+
def get_bin_edges_parallel(a, bins):
19+
bin_edges = np.zeros((bins + 1,), dtype=np.float64)
20+
a_min = a.min()
21+
a_max = a.max()
22+
delta = (a_max - a_min) / bins
23+
for i in range(bin_edges.shape[0]):
24+
bin_edges[i] = a_min + i * delta
25+
26+
bin_edges[-1] = a_max # Avoid roundoff error on last point
27+
return bin_edges
28+
29+
30+
@dpjit
31+
def compute_bin(x, bin_edges):
32+
# assuming uniform bins for now
33+
n = bin_edges.shape[0] - 1
34+
a_min = bin_edges[0]
35+
a_max = bin_edges[-1]
36+
37+
# special case to mirror NumPy behavior for last bin
38+
if x == a_max:
39+
return n - 1 # a_max always in last bin
40+
41+
return int(n * (x - a_min) / (a_max - a_min))
42+
43+
44+
@dpjit
45+
def histogram_parallel(a, bins, weights):
46+
hist = np.zeros((bins,), dtype=a.dtype)
47+
bin_edges = get_bin_edges_parallel(a, bins)
48+
49+
for i in range(a.shape[0]):
50+
bin = compute_bin(a[i], bin_edges)
51+
hist[bin] += weights[i]
52+
53+
return hist, bin_edges
54+
55+
56+
@dpjit
57+
def azimint_hist(data, radius, npt):
58+
histu = np.histogram(radius, npt)[0]
59+
# histw = np.histogram(radius, npt, weights=data)[0]
60+
histw = histogram_parallel(radius, npt, weights=data)[0]
61+
return histw / histu
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# SPDX-FileCopyrightText: 2014 Jérôme Kieffer et al.
2+
# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
3+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
4+
#
5+
# SPDX-License-Identifier: BSD-3-Clause
6+
7+
"""
8+
Jérôme Kieffer and Giannis Ashiotis. Pyfai: a python library for
9+
high performance azimuthal integration on gpu, 2014. In Proceedings of the
10+
7th European Conference on Python in Science (EuroSciPy 2014).
11+
"""
12+
13+
import dpnp as np
14+
from numba_dpex import dpjit
15+
16+
17+
@dpjit
18+
def azimint_naive(data, radius, npt):
19+
rmax = radius.max()
20+
res = np.zeros(npt, dtype=np.float64)
21+
for i in range(npt):
22+
r1 = rmax * i / npt
23+
r2 = rmax * (i + 1) / npt
24+
mask_r12 = np.logical_and((r1 <= radius), (radius < r2))
25+
values_r12 = data[mask_r12]
26+
res[i] = values_r12.mean()
27+
return res
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
# SPDX-FileCopyrightText: 2017 Lorena A. Barba, Gilbert F. Forsyth.
2+
# SPDX-FileCopyrightText: 2018 Barba, Lorena A., and Forsyth, Gilbert F.
3+
# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
4+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
5+
#
6+
# SPDX-License-Identifier: BSD-3-Clause
7+
8+
"""
9+
CFD Python: the 12 steps to Navier-Stokes equations.
10+
Journal of Open Source Education, 1(9), 21,
11+
https://doi.org/10.21105/jose.00021
12+
"""
13+
14+
import dpnp as np
15+
from numba_dpex import dpjit
16+
17+
18+
@dpjit
19+
def build_up_b(rho, dt, dx, dy, u, v):
20+
b = np.zeros_like(u)
21+
b[1:-1, 1:-1] = rho * (
22+
1
23+
/ dt
24+
* (
25+
(u[1:-1, 2:] - u[1:-1, 0:-2]) / (2 * dx)
26+
+ (v[2:, 1:-1] - v[0:-2, 1:-1]) / (2 * dy)
27+
)
28+
- ((u[1:-1, 2:] - u[1:-1, 0:-2]) / (2 * dx)) ** 2
29+
- 2
30+
* (
31+
(u[2:, 1:-1] - u[0:-2, 1:-1])
32+
/ (2 * dy)
33+
* (v[1:-1, 2:] - v[1:-1, 0:-2])
34+
/ (2 * dx)
35+
)
36+
- ((v[2:, 1:-1] - v[0:-2, 1:-1]) / (2 * dy)) ** 2
37+
)
38+
39+
# Periodic BC Pressure @ x = 2
40+
b[1:-1, -1] = rho * (
41+
1
42+
/ dt
43+
* (
44+
(u[1:-1, 0] - u[1:-1, -2]) / (2 * dx)
45+
+ (v[2:, -1] - v[0:-2, -1]) / (2 * dy)
46+
)
47+
- ((u[1:-1, 0] - u[1:-1, -2]) / (2 * dx)) ** 2
48+
- 2
49+
* (
50+
(u[2:, -1] - u[0:-2, -1])
51+
/ (2 * dy)
52+
* (v[1:-1, 0] - v[1:-1, -2])
53+
/ (2 * dx)
54+
)
55+
- ((v[2:, -1] - v[0:-2, -1]) / (2 * dy)) ** 2
56+
)
57+
58+
# Periodic BC Pressure @ x = 0
59+
b[1:-1, 0] = rho * (
60+
1
61+
/ dt
62+
* (
63+
(u[1:-1, 1] - u[1:-1, -1]) / (2 * dx)
64+
+ (v[2:, 0] - v[0:-2, 0]) / (2 * dy)
65+
)
66+
- ((u[1:-1, 1] - u[1:-1, -1]) / (2 * dx)) ** 2
67+
- 2
68+
* (
69+
(u[2:, 0] - u[0:-2, 0])
70+
/ (2 * dy)
71+
* (v[1:-1, 1] - v[1:-1, -1])
72+
/ (2 * dx)
73+
)
74+
- ((v[2:, 0] - v[0:-2, 0]) / (2 * dy)) ** 2
75+
)
76+
77+
return b
78+
79+
80+
@dpjit
81+
def pressure_poisson_periodic(nit, p, dx, dy, b):
82+
pn = np.empty_like(p)
83+
84+
for q in range(nit):
85+
pn = p.copy()
86+
p[1:-1, 1:-1] = (
87+
(pn[1:-1, 2:] + pn[1:-1, 0:-2]) * dy**2
88+
+ (pn[2:, 1:-1] + pn[0:-2, 1:-1]) * dx**2
89+
) / (2 * (dx**2 + dy**2)) - dx**2 * dy**2 / (
90+
2 * (dx**2 + dy**2)
91+
) * b[
92+
1:-1, 1:-1
93+
]
94+
95+
# Periodic BC Pressure @ x = 2
96+
p[1:-1, -1] = (
97+
(pn[1:-1, 0] + pn[1:-1, -2]) * dy**2
98+
+ (pn[2:, -1] + pn[0:-2, -1]) * dx**2
99+
) / (2 * (dx**2 + dy**2)) - dx**2 * dy**2 / (
100+
2 * (dx**2 + dy**2)
101+
) * b[
102+
1:-1, -1
103+
]
104+
105+
# Periodic BC Pressure @ x = 0
106+
p[1:-1, 0] = (
107+
(pn[1:-1, 1] + pn[1:-1, -1]) * dy**2
108+
+ (pn[2:, 0] + pn[0:-2, 0]) * dx**2
109+
) / (2 * (dx**2 + dy**2)) - dx**2 * dy**2 / (
110+
2 * (dx**2 + dy**2)
111+
) * b[
112+
1:-1, 0
113+
]
114+
115+
# Wall boundary conditions, pressure
116+
p[-1, :] = p[-2, :] # dp/dy = 0 at y = 2
117+
p[0, :] = p[1, :] # dp/dy = 0 at y = 0
118+
119+
120+
@dpjit
121+
def channel_flow(nit, u, v, dt, dx, dy, p, rho, nu, F):
122+
udiff = 1
123+
stepcount = 0
124+
125+
while udiff > 0.001:
126+
un = u.copy()
127+
vn = v.copy()
128+
129+
b = build_up_b(rho, dt, dx, dy, u, v)
130+
pressure_poisson_periodic(nit, p, dx, dy, b)
131+
132+
u[1:-1, 1:-1] = (
133+
un[1:-1, 1:-1]
134+
- un[1:-1, 1:-1] * dt / dx * (un[1:-1, 1:-1] - un[1:-1, 0:-2])
135+
- vn[1:-1, 1:-1] * dt / dy * (un[1:-1, 1:-1] - un[0:-2, 1:-1])
136+
- dt / (2 * rho * dx) * (p[1:-1, 2:] - p[1:-1, 0:-2])
137+
+ nu
138+
* (
139+
dt
140+
/ dx**2
141+
* (un[1:-1, 2:] - 2 * un[1:-1, 1:-1] + un[1:-1, 0:-2])
142+
+ dt
143+
/ dy**2
144+
* (un[2:, 1:-1] - 2 * un[1:-1, 1:-1] + un[0:-2, 1:-1])
145+
)
146+
+ F * dt
147+
)
148+
149+
v[1:-1, 1:-1] = (
150+
vn[1:-1, 1:-1]
151+
- un[1:-1, 1:-1] * dt / dx * (vn[1:-1, 1:-1] - vn[1:-1, 0:-2])
152+
- vn[1:-1, 1:-1] * dt / dy * (vn[1:-1, 1:-1] - vn[0:-2, 1:-1])
153+
- dt / (2 * rho * dy) * (p[2:, 1:-1] - p[0:-2, 1:-1])
154+
+ nu
155+
* (
156+
dt
157+
/ dx**2
158+
* (vn[1:-1, 2:] - 2 * vn[1:-1, 1:-1] + vn[1:-1, 0:-2])
159+
+ dt
160+
/ dy**2
161+
* (vn[2:, 1:-1] - 2 * vn[1:-1, 1:-1] + vn[0:-2, 1:-1])
162+
)
163+
)
164+
165+
# Periodic BC u @ x = 2
166+
u[1:-1, -1] = (
167+
un[1:-1, -1]
168+
- un[1:-1, -1] * dt / dx * (un[1:-1, -1] - un[1:-1, -2])
169+
- vn[1:-1, -1] * dt / dy * (un[1:-1, -1] - un[0:-2, -1])
170+
- dt / (2 * rho * dx) * (p[1:-1, 0] - p[1:-1, -2])
171+
+ nu
172+
* (
173+
dt / dx**2 * (un[1:-1, 0] - 2 * un[1:-1, -1] + un[1:-1, -2])
174+
+ dt / dy**2 * (un[2:, -1] - 2 * un[1:-1, -1] + un[0:-2, -1])
175+
)
176+
+ F * dt
177+
)
178+
179+
# Periodic BC u @ x = 0
180+
u[1:-1, 0] = (
181+
un[1:-1, 0]
182+
- un[1:-1, 0] * dt / dx * (un[1:-1, 0] - un[1:-1, -1])
183+
- vn[1:-1, 0] * dt / dy * (un[1:-1, 0] - un[0:-2, 0])
184+
- dt / (2 * rho * dx) * (p[1:-1, 1] - p[1:-1, -1])
185+
+ nu
186+
* (
187+
dt / dx**2 * (un[1:-1, 1] - 2 * un[1:-1, 0] + un[1:-1, -1])
188+
+ dt / dy**2 * (un[2:, 0] - 2 * un[1:-1, 0] + un[0:-2, 0])
189+
)
190+
+ F * dt
191+
)
192+
193+
# Periodic BC v @ x = 2
194+
v[1:-1, -1] = (
195+
vn[1:-1, -1]
196+
- un[1:-1, -1] * dt / dx * (vn[1:-1, -1] - vn[1:-1, -2])
197+
- vn[1:-1, -1] * dt / dy * (vn[1:-1, -1] - vn[0:-2, -1])
198+
- dt / (2 * rho * dy) * (p[2:, -1] - p[0:-2, -1])
199+
+ nu
200+
* (
201+
dt / dx**2 * (vn[1:-1, 0] - 2 * vn[1:-1, -1] + vn[1:-1, -2])
202+
+ dt / dy**2 * (vn[2:, -1] - 2 * vn[1:-1, -1] + vn[0:-2, -1])
203+
)
204+
)
205+
206+
# Periodic BC v @ x = 0
207+
v[1:-1, 0] = (
208+
vn[1:-1, 0]
209+
- un[1:-1, 0] * dt / dx * (vn[1:-1, 0] - vn[1:-1, -1])
210+
- vn[1:-1, 0] * dt / dy * (vn[1:-1, 0] - vn[0:-2, 0])
211+
- dt / (2 * rho * dy) * (p[2:, 0] - p[0:-2, 0])
212+
+ nu
213+
* (
214+
dt / dx**2 * (vn[1:-1, 1] - 2 * vn[1:-1, 0] + vn[1:-1, -1])
215+
+ dt / dy**2 * (vn[2:, 0] - 2 * vn[1:-1, 0] + vn[0:-2, 0])
216+
)
217+
)
218+
219+
# Wall BC: u,v = 0 @ y = 0,2
220+
u[0, :] = 0
221+
u[-1, :] = 0
222+
v[0, :] = 0
223+
v[-1, :] = 0
224+
225+
udiff = (np.sum(u) - np.sum(un)) / np.sum(u)
226+
stepcount += 1
227+
228+
return stepcount
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# SPDX-FileCopyrightText: 2023 Stefan Behnel, Robert Bradshaw,
2+
# Dag Sverre Seljebotn, Greg Ewing, William Stein, Gabriel Gellner, et al.
3+
# SPDX-FileCopyrightText: 2021 ETH Zurich and the NPBench authors
4+
# SPDX-FileCopyrightText: 2022 - 2023 Intel Corporation
5+
#
6+
# SPDX-License-Identifier: BSD-3-Clause
7+
8+
# https://cython.readthedocs.io/en/latest/src/userguide/numpy_tutorial.html
9+
10+
import dpnp as np
11+
from numba_dpex import dpjit
12+
13+
14+
@dpjit
15+
def compute(array_1, array_2, a, b, c):
16+
# return np.clip(array_1, 2, 10) * a + array_2 * b + c
17+
return np.minimum(np.maximum(array_1, 2), 10) * a + array_2 * b + c
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# SPDX-FileCopyrightText: 2018 Øystein Sture
2+
#
3+
# SPDX-License-Identifier: BSD-3-Clause
4+
5+
import dpnp as np
6+
from numba_dpex import dpjit
7+
8+
9+
# Adapted from https://gist.github.com/oysstu/68072c44c02879a2abf94ef350d1c7c6
10+
@dpjit
11+
def crc16(data, poly=0x8408):
12+
"""
13+
CRC-16-CCITT Algorithm
14+
"""
15+
crc = 0xFFFF
16+
for b in data:
17+
cur_byte = 0xFF & b
18+
for _ in range(0, 8):
19+
if (crc & 0x0001) ^ (cur_byte & 0x0001):
20+
crc = (crc >> 1) ^ poly
21+
else:
22+
crc >>= 1
23+
cur_byte >>= 1
24+
crc = ~crc & 0xFFFF
25+
crc = (crc << 8) | ((crc >> 8) & 0xFF)
26+
27+
return crc & 0xFFFF

0 commit comments

Comments
 (0)