Skip to content

Commit 050c1fb

Browse files
Replaced use of numba-dpex in example with use of dpnp
This is until we have GEMM implemented in dpctl
1 parent ceaef3d commit 050c1fb

File tree

2 files changed

+32
-31
lines changed

2 files changed

+32
-31
lines changed

.github/workflows/conda-package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ jobs:
380380
source $CONDA/etc/profile.d/conda.sh
381381
conda activate
382382
CHANNELS="-c $GITHUB_WORKSPACE/channel -c dppy/label/dev -c intel --override-channels"
383-
conda install -n examples -y $CHANNELS numba-dppy numpy dpctl || exit 1
383+
conda install -n examples -y $CHANNELS numpy dpctl dpnp || exit 1
384384
- name: Build and run examples with native extensions
385385
shell: bash -l {0}
386386
run: |

examples/python/dppy_kernel.py renamed to examples/python/sycl_timer.py

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -15,49 +15,50 @@
1515
# limitations under the License.
1616

1717

18-
import numba_dppy
18+
import dpnp
1919
import numpy as np
2020

2121
import dpctl
22+
import dpctl.tensor as dpt
2223
from dpctl import SyclTimer
2324

24-
25-
@numba_dppy.kernel
26-
def dppy_gemm(a, b, c):
27-
i = numba_dppy.get_global_id(0)
28-
j = numba_dppy.get_global_id(1)
29-
if i >= c.shape[0] or j >= c.shape[1]:
30-
return
31-
c[i, j] = 0
32-
for k in range(c.shape[0]):
33-
c[i, j] += a[i, k] * b[k, j]
34-
35-
36-
X = 1024
37-
Y = 16
38-
global_size = X, X
39-
40-
griddim = X, X
41-
blockdim = Y, Y
42-
43-
a = np.arange(X * X, dtype=np.float32).reshape(X, X)
44-
b = np.array(np.random.random(X * X), dtype=np.float32).reshape(X, X)
45-
c = np.ones_like(a).reshape(X, X)
25+
n = 4000
4626

4727
try:
48-
q = dpctl.SyclQueue("opencl:gpu", property="enable_profiling")
28+
q = dpctl.SyclQueue(property="enable_profiling")
4929
except dpctl.SyclQueueCreationError:
5030
print(
5131
"Skipping the example, as dpctl.SyclQueue targeting "
52-
"opencl:gpu device could not be created"
32+
"default device could not be created"
5333
)
5434
exit(0)
35+
36+
a = dpt.reshape(dpt.arange(n * n, dtype=np.float32, sycl_queue=q), (n, n))
37+
b = dpt.reshape(
38+
dpt.asarray(np.random.random(n * n), dtype=np.float32, sycl_queue=q), (n, n)
39+
)
40+
5541
timer = SyclTimer(time_scale=1)
56-
with dpctl.device_context(q):
42+
43+
wall_times = []
44+
device_times = []
45+
print(
46+
f"Performing matrix multiplication of two {n} by {n} matrices "
47+
f"on {q.sycl_device.name}, repeating 5 times."
48+
)
49+
for _ in range(5):
5750
with timer(q):
58-
dppy_gemm[griddim, blockdim](a, b, c)
59-
cc = np.dot(a, b)
51+
a_matmul_b = dpnp.matmul(a, b)
6052
host_time, device_time = timer.dt
53+
wall_times.append(host_time)
54+
device_times.append(device_time)
55+
56+
c = dpnp.asnumpy(a_matmul_b)
57+
cc = np.dot(dpnp.asnumpy(a), dpnp.asnumpy(b))
6158

62-
print("Wall time: ", host_time, "\nDevice time: ", device_time)
63-
print(np.allclose(c, cc))
59+
print("Wall time: ", wall_times, "\nDevice time: ", device_times)
60+
print(
61+
"Accuracy test: passed."
62+
if np.allclose(c, cc)
63+
else (f"Accuracy test: failed. Discrepancy {np.max(np.abs(c-cc))}")
64+
)

0 commit comments

Comments
 (0)