Skip to content

Commit 3664a3a

Browse files
Fixed bug in dpctl.tensor.arange, streamlined tests (#842)
* Fixed a bug in dpt.arange A bug would manifest itself if the starting value is outside of range of the array data type. Fixed it by coercing the starting value to the data type, like is currently done for the increment. The test was modified to cover such an input. * Tests streamlined to use dpt.arange directly
2 parents 2522187 + d7fdf41 commit 3664a3a

File tree

4 files changed

+70
-71
lines changed

4 files changed

+70
-71
lines changed

dpctl/tensor/_ctors.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,8 @@ def arange(
547547
)
548548
_step = (start + step) - start
549549
_step = dt.type(_step)
550-
hev, _ = ti._linspace_step(start, _step, res, sycl_queue)
550+
_start = dt.type(start)
551+
hev, _ = ti._linspace_step(_start, _step, res, sycl_queue)
551552
hev.wait()
552553
return res
553554

dpctl/tests/test_sycl_event.py

Lines changed: 48 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717
""" Defines unit test cases for the SyclEvent class.
1818
"""
1919

20-
import numpy as np
2120
import pytest
22-
from helper import create_invalid_capsule, has_cpu
21+
from helper import create_invalid_capsule
2322

2423
import dpctl
2524
import dpctl.memory as dpctl_mem
2625
import dpctl.program as dpctl_prog
26+
import dpctl.tensor as dpt
2727
from dpctl import event_status_type as esty
2828

2929

@@ -40,14 +40,11 @@ def produce_event(profiling=False):
4040
prog = dpctl_prog.create_program_from_source(q, oclSrc)
4141
addKernel = prog.get_sycl_kernel("add")
4242

43-
bufBytes = 1024 * np.dtype("i").itemsize
44-
abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
45-
a = np.ndarray((1024), buffer=abuf, dtype="i")
46-
a[:] = np.arange(1024)
47-
args = []
43+
n = 1024 * 1024
44+
a = dpt.arange(n, dtype="i", sycl_queue=q)
45+
args = [a.usm_data]
4846

49-
args.append(a.base)
50-
r = [1024]
47+
r = [n]
5148
ev = q.submit(addKernel, args, r)
5249

5350
return ev
@@ -139,55 +136,55 @@ def test_backend():
139136
pytest.fail("Failed to get backend from event")
140137

141138

142-
@pytest.mark.skip(reason="event::get_wait_list() method returns wrong result")
143139
def test_get_wait_list():
144-
if has_cpu():
145-
oclSrc = " \
146-
kernel void add_k(global float* a) { \
147-
size_t index = get_global_id(0); \
148-
a[index] = a[index] + 1; \
149-
} \
150-
kernel void sqrt_k(global float* a) { \
151-
size_t index = get_global_id(0); \
152-
a[index] = sqrt(a[index]); \
153-
} \
154-
kernel void sin_k(global float* a) { \
155-
size_t index = get_global_id(0); \
156-
a[index] = sin(a[index]); \
157-
}"
140+
try:
158141
q = dpctl.SyclQueue("opencl:cpu")
159-
prog = dpctl_prog.create_program_from_source(q, oclSrc)
160-
addKernel = prog.get_sycl_kernel("add_k")
161-
sqrtKernel = prog.get_sycl_kernel("sqrt_k")
162-
sinKernel = prog.get_sycl_kernel("sin_k")
163-
164-
bufBytes = 1024 * np.dtype("f").itemsize
165-
abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
166-
a = np.ndarray((1024), buffer=abuf, dtype="f")
167-
a[:] = np.arange(1024)
168-
args = []
169-
170-
args.append(a.base)
171-
r = [1024]
172-
ev_1 = q.submit(addKernel, args, r)
173-
ev_2 = q.submit(sqrtKernel, args, r, dEvents=[ev_1])
174-
ev_3 = q.submit(sinKernel, args, r, dEvents=[ev_2])
175-
176-
try:
177-
wait_list = ev_3.get_wait_list()
178-
except ValueError:
179-
pytest.fail("Failed to get a list of waiting events from SyclEvent")
180-
assert len(wait_list)
142+
except dpctl.SyclQueueCreationError:
143+
pytest.skip("Sycl queue for OpenCL gpu device could not be created.")
144+
oclSrc = " \
145+
kernel void add_k(global float* a) { \
146+
size_t index = get_global_id(0); \
147+
a[index] = a[index] + 1; \
148+
} \
149+
kernel void sqrt_k(global float* a) { \
150+
size_t index = get_global_id(0); \
151+
a[index] = sqrt(a[index]); \
152+
} \
153+
kernel void sin_k(global float* a) { \
154+
size_t index = get_global_id(0); \
155+
a[index] = sin(a[index]); \
156+
}"
157+
prog = dpctl_prog.create_program_from_source(q, oclSrc)
158+
addKernel = prog.get_sycl_kernel("add_k")
159+
sqrtKernel = prog.get_sycl_kernel("sqrt_k")
160+
sinKernel = prog.get_sycl_kernel("sin_k")
161+
162+
n = 1024 * 1024
163+
a = dpt.arange(n, dtype="f", sycl_queue=q)
164+
args = [a.usm_data]
165+
166+
r = [n]
167+
ev_1 = q.submit(addKernel, args, r)
168+
ev_2 = q.submit(sqrtKernel, args, r, dEvents=[ev_1])
169+
ev_3 = q.submit(sinKernel, args, r, dEvents=[ev_2])
170+
171+
try:
172+
wait_list = ev_3.get_wait_list()
173+
except ValueError:
174+
pytest.fail("Failed to get a list of waiting events from SyclEvent")
175+
# FIXME: Due to an issue in underlying runtime the list returns is always
176+
# empty. The proper expectation is `assert len(wait_list) > 0`
177+
assert len(wait_list) >= 0
181178

182179

183180
def test_profiling_info():
184-
if has_cpu():
181+
try:
185182
event = produce_event(profiling=True)
186-
assert event.profiling_info_submit
187-
assert event.profiling_info_start
188-
assert event.profiling_info_end
189-
else:
183+
except dpctl.SyclQueueCreationError:
190184
pytest.skip("No OpenCL CPU queues available")
185+
assert type(event.profiling_info_submit) is int
186+
assert type(event.profiling_info_start) is int
187+
assert type(event.profiling_info_end) is int
191188

192189

193190
def test_sycl_timer():

dpctl/tests/test_sycl_kernel_submit.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -60,21 +60,18 @@ def test_create_program_from_source(ctype_str, dtype, ctypes_ctor):
6060

6161
n_elems = 1024 * 512
6262
lws = 128
63-
bufBytes = n_elems * dtype.itemsize
64-
abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
65-
bbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
66-
cbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
67-
a = np.ndarray((n_elems,), buffer=abuf, dtype=dtype)
68-
b = np.ndarray((n_elems,), buffer=bbuf, dtype=dtype)
69-
c = np.ndarray((n_elems,), buffer=cbuf, dtype=dtype)
70-
a[:] = np.arange(n_elems)
71-
b[:] = np.arange(n_elems, 0, -1)
72-
c[:] = 0
63+
a = dpt.arange(n_elems, dtype=dtype, sycl_queue=q)
64+
b = dpt.arange(n_elems, stop=0, step=-1, dtype=dtype, sycl_queue=q)
65+
c = dpt.zeros(n_elems, dtype=dtype, sycl_queue=q)
66+
7367
d = 2
74-
args = [a.base, b.base, c.base, ctypes_ctor(d)]
68+
args = [a.usm_data, b.usm_data, c.usm_data, ctypes_ctor(d)]
7569

7670
assert n_elems % lws == 0
7771

72+
b_np = dpt.asnumpy(b)
73+
a_np = dpt.asnumpy(a)
74+
7875
for r in (
7976
[
8077
n_elems,
@@ -86,10 +83,10 @@ def test_create_program_from_source(ctype_str, dtype, ctypes_ctor):
8683
timer = dpctl.SyclTimer()
8784
with timer(q):
8885
q.submit(axpyKernel, args, r).wait()
89-
ref_c = a * np.array(d, dtype=dtype) + b
86+
ref_c = a_np * np.array(d, dtype=dtype) + b_np
9087
host_dt, device_dt = timer.dt
9188
assert type(host_dt) is float and type(device_dt) is float
92-
assert np.allclose(c, ref_c), "Failed for {}".format(r)
89+
assert np.allclose(dpt.asnumpy(c), ref_c), "Failed for {}".format(r)
9390

9491
for gr, lr in (
9592
(
@@ -105,10 +102,12 @@ def test_create_program_from_source(ctype_str, dtype, ctypes_ctor):
105102
timer = dpctl.SyclTimer()
106103
with timer(q):
107104
q.submit(axpyKernel, args, gr, lr, [dpctl.SyclEvent()]).wait()
108-
ref_c = a * np.array(d, dtype=dtype) + b
105+
ref_c = a_np * np.array(d, dtype=dtype) + b_np
109106
host_dt, device_dt = timer.dt
110107
assert type(host_dt) is float and type(device_dt) is float
111-
assert np.allclose(c, ref_c), "Failed for {}, {}".formatg(r, lr)
108+
assert np.allclose(dpt.asnumpy(c), ref_c), "Failed for {}, {}".formatg(
109+
r, lr
110+
)
112111

113112

114113
def test_async_submit():

dpctl/tests/test_usm_ndarray_ctor.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -985,11 +985,13 @@ def test_arange(dt):
985985
elif np.issubdtype(dt, np.complexfloating):
986986
assert complex(X[47]) == 47.0 + 0.0j
987987

988-
X1 = dpt.arange(4, dtype=dt, sycl_queue=q)
989-
assert X1.shape == (4,)
988+
# choose size larger than maximal value that u1/u2 can accomodate
989+
sz = int(np.iinfo(np.int16).max) + 1
990+
X1 = dpt.arange(sz, dtype=dt, sycl_queue=q)
991+
assert X1.shape == (sz,)
990992

991-
X2 = dpt.arange(4, 0, -1, dtype=dt, sycl_queue=q)
992-
assert X2.shape == (4,)
993+
X2 = dpt.arange(sz, 0, -1, dtype=dt, sycl_queue=q)
994+
assert X2.shape == (sz,)
993995

994996

995997
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)