Skip to content

Commit 33031a2

Browse files
Merge pull request #555 from IntelPython/dpctl-sycl-timer
Moved sycl timer into dpctl.SyclTimer
2 parents f55a730 + 90ebdb7 commit 33031a2

File tree

5 files changed

+97
-25
lines changed

5 files changed

+97
-25
lines changed

dpctl/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
set_global_queue,
6565
)
6666

67+
from ._sycl_timer import SyclTimer
6768
from ._version import get_versions
6869
from .enum_types import backend_type, device_type, event_status_type
6970

@@ -88,7 +89,7 @@
8889
]
8990
__all__ += [
9091
"SyclEvent",
91-
"SyclEventRaw",
92+
"SyclTimer",
9293
]
9394
__all__ += [
9495
"get_platforms",

examples/python/sycl_timer.py renamed to dpctl/_sycl_timer.py

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,27 +17,65 @@
1717

1818
import timeit
1919

20-
import dpctl
20+
from . import SyclQueue
2121

2222

2323
class SyclTimer:
24-
def __init__(self, host_time=timeit.default_timer, time_scale=1):
25-
self.timer = host_time
24+
"""
25+
SyclTimer(host_timer=timeit.default_timer, time_scale=1)
26+
Python class to measure device time of execution of commands submitted to
27+
:class:`dpctl.SyclQueue` as well as the wall-time.
28+
29+
:Example:
30+
.. code-block:: python
31+
32+
import dpctl
33+
34+
# Create a default SyclQueue
35+
q = dpctl.SyclQueue(property='enable_profiling')
36+
37+
# create the timer
38+
miliseconds_sc = 1e-3
39+
timer = dpctl.SyclTimer(time_scale = miliseconds_sc)
40+
41+
# use the timer
42+
with timer(queue=q):
43+
code_block
44+
45+
# retrieve elapsed times in miliseconds
46+
sycl_dt, wall_dt = timer.dt
47+
48+
Remark:
49+
The timer synchronizes the queue at the entrance and the
50+
exit of the context.
51+
52+
Args:
53+
host_timer (callable): A callable such that host_timer() returns current
54+
host time in seconds.
55+
time_scale (int, float): Ratio of the unit of time of interest and
56+
one second.
57+
"""
58+
59+
def __init__(self, host_timer=timeit.default_timer, time_scale=1):
60+
self.timer = host_timer
2661
self.time_scale = time_scale
62+
self.queue = None
2763

2864
def __call__(self, queue=None):
29-
if isinstance(queue, dpctl.SyclQueue):
65+
if isinstance(queue, SyclQueue):
3066
if queue.has_enable_profiling:
3167
self.queue = queue
3268
else:
3369
raise ValueError(
34-
"The queue does not contain the enable_profiling property"
70+
"The given queue was not created with the "
71+
"enable_profiling property"
3572
)
3673
else:
37-
raise ValueError(
38-
"The passed queue must be <class 'dpctl._sycl_queue.SyclQueue'>"
74+
raise TypeError(
75+
"The passed queue must have type dpctl.SyclQueue, "
76+
"got {}".format(type(queue))
3977
)
40-
return self.__enter__()
78+
return self
4179

4280
def __enter__(self):
4381
self.event_start = self.queue.submit_barrier()
@@ -48,6 +86,7 @@ def __exit__(self, *args):
4886
self.event_finish = self.queue.submit_barrier()
4987
self.host_finish = self.timer()
5088

89+
@property
5190
def dt(self):
5291
self.event_start.wait()
5392
self.event_finish.wait()

dpctl/tests/test_sycl_event.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,3 +157,26 @@ def test_profiling_info():
157157
assert event.profiling_info_end
158158
else:
159159
pytest.skip("No OpenCL CPU queues available")
160+
161+
162+
def test_sycl_timer():
163+
try:
164+
q = dpctl.SyclQueue(property="enable_profiling")
165+
except dpctl.SyclQueueCreationError:
166+
pytest.skip("Queue creation of default device failed")
167+
timer = dpctl.SyclTimer()
168+
m1 = dpctl_mem.MemoryUSMDevice(256 * 1024, queue=q)
169+
m2 = dpctl_mem.MemoryUSMDevice(256 * 1024, queue=q)
170+
with timer(q):
171+
# device task
172+
m1.copy_from_device(m2)
173+
# host task
174+
[x ** 2 for x in range(1024)]
175+
host_dt, device_dt = timer.dt
176+
assert host_dt > device_dt
177+
q_no_profiling = dpctl.SyclQueue()
178+
assert q_no_profiling.has_enable_profiling is False
179+
with pytest.raises(ValueError):
180+
timer(queue=q_no_profiling)
181+
with pytest.raises(TypeError):
182+
timer(queue=None)

dpctl/tests/test_sycl_kernel_submit.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,20 @@ def test_create_program_from_source(self):
3737
size_t index = get_global_id(0); \
3838
c[index] = d*a[index] + b[index]; \
3939
}"
40-
q = dpctl.SyclQueue("opencl:gpu")
40+
q = dpctl.SyclQueue("opencl:gpu", property="enable_profiling")
4141
prog = dpctl_prog.create_program_from_source(q, oclSrc)
4242
axpyKernel = prog.get_sycl_kernel("axpy")
4343

44-
bufBytes = 1024 * np.dtype("i").itemsize
44+
n_elems = 1024 * 512
45+
bufBytes = n_elems * np.dtype("i").itemsize
4546
abuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
4647
bbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
4748
cbuf = dpctl_mem.MemoryUSMShared(bufBytes, queue=q)
48-
a = np.ndarray((1024), buffer=abuf, dtype="i")
49-
b = np.ndarray((1024), buffer=bbuf, dtype="i")
50-
c = np.ndarray((1024), buffer=cbuf, dtype="i")
51-
a[:] = np.arange(1024)
52-
b[:] = np.arange(1024, 0, -1)
49+
a = np.ndarray((n_elems,), buffer=abuf, dtype="i")
50+
b = np.ndarray((n_elems,), buffer=bbuf, dtype="i")
51+
c = np.ndarray((n_elems,), buffer=cbuf, dtype="i")
52+
a[:] = np.arange(n_elems)
53+
b[:] = np.arange(n_elems, 0, -1)
5354
c[:] = 0
5455
d = 2
5556
args = []
@@ -59,10 +60,17 @@ def test_create_program_from_source(self):
5960
args.append(c.base)
6061
args.append(ctypes.c_int(d))
6162

62-
r = [1024]
63+
r = [
64+
n_elems,
65+
]
6366

64-
q.submit(axpyKernel, args, r)
65-
self.assertTrue(np.allclose(c, a * d + b))
67+
timer = dpctl.SyclTimer()
68+
with timer(q):
69+
q.submit(axpyKernel, args, r)
70+
ref_c = a * d + b
71+
host_dt, device_dt = timer.dt
72+
self.assertTrue(host_dt > device_dt)
73+
self.assertTrue(np.allclose(c, ref_c))
6674

6775

6876
if __name__ == "__main__":

examples/python/dppy_kernel.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717

1818
import numba_dppy
1919
import numpy as np
20-
from sycl_timer import SyclTimer
2120

2221
import dpctl
22+
from dpctl import SyclTimer
2323

2424

2525
@numba_dppy.kernel
@@ -45,11 +45,12 @@ def dppy_gemm(a, b, c):
4545
c = np.ones_like(a).reshape(X, X)
4646

4747
q = dpctl.SyclQueue("opencl:gpu", property="enable_profiling")
48+
timer = SyclTimer(time_scale=1)
4849
with dpctl.device_context(q):
49-
timers = SyclTimer(time_scale=1)
50-
with timers(q):
50+
with timer(q):
5151
dppy_gemm[griddim, blockdim](a, b, c)
5252
cc = np.dot(a, b)
53-
host_time, device_time = timers.dt()
54-
print("Wall time: ", host_time, "\n", "Device time: ", device_time)
55-
print(np.allclose(c, cc))
53+
host_time, device_time = timer.dt
54+
55+
print("Wall time: ", host_time, "\nDevice time: ", device_time)
56+
print(np.allclose(c, cc))

0 commit comments

Comments
 (0)