Skip to content

Commit d1b097e

Browse files
changes blacksholes example to adapt to change in memory buffer interpretation of queue=None
1 parent edd57cc commit d1b097e

File tree

3 files changed

+45
-30
lines changed

3 files changed

+45
-30
lines changed

examples/cython/sycl_direct_linkage/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ To illustrate the queue creation overhead in each call, compare execution of def
2626
which is Intel Gen9 GPU on OpenCL backend:
2727

2828
```
29-
(idp) [11:24:38 ansatnuc04 sycl_direct_linkage]$ SYCL_BE=PI_OPENCL python bench.py
29+
(idp) [11:24:38 ansatnuc04 sycl_direct_linkage]$ SYCL_DEVICE_FILTER=opencl:gpu python bench.py
3030
========== Executing warm-up ==========
3131
NumPy result: [1. 1. 1. ... 1. 1. 1.]
3232
SYCL(default_device) result: [1. 1. 1. ... 1. 1. 1.]
@@ -37,7 +37,7 @@ Times for NumPy
3737
[3.5394036192446947, 3.498957809060812, 3.4925728561356664, 3.5036555202677846, 3.493739523924887]
3838
```
3939

40-
vs. timing when `dpctl`'s current queue is being reused:
40+
vs. timing when `dpctl`'s queue is being reused:
4141

4242
```
4343
(idp) [11:29:14 ansatnuc04 sycl_buffer]$ python bench.py

examples/cython/usm_memory/blackscholes.pyx

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,18 @@ cdef extern from "sycl_blackscholes.hpp":
2929
cdef void cpp_blackscholes[T](c_dpctl.DPCTLSyclQueueRef, size_t n_opts, T* option_params, T* callput) except +
3030
cdef void cpp_populate_params[T](c_dpctl.DPCTLSyclQueueRef, size_t n_opts, T* option_params, T pl, T ph, T sl, T sh, T tl, T th, T rl, T rh, T vl, T vh, int seed) except +
3131

32-
def black_scholes_price(floating[:, ::1] option_params):
32+
cdef c_dpctl.SyclQueue from_queue_keyword(queue):
33+
if (queue is None):
34+
return c_dpctl.SyclQueue()
35+
elif isinstance(queue, dpctl.SyclQueue):
36+
return <c_dpctl.SyclQueue> queue
37+
else:
38+
return c_dpctl.SyclQueue(queue)
39+
# use default
40+
return c_dpctl.SyclQueue()
41+
42+
43+
def black_scholes_price(floating[:, ::1] option_params, queue=None):
3344
cdef size_t n_opts = option_params.shape[0]
3445
cdef size_t n_params = option_params.shape[1]
3546
cdef size_t n_bytes = 0
@@ -49,19 +60,19 @@ def black_scholes_price(floating[:, ::1] option_params):
4960
"Each row must specify (current_price, strike_price, maturity, interest_rate, volatility)."
5061
).format(n_params))
5162

52-
q = c_dpctl.get_current_queue()
63+
q = from_queue_keyword(queue)
5364
q_ptr = q.get_queue_ref()
5465
if (floating is double):
5566
n_bytes = 2*n_opts * sizeof(double)
56-
mobj = c_dpctl_mem.MemoryUSMShared(n_bytes)
67+
mobj = c_dpctl_mem.MemoryUSMShared(n_bytes, queue=q)
5768
callput_arr = np.ndarray((n_opts, 2), buffer=mobj, dtype='d')
5869
call_put_prices = callput_arr
5970
dp1 = &option_params[0,0]
6071
dp2 = &call_put_prices[0,0];
6172
cpp_blackscholes[double](q_ptr, n_opts, dp1, dp2)
6273
elif (floating is float):
6374
n_bytes = 2*n_opts * sizeof(float)
64-
mobj = c_dpctl_mem.MemoryUSMShared(n_bytes)
75+
mobj = c_dpctl_mem.MemoryUSMShared(n_bytes, queue=q)
6576
callput_arr = np.ndarray((n_opts, 2), buffer=mobj, dtype='f')
6677
call_put_prices = callput_arr
6778
fp1 = &option_params[0,0]
@@ -70,7 +81,7 @@ def black_scholes_price(floating[:, ::1] option_params):
7081

7182
return callput_arr
7283

73-
def populate_params(floating[:, ::1] option_params, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, int seed):
84+
def populate_params(floating[:, ::1] option_params, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, int seed, queue=None):
7485
cdef size_t n_opts = option_params.shape[0]
7586
cdef size_t n_params = option_params.shape[1]
7687

@@ -85,7 +96,7 @@ def populate_params(floating[:, ::1] option_params, pl, ph, sl, sh, tl, th, rl,
8596
"Each row must specify (current_price, strike_price, maturity, interest_rate, volatility)."
8697
).format(n_params))
8798

88-
q = c_dpctl.get_current_queue()
99+
q = from_queue_keyword(queue)
89100
q_ptr = q.get_queue_ref()
90101
if (floating is double):
91102
dp = &option_params[0,0]

examples/cython/usm_memory/run.py

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@
2121
from reference_black_scholes import ref_python_black_scholes
2222

2323

24-
def gen_option_params(n_opts, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, dtype):
25-
usm_mem = dpctl_mem.MemoryUSMShared(n_opts * 5 * np.dtype(dtype).itemsize)
26-
# usm_mem2 = dpctl_mem.MemoryUSMDevice(n_opts * 5 * np.dtype(dtype).itemsize)
24+
def gen_option_params(n_opts, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, dtype, queue=None):
25+
nbytes = n_opts * 5 * np.dtype(dtype).itemsize
26+
usm_mem = dpctl_mem.MemoryUSMShared(nbytes, queue=queue)
2727
params = np.ndarray(shape=(n_opts, 5), buffer=usm_mem, dtype=dtype)
2828
seed = 1234
29-
bs.populate_params(params, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, seed)
29+
bs.populate_params(params, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, seed, queue=queue)
3030
return params
3131

3232

@@ -47,38 +47,42 @@ def gen_option_params(n_opts, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, dtype):
4747
# compute prices in CPython
4848
X_ref = np.array([ref_python_black_scholes(*opt) for opt in opts], dtype="d")
4949

50-
print(np.allclose(Xgpu, X_ref, atol=1e-5))
50+
print("Correctness check: allclose(Xgpu, Xref) == ", np.allclose(Xgpu, X_ref, atol=1e-5))
5151

5252
n_opts = 3 * 10 ** 6
5353

5454
# compute on CPU sycl device
5555
import timeit
5656

57-
for _ in range(3):
57+
cpu_q = dpctl.SyclQueue("opencl:cpu:0")
58+
opts1 = gen_option_params(
59+
n_opts, 20.0, 30.0, 22.0, 29.0, 18.0, 24.0, 0.01, 0.05, 0.01, 0.05, "d", queue=cpu_q
60+
)
61+
62+
gpu_q = dpctl.SyclQueue("level_zero:gpu:0")
63+
opts2 = gen_option_params(
64+
n_opts, 20.0, 30.0, 22.0, 29.0, 18.0, 24.0, 0.01, 0.05, 0.01, 0.05, "d", queue=gpu_q
65+
)
5866

59-
dpctl.set_global_queue("opencl:cpu:0")
60-
print("Using : {}".format(dpctl.get_current_queue().sycl_device.name))
67+
cpu_times = []
68+
gpu_times = []
69+
for _ in range(5):
6170

6271
t0 = timeit.default_timer()
63-
opts1 = gen_option_params(
64-
n_opts, 20.0, 30.0, 22.0, 29.0, 18.0, 24.0, 0.01, 0.05, 0.01, 0.05, "d"
65-
)
66-
X1 = bs.black_scholes_price(opts1)
72+
X1 = bs.black_scholes_price(opts1, queue=cpu_q)
6773
t1 = timeit.default_timer()
6874

69-
print("Elapsed: {}".format(t1 - t0))
75+
cpu_times.append(t1-t0)
7076

7177
# compute on GPU sycl device
72-
dpctl.set_global_queue("level_zero:gpu:0")
73-
print("Using : {}".format(dpctl.get_current_queue().sycl_device.name))
7478

7579
t0 = timeit.default_timer()
76-
opts2 = gen_option_params(
77-
n_opts, 20.0, 30.0, 22.0, 29.0, 18.0, 24.0, 0.01, 0.05, 0.01, 0.05, "d"
78-
)
79-
X2 = bs.black_scholes_price(opts2)
80+
X2 = bs.black_scholes_price(opts2, queue=gpu_q)
8081
t1 = timeit.default_timer()
81-
print("Elapsed: {}".format(t1 - t0))
82+
gpu_times.append(t1-t0)
83+
84+
print("Using : {}".format(cpu_q.sycl_device.name))
85+
print("Wall times : {}".format(cpu_times))
8286

83-
print(np.abs(opts1 - opts2).max())
84-
print(np.abs(X2 - X1).max())
87+
print("Using : {}".format(gpu_q.sycl_device.name))
88+
print("Wall times : {}".format(gpu_times))

0 commit comments

Comments
 (0)