@@ -37,19 +37,20 @@ def test_create_program_from_source(self):
37
37
size_t index = get_global_id(0); \
38
38
c[index] = d*a[index] + b[index]; \
39
39
}"
40
- q = dpctl .SyclQueue ("opencl:gpu" )
40
+ q = dpctl .SyclQueue ("opencl:gpu" , property = "enable_profiling" )
41
41
prog = dpctl_prog .create_program_from_source (q , oclSrc )
42
42
axpyKernel = prog .get_sycl_kernel ("axpy" )
43
43
44
- bufBytes = 1024 * np .dtype ("i" ).itemsize
44
+ n_elems = 1024 * 512
45
+ bufBytes = n_elems * np .dtype ("i" ).itemsize
45
46
abuf = dpctl_mem .MemoryUSMShared (bufBytes , queue = q )
46
47
bbuf = dpctl_mem .MemoryUSMShared (bufBytes , queue = q )
47
48
cbuf = dpctl_mem .MemoryUSMShared (bufBytes , queue = q )
48
- a = np .ndarray ((1024 ), buffer = abuf , dtype = "i" )
49
- b = np .ndarray ((1024 ), buffer = bbuf , dtype = "i" )
50
- c = np .ndarray ((1024 ), buffer = cbuf , dtype = "i" )
51
- a [:] = np .arange (1024 )
52
- b [:] = np .arange (1024 , 0 , - 1 )
49
+ a = np .ndarray ((n_elems , ), buffer = abuf , dtype = "i" )
50
+ b = np .ndarray ((n_elems , ), buffer = bbuf , dtype = "i" )
51
+ c = np .ndarray ((n_elems , ), buffer = cbuf , dtype = "i" )
52
+ a [:] = np .arange (n_elems )
53
+ b [:] = np .arange (n_elems , 0 , - 1 )
53
54
c [:] = 0
54
55
d = 2
55
56
args = []
@@ -59,10 +60,17 @@ def test_create_program_from_source(self):
59
60
args .append (c .base )
60
61
args .append (ctypes .c_int (d ))
61
62
62
- r = [1024 ]
63
+ r = [
64
+ n_elems ,
65
+ ]
63
66
64
- q .submit (axpyKernel , args , r )
65
- self .assertTrue (np .allclose (c , a * d + b ))
67
+ timer = dpctl .SyclTimer ()
68
+ with timer (q ):
69
+ q .submit (axpyKernel , args , r )
70
+ ref_c = a * d + b
71
+ host_dt , device_dt = timer .dt
72
+ self .assertTrue (host_dt > device_dt )
73
+ self .assertTrue (np .allclose (c , ref_c ))
66
74
67
75
68
76
if __name__ == "__main__" :
0 commit comments