|
15 | 15 | # limitations under the License.
|
16 | 16 |
|
17 | 17 |
|
18 |
| -import numba_dppy |
| 18 | +import dpnp |
19 | 19 | import numpy as np
|
20 | 20 |
|
21 | 21 | import dpctl
|
| 22 | +import dpctl.tensor as dpt |
22 | 23 | from dpctl import SyclTimer
|
23 | 24 |
|
24 |
| - |
25 |
| -@numba_dppy.kernel |
26 |
| -def dppy_gemm(a, b, c): |
27 |
| - i = numba_dppy.get_global_id(0) |
28 |
| - j = numba_dppy.get_global_id(1) |
29 |
| - if i >= c.shape[0] or j >= c.shape[1]: |
30 |
| - return |
31 |
| - c[i, j] = 0 |
32 |
| - for k in range(c.shape[0]): |
33 |
| - c[i, j] += a[i, k] * b[k, j] |
34 |
| - |
35 |
| - |
36 |
| -X = 1024 |
37 |
| -Y = 16 |
38 |
| -global_size = X, X |
39 |
| - |
40 |
| -griddim = X, X |
41 |
| -blockdim = Y, Y |
42 |
| - |
43 |
| -a = np.arange(X * X, dtype=np.float32).reshape(X, X) |
44 |
| -b = np.array(np.random.random(X * X), dtype=np.float32).reshape(X, X) |
45 |
| -c = np.ones_like(a).reshape(X, X) |
| 25 | +n = 4000 |
46 | 26 |
|
47 | 27 | try:
|
48 |
| - q = dpctl.SyclQueue("opencl:gpu", property="enable_profiling") |
| 28 | + q = dpctl.SyclQueue(property="enable_profiling") |
49 | 29 | except dpctl.SyclQueueCreationError:
|
50 | 30 | print(
|
51 | 31 | "Skipping the example, as dpctl.SyclQueue targeting "
|
52 |
| - "opencl:gpu device could not be created" |
| 32 | + "default device could not be created" |
53 | 33 | )
|
54 | 34 | exit(0)
|
| 35 | + |
| 36 | +a = dpt.reshape(dpt.arange(n * n, dtype=np.float32, sycl_queue=q), (n, n)) |
| 37 | +b = dpt.reshape( |
| 38 | + dpt.asarray(np.random.random(n * n), dtype=np.float32, sycl_queue=q), (n, n) |
| 39 | +) |
| 40 | + |
55 | 41 | timer = SyclTimer(time_scale=1)
|
56 |
| -with dpctl.device_context(q): |
| 42 | + |
| 43 | +wall_times = [] |
| 44 | +device_times = [] |
| 45 | +print( |
| 46 | + f"Performing matrix multiplication of two {n} by {n} matrices " |
| 47 | + f"on {q.sycl_device.name}, repeating 5 times." |
| 48 | +) |
| 49 | +for _ in range(5): |
57 | 50 | with timer(q):
|
58 |
| - dppy_gemm[griddim, blockdim](a, b, c) |
59 |
| - cc = np.dot(a, b) |
| 51 | + a_matmul_b = dpnp.matmul(a, b) |
60 | 52 | host_time, device_time = timer.dt
|
| 53 | + wall_times.append(host_time) |
| 54 | + device_times.append(device_time) |
| 55 | + |
| 56 | +c = dpnp.asnumpy(a_matmul_b) |
| 57 | +cc = np.dot(dpnp.asnumpy(a), dpnp.asnumpy(b)) |
61 | 58 |
|
62 |
| -print("Wall time: ", host_time, "\nDevice time: ", device_time) |
63 |
| -print(np.allclose(c, cc)) |
| 59 | +print("Wall time: ", wall_times, "\nDevice time: ", device_times) |
| 60 | +print( |
| 61 | + "Accuracy test: passed." |
| 62 | + if np.allclose(c, cc) |
| 63 | + else (f"Accuracy test: failed. Discrepancy {np.max(np.abs(c-cc))}") |
| 64 | +) |
0 commit comments