@@ -13,8 +13,8 @@ oneMKL interfaces are an open-source implementation of the oneMKL Data Parallel
13
13
</thead>
14
14
<tbody>
15
15
<tr>
16
- <td rowspan=4 align="center">oneMKL interface</td>
17
- <td rowspan=4 align="center">oneMKL selector</td>
16
+ <td rowspan=5 align="center">oneMKL interface</td>
17
+ <td rowspan=5 align="center">oneMKL selector</td>
18
18
<td align="center"><a href="https://software.intel.com/en-us/oneapi/onemkl">Intel(R) oneAPI Math Kernel Library</a> for Intel CPU</td>
19
19
<td align="center">Intel CPU</td>
20
20
</tr>
@@ -26,6 +26,10 @@ oneMKL interfaces are an open-source implementation of the oneMKL Data Parallel
26
26
<td align="center"><a href="https://developer.nvidia.com/cublas"> NVIDIA cuBLAS</a> for NVIDIA GPU </td>
27
27
<td align="center">NVIDIA GPU</td>
28
28
</tr>
29
+ <tr>
30
+ <td align="center"><a href="https://developer.nvidia.com/curand"> NVIDIA cuRAND</a> for NVIDIA GPU </td>
31
+ <td align="center">NVIDIA GPU</td>
32
+ </tr>
29
33
<tr>
30
34
<td align="center"><a href="https://ww.netlib.org"> NETLIB LAPACK</a> for INTEL CPU </td>
31
35
<td align="center">INTEL CPU</td>
@@ -59,14 +63,14 @@ Example of app.cpp with run-time dispatching:
59
63
#include " oneapi/mkl.hpp"
60
64
61
65
...
62
- cpu_dev = cl:: sycl::device(cl:: sycl::cpu_selector());
63
- gpu_dev = cl:: sycl::device(cl:: sycl::gpu_selector());
66
+ cpu_dev = sycl::device(sycl::cpu_selector());
67
+ gpu_dev = sycl::device(sycl::gpu_selector());
64
68
65
- cl:: sycl::queue cpu_queue (cpu_dev);
66
- cl:: sycl::queue gpu_queue(gpu_dev);
69
+ sycl::queue cpu_queue (cpu_dev);
70
+ sycl::queue gpu_queue(gpu_dev);
67
71
68
- oneapi::mkl::blas::gemm(cpu_queue, transA, transB, m, ...);
69
- oneapi::mkl::blas::gemm(gpu_queue, transA, transB, m, ...);
72
+ oneapi::mkl::blas::column_major:: gemm(cpu_queue, transA, transB, m, ...);
73
+ oneapi::mkl::blas::column_major:: gemm(gpu_queue, transA, transB, m, ...);
70
74
```
71
75
How to build an application with run-time dispatching:
72
76
@@ -83,16 +87,16 @@ Example of app.cpp with compile-time dispatching:
83
87
#include " oneapi/mkl.hpp"
84
88
85
89
...
86
- cpu_dev = cl:: sycl::device(cl:: sycl::cpu_selector());
87
- gpu_dev = cl:: sycl::device(cl:: sycl::gpu_selector());
90
+ cpu_dev = sycl::device(sycl::cpu_selector());
91
+ gpu_dev = sycl::device(sycl::gpu_selector());
88
92
89
- cl:: sycl::queue cpu_queue (cpu_dev);
90
- cl:: sycl::queue gpu_queue(gpu_dev);
93
+ sycl::queue cpu_queue (cpu_dev);
94
+ sycl::queue gpu_queue(gpu_dev);
91
95
92
96
oneapi::mkl::backend_selector< oneapi::mkl::backend::mklcpu > cpu_selector(cpu_queue);
93
97
94
- oneapi::mkl::blas::gemm(cpu_selector, transA, transB, m, ...);
95
- oneapi::mkl::blas::gemm(oneapi::mkl::backend_selector< oneapi::mkl::backend::cublas > {gpu_queue}, transA, transB, m, ...);
98
+ oneapi::mkl::blas::column_major:: gemm(cpu_selector, transA, transB, m, ...);
99
+ oneapi::mkl::blas::column_major:: gemm(oneapi::mkl::backend_selector< oneapi::mkl::backend::cublas > {gpu_queue}, transA, transB, m, ...);
96
100
```
97
101
How to build an application with compile-time dispatching:
98
102
@@ -138,7 +142,7 @@ Supported domains: BLAS, RNG
138
142
<td align="center">Dynamic, Static</td>
139
143
</tr>
140
144
<tr>
141
- <td rowspan=2 align="center">RNG</td>
145
+ <td rowspan=3 align="center">RNG</td>
142
146
<td align="center">Intel CPU</td>
143
147
<td rowspan=2 align="center">Intel(R) oneAPI Math Kernel Library</td>
144
148
<td align="center">Dynamic, Static</td>
@@ -147,6 +151,11 @@ Supported domains: BLAS, RNG
147
151
<td align="center">Intel GPU</td>
148
152
<td align="center">Dynamic, Static</td>
149
153
</tr>
154
+ <tr>
155
+ <td align="center">NVIDIA GPU</td>
156
+ <td align="center">NVIDIA cuRAND</td>
157
+ <td align="center">Dynamic, Static</td>
158
+ </tr>
150
159
</tbody>
151
160
</table >
152
161
@@ -512,6 +521,7 @@ build_shared_libs | BUILD_SHARED_LIBS | True, False | True
512
521
enable_mklcpu_backend | ENABLE_MKLCPU_BACKEND | True, False | True
513
522
enable_mklgpu_backend | ENABLE_MKLGPU_BACKEND | True, False | True
514
523
* Not Supported* | ENABLE_CUBLAS_BACKEND | True, False | False
524
+ * Not Supported* | ENABLE_CURAND_BACKEND | True, False | False
515
525
* Not Supported* | ENABLE_NETLIB_BACKEND | True, False | False
516
526
enable_mklcpu_thread_tbb | ENABLE_MKLCPU_THREAD_TBB | True, False | True
517
527
build_functional_tests | BUILD_FUNCTIONAL_TESTS | True, False | True
0 commit comments