|
32 | 32 | #include "queue_sycl.hpp"
|
33 | 33 |
|
34 | 34 | namespace mkl_blas = oneapi::mkl::blas;
|
| 35 | +namespace mkl_lapack = oneapi::mkl::lapack; |
35 | 36 |
|
36 | 37 | template <typename _KernelNameSpecialization>
|
37 | 38 | class custom_blas_gemm_c_kernel;
|
@@ -182,60 +183,69 @@ template void custom_blas_dot_c<long>(void* array1_in, void* array2_in, void* re
|
182 | 183 | template void custom_blas_dot_c<float>(void* array1_in, void* array2_in, void* result1, size_t size);
|
183 | 184 | template void custom_blas_dot_c<double>(void* array1_in, void* array2_in, void* result1, size_t size);
|
184 | 185 |
|
185 |
| -#if 0 // Example for OpenCL kernel |
186 |
| -#include <map> |
187 |
| -#include <typeindex> |
188 |
| - |
189 |
| -static std::map<std::type_index, std::string> types_map = {{typeid(long), "long"}, {typeid(int), "int"}}; |
190 |
| - |
191 |
| -static const char* blas_gemm_naive = |
192 |
| - "//#define __KERNEL_TYPE__ long \n" |
193 |
| - "#define __KERNEL_TYPE_ZERO__ 0 \n" |
194 |
| - "__kernel void blas_gemm_naive(__global __KERNEL_TYPE__* array_1, \n" |
195 |
| - " __global __KERNEL_TYPE__* array_2, \n" |
196 |
| - " __global __KERNEL_TYPE__* result, \n" |
197 |
| - " unsigned long size) \n" |
198 |
| - "{ \n" |
199 |
| - " size_t i = get_global_id(0); //for (size_t i = 0; i < size; ++i) \n" |
200 |
| - " { \n" |
201 |
| - " size_t j = get_global_id(1); //for (size_t j = 0; j < size; ++j) \n" |
202 |
| - " { \n" |
203 |
| - " __KERNEL_TYPE__ temp = __KERNEL_TYPE_ZERO__; \n" |
204 |
| - " for (size_t k = 0; k < size; ++k) \n" |
205 |
| - " { \n" |
206 |
| - " const size_t index_1 = i * size + k; \n" |
207 |
| - " const size_t index_2 = k * size + j; \n" |
208 |
| - " temp += array_1[index_1] * array_2[index_2]; \n" |
209 |
| - " } \n" |
210 |
| - " \n" |
211 |
| - " const size_t index_result = i * size + j; \n" |
212 |
| - " result[index_result] = temp; \n" |
213 |
| - " } \n" |
214 |
| - " } \n" |
215 |
| - "} \n"; |
216 |
| - |
217 |
| -template <typename _DataType> |
218 |
| -void custom_dgemm_c_opencl(void* array_1_in, void* array_2_in, void* result_1, size_t size) |
| 186 | +template <typename _DataType, typename _ResultType> |
| 187 | +void custom_lapack_eig_c(const void* array_in, void* result1, void* result2, size_t size) |
219 | 188 | {
|
220 |
| - _DataType* array_1 = reinterpret_cast<_DataType*>(array_1_in); |
221 |
| - _DataType* array_2 = reinterpret_cast<_DataType*>(array_2_in); |
222 |
| - _DataType* result = reinterpret_cast<_DataType*>(result_1); |
| 189 | + // TODO this kernel works with square 2-D array only |
223 | 190 |
|
224 |
| - std::string compile_time_options("-cl-std=CL1.2"); |
225 |
| - compile_time_options += " -D__KERNEL_TYPE__=" + types_map.at(typeid(_DataType)); |
| 191 | + // Kernel Type for calculation is double type |
| 192 | + // because interface requires float type but calculations are expected in double type |
226 | 193 |
|
227 |
| - cl::sycl::program program_src(DPNP_QUEUE.get_context()); |
228 |
| - program_src.build_with_source(blas_gemm_naive, compile_time_options); |
| 194 | + if (!size) |
| 195 | + { |
| 196 | + return; |
| 197 | + } |
229 | 198 |
|
230 |
| - cl::sycl::range<2> kernel_work_ids(size, size); // dimensions are: "i" and "j" |
231 |
| - DPNP_QUEUE.submit([&](cl::sycl::handler& cgh) { |
232 |
| - cgh.set_args(array_1, array_2, result, size); |
233 |
| - cgh.parallel_for(kernel_work_ids, program_src.get_kernel("blas_gemm_naive")); |
234 |
| - }); |
| 199 | + cl::sycl::event event; |
235 | 200 |
|
236 |
| - DPNP_QUEUE.wait(); |
237 |
| -} |
| 201 | + const _DataType* array = reinterpret_cast<const _DataType*>(array_in); |
| 202 | + _ResultType* result_val = reinterpret_cast<_ResultType*>(result1); |
| 203 | + _ResultType* result_vec = reinterpret_cast<_ResultType*>(result2); |
| 204 | + |
| 205 | + double* result_val_kern = reinterpret_cast<double*>(dpnp_memory_alloc_c(size * sizeof(double))); |
| 206 | + double* result_vec_kern = reinterpret_cast<double*>(dpnp_memory_alloc_c(size * size * sizeof(double))); |
| 207 | + |
| 208 | + // type conversion. Also, math library requires copy memory because override |
| 209 | + for (size_t it = 0; it < (size * size); ++it) |
| 210 | + { |
| 211 | + result_vec_kern[it] = array[it]; |
| 212 | + } |
| 213 | + |
| 214 | + const std::int64_t lda = std::max<size_t>(1UL, size); |
| 215 | + |
| 216 | + const std::int64_t scratchpad_size = mkl_lapack::syevd_scratchpad_size<double>( |
| 217 | + DPNP_QUEUE, oneapi::mkl::job::vec, oneapi::mkl::uplo::upper, size, lda); |
| 218 | + |
| 219 | + double* scratchpad = reinterpret_cast<double*>(dpnp_memory_alloc_c(scratchpad_size * sizeof(double))); |
| 220 | + |
| 221 | + event = mkl_lapack::syevd(DPNP_QUEUE, // queue |
| 222 | + oneapi::mkl::job::vec, // jobz |
| 223 | + oneapi::mkl::uplo::upper, // uplo |
| 224 | + size, // The order of the matrix A (0 <= n) |
| 225 | + result_vec_kern, // will be overwritten with eigenvectors |
| 226 | + lda, |
| 227 | + result_val_kern, |
| 228 | + scratchpad, |
| 229 | + scratchpad_size); |
| 230 | + event.wait(); |
238 | 231 |
|
239 |
| -template void custom_dgemm_c_opencl<long>(void* array_1_in, void* array_2_in, void* result_1, size_t size); |
| 232 | + dpnp_memory_free_c(scratchpad); |
| 233 | + |
| 234 | + for (size_t it1 = 0; it1 < size; ++it1) |
| 235 | + { |
| 236 | + result_val[it1] = result_val_kern[it1]; |
| 237 | + for (size_t it2 = 0; it2 < size; ++it2) |
| 238 | + { |
| 239 | + // copy + transpose |
| 240 | + result_vec[it2 * size + it1] = result_vec_kern[it1 * size + it2]; |
| 241 | + } |
| 242 | + } |
| 243 | + |
| 244 | + dpnp_memory_free_c(result_val_kern); |
| 245 | + dpnp_memory_free_c(result_vec_kern); |
| 246 | +} |
240 | 247 |
|
241 |
| -#endif |
| 248 | +template void custom_lapack_eig_c<int, double>(const void* array_in, void* result1, void* result2, size_t size); |
| 249 | +template void custom_lapack_eig_c<long, double>(const void* array_in, void* result1, void* result2, size_t size); |
| 250 | +template void custom_lapack_eig_c<float, float>(const void* array_in, void* result1, void* result2, size_t size); |
| 251 | +template void custom_lapack_eig_c<double, double>(const void* array_in, void* result1, void* result2, size_t size); |
0 commit comments