EIG combine existing MKL and non-existing SYCL to one kernel (#112)

densmirn · web-flow · commit b26d624a8841 · 2020-10-11T09:23:09.000-05:00
diff --git a/dpnp/backend/backend_iface.hpp b/dpnp/backend/backend_iface.hpp
@@ -173,17 +173,19 @@ INP_DLLEXPORT void custom_prod_c(void* array, void* result, size_t size);
 
 /**
  * @ingroup BACKEND_API
- * @brief math library implementation of eig function
+ * @brief Compute the eigenvalues and right eigenvectors of a square array.
  *
- * @param [in]  array1  Input array.
+ * @param [in]  array_in  Input array[size][size]
  *
- * @param [out] result1 Output array.
+ * @param [out] result1   The eigenvalues, each repeated according to its multiplicity
  *
- * @param [in]  size    Number of elements in input arrays.
+ * @param [out] result2   The normalized (unit “length”) eigenvectors
+ *
+ * @param [in]  size      One dimension of square [size][size] array
  *
  */
-template <typename _DataType>
-INP_DLLEXPORT void mkl_lapack_syevd_c(void* array1, void* result1, size_t size);
+template <typename _DataType, typename _ResultType>
+INP_DLLEXPORT void custom_lapack_eig_c(const void* array_in, void* result1, void* result2, size_t size);
 
 /**
  * @ingroup BACKEND_API
diff --git a/dpnp/backend/backend_iface_fptr.cpp b/dpnp/backend/backend_iface_fptr.cpp
@@ -309,10 +309,10 @@ static func_map_t func_map_init()
     fmap[DPNPFuncName::DPNP_FN_DOT][eft_FLT][eft_FLT] = {eft_FLT, (void*)custom_blas_dot_c<float>};
     fmap[DPNPFuncName::DPNP_FN_DOT][eft_DBL][eft_DBL] = {eft_DBL, (void*)custom_blas_dot_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_EIG][eft_INT][eft_INT] = {eft_DBL, (void*)mkl_lapack_syevd_c<double>};
-    fmap[DPNPFuncName::DPNP_FN_EIG][eft_LNG][eft_LNG] = {eft_DBL, (void*)mkl_lapack_syevd_c<double>};
-    fmap[DPNPFuncName::DPNP_FN_EIG][eft_FLT][eft_FLT] = {eft_FLT, (void*)mkl_lapack_syevd_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_EIG][eft_DBL][eft_DBL] = {eft_DBL, (void*)mkl_lapack_syevd_c<double>};
+    fmap[DPNPFuncName::DPNP_FN_EIG][eft_INT][eft_INT] = {eft_DBL, (void*)custom_lapack_eig_c<int, double>};
+    fmap[DPNPFuncName::DPNP_FN_EIG][eft_LNG][eft_LNG] = {eft_DBL, (void*)custom_lapack_eig_c<long, double>};
+    fmap[DPNPFuncName::DPNP_FN_EIG][eft_FLT][eft_FLT] = {eft_FLT, (void*)custom_lapack_eig_c<float, float>};
+    fmap[DPNPFuncName::DPNP_FN_EIG][eft_DBL][eft_DBL] = {eft_DBL, (void*)custom_lapack_eig_c<double, double>};
 
     fmap[DPNPFuncName::DPNP_FN_EXP][eft_INT][eft_INT] = {eft_DBL, (void*)custom_elemwise_exp_c<int, double>};
     fmap[DPNPFuncName::DPNP_FN_EXP][eft_LNG][eft_LNG] = {eft_DBL, (void*)custom_elemwise_exp_c<long, double>};
diff --git a/dpnp/backend/custom_kernels.cpp b/dpnp/backend/custom_kernels.cpp
@@ -32,6 +32,7 @@
 #include "queue_sycl.hpp"
 
 namespace mkl_blas = oneapi::mkl::blas;
+namespace mkl_lapack = oneapi::mkl::lapack;
 
 template <typename _KernelNameSpecialization>
 class custom_blas_gemm_c_kernel;
@@ -182,60 +183,69 @@ template void custom_blas_dot_c<long>(void* array1_in, void* array2_in, void* re
 template void custom_blas_dot_c<float>(void* array1_in, void* array2_in, void* result1, size_t size);
 template void custom_blas_dot_c<double>(void* array1_in, void* array2_in, void* result1, size_t size);
 
-#if 0 // Example for OpenCL kernel
-#include <map>
-#include <typeindex>
-
-static std::map<std::type_index, std::string> types_map = {{typeid(long), "long"}, {typeid(int), "int"}};
-
-static const char* blas_gemm_naive =
-    "//#define __KERNEL_TYPE__ long                                                \n"
-    "#define __KERNEL_TYPE_ZERO__ 0                                                \n"
-    "__kernel void blas_gemm_naive(__global __KERNEL_TYPE__* array_1,              \n"
-    "                              __global __KERNEL_TYPE__* array_2,              \n"
-    "                              __global __KERNEL_TYPE__* result,               \n"
-    "                              unsigned long size)                             \n"
-    "{                                                                             \n"
-    "    size_t i = get_global_id(0); //for (size_t i = 0; i < size; ++i)          \n"
-    "    {                                                                         \n"
-    "        size_t j = get_global_id(1); //for (size_t j = 0; j < size; ++j)      \n"
-    "        {                                                                     \n"
-    "            __KERNEL_TYPE__ temp = __KERNEL_TYPE_ZERO__;                      \n"
-    "            for (size_t k = 0; k < size; ++k)                                 \n"
-    "            {                                                                 \n"
-    "                const size_t index_1 = i * size + k;                          \n"
-    "                const size_t index_2 = k * size + j;                          \n"
-    "                temp += array_1[index_1] * array_2[index_2];                  \n"
-    "            }                                                                 \n"
-    "                                                                              \n"
-    "            const size_t index_result = i * size + j;                         \n"
-    "            result[index_result] = temp;                                      \n"
-    "        }                                                                     \n"
-    "    }                                                                         \n"
-    "}                                                                             \n";
-
-template <typename _DataType>
-void custom_dgemm_c_opencl(void* array_1_in, void* array_2_in, void* result_1, size_t size)
+template <typename _DataType, typename _ResultType>
+void custom_lapack_eig_c(const void* array_in, void* result1, void* result2, size_t size)
 {
-    _DataType* array_1 = reinterpret_cast<_DataType*>(array_1_in);
-    _DataType* array_2 = reinterpret_cast<_DataType*>(array_2_in);
-    _DataType* result = reinterpret_cast<_DataType*>(result_1);
+    // TODO this kernel works with square 2-D array only
 
-    std::string compile_time_options("-cl-std=CL1.2");
-    compile_time_options += " -D__KERNEL_TYPE__=" + types_map.at(typeid(_DataType));
+    // Kernel Type for calculation is double type
+    // because interface requires float type but calculations are expected in double type
 
-    cl::sycl::program program_src(DPNP_QUEUE.get_context());
-    program_src.build_with_source(blas_gemm_naive, compile_time_options);
+    if (!size)
+    {
+        return;
+    }
 
-    cl::sycl::range<2> kernel_work_ids(size, size); // dimensions are: "i" and "j"
-    DPNP_QUEUE.submit([&](cl::sycl::handler& cgh) {
-        cgh.set_args(array_1, array_2, result, size);
-        cgh.parallel_for(kernel_work_ids, program_src.get_kernel("blas_gemm_naive"));
-    });
+    cl::sycl::event event;
 
-    DPNP_QUEUE.wait();
-}
+    const _DataType* array = reinterpret_cast<const _DataType*>(array_in);
+    _ResultType* result_val = reinterpret_cast<_ResultType*>(result1);
+    _ResultType* result_vec = reinterpret_cast<_ResultType*>(result2);
+
+    double* result_val_kern = reinterpret_cast<double*>(dpnp_memory_alloc_c(size * sizeof(double)));
+    double* result_vec_kern = reinterpret_cast<double*>(dpnp_memory_alloc_c(size * size * sizeof(double)));
+
+    // type conversion. Also, math library requires copy memory because override
+    for (size_t it = 0; it < (size * size); ++it)
+    {
+        result_vec_kern[it] = array[it];
+    }
+
+    const std::int64_t lda = std::max<size_t>(1UL, size);
+
+    const std::int64_t scratchpad_size = mkl_lapack::syevd_scratchpad_size<double>(
+        DPNP_QUEUE, oneapi::mkl::job::vec, oneapi::mkl::uplo::upper, size, lda);
+
+    double* scratchpad = reinterpret_cast<double*>(dpnp_memory_alloc_c(scratchpad_size * sizeof(double)));
+
+    event = mkl_lapack::syevd(DPNP_QUEUE,               // queue
+                              oneapi::mkl::job::vec,    // jobz
+                              oneapi::mkl::uplo::upper, // uplo
+                              size,                     // The order of the matrix A (0 <= n)
+                              result_vec_kern,          // will be overwritten with eigenvectors
+                              lda,
+                              result_val_kern,
+                              scratchpad,
+                              scratchpad_size);
+    event.wait();
 
-template void custom_dgemm_c_opencl<long>(void* array_1_in, void* array_2_in, void* result_1, size_t size);
+    dpnp_memory_free_c(scratchpad);
+
+    for (size_t it1 = 0; it1 < size; ++it1)
+    {
+        result_val[it1] = result_val_kern[it1];
+        for (size_t it2 = 0; it2 < size; ++it2)
+        {
+            // copy + transpose
+            result_vec[it2 * size + it1] = result_vec_kern[it1 * size + it2];
+        }
+    }
+
+    dpnp_memory_free_c(result_val_kern);
+    dpnp_memory_free_c(result_vec_kern);
+}
 
-#endif
+template void custom_lapack_eig_c<int, double>(const void* array_in, void* result1, void* result2, size_t size);
+template void custom_lapack_eig_c<long, double>(const void* array_in, void* result1, void* result2, size_t size);
+template void custom_lapack_eig_c<float, float>(const void* array_in, void* result1, void* result2, size_t size);
+template void custom_lapack_eig_c<double, double>(const void* array_in, void* result1, void* result2, size_t size);
diff --git a/dpnp/backend/mkl_wrap_lapack.cpp b/dpnp/backend/mkl_wrap_lapack.cpp
diff --git a/dpnp/linalg/linalg.pyx b/dpnp/linalg/linalg.pyx
@@ -44,27 +44,21 @@ __all__ = [
 ]
 
 
-cpdef tuple dpnp_eig(dparray in_array1):
-    cdef dparray_shape_type shape1 = in_array1.shape
+cpdef tuple dpnp_eig(dparray x1):
+    cdef dparray_shape_type x1_shape = x1.shape
 
-    cdef size_t size1 = 0
-    if not shape1.empty():
-        size1 = shape1.front()
+    cdef size_t size = 0 if x1_shape.empty() else x1_shape.front()
 
-    # convert string type names (dparray.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(in_array1.dtype)
-
-    # get the FPTR data structure
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EIG, param1_type, param1_type)
 
     result_type = dpnp_DPNPFuncType_to_dtype( < size_t > kernel_data.return_type)
-    # this array is used as input for math library and will be overwritten with eigen vectors
-    res_vec = in_array1.astype(result_type)
-    # ceate result array with type given by FPTR data
-    cdef dparray res_val = dparray((size1,), dtype=result_type)
 
-    cdef fptr_1in_1out_t func = <fptr_1in_1out_t > kernel_data.ptr
+    cdef dparray res_val = dparray((size,), dtype=result_type)
+    cdef dparray res_vec = dparray(x1_shape, dtype=result_type)
+
+    cdef fptr_2in_1out_t func = <fptr_2in_1out_t > kernel_data.ptr
     # call FPTR function
-    func(res_vec.get_data(), res_val.get_data(), size1)
+    func(x1.get_data(), res_val.get_data(), res_vec.get_data(), size)
 
-    return res_val, res_vec
+    return (res_val, res_vec)
diff --git a/dpnp/linalg/linalg_iface.py b/dpnp/linalg/linalg_iface.py
@@ -55,33 +55,21 @@
 ]
 
 
-def eig(in_array1):
+def eig(x1):
     """
     Compute the eigenvalues and right eigenvectors of a square array.
-    Parameters
-    ----------
-    a : (..., M, M) array
-        Matrices for which the eigenvalues and right eigenvectors will
-        be computed
-    Returns
-    -------
-    w : (..., M) array
-        The eigenvalues, each repeated according to its multiplicity.
-        The eigenvalues are not necessarily ordered. The resulting
-        array will be of complex type, unless the imaginary part is
-        zero in which case it will be cast to a real type. When `a`
-        is real the resulting eigenvalues will be real (0 imaginary
-        part) or occur in conjugate pairs
-    v : (..., M, M) array
-        The normalized (unit "length") eigenvectors, such that the
-        column ``v[:,i]`` is the eigenvector corresponding to the
-        eigenvalue ``w[i]``.
+
+    .. seealso:: :func:`numpy.linalg.eig`
+
     """
 
-    if (use_origin_backend()):
-        return numpy.linalg.eig(in_array1)
+    is_x1_dparray = isinstance(x1, dparray)
+
+    if (not use_origin_backend(x1) and is_x1_dparray):
+        if (x1.size > 0):
+            return dpnp_eig(x1)
 
-    return dpnp_eig(in_array1)
+    return call_origin(numpy.linalg.eig, x1)
 
 
 def matrix_power(input, count):
diff --git a/examples/example7.cpp b/examples/example7.cpp
@@ -45,8 +45,9 @@ int main(int, char**)
 
     dpnp_queue_initialize_c(QueueOptions::CPU_SELECTOR);
 
-    int* array = (int*)dpnp_memory_alloc_c(len * sizeof(int));
-    double* result = (double*)dpnp_memory_alloc_c(size * sizeof(double));
+    float* array = (float*)dpnp_memory_alloc_c(len * sizeof(float));
+    float* result1 = (float*)dpnp_memory_alloc_c(size * sizeof(float));
+    float* result2 = (float*)dpnp_memory_alloc_c(len * sizeof(float));
 
     /* init input diagonal array like:
     1, 0, 0,
@@ -62,16 +63,17 @@ int main(int, char**)
         array[size * i + i] = i + 1;
     }
 
-    mkl_lapack_syevd_c<double>(array, result, size);
+    custom_lapack_eig_c<float, float>(array, result1, result2, size);
 
     std::cout << "eigen values" << std::endl;
     for (size_t i = 0; i < size; ++i)
     {
-        std::cout << result[i] << ", ";
+        std::cout << result1[i] << ", ";
     }
     std::cout << std::endl;
 
-    dpnp_memory_free_c(result);
+    dpnp_memory_free_c(result2);
+    dpnp_memory_free_c(result1);
     dpnp_memory_free_c(array);
 
     return 0;
diff --git a/setup.py b/setup.py
@@ -312,7 +312,6 @@
                 "dpnp/backend/custom_kernels_sorting.cpp",
                 "dpnp/backend/custom_kernels_statistics.cpp",
                 "dpnp/backend/memory_sycl.cpp",
-                "dpnp/backend/mkl_wrap_lapack.cpp",
                 "dpnp/backend/mkl_wrap_rng.cpp",
                 "dpnp/backend/queue_sycl.cpp"
             ],
diff --git a/tests/test_linalg.py b/tests/test_linalg.py
@@ -29,10 +29,12 @@ def vvsort(val, vec, size):
                          [2, 4, 8, 16, 300])
 def test_eig_arange(type, size):
     a = numpy.arange(size * size, dtype=type).reshape((size, size))
-    symm = numpy.tril(a) + numpy.tril(a, -1).T + numpy.diag(numpy.full((size,), size * size, dtype=type))
-    isymm = inp.array(symm)
+    symm_orig = numpy.tril(a) + numpy.tril(a, -1).T + numpy.diag(numpy.full((size,), size * size, dtype=type))
+    symm = symm_orig
+    dpnp_symm_orig = inp.array(symm)
+    dpnp_symm = dpnp_symm_orig
 
-    dpnp_val, dpnp_vec = inp.linalg.eig(isymm)
+    dpnp_val, dpnp_vec = inp.linalg.eig(dpnp_symm)
     np_val, np_vec = numpy.linalg.eig(symm)
 
     # DPNP sort val/vec by abs value
@@ -46,5 +48,13 @@ def test_eig_arange(type, size):
         if np_vec[0, i] * dpnp_vec[0, i] < 0:
             np_vec[:, i] = -np_vec[:, i]
 
+    numpy.testing.assert_array_equal(symm_orig, symm)
+    numpy.testing.assert_array_equal(dpnp_symm_orig, dpnp_symm)
+
+    assert (dpnp_val.dtype == np_val.dtype)
+    assert (dpnp_vec.dtype == np_vec.dtype)
+    assert (dpnp_val.shape == np_val.shape)
+    assert (dpnp_vec.shape == np_vec.shape)
+
     numpy.testing.assert_allclose(dpnp_val, np_val, rtol=1e-05, atol=1e-05)
     numpy.testing.assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-05)

Original file line number	Diff line number	Diff line change
`@@ -173,17 +173,19 @@ INP_DLLEXPORT void custom_prod_c(void* array, void* result, size_t size);`
`173`	`173`
`174`	`174`	`/**`
`175`	`175`	`* @ingroup BACKEND_API`
`176`		`- * @brief math library implementation of eig function`
	`176`	`+ * @brief Compute the eigenvalues and right eigenvectors of a square array.`
`177`	`177`	`*`
`178`		`- * @param [in] array1 Input array.`
	`178`	`+ * @param [in] array_in Input array[size][size]`
`179`	`179`	`*`
`180`		`- * @param [out] result1 Output array.`
	`180`	`+ * @param [out] result1 The eigenvalues, each repeated according to its multiplicity`
`181`	`181`	`*`
`182`		`- * @param [in] size Number of elements in input arrays.`
	`182`	`+ * @param [out] result2 The normalized (unit “length”) eigenvectors`
	`183`	`+ *`
	`184`	`+ * @param [in] size One dimension of square [size][size] array`
`183`	`185`	`*`
`184`	`186`	`*/`
`185`		`-template <typename _DataType>`
`186`		`-INP_DLLEXPORT void mkl_lapack_syevd_c(void* array1, void* result1, size_t size);`
	`187`	`+template <typename _DataType, typename _ResultType>`
	`188`	`+INP_DLLEXPORT void custom_lapack_eig_c(const void* array_in, void* result1, void* result2, size_t size);`
`187`	`189`
`188`	`190`	`/**`
`189`	`191`	`* @ingroup BACKEND_API`