Linalg kron func (#507)

Alexander-Makaryev · web-flow · commit b0f6461bc5eb · 2021-01-21T10:07:47.000-06:00
* kron func impl
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -98,8 +98,9 @@ enum class DPNPFuncName : size_t
     DPNP_FN_FLOOR_DIVIDE,             /**< Used in numpy.floor_divide() implementation  */
     DPNP_FN_FMOD,                     /**< Used in numpy.fmod() implementation  */
     DPNP_FN_HYPOT,                    /**< Used in numpy.hypot() implementation  */
-    DPNP_FN_INVERT,                   /**< Used in numpy.invert() implementation  */
     DPNP_FN_INV,                      /**< Used in numpy.linalg.inv() implementation  */
+    DPNP_FN_INVERT,                   /**< Used in numpy.invert() implementation  */
+    DPNP_FN_KRON,                     /**< Used in numpy.kron() implementation  */
     DPNP_FN_LEFT_SHIFT,               /**< Used in numpy.left_shift() implementation  */
     DPNP_FN_LOG,                      /**< Used in numpy.log() implementation  */
     DPNP_FN_LOG10,                    /**< Used in numpy.log10() implementation  */
diff --git a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp
@@ -275,6 +275,73 @@ void dpnp_inv_c(void* array1_in, void* result1, size_t* shape, size_t ndim)
     return;
 }
 
+template <typename _DataType1, typename _DataType2, typename _ResultType>
+class dpnp_kron_c_kernel;
+
+template <typename _DataType1, typename _DataType2, typename _ResultType>
+void dpnp_kron_c(void* array1_in,
+                 void* array2_in,
+                 void* result1,
+                 size_t* in1_shape,
+                 size_t* in2_shape,
+                 size_t* res_shape,
+                 size_t ndim)
+{
+    _DataType1* array1 = reinterpret_cast<_DataType1*>(array1_in);
+    _DataType2* array2 = reinterpret_cast<_DataType2*>(array2_in);
+    _ResultType* result = reinterpret_cast<_ResultType*>(result1);
+
+    size_t size = 1;
+    for (size_t i = 0; i < ndim; ++i)
+    {
+        size *= res_shape[i];
+    }
+
+    size_t* _in1_shape = reinterpret_cast<size_t*>(dpnp_memory_alloc_c(ndim * sizeof(size_t)));
+    size_t* _in2_shape = reinterpret_cast<size_t*>(dpnp_memory_alloc_c(ndim * sizeof(size_t)));
+
+    dpnp_memory_memcpy_c(_in1_shape, in1_shape, ndim * sizeof(size_t));
+    dpnp_memory_memcpy_c(_in2_shape, in2_shape, ndim * sizeof(size_t));
+
+    size_t* in1_offsets = reinterpret_cast<size_t*>(dpnp_memory_alloc_c(ndim * sizeof(size_t)));
+    size_t* in2_offsets = reinterpret_cast<size_t*>(dpnp_memory_alloc_c(ndim * sizeof(size_t)));
+    size_t* res_offsets = reinterpret_cast<size_t*>(dpnp_memory_alloc_c(ndim * sizeof(size_t)));
+
+    get_shape_offsets_inkernel<size_t>(in1_shape, ndim, in1_offsets);
+    get_shape_offsets_inkernel<size_t>(in2_shape, ndim, in2_offsets);
+    get_shape_offsets_inkernel<size_t>(res_shape, ndim, res_offsets);
+
+    cl::sycl::range<1> gws(size);
+    auto kernel_parallel_for_func = [=](cl::sycl::id<1> global_id) {
+        const size_t idx = global_id[0];
+
+        size_t idx1 = 0;
+        size_t idx2 = 0;
+        size_t reminder = idx;
+        for (size_t axis = 0; axis < ndim; ++axis)
+        {
+            const size_t res_axis = reminder / res_offsets[axis];
+            reminder = reminder - res_axis * res_offsets[axis];
+
+            const size_t in1_axis = res_axis / _in2_shape[axis];
+            const size_t in2_axis = res_axis - in1_axis * _in2_shape[axis];
+
+            idx1 += in1_axis * in1_offsets[axis];
+            idx2 += in2_axis * in2_offsets[axis];
+        }
+
+        result[idx] = array1[idx1] * array2[idx2];
+    };
+
+    auto kernel_func = [&](cl::sycl::handler& cgh) {
+        cgh.parallel_for<class dpnp_kron_c_kernel<_DataType1, _DataType2, _ResultType>>(gws, kernel_parallel_for_func);
+    };
+
+    cl::sycl::event event = DPNP_QUEUE.submit(kernel_func);
+
+    event.wait();
+}
+
 template <typename _DataType>
 class dpnp_matrix_rank_c_kernel;
 
@@ -379,6 +446,41 @@ void func_map_init_linalg_func(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_INV][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_inv_c<float>};
     fmap[DPNPFuncName::DPNP_FN_INV][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_inv_c<double>};
 
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_kron_c<int, int, int>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_LNG] = {eft_LNG, (void*)dpnp_kron_c<int, long, long>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_FLT] = {eft_FLT, (void*)dpnp_kron_c<int, float, float>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_DBL] = {eft_DBL, (void*)dpnp_kron_c<int, double, double>};
+    // fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_C128] = {
+    // eft_C128, (void*)dpnp_kron_c<int, std::complex<double>, std::complex<double>>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_LNG][eft_INT] = {eft_LNG, (void*)dpnp_kron_c<long, int, long>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_kron_c<long, long, long>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_LNG][eft_FLT] = {eft_FLT, (void*)dpnp_kron_c<long, float, float>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_LNG][eft_DBL] = {eft_DBL, (void*)dpnp_kron_c<long, double, double>};
+    // fmap[DPNPFuncName::DPNP_FN_KRON][eft_LNG][eft_C128] = {
+    // eft_C128, (void*)dpnp_kron_c<long, std::complex<double>, std::complex<double>>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_FLT][eft_INT] = {eft_FLT, (void*)dpnp_kron_c<float, int, float>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_FLT][eft_LNG] = {eft_FLT, (void*)dpnp_kron_c<float, long, float>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_kron_c<float, float, float>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_FLT][eft_DBL] = {eft_DBL, (void*)dpnp_kron_c<float, double, double>};
+    // fmap[DPNPFuncName::DPNP_FN_KRON][eft_FLT][eft_C128] = {
+    // eft_C128, (void*)dpnp_kron_c<float, std::complex<double>, std::complex<double>>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_DBL][eft_INT] = {eft_DBL, (void*)dpnp_kron_c<double, int, double>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_DBL][eft_LNG] = {eft_DBL, (void*)dpnp_kron_c<double, long, double>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_DBL][eft_FLT] = {eft_DBL, (void*)dpnp_kron_c<double, float, double>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_kron_c<double, double, double>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_DBL][eft_C128] = {
+        eft_C128, (void*)dpnp_kron_c<double, std::complex<double>, std::complex<double>>};
+    // fmap[DPNPFuncName::DPNP_FN_KRON][eft_C128][eft_INT] = {
+    // eft_C128, (void*)dpnp_kron_c<std::complex<double>, int, std::complex<double>>};
+    // fmap[DPNPFuncName::DPNP_FN_KRON][eft_C128][eft_LNG] = {
+    // eft_C128, (void*)dpnp_kron_c<std::complex<double>, long, std::complex<double>>};
+    // fmap[DPNPFuncName::DPNP_FN_KRON][eft_C128][eft_FLT] = {
+    // eft_C128, (void*)dpnp_kron_c<std::complex<double>, float, std::complex<double>>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_C128][eft_DBL] = {
+        eft_C128, (void*)dpnp_kron_c<std::complex<double>, double, std::complex<double>>};
+    fmap[DPNPFuncName::DPNP_FN_KRON][eft_C128][eft_C128] = {
+        eft_C128, (void*)dpnp_kron_c<std::complex<double>, std::complex<double>, std::complex<double>>};
+
     fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_matrix_rank_c<int>};
     fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_matrix_rank_c<long>};
     fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_matrix_rank_c<float>};
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -73,6 +73,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_HYPOT
         DPNP_FN_INV
         DPNP_FN_INVERT
+        DPNP_FN_KRON
         DPNP_FN_LEFT_SHIFT
         DPNP_FN_LOG
         DPNP_FN_LOG10
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -98,6 +98,10 @@ cpdef dparray dpnp_array(obj, dtype=None):
     cdef elem_dtype
     cdef dparray_shape_type obj_shape
 
+    # convert scalar to tuple
+    if dpnp.isscalar(obj):
+        obj = (obj, )
+
     if not cpython.PySequence_Check(obj):
         raise TypeError(f"DPNP array(): Unsupported non-sequence obj={type(obj)}")
 
diff --git a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx
@@ -39,10 +39,15 @@ cimport numpy
 __all__ += [
     "dpnp_dot",
     "dpnp_inner",
+    "dpnp_kron",
     "dpnp_outer"
 ]
 
 
+# C function pointer to the C library template functions
+ctypedef void(*fptr_2in_1out_shapes_t)(void * , void * , void * , size_t * , size_t * , size_t * , size_t)
+
+
 cpdef dparray dpnp_dot(dparray in_array1, dparray in_array2):
     cdef vector[Py_ssize_t] shape1 = in_array1.shape
     cdef vector[Py_ssize_t] shape2 = in_array2.shape
@@ -148,6 +153,45 @@ cpdef dparray dpnp_inner(dparray array1, dparray array2):
     return result
 
 
+cpdef dparray dpnp_kron(dparray in_array1, dparray in_array2):
+    cdef size_t ndim = max(in_array1.ndim, in_array2.ndim)
+
+    cdef dparray_shape_type in_array1_shape
+    if in_array1.ndim < ndim:
+        for i in range(ndim - in_array1.ndim):
+            in_array1_shape.push_back(1)
+    for i in range(in_array1.ndim):
+        in_array1_shape.push_back(in_array1.shape[i])
+
+    cdef dparray_shape_type in_array2_shape
+    if in_array2.ndim < ndim:
+        for i in range(ndim - in_array2.ndim):
+            in_array2_shape.push_back(1)
+    for i in range(in_array2.ndim):
+        in_array2_shape.push_back(in_array2.shape[i])
+
+    cdef dparray_shape_type result_shape
+    for i in range(ndim):
+        result_shape.push_back(in_array1_shape[i] * in_array2_shape[i])
+
+    # convert string type names (dparray.dtype) to C enum DPNPFuncType
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(in_array1.dtype)
+    cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(in_array2.dtype)
+
+    # get the FPTR data structure
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_KRON, param1_type, param2_type)
+
+    result_type = dpnp_DPNPFuncType_to_dtype( < size_t > kernel_data.return_type)
+    # ceate result array with type given by FPTR data
+    cdef dparray result = dparray(result_shape, dtype=result_type)
+
+    cdef fptr_2in_1out_shapes_t func = <fptr_2in_1out_shapes_t > kernel_data.ptr
+    # call FPTR function
+    func(in_array1.get_data(), in_array2.get_data(), result.get_data(), < size_t * > in_array1_shape.data(), < size_t * > in_array2_shape.data(), < size_t * > result_shape.data(), ndim)
+
+    return result
+
+
 cpdef dparray dpnp_outer(dparray array1, dparray array2):
     cdef dparray_shape_type result_shape = (array1.size, array2.size)
     result_type = numpy.promote_types(array1.dtype, array1.dtype)
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
@@ -194,21 +194,30 @@ def inner(x1, x2, **kwargs):
     return call_origin(numpy.inner, x1, x2, **kwargs)
 
 
-def kron(x1, x2, **kwargs):
+def kron(a, b):
     """
     Returns the kronecker product of two arrays.
 
     For full documentation refer to :obj:`numpy.kron`.
 
-    Limitations
-    -----------
-    Function is executed sequentially on CPU.
-
     .. seealso:: :obj:`dpnp.outer` returns the outer product of two arrays.
 
     """
 
-    return call_origin(numpy.kron, x1, x2, **kwargs)
+    if not use_origin_backend(a):
+        if dpnp.isscalar(a):
+            a = dpnp.array(a)
+        if dpnp.isscalar(b):
+            b = dpnp.array(b)
+
+        if not isinstance(a, dparray):
+            pass
+        elif not isinstance(b, dparray):
+            pass
+        else:
+            return dpnp_kron(a, b)
+
+    return call_origin(numpy.kron, a, b)
 
 
 def outer(x1, x2, **kwargs):
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
@@ -835,13 +835,11 @@ tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matri
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_dot_vec2
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_multidim_vdot
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_inner
-tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_kron
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_outer
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_vdot
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_tensordot_zero_dim
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_dot_with_out_f_contiguous
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_multidim_vdot
-tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_zerodim_kron
 tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_array_scalar
 tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_finite
 tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_infinite
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
@@ -1034,13 +1034,11 @@ tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matri
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_dot_vec2
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_multidim_vdot
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_inner
-tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_kron
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_outer
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_vdot
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_tensordot_zero_dim
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_dot_with_out_f_contiguous
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_multidim_vdot
-tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_zerodim_kron
 tests/third_party/cupy/linalg_tests/test_product.py::TestProductZeroLength_param_0_{params=((0, 0), 2)}::test_tensordot_zero_length
 tests/third_party/cupy/linalg_tests/test_product.py::TestProductZeroLength_param_1_{params=((0, 0), (1, 0))}::test_tensordot_zero_length
 tests/third_party/cupy/linalg_tests/test_product.py::TestProductZeroLength_param_2_{params=((0, 0, 0), 2)}::test_tensordot_zero_length