diff --git a/dpnp/backend/include/dpnp_gen_1arg_2type_tbl.hpp b/dpnp/backend/include/dpnp_gen_1arg_2type_tbl.hpp deleted file mode 100644 index 018a625a606a..000000000000 --- a/dpnp/backend/include/dpnp_gen_1arg_2type_tbl.hpp +++ /dev/null @@ -1,92 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2016-2024, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -/* - * This header file contains single argument element wise functions definitions - * - * Macro `MACRO_1ARG_2TYPES_OP` must be defined before usage - * - * Parameters: - * - public name of the function and kernel name - * - operation used to calculate the result - * - mkl operation used to calculate the result - * - */ - -#ifndef MACRO_1ARG_2TYPES_OP -#error "MACRO_1ARG_2TYPES_OP is not defined" -#endif - -#ifdef _SECTION_DOCUMENTATION_GENERATION_ - -#define MACRO_1ARG_2TYPES_OP(__name__, __operation1__, __operation2__) \ - /** @ingroup BACKEND_API */ \ - /** @brief Per element operation function __name__ */ \ - /** */ \ - /** Function "__name__" executes operator "__operation1__" over each \ - * element of the array */ \ - /** */ \ - /** @param[in] q_ref Reference to SYCL queue. */ \ - /** @param[out] result_out Output array. */ \ - /** @param[in] result_size Output array size. */ \ - /** @param[in] result_ndim Number of output array dimensions. \ - */ \ - /** @param[in] result_shape Output array shape. */ \ - /** @param[in] result_strides Output array strides. */ \ - /** @param[in] input1_in Input array 1. */ \ - /** @param[in] input1_size Input array 1 size. */ \ - /** @param[in] input1_ndim Number of input array 1 dimensions. \ - */ \ - /** @param[in] input1_shape Input array 1 shape. */ \ - /** @param[in] input1_strides Input array 1 strides. */ \ - /** @param[in] where Where condition. */ \ - /** @param[in] dep_event_vec_ref Reference to vector of SYCL events. \ - */ \ - template \ - DPCTLSyclEventRef __name__( \ - DPCTLSyclQueueRef q_ref, void *result_out, const size_t result_size, \ - const size_t result_ndim, const shape_elem_type *result_shape, \ - const shape_elem_type *result_strides, const void *input1_in, \ - const size_t input1_size, const size_t input1_ndim, \ - const shape_elem_type *input1_shape, \ - const shape_elem_type *input1_strides, const size_t *where, \ - const DPCTLEventVectorRef dep_event_vec_ref); \ - \ - template \ - void __name__( \ - void *result_out, const size_t result_size, const size_t result_ndim, \ - const shape_elem_type *result_shape, \ - const shape_elem_type *result_strides, const void *input1_in, \ - const size_t input1_size, const size_t input1_ndim, \ - const shape_elem_type *input1_shape, \ - const shape_elem_type *input1_strides, const size_t *where); - -#endif - -MACRO_1ARG_2TYPES_OP(dpnp_sqrt_c, - sycl::sqrt(input_elem), - oneapi::mkl::vm::sqrt(q, input1_size, input1_data, result)) - -#undef MACRO_1ARG_2TYPES_OP diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp index 6ceddf889629..029a4a6d903e 100644 --- a/dpnp/backend/include/dpnp_iface.hpp +++ b/dpnp/backend/include/dpnp_iface.hpp @@ -100,35 +100,6 @@ INP_DLLEXPORT void dpnp_memory_memcpy_c(DPCTLSyclQueueRef q_ref, INP_DLLEXPORT void dpnp_memory_memcpy_c(void *dst, const void *src, size_t size_in_bytes); -/** - * @ingroup BACKEND_API - * @brief Compute the variance along the specified axis, while ignoring NaNs. - * - * @param [in] q_ref Reference to SYCL queue. - * @param [in] array Input array. - * @param [in] mask_arr Input mask array when elem is nan. - * @param [out] result Output array. - * @param [in] result_size Output array size. - * @param [in] size Number of elements in input arrays. - * @param [in] dep_event_vec_ref Reference to vector of SYCL events. - */ -template -INP_DLLEXPORT DPCTLSyclEventRef - dpnp_nanvar_c(DPCTLSyclQueueRef q_ref, - void *array, - void *mask_arr, - void *result, - const size_t result_size, - size_t size, - const DPCTLEventVectorRef dep_event_vec_ref); - -template -INP_DLLEXPORT void dpnp_nanvar_c(void *array, - void *mask_arr, - void *result, - const size_t result_size, - size_t size); - /** * @ingroup BACKEND_API * @brief Custom implementation of dot function @@ -238,29 +209,6 @@ INP_DLLEXPORT void dpnp_sum_c(void *result_out, const void *initial, const long *where); -/** - * @ingroup BACKEND_API - * @brief Custom implementation of count_nonzero function - * - * @param [in] q_ref Reference to SYCL queue. - * @param [in] array1_in Input array. - * @param [out] result1_out Output array. - * @param [in] size Number of elements in input arrays. - * @param [in] dep_event_vec_ref Reference to vector of SYCL events. - * - */ -template -INP_DLLEXPORT DPCTLSyclEventRef - dpnp_count_nonzero_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1_out, - size_t size, - const DPCTLEventVectorRef dep_event_vec_ref); - -template -INP_DLLEXPORT void - dpnp_count_nonzero_c(void *array1_in, void *result1_out, size_t size); - /** * @ingroup BACKEND_API * @brief Return a partitioned copy of an array. @@ -463,30 +411,6 @@ INP_DLLEXPORT void dpnp_correlate_c(void *result_out, const size_t input2_shape_ndim, const size_t *where); -/** - * @ingroup BACKEND_API - * @brief Custom implementation of cov function with math library and PSTL - * - * @param [in] q_ref Reference to SYCL queue. - * @param [in] array Input array. - * @param [out] result Output array. - * @param [in] nrows Number of rows in input array. - * @param [in] ncols Number of columns in input array. - * @param [in] dep_event_vec_ref Reference to vector of SYCL events. - */ -template -INP_DLLEXPORT DPCTLSyclEventRef - dpnp_cov_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1, - size_t nrows, - size_t ncols, - const DPCTLEventVectorRef dep_event_vec_ref); - -template -INP_DLLEXPORT void - dpnp_cov_c(void *array1_in, void *result1, size_t nrows, size_t ncols); - /** * @ingroup BACKEND_API * @brief Construct an array from an index array and a list of arrays to choose @@ -541,73 +465,6 @@ INP_DLLEXPORT DPCTLSyclEventRef template INP_DLLEXPORT void dpnp_initval_c(void *result1, void *value, size_t size); -/** - * @ingroup BACKEND_API - * @brief math library implementation of max function - * - * @param [in] q_ref Reference to SYCL queue. - * @param [in] array1_in Input array with data. - * @param [out] result1 Output array. - * @param [in] result_size Output array size. - * @param [in] shape Shape of input array. - * @param [in] ndim Number of elements in shape. - * @param [in] axis Axis. - * @param [in] naxis Number of elements in axis. - * @param [in] dep_event_vec_ref Reference to vector of SYCL events. - */ -template -INP_DLLEXPORT DPCTLSyclEventRef - dpnp_max_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1, - const size_t result_size, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis, - const DPCTLEventVectorRef dep_event_vec_ref); - -template -INP_DLLEXPORT void dpnp_max_c(void *array1_in, - void *result1, - const size_t result_size, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis); - -/** - * @ingroup BACKEND_API - * @brief math library implementation of mean function - * - * @param [in] q_ref Reference to SYCL queue. - * @param [in] array Input array with data. - * @param [out] result Output array. - * @param [in] shape Shape of input array. - * @param [in] ndim Number of elements in shape. - * @param [in] axis Axis. - * @param [in] naxis Number of elements in axis. - * @param [in] dep_event_vec_ref Reference to vector of SYCL events. - */ -template -INP_DLLEXPORT DPCTLSyclEventRef - dpnp_mean_c(DPCTLSyclQueueRef q_ref, - void *array, - void *result, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis, - const DPCTLEventVectorRef dep_event_vec_ref); - -template -INP_DLLEXPORT void dpnp_mean_c(void *array, - void *result, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis); - /** * @ingroup BACKEND_API * @brief math library implementation of median function @@ -640,41 +497,6 @@ INP_DLLEXPORT void dpnp_median_c(void *array, const shape_elem_type *axis, size_t naxis); -/** - * @ingroup BACKEND_API - * @brief math library implementation of min function - * - * @param [in] q_ref Reference to SYCL queue. - * @param [in] array Input array with data. - * @param [out] result Output array. - * @param [in] result_size Output array size. - * @param [in] shape Shape of input array. - * @param [in] ndim Number of elements in shape. - * @param [in] axis Axis. - * @param [in] naxis Number of elements in axis. - * @param [in] dep_event_vec_ref Reference to vector of SYCL events. - */ -template -INP_DLLEXPORT DPCTLSyclEventRef - dpnp_min_c(DPCTLSyclQueueRef q_ref, - void *array, - void *result, - const size_t result_size, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis, - const DPCTLEventVectorRef dep_event_vec_ref); - -template -INP_DLLEXPORT void dpnp_min_c(void *array, - void *result, - const size_t result_size, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis); - /** * @ingroup BACKEND_API * @brief math library implementation of argmax function @@ -717,76 +539,6 @@ INP_DLLEXPORT DPCTLSyclEventRef template INP_DLLEXPORT void dpnp_argmin_c(void *array, void *result, size_t size); -/** - * @ingroup BACKEND_API - * @brief math library implementation of std function - * - * @param [in] q_ref Reference to SYCL queue. - * @param [in] array Input array with data. - * @param [out] result Output array with indices. - * @param [in] shape Shape of input array. - * @param [in] ndim Number of elements in shape. - * @param [in] axis Axis. - * @param [in] naxis Number of elements in axis. - * @param [in] ddof Delta degrees of freedom. - * @param [in] dep_event_vec_ref Reference to vector of SYCL events. - */ -template -INP_DLLEXPORT DPCTLSyclEventRef - dpnp_std_c(DPCTLSyclQueueRef q_ref, - void *array, - void *result, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis, - size_t ddof, - const DPCTLEventVectorRef dep_event_vec_ref); - -template -INP_DLLEXPORT void dpnp_std_c(void *array, - void *result, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis, - size_t ddof); - -/** - * @ingroup BACKEND_API - * @brief math library implementation of var function - * - * @param [in] q_ref Reference to SYCL queue. - * @param [in] array Input array with data. - * @param [out] result Output array with indices. - * @param [in] shape Shape of input array. - * @param [in] ndim Number of elements in shape. - * @param [in] axis Axis. - * @param [in] naxis Number of elements in axis. - * @param [in] ddof Delta degrees of freedom. - * @param [in] dep_event_vec_ref Reference to vector of SYCL events. - */ -template -INP_DLLEXPORT DPCTLSyclEventRef - dpnp_var_c(DPCTLSyclQueueRef q_ref, - void *array, - void *result, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis, - size_t ddof, - const DPCTLEventVectorRef dep_event_vec_ref); - -template -INP_DLLEXPORT void dpnp_var_c(void *array, - void *result, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis, - size_t ddof); - #define MACRO_1ARG_1TYPE_OP(__name__, __operation1__, __operation2__) \ template \ INP_DLLEXPORT DPCTLSyclEventRef __name__( \ @@ -809,28 +561,6 @@ INP_DLLEXPORT void dpnp_var_c(void *array, #include -#define MACRO_1ARG_2TYPES_OP(__name__, __operation1__, __operation2__) \ - template \ - INP_DLLEXPORT DPCTLSyclEventRef __name__( \ - DPCTLSyclQueueRef q_ref, void *result_out, const size_t result_size, \ - const size_t result_ndim, const shape_elem_type *result_shape, \ - const shape_elem_type *result_strides, const void *input1_in, \ - const size_t input1_size, const size_t input1_ndim, \ - const shape_elem_type *input1_shape, \ - const shape_elem_type *input1_strides, const size_t *where, \ - const DPCTLEventVectorRef dep_event_vec_ref); \ - \ - template \ - INP_DLLEXPORT void __name__( \ - void *result_out, const size_t result_size, const size_t result_ndim, \ - const shape_elem_type *result_shape, \ - const shape_elem_type *result_strides, const void *input1_in, \ - const size_t input1_size, const size_t input1_ndim, \ - const shape_elem_type *input1_shape, \ - const shape_elem_type *input1_strides, const size_t *where); - -#include - #define MACRO_2ARG_3TYPES_OP(__name__, __operation__, __vec_operation__, \ __vec_types__, __mkl_operation__, __mkl_types__) \ template \ - class __name__##_kernel; \ - \ - template \ - class __name__##_strides_kernel; \ - \ - template \ - DPCTLSyclEventRef __name__( \ - DPCTLSyclQueueRef q_ref, void *result_out, const size_t result_size, \ - const size_t result_ndim, const shape_elem_type *result_shape, \ - const shape_elem_type *result_strides, const void *input1_in, \ - const size_t input1_size, const size_t input1_ndim, \ - const shape_elem_type *input1_shape, \ - const shape_elem_type *input1_strides, const size_t *where, \ - const DPCTLEventVectorRef dep_event_vec_ref) \ - { \ - /* avoid warning unused variable*/ \ - (void)result_shape; \ - (void)where; \ - (void)dep_event_vec_ref; \ - \ - DPCTLSyclEventRef event_ref = nullptr; \ - \ - if (!input1_size) { \ - return event_ref; \ - } \ - \ - sycl::queue q = *(reinterpret_cast(q_ref)); \ - \ - _DataType_input *input1_data = \ - static_cast<_DataType_input *>(const_cast(input1_in)); \ - _DataType_output *result = \ - static_cast<_DataType_output *>(result_out); \ - \ - shape_elem_type *input1_shape_offsets = \ - new shape_elem_type[input1_ndim]; \ - \ - get_shape_offsets_inkernel(input1_shape, input1_ndim, \ - input1_shape_offsets); \ - bool use_strides = !array_equal(input1_strides, input1_ndim, \ - input1_shape_offsets, input1_ndim); \ - delete[] input1_shape_offsets; \ - \ - sycl::event event; \ - sycl::range<1> gws(result_size); \ - \ - if (use_strides) { \ - if (result_ndim != input1_ndim) { \ - throw std::runtime_error( \ - "Result ndim=" + std::to_string(result_ndim) + \ - " mismatches with input1 ndim=" + \ - std::to_string(input1_ndim)); \ - } \ - \ - /* memory transfer optimization, use USM-host for temporary speeds \ - * up transfer to device */ \ - using usm_host_allocatorT = \ - sycl::usm_allocator; \ - \ - size_t strides_size = 2 * result_ndim; \ - shape_elem_type *dev_strides_data = \ - sycl::malloc_device(strides_size, q); \ - \ - /* create host temporary for packed strides managed by shared \ - * pointer */ \ - auto strides_host_packed = \ - std::vector( \ - strides_size, usm_host_allocatorT(q)); \ - \ - /* packed vector is concatenation of result_strides, \ - * input1_strides and input2_strides */ \ - std::copy(result_strides, result_strides + result_ndim, \ - strides_host_packed.begin()); \ - std::copy(input1_strides, input1_strides + result_ndim, \ - strides_host_packed.begin() + result_ndim); \ - \ - auto copy_strides_ev = q.copy( \ - strides_host_packed.data(), dev_strides_data, \ - strides_host_packed.size()); \ - \ - auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ - size_t output_id = global_id[0]; /* for (size_t i = 0; i < \ - result_size; ++i) */ \ - { \ - const shape_elem_type *result_strides_data = \ - &dev_strides_data[0]; \ - const shape_elem_type *input1_strides_data = \ - &dev_strides_data[result_ndim]; \ - \ - size_t input_id = 0; \ - for (size_t i = 0; i < input1_ndim; ++i) { \ - const size_t output_xyz_id = \ - get_xyz_id_by_id_inkernel(output_id, \ - result_strides_data, \ - result_ndim, i); \ - input_id += output_xyz_id * input1_strides_data[i]; \ - } \ - \ - const _DataType_output input_elem = input1_data[input_id]; \ - result[output_id] = __operation1__; \ - } \ - }; \ - auto kernel_func = [&](sycl::handler &cgh) { \ - cgh.parallel_for>( \ - gws, kernel_parallel_for_func); \ - }; \ - \ - q.submit(kernel_func).wait(); \ - \ - sycl::free(dev_strides_data, q); \ - return event_ref; \ - } \ - else { \ - auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \ - size_t output_id = global_id[0]; /* for (size_t i = 0; i < \ - result_size; ++i) */ \ - { \ - const _DataType_output input_elem = \ - input1_data[output_id]; \ - result[output_id] = __operation1__; \ - } \ - }; \ - auto kernel_func = [&](sycl::handler &cgh) { \ - cgh.parallel_for>( \ - gws, kernel_parallel_for_func); \ - }; \ - \ - if constexpr (both_types_are_same<_DataType_input, \ - _DataType_output, float, \ - double>) \ - { \ - if (q.get_device().has(sycl::aspect::fp64)) { \ - event = __operation2__; \ - \ - event_ref = reinterpret_cast(&event); \ - return DPCTLEvent_Copy(event_ref); \ - } \ - } \ - event = q.submit(kernel_func); \ - } \ - \ - event_ref = reinterpret_cast(&event); \ - return DPCTLEvent_Copy(event_ref); \ - } \ - \ - template \ - void __name__( \ - void *result_out, const size_t result_size, const size_t result_ndim, \ - const shape_elem_type *result_shape, \ - const shape_elem_type *result_strides, const void *input1_in, \ - const size_t input1_size, const size_t input1_ndim, \ - const shape_elem_type *input1_shape, \ - const shape_elem_type *input1_strides, const size_t *where) \ - { \ - DPCTLSyclQueueRef q_ref = \ - reinterpret_cast(&DPNP_QUEUE); \ - DPCTLEventVectorRef dep_event_vec_ref = nullptr; \ - DPCTLSyclEventRef event_ref = \ - __name__<_DataType_input, _DataType_output>( \ - q_ref, result_out, result_size, result_ndim, result_shape, \ - result_strides, input1_in, input1_size, input1_ndim, \ - input1_shape, input1_strides, where, dep_event_vec_ref); \ - DPCTLEvent_WaitAndThrow(event_ref); \ - DPCTLEvent_Delete(event_ref); \ - } \ - \ - template \ - void (*__name__##_default)( \ - void *, const size_t, const size_t, const shape_elem_type *, \ - const shape_elem_type *, const void *, const size_t, const size_t, \ - const shape_elem_type *, const shape_elem_type *, const size_t *) = \ - __name__<_DataType_input, _DataType_output>; \ - \ - template \ - DPCTLSyclEventRef (*__name__##_ext)( \ - DPCTLSyclQueueRef, void *, const size_t, const size_t, \ - const shape_elem_type *, const shape_elem_type *, const void *, \ - const size_t, const size_t, const shape_elem_type *, \ - const shape_elem_type *, const size_t *, const DPCTLEventVectorRef) = \ - __name__<_DataType_input, _DataType_output>; - -#include - -static void func_map_init_elemwise_1arg_2type(func_map_t &fmap) -{ - - fmap[DPNPFuncName::DPNP_FN_SQRT][eft_INT][eft_INT] = { - eft_DBL, (void *)dpnp_sqrt_c_default}; - fmap[DPNPFuncName::DPNP_FN_SQRT][eft_LNG][eft_LNG] = { - eft_DBL, (void *)dpnp_sqrt_c_default}; - fmap[DPNPFuncName::DPNP_FN_SQRT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_sqrt_c_default}; - fmap[DPNPFuncName::DPNP_FN_SQRT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_sqrt_c_default}; - - // Used in dpnp_std_c - fmap[DPNPFuncName::DPNP_FN_SQRT_EXT][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_sqrt_c_ext}; - fmap[DPNPFuncName::DPNP_FN_SQRT_EXT][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_sqrt_c_ext}; - - return; -} - template constexpr T dispatch_erf_op(T elem) { @@ -261,36 +52,6 @@ constexpr T dispatch_erf_op(T elem) } } -template -constexpr auto dispatch_fmod_op(T elem1, T elem2) -{ - if constexpr (sycl::detail::is_integral::value) { - if constexpr (sycl::detail::is_vec::value) { - T rem; - using ElemT = typename T::element_type; -#pragma unroll - for (size_t i = 0; i < rem.size(); i++) { - if (elem2[i] == ElemT(0)) { - rem[i] = ElemT(0); - } - else { - rem[i] = elem1[i] % elem2[i]; - } - } - return rem; - } - else { - if (elem2 == T(0)) { - return T(0); - } - return elem1 % elem2; - } - } - else { - return sycl::fmod(elem1, elem2); - } -} - #define MACRO_1ARG_1TYPE_OP(__name__, __operation1__, __operation2__) \ template \ class __name__##_kernel; \ @@ -1001,7 +762,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t &fmap) void func_map_init_elemwise(func_map_t &fmap) { func_map_init_elemwise_1arg_1type(fmap); - func_map_init_elemwise_1arg_2type(fmap); func_map_init_elemwise_2arg_3type(fmap); return; diff --git a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp index a108b5200913..66a4881d7f2a 100644 --- a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp @@ -128,503 +128,6 @@ DPCTLSyclEventRef (*dpnp_correlate_ext_c)(DPCTLSyclQueueRef, const DPCTLEventVectorRef) = dpnp_correlate_c<_DataType_output, _DataType_input1, _DataType_input2>; -template -class dpnp_cov_c_kernel1; - -template -class dpnp_cov_c_kernel2; - -template -DPCTLSyclEventRef dpnp_cov_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1, - size_t nrows, - size_t ncols, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - - if (!nrows || !ncols) { - return event_ref; - } - - sycl::queue q = *(reinterpret_cast(q_ref)); - - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, nrows * ncols); - _DataType *array_1 = input1_ptr.get_ptr(); - _DataType *result = reinterpret_cast<_DataType *>(result1); - - auto policy = oneapi::dpl::execution::make_device_policy< - class dpnp_cov_c_kernel1<_DataType>>(q); - - _DataType *mean = reinterpret_cast<_DataType *>( - sycl::malloc_shared(nrows * sizeof(_DataType), q)); - for (size_t i = 0; i < nrows; ++i) { - _DataType *row_start = array_1 + ncols * i; - mean[i] = std::reduce(policy, row_start, row_start + ncols, - _DataType(0), std::plus<_DataType>()) / - ncols; - } - policy.queue().wait(); - - _DataType *temp = reinterpret_cast<_DataType *>( - sycl::malloc_shared(nrows * ncols * sizeof(_DataType), q)); - for (size_t i = 0; i < nrows; ++i) { - size_t offset = ncols * i; - _DataType *row_start = array_1 + offset; - std::transform(policy, row_start, row_start + ncols, temp + offset, - [=](_DataType x) { return x - mean[i]; }); - } - policy.queue().wait(); - - sycl::event event_syrk; - - const _DataType alpha = _DataType(1) / (ncols - 1); - const _DataType beta = _DataType(0); - - event_syrk = - mkl_blas::syrk(q, // queue &exec_queue, - oneapi::mkl::uplo::upper, // uplo upper_lower, - oneapi::mkl::transpose::nontrans, // transpose trans, - nrows, // std::int64_t n, - ncols, // std::int64_t k, - alpha, // T alpha, - temp, // const T* a, - ncols, // std::int64_t lda, - beta, // T beta, - result, // T* c, - nrows); // std::int64_t ldc); - event_syrk.wait(); - - // fill lower elements - sycl::event event; - sycl::range<1> gws(nrows * nrows); - auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { - const size_t idx = global_id[0]; - const size_t row_idx = idx / nrows; - const size_t col_idx = idx - row_idx * nrows; - if (col_idx < row_idx) { - result[idx] = result[col_idx * nrows + row_idx]; - } - }; - - auto kernel_func = [&](sycl::handler &cgh) { - cgh.parallel_for>( - gws, kernel_parallel_for_func); - }; - - event = q.submit(kernel_func); - - event.wait(); - - sycl::free(mean, q); - sycl::free(temp, q); - - return event_ref; -} - -template -void dpnp_cov_c(void *array1_in, void *result1, size_t nrows, size_t ncols) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = dpnp_cov_c<_DataType>( - q_ref, array1_in, result1, nrows, ncols, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); -} - -template -void (*dpnp_cov_default_c)(void *, void *, size_t, size_t) = - dpnp_cov_c<_DataType>; - -template -DPCTLSyclEventRef - dpnp_count_nonzero_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1_out, - size_t size, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - - if (array1_in == nullptr) { - return event_ref; - } - - sycl::queue q = *(reinterpret_cast(q_ref)); - - DPNPC_ptr_adapter<_DataType_input> input1_ptr(q_ref, array1_in, size, true); - DPNPC_ptr_adapter<_DataType_output> result_ptr(q_ref, result1_out, 1, true, - true); - _DataType_input *array1 = input1_ptr.get_ptr(); - _DataType_output *result1 = result_ptr.get_ptr(); - - result1[0] = 0; - - for (size_t i = 0; i < size; ++i) { - if (array1[i] != 0) { - result1[0] += 1; - } - } - - return event_ref; -} - -template -void dpnp_count_nonzero_c(void *array1_in, void *result1_out, size_t size) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = - dpnp_count_nonzero_c<_DataType_input, _DataType_output>( - q_ref, array1_in, result1_out, size, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_count_nonzero_default_c)(void *, void *, size_t) = - dpnp_count_nonzero_c<_DataType_input, _DataType_output>; - -template -class dpnp_max_c_kernel; - -template -DPCTLSyclEventRef dpnp_max_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1, - const size_t result_size, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - - const size_t size_input = std::accumulate( - shape, shape + ndim, 1, std::multiplies()); - if (!size_input) { - return event_ref; - } - - sycl::queue q = *(reinterpret_cast(q_ref)); - - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size_input, true); - DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result1, result_size, true, - true); - _DataType *array_1 = input1_ptr.get_ptr(); - _DataType *result = result_ptr.get_ptr(); - - if (naxis == 0) { - __attribute__((unused)) void *tmp = (void *)(axis + naxis); - - size_t size = 1; - for (size_t i = 0; i < ndim; ++i) { - size *= shape[i]; - } - - if constexpr (std::is_same<_DataType, double>::value || - std::is_same<_DataType, float>::value) - { - // Required initializing the result before call the function - result[0] = array_1[0]; - - auto dataset = - mkl_stats::make_dataset(1, size, - array_1); - - sycl::event event = mkl_stats::max(q, dataset, result); - - event.wait(); - } - else { - auto policy = oneapi::dpl::execution::make_device_policy< - class dpnp_max_c_kernel<_DataType>>(q); - - _DataType *res = std::max_element(policy, array_1, array_1 + size); - policy.queue().wait(); - - result[0] = *res; - } - } - else { - std::vector res_shape; - for (size_t i = 0; i < ndim; ++i) { - bool found = false; - for (size_t j = 0; j < naxis; ++j) { - if (static_cast(axis[j]) == i) { - found = true; - break; - } - } - if (!found) { - res_shape.push_back(shape[i]); - } - } - const size_t res_ndim = res_shape.size(); - - size_t acc = 1; - std::vector input_shape_offsets{acc}; - for (size_t i = ndim - 2; i > 0; --i) { - acc *= shape[i]; - input_shape_offsets.insert(input_shape_offsets.begin(), acc); - } - - acc = 1; - std::vector output_shape_offsets{acc}; - for (size_t i = res_ndim - 2; i > 0; --i) { - acc *= res_shape[i]; - output_shape_offsets.insert(output_shape_offsets.begin(), acc); - } - - size_t size_result = 1; - for (size_t i = 0; i < res_ndim; ++i) { - size_result *= res_shape[i]; - } - - // init result array - size_t *xyz = new size_t[res_ndim]; - size_t *source_axis = new size_t[ndim]; - size_t *result_axis = new size_t[res_ndim]; - for (size_t result_idx = 0; result_idx < size_result; ++result_idx) { - size_t remainder = result_idx; - for (size_t i = 0; i < res_ndim; ++i) { - xyz[i] = remainder / output_shape_offsets[i]; - remainder = remainder - xyz[i] * output_shape_offsets[i]; - } - - size_t result_axis_idx = 0; - for (size_t idx = 0; idx < ndim; ++idx) { - bool found = false; - for (size_t i = 0; i < naxis; ++i) { - if (static_cast(axis[i]) == idx) { - found = true; - break; - } - } - if (found) { - source_axis[idx] = 0; - } - else { - source_axis[idx] = xyz[result_axis_idx]; - result_axis_idx++; - } - } - - size_t source_idx = 0; - for (size_t i = 0; i < ndim; ++i) { - source_idx += input_shape_offsets[i] * source_axis[i]; - } - - result[result_idx] = array_1[source_idx]; - } - - for (size_t source_idx = 0; source_idx < size_input; ++source_idx) { - // reconstruct x,y,z from linear source_idx - size_t remainder = source_idx; - for (size_t i = 0; i < ndim; ++i) { - xyz[i] = remainder / input_shape_offsets[i]; - remainder = remainder - xyz[i] * input_shape_offsets[i]; - } - - // extract result axis - size_t result_idx = 0; - for (size_t idx = 0; idx < ndim; ++idx) { - // try to find current idx in axis array - bool found = false; - for (size_t i = 0; i < naxis; ++i) { - if (static_cast(axis[i]) == idx) { - found = true; - break; - } - } - if (!found) { - result_axis[result_idx] = xyz[idx]; - result_idx++; - } - } - - // Construct result offset - size_t result_offset = 0; - for (size_t i = 0; i < res_ndim; ++i) { - result_offset += output_shape_offsets[i] * result_axis[i]; - } - - if (result[result_offset] < array_1[source_idx]) { - result[result_offset] = array_1[source_idx]; - } - } - - delete[] xyz; - delete[] source_axis; - delete[] result_axis; - } - - return event_ref; -} - -// Explicit instantiation of the function, since dpnp_max_c() is used by -// other template functions, but implicit instantiation is not applied anymore. -template DPCTLSyclEventRef dpnp_max_c(DPCTLSyclQueueRef q_ref, - void *, - void *, - const size_t, - const shape_elem_type *, - size_t, - const shape_elem_type *, - size_t, - const DPCTLEventVectorRef); - -template DPCTLSyclEventRef dpnp_max_c(DPCTLSyclQueueRef q_ref, - void *, - void *, - const size_t, - const shape_elem_type *, - size_t, - const shape_elem_type *, - size_t, - const DPCTLEventVectorRef); - -template DPCTLSyclEventRef dpnp_max_c(DPCTLSyclQueueRef q_ref, - void *, - void *, - const size_t, - const shape_elem_type *, - size_t, - const shape_elem_type *, - size_t, - const DPCTLEventVectorRef); - -template DPCTLSyclEventRef dpnp_max_c(DPCTLSyclQueueRef q_ref, - void *, - void *, - const size_t, - const shape_elem_type *, - size_t, - const shape_elem_type *, - size_t, - const DPCTLEventVectorRef); - -template -void dpnp_max_c(void *array1_in, - void *result1, - const size_t result_size, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = - dpnp_max_c<_DataType>(q_ref, array1_in, result1, result_size, shape, - ndim, axis, naxis, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_max_default_c)(void *, - void *, - const size_t, - const shape_elem_type *, - size_t, - const shape_elem_type *, - size_t) = dpnp_max_c<_DataType>; - -template -DPCTLSyclEventRef dpnp_mean_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - __attribute__((unused)) void *tmp = (void *)(axis + naxis); - - DPCTLSyclEventRef event_ref = nullptr; - - const size_t size = std::accumulate(shape, shape + ndim, 1, - std::multiplies()); - if (!size) { - return event_ref; - } - - sycl::queue q = *(reinterpret_cast(q_ref)); - - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size, true); - DPNPC_ptr_adapter<_ResultType> result_ptr(q_ref, result1, 1, true, true); - _DataType *array = input1_ptr.get_ptr(); - _ResultType *result = result_ptr.get_ptr(); - - if constexpr (std::is_same<_DataType, double>::value || - std::is_same<_DataType, float>::value) - { - auto dataset = mkl_stats::make_dataset< - mkl_stats::layout::row_major /*, _ResultType*/>(1, size, array); - - sycl::event event = mkl_stats::mean(q, dataset, result); - - event.wait(); - - event_ref = reinterpret_cast(&event); - - return DPCTLEvent_Copy(event_ref); - } - else { - _ResultType *sum = reinterpret_cast<_ResultType *>( - sycl::malloc_shared(1 * sizeof(_ResultType), q)); - - dpnp_sum_c<_ResultType, _DataType>(sum, array, shape, ndim, axis, naxis, - nullptr, nullptr); - - result[0] = sum[0] / static_cast<_ResultType>(size); - - sycl::free(sum, q); - - return event_ref; - } -} - -template -void dpnp_mean_c(void *array1_in, - void *result1, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = dpnp_mean_c<_DataType, _ResultType>( - q_ref, array1_in, result1, shape, ndim, axis, naxis, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_mean_default_c)(void *, - void *, - const shape_elem_type *, - size_t, - const shape_elem_type *, - size_t) = dpnp_mean_c<_DataType, _ResultType>; - template DPCTLSyclEventRef dpnp_median_c(DPCTLSyclQueueRef q_ref, void *array1_in, @@ -707,505 +210,6 @@ DPCTLSyclEventRef (*dpnp_median_ext_c)(DPCTLSyclQueueRef, const DPCTLEventVectorRef) = dpnp_median_c<_DataType, _ResultType>; -template -class dpnp_min_c_kernel; - -template -DPCTLSyclEventRef dpnp_min_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1, - const size_t result_size, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - __attribute__((unused)) void *tmp = (void *)(axis + naxis); - - DPCTLSyclEventRef event_ref = nullptr; - - const size_t size_input = std::accumulate( - shape, shape + ndim, 1, std::multiplies()); - if (!size_input) { - return event_ref; - } - - sycl::queue q = *(reinterpret_cast(q_ref)); - - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size_input, true); - DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result1, result_size, true, - true); - _DataType *array_1 = input1_ptr.get_ptr(); - _DataType *result = result_ptr.get_ptr(); - - if (naxis == 0) { - if constexpr (std::is_same<_DataType, double>::value || - std::is_same<_DataType, float>::value) - { - // Required initializing the result before call the function - result[0] = array_1[0]; - - auto dataset = - mkl_stats::make_dataset( - 1, size_input, array_1); - - sycl::event event = mkl_stats::min(q, dataset, result); - - event.wait(); - } - else { - auto policy = oneapi::dpl::execution::make_device_policy< - class dpnp_min_c_kernel<_DataType>>(q); - - _DataType *res = - std::min_element(policy, array_1, array_1 + size_input); - policy.queue().wait(); - - result[0] = *res; - } - } - else { - std::vector res_shape; - for (size_t i = 0; i < ndim; i++) { - bool found = false; - for (size_t j = 0; j < naxis; j++) { - if (static_cast(axis[j]) == i) { - found = true; - break; - } - } - if (!found) { - res_shape.push_back(shape[i]); - } - } - const size_t res_ndim = res_shape.size(); - - size_t acc = 1; - std::vector input_shape_offsets{acc}; - for (size_t i = ndim - 2; i > 0; --i) { - acc *= shape[i]; - input_shape_offsets.insert(input_shape_offsets.begin(), acc); - } - - acc = 1; - std::vector output_shape_offsets{acc}; - for (size_t i = res_ndim - 2; i > 0; --i) { - acc *= res_shape[i]; - output_shape_offsets.insert(output_shape_offsets.begin(), acc); - } - - size_t size_result = 1; - for (size_t i = 0; i < res_ndim; ++i) { - size_result *= res_shape[i]; - } - - // init result array - size_t *xyz = new size_t[res_ndim]; - size_t *source_axis = new size_t[ndim]; - size_t *result_axis = new size_t[res_ndim]; - for (size_t result_idx = 0; result_idx < size_result; ++result_idx) { - size_t remainder = result_idx; - for (size_t i = 0; i < res_ndim; ++i) { - xyz[i] = remainder / output_shape_offsets[i]; - remainder = remainder - xyz[i] * output_shape_offsets[i]; - } - - size_t result_axis_idx = 0; - for (size_t idx = 0; idx < ndim; ++idx) { - bool found = false; - for (size_t i = 0; i < naxis; ++i) { - if (static_cast(axis[i]) == idx) { - found = true; - break; - } - } - if (found) { - source_axis[idx] = 0; - } - else { - source_axis[idx] = xyz[result_axis_idx]; - result_axis_idx++; - } - } - - size_t source_idx = 0; - for (size_t i = 0; i < ndim; ++i) { - source_idx += input_shape_offsets[i] * source_axis[i]; - } - - result[result_idx] = array_1[source_idx]; - } - - for (size_t source_idx = 0; source_idx < size_input; ++source_idx) { - // reconstruct x,y,z from linear source_idx - size_t remainder = source_idx; - for (size_t i = 0; i < ndim; ++i) { - xyz[i] = remainder / input_shape_offsets[i]; - remainder = remainder - xyz[i] * input_shape_offsets[i]; - } - - // extract result axis - size_t result_idx = 0; - for (size_t idx = 0; idx < ndim; ++idx) { - // try to find current idx in axis array - bool found = false; - for (size_t i = 0; i < naxis; ++i) { - if (static_cast(axis[i]) == idx) { - found = true; - break; - } - } - if (!found) { - result_axis[result_idx] = xyz[idx]; - result_idx++; - } - } - - // Construct result offset - size_t result_offset = 0; - for (size_t i = 0; i < res_ndim; ++i) { - result_offset += output_shape_offsets[i] * result_axis[i]; - } - - if (result[result_offset] > array_1[source_idx]) { - result[result_offset] = array_1[source_idx]; - } - } - - delete[] xyz; - delete[] source_axis; - delete[] result_axis; - } - - return event_ref; -} - -// Explicit instantiation of the function, since dpnp_min_c() is used by -// other template functions, but implicit instantiation is not applied anymore. -template DPCTLSyclEventRef dpnp_min_c(DPCTLSyclQueueRef q_ref, - void *, - void *, - const size_t, - const shape_elem_type *, - size_t, - const shape_elem_type *, - size_t, - const DPCTLEventVectorRef); - -template DPCTLSyclEventRef dpnp_min_c(DPCTLSyclQueueRef q_ref, - void *, - void *, - const size_t, - const shape_elem_type *, - size_t, - const shape_elem_type *, - size_t, - const DPCTLEventVectorRef); - -template DPCTLSyclEventRef dpnp_min_c(DPCTLSyclQueueRef q_ref, - void *, - void *, - const size_t, - const shape_elem_type *, - size_t, - const shape_elem_type *, - size_t, - const DPCTLEventVectorRef); - -template DPCTLSyclEventRef dpnp_min_c(DPCTLSyclQueueRef q_ref, - void *, - void *, - const size_t, - const shape_elem_type *, - size_t, - const shape_elem_type *, - size_t, - const DPCTLEventVectorRef); - -template -void dpnp_min_c(void *array1_in, - void *result1, - const size_t result_size, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = - dpnp_min_c<_DataType>(q_ref, array1_in, result1, result_size, shape, - ndim, axis, naxis, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_min_default_c)(void *, - void *, - const size_t, - const shape_elem_type *, - size_t, - const shape_elem_type *, - size_t) = dpnp_min_c<_DataType>; - -template -DPCTLSyclEventRef dpnp_nanvar_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *mask_arr1, - void *result1, - const size_t result_size, - size_t arr_size, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - - if ((array1_in == nullptr) || (mask_arr1 == nullptr) || - (result1 == nullptr)) { - return event_ref; - } - - if (arr_size == 0) { - return event_ref; - } - - sycl::queue q = *(reinterpret_cast(q_ref)); - - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, arr_size, true); - DPNPC_ptr_adapter input2_ptr(q_ref, mask_arr1, arr_size, true); - DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result1, result_size, true, - true); - _DataType *array1 = input1_ptr.get_ptr(); - bool *mask_arr = input2_ptr.get_ptr(); - _DataType *result = result_ptr.get_ptr(); - - size_t ind = 0; - for (size_t i = 0; i < arr_size; ++i) { - if (!mask_arr[i]) { - result[ind] = array1[i]; - ind += 1; - } - } - - return event_ref; -} - -template -void dpnp_nanvar_c(void *array1_in, - void *mask_arr1, - void *result1, - const size_t result_size, - size_t arr_size) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = - dpnp_nanvar_c<_DataType>(q_ref, array1_in, mask_arr1, result1, - result_size, arr_size, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_nanvar_default_c)(void *, void *, void *, const size_t, size_t) = - dpnp_nanvar_c<_DataType>; - -template -DPCTLSyclEventRef dpnp_std_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis, - size_t ddof, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - sycl::queue q = *(reinterpret_cast(q_ref)); - - _ResultType *var = reinterpret_cast<_ResultType *>( - sycl::malloc_shared(1 * sizeof(_ResultType), q)); - - dpnp_var_c<_DataType, _ResultType>(array1_in, var, shape, ndim, axis, naxis, - ddof); - - const size_t result1_size = 1; - const size_t result1_ndim = 1; - const size_t result1_shape_size_in_bytes = - result1_ndim * sizeof(shape_elem_type); - const size_t result1_strides_size_in_bytes = - result1_ndim * sizeof(shape_elem_type); - shape_elem_type *result1_shape = reinterpret_cast( - sycl::malloc_shared(result1_shape_size_in_bytes, q)); - *result1_shape = 1; - shape_elem_type *result1_strides = reinterpret_cast( - sycl::malloc_shared(result1_strides_size_in_bytes, q)); - *result1_strides = 1; - - const size_t var_size = 1; - const size_t var_ndim = 1; - const size_t var_shape_size_in_bytes = var_ndim * sizeof(shape_elem_type); - const size_t var_strides_size_in_bytes = var_ndim * sizeof(shape_elem_type); - shape_elem_type *var_shape = reinterpret_cast( - sycl::malloc_shared(var_shape_size_in_bytes, q)); - *var_shape = 1; - shape_elem_type *var_strides = reinterpret_cast( - sycl::malloc_shared(var_strides_size_in_bytes, q)); - *var_strides = 1; - - DPCTLSyclEventRef e_sqrt_ref = dpnp_sqrt_c<_ResultType, _ResultType>( - q_ref, result1, result1_size, result1_ndim, result1_shape, - result1_strides, var, var_size, var_ndim, var_shape, var_strides, NULL, - NULL); - DPCTLEvent_WaitAndThrow(e_sqrt_ref); - DPCTLEvent_Delete(e_sqrt_ref); - - sycl::free(var, q); - sycl::free(result1_shape, q); - sycl::free(result1_strides, q); - sycl::free(var_shape, q); - sycl::free(var_strides, q); - - return event_ref; -} - -template -void dpnp_std_c(void *array1_in, - void *result1, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis, - size_t ddof) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = dpnp_std_c<_DataType, _ResultType>( - q_ref, array1_in, result1, shape, ndim, axis, naxis, ddof, - dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_std_default_c)(void *, - void *, - const shape_elem_type *, - size_t, - const shape_elem_type *, - size_t, - size_t) = dpnp_std_c<_DataType, _ResultType>; - -template -class dpnp_var_c_kernel; - -template -DPCTLSyclEventRef dpnp_var_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis, - size_t ddof, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - - const size_t size = std::accumulate(shape, shape + ndim, 1, - std::multiplies()); - if (!size) { - return event_ref; - } - - sycl::queue q = *(reinterpret_cast(q_ref)); - sycl::event event; - - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size); - DPNPC_ptr_adapter<_ResultType> result_ptr(q_ref, result1, 1, true, true); - _DataType *array1 = input1_ptr.get_ptr(); - _ResultType *result = result_ptr.get_ptr(); - - _ResultType *mean = reinterpret_cast<_ResultType *>( - sycl::malloc_shared(1 * sizeof(_ResultType), q)); - dpnp_mean_c<_DataType, _ResultType>(array1, mean, shape, ndim, axis, naxis); - _ResultType mean_val = mean[0]; - - _ResultType *squared_deviations = reinterpret_cast<_ResultType *>( - sycl::malloc_shared(size * sizeof(_ResultType), q)); - - sycl::range<1> gws(size); - auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { - size_t i = global_id[0]; /*for (size_t i = 0; i < size; ++i)*/ - { - _ResultType deviation = - static_cast<_ResultType>(array1[i]) - mean_val; - squared_deviations[i] = deviation * deviation; - } - }; - - auto kernel_func = [&](sycl::handler &cgh) { - cgh.parallel_for>( - gws, kernel_parallel_for_func); - }; - - event = q.submit(kernel_func); - - event.wait(); - - dpnp_mean_c<_ResultType, _ResultType>(squared_deviations, mean, shape, ndim, - axis, naxis); - mean_val = mean[0]; - - result[0] = mean_val * size / static_cast<_ResultType>(size - ddof); - - sycl::free(mean, q); - sycl::free(squared_deviations, q); - - return event_ref; -} - -template -void dpnp_var_c(void *array1_in, - void *result1, - const shape_elem_type *shape, - size_t ndim, - const shape_elem_type *axis, - size_t naxis, - size_t ddof) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = dpnp_var_c<_DataType, _ResultType>( - q_ref, array1_in, result1, shape, ndim, axis, naxis, ddof, - dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_var_default_c)(void *, - void *, - const shape_elem_type *, - size_t, - const shape_elem_type *, - size_t, - size_t) = dpnp_var_c<_DataType, _ResultType>; - void func_map_init_statistics(func_map_t &fmap) { fmap[DPNPFuncName::DPNP_FN_CORRELATE][eft_INT][eft_INT] = { @@ -1274,44 +278,6 @@ void func_map_init_statistics(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_CORRELATE_EXT][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_correlate_ext_c}; - fmap[DPNPFuncName::DPNP_FN_COUNT_NONZERO][eft_BLN][eft_BLN] = { - eft_LNG, (void *)dpnp_count_nonzero_default_c}; - fmap[DPNPFuncName::DPNP_FN_COUNT_NONZERO][eft_INT][eft_INT] = { - eft_LNG, (void *)dpnp_count_nonzero_default_c}; - fmap[DPNPFuncName::DPNP_FN_COUNT_NONZERO][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_count_nonzero_default_c}; - fmap[DPNPFuncName::DPNP_FN_COUNT_NONZERO][eft_FLT][eft_FLT] = { - eft_LNG, (void *)dpnp_count_nonzero_default_c}; - fmap[DPNPFuncName::DPNP_FN_COUNT_NONZERO][eft_DBL][eft_DBL] = { - eft_LNG, (void *)dpnp_count_nonzero_default_c}; - - fmap[DPNPFuncName::DPNP_FN_COV][eft_INT][eft_INT] = { - eft_DBL, (void *)dpnp_cov_default_c}; - fmap[DPNPFuncName::DPNP_FN_COV][eft_LNG][eft_LNG] = { - eft_DBL, (void *)dpnp_cov_default_c}; - fmap[DPNPFuncName::DPNP_FN_COV][eft_FLT][eft_FLT] = { - eft_DBL, (void *)dpnp_cov_default_c}; - fmap[DPNPFuncName::DPNP_FN_COV][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_cov_default_c}; - - fmap[DPNPFuncName::DPNP_FN_MAX][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_max_default_c}; - fmap[DPNPFuncName::DPNP_FN_MAX][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_max_default_c}; - fmap[DPNPFuncName::DPNP_FN_MAX][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_max_default_c}; - fmap[DPNPFuncName::DPNP_FN_MAX][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_max_default_c}; - - fmap[DPNPFuncName::DPNP_FN_MEAN][eft_INT][eft_INT] = { - eft_DBL, (void *)dpnp_mean_default_c}; - fmap[DPNPFuncName::DPNP_FN_MEAN][eft_LNG][eft_LNG] = { - eft_DBL, (void *)dpnp_mean_default_c}; - fmap[DPNPFuncName::DPNP_FN_MEAN][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_mean_default_c}; - fmap[DPNPFuncName::DPNP_FN_MEAN][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_mean_default_c}; - fmap[DPNPFuncName::DPNP_FN_MEDIAN][eft_INT][eft_INT] = { eft_DBL, (void *)dpnp_median_default_c}; fmap[DPNPFuncName::DPNP_FN_MEDIAN][eft_LNG][eft_LNG] = { @@ -1342,41 +308,5 @@ void func_map_init_statistics(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_MEDIAN_EXT][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_median_ext_c}; - fmap[DPNPFuncName::DPNP_FN_MIN][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_min_default_c}; - fmap[DPNPFuncName::DPNP_FN_MIN][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_min_default_c}; - fmap[DPNPFuncName::DPNP_FN_MIN][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_min_default_c}; - fmap[DPNPFuncName::DPNP_FN_MIN][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_min_default_c}; - - fmap[DPNPFuncName::DPNP_FN_NANVAR][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_nanvar_default_c}; - fmap[DPNPFuncName::DPNP_FN_NANVAR][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_nanvar_default_c}; - fmap[DPNPFuncName::DPNP_FN_NANVAR][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_nanvar_default_c}; - fmap[DPNPFuncName::DPNP_FN_NANVAR][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_nanvar_default_c}; - - fmap[DPNPFuncName::DPNP_FN_STD][eft_INT][eft_INT] = { - eft_DBL, (void *)dpnp_std_default_c}; - fmap[DPNPFuncName::DPNP_FN_STD][eft_LNG][eft_LNG] = { - eft_DBL, (void *)dpnp_std_default_c}; - fmap[DPNPFuncName::DPNP_FN_STD][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_std_default_c}; - fmap[DPNPFuncName::DPNP_FN_STD][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_std_default_c}; - - fmap[DPNPFuncName::DPNP_FN_VAR][eft_INT][eft_INT] = { - eft_DBL, (void *)dpnp_var_default_c}; - fmap[DPNPFuncName::DPNP_FN_VAR][eft_LNG][eft_LNG] = { - eft_DBL, (void *)dpnp_var_default_c}; - fmap[DPNPFuncName::DPNP_FN_VAR][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_var_default_c}; - fmap[DPNPFuncName::DPNP_FN_VAR][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_var_default_c}; - return; } diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 489a20f064cb..1667e9d413a2 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -129,23 +129,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_t)(c_dpctl.DPCTLSyclQueueRef, const size_t, const long * , const c_dpctl.DPCTLEventVectorRef) -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_strides_t)(c_dpctl.DPCTLSyclQueueRef, - void *, - const size_t, - const size_t, - const shape_elem_type * , - const shape_elem_type * , - void *, - const size_t, - const size_t, - const shape_elem_type * , - const shape_elem_type * , - void *, - const size_t, const size_t, - const shape_elem_type * , - const shape_elem_type * , - const long * , - const c_dpctl.DPCTLEventVectorRef) except + """ diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx index f19dcc952230..994f489b6c38 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pyx +++ b/dpnp/dpnp_algo/dpnp_algo.pyx @@ -65,15 +65,6 @@ include "dpnp_algo_special.pxi" include "dpnp_algo_statistics.pxi" -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_flatten_t)(c_dpctl.DPCTLSyclQueueRef, - void *, const size_t, const size_t, - const shape_elem_type * , const shape_elem_type * , - void *, const size_t, const size_t, - const shape_elem_type * , const shape_elem_type * , - const long * , - const c_dpctl.DPCTLEventVectorRef) - - cpdef dpnp_queue_initialize(): """ Initialize SYCL queue which will be used for any library operations.