diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt index d49b0a90a919..298648378256 100644 --- a/dpnp/CMakeLists.txt +++ b/dpnp/CMakeLists.txt @@ -62,6 +62,7 @@ add_subdirectory(backend/extensions/lapack) add_subdirectory(backend/extensions/vm) add_subdirectory(backend/extensions/ufunc) add_subdirectory(backend/extensions/statistics) +add_subdirectory(backend/extensions/indexing) add_subdirectory(dpnp_algo) add_subdirectory(dpnp_utils) diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt index da11cc5f0266..4a6b44d50f0d 100644 --- a/dpnp/backend/CMakeLists.txt +++ b/dpnp/backend/CMakeLists.txt @@ -27,7 +27,6 @@ set(DPNP_SRC kernels/dpnp_krnl_arraycreation.cpp kernels/dpnp_krnl_common.cpp kernels/dpnp_krnl_elemwise.cpp - kernels/dpnp_krnl_indexing.cpp kernels/dpnp_krnl_mathematical.cpp kernels/dpnp_krnl_random.cpp kernels/dpnp_krnl_sorting.cpp diff --git a/dpnp/backend/extensions/indexing/CMakeLists.txt b/dpnp/backend/extensions/indexing/CMakeLists.txt new file mode 100644 index 000000000000..34f4fe380748 --- /dev/null +++ b/dpnp/backend/extensions/indexing/CMakeLists.txt @@ -0,0 +1,90 @@ +# ***************************************************************************** +# Copyright (c) 2016-2024, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + + +set(python_module_name _indexing_impl) +set(_module_src + ${CMAKE_CURRENT_SOURCE_DIR}/choose.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/indexing_py.cpp +) + +pybind11_add_module(${python_module_name} MODULE ${_module_src}) +add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) + +if(_dpnp_sycl_targets) + # make fat binary + target_compile_options( + ${python_module_name} + PRIVATE + -fsycl-targets=${_dpnp_sycl_targets} + ) + target_link_options( + ${python_module_name} + PRIVATE + -fsycl-targets=${_dpnp_sycl_targets} + ) +endif() + +if (WIN32) + if (${CMAKE_VERSION} VERSION_LESS "3.27") + # this is a work-around for target_link_options inserting option after -link option, cause + # linker to ignore it. + set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel") + endif() +endif() + +set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) + +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) + +target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) +target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) + +if (WIN32) + target_compile_options(${python_module_name} PRIVATE + /clang:-fno-approx-func + /clang:-fno-finite-math-only + ) +else() + target_compile_options(${python_module_name} PRIVATE + -fno-approx-func + -fno-finite-math-only + ) +endif() + +target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) + +if (DPNP_GENERATE_COVERAGE) + target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping) +endif() + +if (DPNP_WITH_REDIST) + set_target_properties(${python_module_name} PROPERTIES INSTALL_RPATH "$ORIGIN/../../../../../../") +endif() + +install(TARGETS ${python_module_name} + DESTINATION "dpnp/backend/extensions/indexing" +) diff --git a/dpnp/backend/extensions/indexing/choose.cpp b/dpnp/backend/extensions/indexing/choose.cpp new file mode 100644 index 000000000000..83008a6795ad --- /dev/null +++ b/dpnp/backend/extensions/indexing/choose.cpp @@ -0,0 +1,460 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "choose_kernel.hpp" +#include "dpctl4pybind11.hpp" +#include "utils/indexing_utils.hpp" +#include "utils/memory_overlap.hpp" +#include "utils/output_validation.hpp" +#include "utils/sycl_alloc_utils.hpp" +#include "utils/type_dispatch.hpp" + +namespace dpnp::extensions::indexing +{ + +namespace td_ns = dpctl::tensor::type_dispatch; + +static kernels::choose_fn_ptr_t choose_clip_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static kernels::choose_fn_ptr_t choose_wrap_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +namespace py = pybind11; + +namespace detail +{ + +using host_ptrs_allocator_t = + dpctl::tensor::alloc_utils::usm_host_allocator; +using ptrs_t = std::vector; +using host_ptrs_shp_t = std::shared_ptr; + +host_ptrs_shp_t make_host_ptrs(sycl::queue &exec_q, + const std::vector &ptrs) +{ + host_ptrs_allocator_t ptrs_allocator(exec_q); + host_ptrs_shp_t host_ptrs_shp = + std::make_shared(ptrs.size(), ptrs_allocator); + + std::copy(ptrs.begin(), ptrs.end(), host_ptrs_shp->begin()); + + return host_ptrs_shp; +} + +using host_sz_allocator_t = + dpctl::tensor::alloc_utils::usm_host_allocator; +using sz_t = std::vector; +using host_sz_shp_t = std::shared_ptr; + +host_sz_shp_t make_host_offsets(sycl::queue &exec_q, + const std::vector &offsets) +{ + host_sz_allocator_t offsets_allocator(exec_q); + host_sz_shp_t host_offsets_shp = + std::make_shared(offsets.size(), offsets_allocator); + + std::copy(offsets.begin(), offsets.end(), host_offsets_shp->begin()); + + return host_offsets_shp; +} + +host_sz_shp_t make_host_shape_strides(sycl::queue &exec_q, + py::ssize_t n_chcs, + std::vector &shape, + std::vector &inp_strides, + std::vector &dst_strides, + std::vector &chc_strides) +{ + auto nelems = shape.size(); + host_sz_allocator_t shape_strides_allocator(exec_q); + host_sz_shp_t host_shape_strides_shp = + std::make_shared(nelems * (3 + n_chcs), shape_strides_allocator); + + std::copy(shape.begin(), shape.end(), host_shape_strides_shp->begin()); + std::copy(inp_strides.begin(), inp_strides.end(), + host_shape_strides_shp->begin() + nelems); + std::copy(dst_strides.begin(), dst_strides.end(), + host_shape_strides_shp->begin() + 2 * nelems); + std::copy(chc_strides.begin(), chc_strides.end(), + host_shape_strides_shp->begin() + 3 * nelems); + + return host_shape_strides_shp; +} + +/* This function expects a queue and a non-trivial number of + std::pairs of raw device pointers and host shared pointers + (structured as ), + then enqueues a copy of the host shared pointer data into + the device pointer. + + Assumes the device pointer addresses sufficient memory for + the size of the host memory. +*/ +template +std::vector batched_copy(sycl::queue &exec_q, + DevHostPairs &&...dev_host_pairs) +{ + constexpr std::size_t n = sizeof...(DevHostPairs); + static_assert(n > 0, "batched_copy requires at least one argument"); + + std::vector copy_evs; + copy_evs.reserve(n); + (copy_evs.emplace_back(exec_q.copy(dev_host_pairs.second->data(), + dev_host_pairs.first, + dev_host_pairs.second->size())), + ...); + + return copy_evs; +} + +/* This function takes as input a queue, sycl::event dependencies, + and a non-trivial number of shared_ptrs and moves them into + a host_task lambda capture, ensuring their lifetime until the + host_task executes. +*/ +template +sycl::event async_shp_free(sycl::queue &exec_q, + const std::vector &depends, + Shps &&...shps) +{ + constexpr std::size_t n = sizeof...(Shps); + static_assert(n > 0, "async_shp_free requires at least one argument"); + + const sycl::event &shared_ptr_cleanup_ev = + exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + cgh.host_task([capture = std::tuple(std::move(shps)...)]() {}); + }); + + return shared_ptr_cleanup_ev; +} + +// copied from dpctl, remove if a similar utility is ever exposed +std::vector parse_py_chcs(const sycl::queue &q, + const py::object &py_chcs) +{ + py::ssize_t chc_count = py::len(py_chcs); + std::vector res; + res.reserve(chc_count); + + for (py::ssize_t i = 0; i < chc_count; ++i) { + py::object el_i = py_chcs[py::cast(i)]; + dpctl::tensor::usm_ndarray arr_i = + py::cast(el_i); + if (!dpctl::utils::queues_are_compatible(q, {arr_i})) { + throw py::value_error("Choice allocation queue is not compatible " + "with execution queue"); + } + res.push_back(arr_i); + } + + return res; +} + +} // namespace detail + +std::pair + py_choose(const dpctl::tensor::usm_ndarray &src, + const py::object &py_chcs, + const dpctl::tensor::usm_ndarray &dst, + uint8_t mode, + sycl::queue &exec_q, + const std::vector &depends) +{ + std::vector chcs = + detail::parse_py_chcs(exec_q, py_chcs); + + // Python list max size must fit into py_ssize_t + py::ssize_t n_chcs = chcs.size(); + + if (n_chcs == 0) { + throw py::value_error("List of choices is empty."); + } + + if (mode != 0 && mode != 1) { + throw py::value_error("Mode must be 0 or 1."); + } + + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst); + + const dpctl::tensor::usm_ndarray chc_rep = chcs[0]; + + int nd = src.get_ndim(); + int dst_nd = dst.get_ndim(); + int chc_nd = chc_rep.get_ndim(); + + if (nd != dst_nd || nd != chc_nd) { + throw py::value_error("Array shapes are not consistent"); + } + + const py::ssize_t *src_shape = src.get_shape_raw(); + const py::ssize_t *dst_shape = dst.get_shape_raw(); + const py::ssize_t *chc_shape = chc_rep.get_shape_raw(); + + size_t nelems = src.get_size(); + bool shapes_equal = std::equal(src_shape, src_shape + nd, dst_shape); + shapes_equal &= std::equal(src_shape, src_shape + nd, chc_shape); + + if (!shapes_equal) { + throw py::value_error("Array shapes don't match."); + } + + if (nelems == 0) { + return std::make_pair(sycl::event{}, sycl::event{}); + } + + char *src_data = src.get_data(); + char *dst_data = dst.get_data(); + + if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + if (overlap(src, dst)) { + throw py::value_error("Array memory overlap."); + } + + // trivial offsets as choose does not apply stride + // simplification, but may in the future + constexpr py::ssize_t src_offset = py::ssize_t(0); + constexpr py::ssize_t dst_offset = py::ssize_t(0); + + int src_typenum = src.get_typenum(); + int dst_typenum = dst.get_typenum(); + int chc_typenum = chc_rep.get_typenum(); + + auto array_types = td_ns::usm_ndarray_types(); + int src_type_id = array_types.typenum_to_lookup_id(src_typenum); + int dst_type_id = array_types.typenum_to_lookup_id(dst_typenum); + int chc_type_id = array_types.typenum_to_lookup_id(chc_typenum); + + if (chc_type_id != dst_type_id) { + throw py::type_error("Output and choice data types are not the same."); + } + + dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst, nelems); + + std::vector chc_ptrs; + chc_ptrs.reserve(n_chcs); + + std::vector chc_offsets; + chc_offsets.reserve(n_chcs); + + auto sh_nelems = std::max(nd, 1); + std::vector chc_strides(n_chcs * sh_nelems, 0); + + for (auto i = 0; i < n_chcs; ++i) { + dpctl::tensor::usm_ndarray chc_ = chcs[i]; + + // ndim, type, and shape are checked against the first array + if (i > 0) { + if (!(chc_.get_ndim() == nd)) { + throw py::value_error( + "Choice array dimensions are not the same"); + } + + if (!(chc_type_id == + array_types.typenum_to_lookup_id(chc_.get_typenum()))) { + throw py::type_error( + "Choice array data types are not all the same."); + } + + const py::ssize_t *chc_shape_ = chc_.get_shape_raw(); + if (!std::equal(chc_shape_, chc_shape_ + nd, chc_shape)) { + throw py::value_error("Choice shapes are not all equal."); + } + } + + // check for overlap with destination + if (overlap(dst, chc_)) { + throw py::value_error( + "Arrays index overlapping segments of memory"); + } + + char *chc_data = chc_.get_data(); + + if (nd > 0) { + auto chc_strides_ = chc_.get_strides_vector(); + std::copy(chc_strides_.begin(), chc_strides_.end(), + chc_strides.begin() + i * nd); + } + + chc_ptrs.push_back(chc_data); + chc_offsets.push_back(py::ssize_t(0)); + } + + auto fn = mode ? choose_clip_dispatch_table[src_type_id][chc_type_id] + : choose_wrap_dispatch_table[src_type_id][chc_type_id]; + + if (fn == nullptr) { + throw std::runtime_error("Indices must be integer type, got " + + std::to_string(src_type_id)); + } + + auto packed_chc_ptrs = + dpctl::tensor::alloc_utils::smart_malloc_device(n_chcs, exec_q); + + // packed_shapes_strides = [common shape, + // src.strides, + // dst.strides, + // chcs[0].strides, + // ..., + // chcs[n_chcs].strides] + auto packed_shapes_strides = + dpctl::tensor::alloc_utils::smart_malloc_device( + (3 + n_chcs) * sh_nelems, exec_q); + + auto packed_chc_offsets = + dpctl::tensor::alloc_utils::smart_malloc_device(n_chcs, + exec_q); + + std::vector host_task_events; + host_task_events.reserve(2); + + std::vector pack_deps; + std::vector common_shape; + std::vector src_strides; + std::vector dst_strides; + if (nd == 0) { + // special case where all inputs are scalars + // need to pass src, dst shape=1 and strides=0 + // chc_strides already initialized to 0 so ignore + common_shape = {1}; + src_strides = {0}; + dst_strides = {0}; + } + else { + common_shape = src.get_shape_vector(); + src_strides = src.get_strides_vector(); + dst_strides = dst.get_strides_vector(); + } + + auto host_chc_ptrs = detail::make_host_ptrs(exec_q, chc_ptrs); + auto host_chc_offsets = detail::make_host_offsets(exec_q, chc_offsets); + auto host_shape_strides = detail::make_host_shape_strides( + exec_q, n_chcs, common_shape, src_strides, dst_strides, chc_strides); + + pack_deps = detail::batched_copy( + exec_q, std::make_pair(packed_chc_ptrs.get(), host_chc_ptrs), + std::make_pair(packed_chc_offsets.get(), host_chc_offsets), + std::make_pair(packed_shapes_strides.get(), host_shape_strides)); + + host_task_events.push_back( + detail::async_shp_free(exec_q, pack_deps, host_chc_ptrs, + host_chc_offsets, host_shape_strides)); + + std::vector all_deps; + all_deps.reserve(depends.size() + pack_deps.size()); + all_deps.insert(std::end(all_deps), std::begin(pack_deps), + std::end(pack_deps)); + all_deps.insert(std::end(all_deps), std::begin(depends), std::end(depends)); + + sycl::event choose_generic_ev = + fn(exec_q, nelems, n_chcs, sh_nelems, packed_shapes_strides.get(), + src_data, dst_data, packed_chc_ptrs.get(), src_offset, dst_offset, + packed_chc_offsets.get(), all_deps); + + // async_smart_free releases owners + sycl::event temporaries_cleanup_ev = + dpctl::tensor::alloc_utils::async_smart_free( + exec_q, {choose_generic_ev}, packed_chc_ptrs, packed_shapes_strides, + packed_chc_offsets); + + host_task_events.push_back(temporaries_cleanup_ev); + + using dpctl::utils::keep_args_alive; + sycl::event arg_cleanup_ev = + keep_args_alive(exec_q, {src, py_chcs, dst}, host_task_events); + + return std::make_pair(arg_cleanup_ev, choose_generic_ev); +} + +template +struct ChooseFactory +{ + fnT get() + { + if constexpr (std::is_integral::value && + !std::is_same::value) { + fnT fn = kernels::choose_impl; + return fn; + } + else { + fnT fn = nullptr; + return fn; + } + } +}; + +using dpctl::tensor::indexing_utils::ClipIndex; +using dpctl::tensor::indexing_utils::WrapIndex; + +template +using ChooseWrapFactory = ChooseFactory>; + +template +using ChooseClipFactory = ChooseFactory>; + +void init_choose_dispatch_tables(void) +{ + using namespace td_ns; + using kernels::choose_fn_ptr_t; + + DispatchTableBuilder + dtb_choose_clip; + dtb_choose_clip.populate_dispatch_table(choose_clip_dispatch_table); + + DispatchTableBuilder + dtb_choose_wrap; + dtb_choose_wrap.populate_dispatch_table(choose_wrap_dispatch_table); + + return; +} + +void init_choose(py::module_ m) +{ + dpnp::extensions::indexing::init_choose_dispatch_tables(); + + m.def("_choose", &py_choose, "", py::arg("src"), py::arg("chcs"), + py::arg("dst"), py::arg("mode"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + + return; +} + +} // namespace dpnp::extensions::indexing diff --git a/dpnp/backend/extensions/indexing/choose.hpp b/dpnp/backend/extensions/indexing/choose.hpp new file mode 100644 index 000000000000..63b155ec6463 --- /dev/null +++ b/dpnp/backend/extensions/indexing/choose.hpp @@ -0,0 +1,35 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +namespace py = pybind11; + +namespace dpnp::extensions::indexing +{ +void init_choose(py::module_ m); +} // namespace dpnp::extensions::indexing diff --git a/dpnp/backend/extensions/indexing/choose_kernel.hpp b/dpnp/backend/extensions/indexing/choose_kernel.hpp new file mode 100644 index 000000000000..0798239eac14 --- /dev/null +++ b/dpnp/backend/extensions/indexing/choose_kernel.hpp @@ -0,0 +1,188 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include "kernels/dpctl_tensor_types.hpp" +#include "utils/indexing_utils.hpp" +#include "utils/offset_utils.hpp" +#include "utils/strided_iters.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::extensions::indexing::strides_detail +{ + +struct NthStrideOffsetUnpacked +{ + NthStrideOffsetUnpacked(int common_nd, + dpctl::tensor::ssize_t const *_offsets, + dpctl::tensor::ssize_t const *_shape, + dpctl::tensor::ssize_t const *_strides) + : _ind(common_nd), nd(common_nd), offsets(_offsets), shape(_shape), + strides(_strides) + { + } + + template + size_t operator()(dpctl::tensor::ssize_t gid, nT n) const + { + dpctl::tensor::ssize_t relative_offset(0); + _ind.get_displacement( + gid, shape, strides + (n * nd), relative_offset); + + return relative_offset + offsets[n]; + } + +private: + dpctl::tensor::strides::CIndexer_vector _ind; + + int nd; + dpctl::tensor::ssize_t const *offsets; + dpctl::tensor::ssize_t const *shape; + dpctl::tensor::ssize_t const *strides; +}; + +static_assert(sycl::is_device_copyable_v); + +} // namespace dpnp::extensions::indexing::strides_detail + +namespace dpnp::extensions::indexing::kernels +{ + +template +class ChooseFunctor +{ +private: + const IndT *ind = nullptr; + T *dst = nullptr; + char **chcs = nullptr; + dpctl::tensor::ssize_t n_chcs; + const IndOutIndexerT ind_out_indexer; + const ChoicesIndexerT chcs_indexer; + +public: + ChooseFunctor(const IndT *ind_, + T *dst_, + char **chcs_, + dpctl::tensor::ssize_t n_chcs_, + const IndOutIndexerT &ind_out_indexer_, + const ChoicesIndexerT &chcs_indexer_) + : ind(ind_), dst(dst_), chcs(chcs_), n_chcs(n_chcs_), + ind_out_indexer(ind_out_indexer_), chcs_indexer(chcs_indexer_) + { + } + + void operator()(sycl::id<1> id) const + { + const ProjectorT proj{}; + + dpctl::tensor::ssize_t i = id[0]; + + auto ind_dst_offsets = ind_out_indexer(i); + dpctl::tensor::ssize_t ind_offset = ind_dst_offsets.get_first_offset(); + dpctl::tensor::ssize_t dst_offset = ind_dst_offsets.get_second_offset(); + + IndT chc_idx = ind[ind_offset]; + // proj produces an index in the range of n_chcs + dpctl::tensor::ssize_t projected_idx = proj(n_chcs, chc_idx); + + dpctl::tensor::ssize_t chc_offset = chcs_indexer(i, projected_idx); + + T *chc = reinterpret_cast(chcs[projected_idx]); + + dst[dst_offset] = chc[chc_offset]; + } +}; + +typedef sycl::event (*choose_fn_ptr_t)(sycl::queue &, + size_t, + dpctl::tensor::ssize_t, + int, + const dpctl::tensor::ssize_t *, + const char *, + char *, + char **, + dpctl::tensor::ssize_t, + dpctl::tensor::ssize_t, + const dpctl::tensor::ssize_t *, + const std::vector &); + +template +sycl::event choose_impl(sycl::queue &q, + size_t nelems, + dpctl::tensor::ssize_t n_chcs, + int nd, + const dpctl::tensor::ssize_t *shape_and_strides, + const char *ind_cp, + char *dst_cp, + char **chcs_cp, + dpctl::tensor::ssize_t ind_offset, + dpctl::tensor::ssize_t dst_offset, + const dpctl::tensor::ssize_t *chc_offsets, + const std::vector &depends) +{ + dpctl::tensor::type_utils::validate_type_for_device(q); + + const indTy *ind_tp = reinterpret_cast(ind_cp); + Ty *dst_tp = reinterpret_cast(dst_cp); + + sycl::event choose_ev = q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + using InOutIndexerT = + dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; + const InOutIndexerT ind_out_indexer{nd, ind_offset, dst_offset, + shape_and_strides}; + + using NthChoiceIndexerT = strides_detail::NthStrideOffsetUnpacked; + const NthChoiceIndexerT choices_indexer{ + nd, chc_offsets, shape_and_strides, shape_and_strides + 3 * nd}; + + using ChooseFunc = ChooseFunctor; + + cgh.parallel_for(sycl::range<1>(nelems), + ChooseFunc(ind_tp, dst_tp, chcs_cp, n_chcs, + ind_out_indexer, + choices_indexer)); + }); + + return choose_ev; +} + +} // namespace dpnp::extensions::indexing::kernels diff --git a/dpnp/backend/extensions/indexing/indexing_py.cpp b/dpnp/backend/extensions/indexing/indexing_py.cpp new file mode 100644 index 000000000000..143202477751 --- /dev/null +++ b/dpnp/backend/extensions/indexing/indexing_py.cpp @@ -0,0 +1,38 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +// This file defines functions of dpnp.backend._lapack_impl extensions +// +//***************************************************************************** + +#include +#include + +#include "choose.hpp" + +PYBIND11_MODULE(_indexing_impl, m) +{ + dpnp::extensions::indexing::init_choose(m); +} diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp index 1269ac77095c..5a0698a978af 100644 --- a/dpnp/backend/include/dpnp_iface.hpp +++ b/dpnp/backend/include/dpnp_iface.hpp @@ -132,39 +132,6 @@ INP_DLLEXPORT void dpnp_partition_c(void *array, const shape_elem_type *shape, const size_t ndim); -/** - * @ingroup BACKEND_API - * @brief Construct an array from an index array and a list of arrays to choose - * from. - * - * @param [in] q_ref Reference to SYCL queue. - * @param [out] result1 Output array. - * @param [in] array1_in Input array with data. - * @param [in] choices Choice arrays. - * @param [in] size Input array size. - * @param [in] choices_size Choices size. - * @param [in] choice_size Choice size. - * @param [in] dep_event_vec_ref Reference to vector of SYCL events. - */ -template -INP_DLLEXPORT DPCTLSyclEventRef - dpnp_choose_c(DPCTLSyclQueueRef q_ref, - void *result1, - void *array1_in, - void **choices, - size_t size, - size_t choices_size, - size_t choice_size, - const DPCTLEventVectorRef dep_event_vec_ref); - -template -INP_DLLEXPORT void dpnp_choose_c(void *result1, - void *array1_in, - void **choices, - size_t size, - size_t choices_size, - size_t choice_size); - /** * @ingroup BACKEND_API * @brief implementation of creating filled with value array function diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp index 21df2c35dc13..ea94d4a43b56 100644 --- a/dpnp/backend/include/dpnp_iface_fptr.hpp +++ b/dpnp/backend/include/dpnp_iface_fptr.hpp @@ -58,13 +58,10 @@ */ enum class DPNPFuncName : size_t { - DPNP_FN_NONE, /**< Very first element of the enumeration */ - DPNP_FN_CHOOSE, /**< Used in numpy.choose() impl */ - DPNP_FN_CHOOSE_EXT, /**< Used in numpy.choose() impl, requires extra - parameters */ - DPNP_FN_ERF, /**< Used in scipy.special.erf impl */ - DPNP_FN_ERF_EXT, /**< Used in scipy.special.erf impl, requires extra - parameters */ + DPNP_FN_NONE, /**< Very first element of the enumeration */ + DPNP_FN_ERF, /**< Used in scipy.special.erf impl */ + DPNP_FN_ERF_EXT, /**< Used in scipy.special.erf impl, requires extra + parameters */ DPNP_FN_INITVAL, /**< Used in numpy ones, ones_like, zeros, zeros_like impls */ DPNP_FN_INITVAL_EXT, /**< Used in numpy ones, ones_like, zeros, zeros_like diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp deleted file mode 100644 index 94bb07f5ef25..000000000000 --- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp +++ /dev/null @@ -1,168 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2016-2025, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#include -#include -#include - -#include "dpnp_fptr.hpp" -#include "dpnpc_memory_adapter.hpp" -#include "queue_sycl.hpp" -#include - -template -class dpnp_choose_c_kernel; - -template -DPCTLSyclEventRef dpnp_choose_c(DPCTLSyclQueueRef q_ref, - void *result1, - void *array1_in, - void **choices1, - size_t size, - size_t choices_size, - size_t choice_size, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - - if ((array1_in == nullptr) || (result1 == nullptr) || (choices1 == nullptr)) - { - return event_ref; - } - if (!size || !choices_size || !choice_size) { - return event_ref; - } - - sycl::queue q = *(reinterpret_cast(q_ref)); - - DPNPC_ptr_adapter<_DataType1> input1_ptr(q_ref, array1_in, size); - _DataType1 *array_in = input1_ptr.get_ptr(); - - // choices1 is a list of pointers to device memory, - // which is allocating on the host, so memcpy to device memory is required - DPNPC_ptr_adapter<_DataType2 *> choices_ptr(q_ref, choices1, choices_size, - true); - _DataType2 **choices = choices_ptr.get_ptr(); - - for (size_t i = 0; i < choices_size; ++i) { - DPNPC_ptr_adapter<_DataType2> choice_ptr(q_ref, choices[i], - choice_size); - choices[i] = choice_ptr.get_ptr(); - } - - DPNPC_ptr_adapter<_DataType2> result1_ptr(q_ref, result1, size, false, - true); - _DataType2 *result = result1_ptr.get_ptr(); - - sycl::range<1> gws(size); - auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { - const size_t idx = global_id[0]; - result[idx] = choices[array_in[idx]][idx]; - }; - - auto kernel_func = [&](sycl::handler &cgh) { - cgh.parallel_for>( - gws, kernel_parallel_for_func); - }; - - sycl::event event = q.submit(kernel_func); - choices_ptr.depends_on(event); - - event_ref = reinterpret_cast(&event); - - return DPCTLEvent_Copy(event_ref); -} - -template -void dpnp_choose_c(void *result1, - void *array1_in, - void **choices1, - size_t size, - size_t choices_size, - size_t choice_size) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = dpnp_choose_c<_DataType1, _DataType2>( - q_ref, result1, array1_in, choices1, size, choices_size, choice_size, - dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); -} - -template -void (*dpnp_choose_default_c)(void *, void *, void **, size_t, size_t, size_t) = - dpnp_choose_c<_DataType1, _DataType2>; - -template -DPCTLSyclEventRef (*dpnp_choose_ext_c)(DPCTLSyclQueueRef, - void *, - void *, - void **, - size_t, - size_t, - size_t, - const DPCTLEventVectorRef) = - dpnp_choose_c<_DataType1, _DataType2>; - -void func_map_init_indexing_func(func_map_t &fmap) -{ - fmap[DPNPFuncName::DPNP_FN_CHOOSE][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_choose_default_c}; - fmap[DPNPFuncName::DPNP_FN_CHOOSE][eft_INT][eft_LNG] = { - eft_LNG, (void *)dpnp_choose_default_c}; - fmap[DPNPFuncName::DPNP_FN_CHOOSE][eft_INT][eft_FLT] = { - eft_FLT, (void *)dpnp_choose_default_c}; - fmap[DPNPFuncName::DPNP_FN_CHOOSE][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_choose_default_c}; - fmap[DPNPFuncName::DPNP_FN_CHOOSE][eft_LNG][eft_INT] = { - eft_INT, (void *)dpnp_choose_default_c}; - fmap[DPNPFuncName::DPNP_FN_CHOOSE][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_choose_default_c}; - fmap[DPNPFuncName::DPNP_FN_CHOOSE][eft_LNG][eft_FLT] = { - eft_FLT, (void *)dpnp_choose_default_c}; - fmap[DPNPFuncName::DPNP_FN_CHOOSE][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_choose_default_c}; - - fmap[DPNPFuncName::DPNP_FN_CHOOSE_EXT][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_choose_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CHOOSE_EXT][eft_INT][eft_LNG] = { - eft_LNG, (void *)dpnp_choose_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CHOOSE_EXT][eft_INT][eft_FLT] = { - eft_FLT, (void *)dpnp_choose_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CHOOSE_EXT][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_choose_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CHOOSE_EXT][eft_LNG][eft_INT] = { - eft_INT, (void *)dpnp_choose_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CHOOSE_EXT][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_choose_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CHOOSE_EXT][eft_LNG][eft_FLT] = { - eft_FLT, (void *)dpnp_choose_ext_c}; - fmap[DPNPFuncName::DPNP_FN_CHOOSE_EXT][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_choose_ext_c}; - return; -} diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp index cc509d6d8f31..8a7d50597c12 100644 --- a/dpnp/backend/src/dpnp_fptr.hpp +++ b/dpnp/backend/src/dpnp_fptr.hpp @@ -158,7 +158,6 @@ class dpnp_less_comp */ void func_map_init_arraycreation(func_map_t &fmap); void func_map_init_elemwise(func_map_t &fmap); -void func_map_init_indexing_func(func_map_t &fmap); void func_map_init_linalg(func_map_t &fmap); void func_map_init_mathematical(func_map_t &fmap); void func_map_init_random(func_map_t &fmap); diff --git a/dpnp/backend/src/dpnp_iface_fptr.cpp b/dpnp/backend/src/dpnp_iface_fptr.cpp index 0b64df1f95c4..c921f5e9bd27 100644 --- a/dpnp/backend/src/dpnp_iface_fptr.cpp +++ b/dpnp/backend/src/dpnp_iface_fptr.cpp @@ -97,7 +97,6 @@ static func_map_t func_map_init() func_map_init_arraycreation(fmap); func_map_init_elemwise(fmap); - func_map_init_indexing_func(fmap); func_map_init_linalg(fmap); func_map_init_mathematical(fmap); func_map_init_random(fmap); diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd index 072ba2ae03ec..0dcc519d198f 100644 --- a/dpnp/dpnp_algo/dpnp_algo.pxd +++ b/dpnp/dpnp_algo/dpnp_algo.pxd @@ -33,7 +33,6 @@ from dpnp.dpnp_utils.dpnp_algo_utils cimport dpnp_descriptor cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName": # need this namespace for Enum import cdef enum DPNPFuncName "DPNPFuncName": - DPNP_FN_CHOOSE_EXT DPNP_FN_ERF_EXT DPNP_FN_MODF_EXT DPNP_FN_PARTITION_EXT diff --git a/dpnp/dpnp_algo/dpnp_algo_indexing.pxi b/dpnp/dpnp_algo/dpnp_algo_indexing.pxi index 50067c4b8c89..3240ac9499f3 100644 --- a/dpnp/dpnp_algo/dpnp_algo_indexing.pxi +++ b/dpnp/dpnp_algo/dpnp_algo_indexing.pxi @@ -36,61 +36,9 @@ and the rest of the library # NO IMPORTs here. All imports must be placed into main "dpnp_algo.pyx" file __all__ += [ - "dpnp_choose", "dpnp_putmask", ] -ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_choose_t)(c_dpctl.DPCTLSyclQueueRef, - void *, void * , void ** , size_t, size_t, size_t, - const c_dpctl.DPCTLEventVectorRef) - -cpdef utils.dpnp_descriptor dpnp_choose(utils.dpnp_descriptor x1, list choices1): - cdef vector[void * ] choices - cdef utils.dpnp_descriptor choice - for desc in choices1: - choice = desc - choices.push_back(choice.get_data()) - - cdef shape_type_c x1_shape = x1.shape - cdef size_t choice_size = choices1[0].size - - cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype) - - cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(choices1[0].dtype) - - cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_CHOOSE_EXT, param1_type, param2_type) - - x1_obj = x1.get_array() - - cdef utils.dpnp_descriptor res_array = utils.create_output_descriptor(x1_shape, - kernel_data.return_type, - None, - device=x1_obj.sycl_device, - usm_type=x1_obj.usm_type, - sycl_queue=x1_obj.sycl_queue) - - result_sycl_queue = res_array.get_array().sycl_queue - - cdef c_dpctl.SyclQueue q = result_sycl_queue - cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref() - - cdef fptr_dpnp_choose_t func = kernel_data.ptr - - cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, - res_array.get_data(), - x1.get_data(), - choices.data(), - x1_shape[0], - choices.size(), - choice_size, - NULL) # dep_events_ref - - with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref) - c_dpctl.DPCTLEvent_Delete(event_ref) - - return res_array - - cpdef dpnp_putmask(utils.dpnp_descriptor arr, utils.dpnp_descriptor mask, utils.dpnp_descriptor values): cdef int values_size = values.size diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index 10bad7a18d58..4f5d885da15c 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -857,15 +857,15 @@ def astype( # 'base', # 'byteswap', - def choose(input, choices, out=None, mode="raise"): + def choose(self, /, choices, out=None, mode="wrap"): """ - Construct an array from an index array and a set of arrays to choose from. + Use an array as index array to construct a new array from a set of choices. Refer to :obj:`dpnp.choose` for full documentation. """ - return dpnp.choose(input, choices, out, mode) + return dpnp.choose(self, choices, out, mode) def clip(self, min=None, max=None, out=None, **kwargs): """ diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py index 2bc7f1223205..84ae86de5170 100644 --- a/dpnp/dpnp_iface_indexing.py +++ b/dpnp/dpnp_iface_indexing.py @@ -40,6 +40,7 @@ # pylint: disable=protected-access import operator +from collections.abc import Iterable import dpctl.tensor as dpt import dpctl.tensor._tensor_impl as ti @@ -51,9 +52,11 @@ import dpnp +# pylint: disable=no-name-in-module +import dpnp.backend.extensions.indexing._indexing_impl as indexing_ext + # pylint: disable=no-name-in-module from .dpnp_algo import ( - dpnp_choose, dpnp_putmask, ) from .dpnp_array import dpnp_array @@ -117,56 +120,176 @@ def _ravel_multi_index_checks(multi_index, dims, order): ) -def choose(x1, choices, out=None, mode="raise"): +def _build_choices_list(choices): + """ + Gather queues and USM types for the input, expected to be an array or + list of arrays. If a single array of dimension greater than one, the array + will be unstacked. + + Returns a list of :class:`dpctl.tensor.usm_ndarray`s. + """ + + if dpnp.is_supported_array_type(choices): + choices = dpnp.unstack(choices) + elif not isinstance(choices, Iterable): + raise TypeError("`choices` must be an array or sequence of arrays") + return [dpnp.get_usm_ndarray(chc) for chc in choices] + + +def _choose_run(inds, chcs, q, usm_type, out=None, mode=0): + # arg validation, broadcasting, type coercion assumed done by caller + if out is not None: + out = dpnp.get_usm_ndarray(out) + + if not out.flags.writable: + raise ValueError("provided `out` array is read-only") + + if out.shape != inds.shape: + raise ValueError( + "The shape of input and output arrays are inconsistent. " + f"Expected output shape is {inds.shape}, got {out.shape}" + ) + + if chcs[0].dtype != out.dtype: + raise TypeError( + f"Output array of type {chcs[0].dtype} is needed, " + f"got {out.dtype}" + ) + + if dpu.get_execution_queue((q, out.sycl_queue)) is None: + raise dpu.ExecutionPlacementError( + "Input and output allocation queues are not compatible" + ) + + if ti._array_overlap(inds, out) or any( + ti._array_overlap(out, chc) for chc in chcs + ): + # Allocate a temporary buffer to avoid memory overlapping. + out = dpt.empty_like(out) + else: + out = dpt.empty( + inds.shape, dtype=chcs[0].dtype, usm_type=usm_type, sycl_queue=q + ) + + _manager = dpu.SequentialOrderManager[q] + dep_evs = _manager.submitted_events + + h_ev, choose_ev = indexing_ext._choose(inds, chcs, out, mode, q, dep_evs) + _manager.add_event_pair(h_ev, choose_ev) + + return out + + +def choose(a, choices, out=None, mode="wrap"): """ Construct an array from an index array and a set of arrays to choose from. For full documentation refer to :obj:`numpy.choose`. + Parameters + ---------- + a : {dpnp.ndarray, usm_ndarray} + An integer array of indices indicating the position of the array + in `choices` to choose from. Behavior of out-of-bounds integers (i.e., + integers outside of `[0, n-1]` where `n` is the number of choices) is + determined by the `mode` keyword. + choices : {dpnp.ndarray, usm_ndarray, \ + sequence of dpnp.ndarrays and usm_ndarrays} + Choice arrays. `a` and choice arrays must be broadcast-compatible. + If `choices` is an array, the array is unstacked into a sequence of + arrays. + out : {None, dpnp.ndarray, usm_ndarray}, optional + If provided, the result will be placed in this array. It should + be of the appropriate shape and dtype. + Default: ``None``. + mode : {"wrap", "clip"}, optional + Specifies how out-of-bounds indices will be handled. Possible values + are: + + - ``"wrap"``: clamps indices to (``-n <= i < n``), then wraps + negative indices. + - ``"clip"``: clips indices to (``0 <= i < n``). + + Default: ``"wrap"``. + + Returns + ------- + out : dpnp.ndarray + The merged result. + See also -------- - :obj:`dpnp.take_along_axis` : Preferable if choices is an array. + :obj:`dpnp.ndarray.choose` : Equivalent method. + :obj:`dpnp.take_along_axis` : Preferable if `choices` is an array. + Examples + -------- + >>> import dpnp as np + >>> choices = np.array([[0, 1, 2, 3], [10, 11, 12, 13], + ... [20, 21, 22, 23], [30, 31, 32, 33]]) + >>> np.choose(np.array([2, 3, 1, 0]), choices + ... # the first element of the result will be the first element of the + ... # third (2+1) "array" in choices, namely, 20; the second element + ... # will be the second element of the fourth (3+1) choice array, i.e., + ... # 31, etc. + ... ) + array([20, 31, 12, 3]) + >>> np.choose(np.array([2, 4, 1, 0]), choices, mode='clip' + ... # 4 goes to 3 (4-1) + ... ) + array([20, 31, 12, 3]) + >>> # because there are 4 choice arrays + >>> np.choose(np.array([2, 4, 1, 0]), choices, mode='wrap' + ... # 4 is clipped to 3 + ... ) + array([20, 31, 12, 3]) + >>> np.choose(np.array([2, -1, 1, 0]), choices, mode='wrap' + ... # -1 goes to 3 (-1+4) + ... ) + array([20, 31, 12, 3]) + + An example using broadcasting: + + >>> a = np.array([[1, 0, 1], [0, 1, 0], [1, 0, 1]]) + >>> choices = np.array([-10, 10]) + >>> np.choose(a, choices) + array([[ 10, -10, 10], + [-10, 10, -10], + [ 10, -10, 10]]) """ + mode = _get_indexing_mode(mode) - x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False) - - choices_list = [] - for choice in choices: - choices_list.append( - dpnp.get_dpnp_descriptor(choice, copy_when_nondefault_queue=False) + inds = dpnp.get_usm_ndarray(a) + ind_dt = inds.dtype + if not dpnp.issubdtype(ind_dt, dpnp.integer): + # NumPy will cast up to int64 in general but + # int32 is more than safe for bool + if ind_dt == dpnp.bool: + inds = dpt.astype(inds, dpt.int32) + else: + raise TypeError("input index array must be of integer data type") + + choices = _build_choices_list(choices) + + res_usm_type, exec_q = get_usm_allocations(choices + [inds]) + # apply type promotion to input choices + res_dt = dpt.result_type(*choices) + if len(choices) > 1: + choices = tuple( + map( + lambda chc: ( + chc if chc.dtype == res_dt else dpt.astype(chc, res_dt) + ), + choices, + ) ) + arrs_broadcast = dpt.broadcast_arrays(inds, *choices) + inds = arrs_broadcast[0] + choices = tuple(arrs_broadcast[1:]) - if x1_desc: - if dpnp.is_cuda_backend(x1_desc.get_array()): # pragma: no cover - raise NotImplementedError( - "Running on CUDA is currently not supported" - ) + res = _choose_run(inds, choices, exec_q, res_usm_type, out=out, mode=mode) - if any(not desc for desc in choices_list): - pass - elif out is not None: - pass - elif mode != "raise": - pass - elif any(not choices[0].dtype == choice.dtype for choice in choices): - pass - elif not choices_list: - pass - else: - size = x1_desc.size - choices_size = choices_list[0].size - if any( - choice.size != choices_size or choice.size != size - for choice in choices - ): - pass - elif any(x >= choices_size for x in dpnp.asnumpy(x1)): - pass - else: - return dpnp_choose(x1_desc, choices_list).get_pyobj() - - return call_origin(numpy.choose, x1, choices, out, mode) + return dpnp.get_result_array(res, out=out) def _take_index(x, inds, axis, q, usm_type, out=None, mode=0): diff --git a/dpnp/tests/skipped_tests_cuda.tbl b/dpnp/tests/skipped_tests_cuda.tbl index 563a1fb69d67..2e3afa2e1f8a 100644 --- a/dpnp/tests/skipped_tests_cuda.tbl +++ b/dpnp/tests/skipped_tests_cuda.tbl @@ -818,13 +818,3 @@ tests/test_strides.py::test_strides_erf[(10,)-int64] tests/test_strides.py::test_strides_erf[(10,)-float32] tests/test_strides.py::test_strides_erf[(10,)-float64] tests/test_strides.py::test_strides_erf[(10,)-None] - -# choose -tests/third_party/cupy/indexing_tests/test_indexing.py::TestChoose::test_choose -tests/third_party/cupy/indexing_tests/test_indexing.py::TestChoose::test_choose_broadcast -tests/third_party/cupy/indexing_tests/test_indexing.py::TestChoose::test_choose_broadcast2 -tests/third_party/cupy/indexing_tests/test_indexing.py::TestChoose::test_choose_broadcast_fail -tests/third_party/cupy/indexing_tests/test_indexing.py::TestChoose::test_choose_clip -tests/third_party/cupy/indexing_tests/test_indexing.py::TestChoose::test_choose_wrap -tests/third_party/cupy/indexing_tests/test_indexing.py::TestChoose::test_raise -tests/third_party/cupy/indexing_tests/test_indexing.py::TestChoose::test_unknown_clip diff --git a/dpnp/tests/test_indexing.py b/dpnp/tests/test_indexing.py index 20a43a09ffa9..5a91ad3e07ff 100644 --- a/dpnp/tests/test_indexing.py +++ b/dpnp/tests/test_indexing.py @@ -5,6 +5,7 @@ import numpy import pytest from dpctl.tensor._numpy_helper import AxisError +from dpctl.tensor._type_utils import _to_device_supported_dtype from dpctl.utils import ExecutionPlacementError from numpy.testing import ( assert_, @@ -879,20 +880,6 @@ def test_mode_clip(self): assert (result == dpnp.array([-2, 0, -2, 2])).all() -@pytest.mark.usefixtures("allow_fall_back_on_numpy") -def test_choose(): - a = numpy.r_[:4] - ia = dpnp.array(a) - b = numpy.r_[-4:0] - ib = dpnp.array(b) - c = numpy.r_[100:500:100] - ic = dpnp.array(c) - - expected = numpy.choose([0, 0, 0, 0], [a, b, c]) - result = dpnp.choose([0, 0, 0, 0], [ia, ib, ic]) - assert_array_equal(expected, result) - - @pytest.mark.parametrize("val", [-1, 0, 1], ids=["-1", "0", "1"]) @pytest.mark.parametrize( "array", @@ -1447,3 +1434,198 @@ def test_compress_strided(self): result = dpnp.compress(cond, a) expected = numpy.compress(cond_np, a_np) assert_array_equal(result, expected) + + +class TestChoose: + def test_choose_basic(self): + indices = [0, 1, 0] + # use a single array for choices + chcs_np = numpy.arange(2 * len(indices)) + chcs = dpnp.arange(2 * len(indices)) + inds_np = numpy.array(indices) + inds = dpnp.array(indices) + expected = numpy.choose(inds_np, chcs_np) + result = dpnp.choose(inds, chcs) + assert_array_equal(expected, result) + + def test_choose_method_basic(self): + indices = [0, 1, 2] + # use a single array for choices + chcs_np = numpy.arange(3 * len(indices)) + chcs = dpnp.arange(3 * len(indices)) + inds_np = numpy.array(indices) + inds = dpnp.array(indices) + expected = inds_np.choose(chcs_np) + result = inds.choose(chcs) + assert_array_equal(expected, result) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)) + def test_choose_inds_all_dtypes(self, dtype): + if not dpnp.issubdtype(dtype, dpnp.integer) and dtype != dpnp.bool: + inds = dpnp.zeros(1, dtype=dtype) + chcs = dpnp.ones(1, dtype=dtype) + with pytest.raises(TypeError): + dpnp.choose(inds, chcs) + else: + inds_np = numpy.array([1, 0, 1], dtype=dtype) + inds = dpnp.array(inds_np) + chcs_np = numpy.array([1, 2, 3], dtype=dtype) + chcs = dpnp.array(chcs_np) + expected = numpy.choose(inds_np, chcs_np) + result = dpnp.choose(inds, chcs) + assert_array_equal(expected, result) + + def test_choose_invalid_out_errors(self): + q1 = dpctl.SyclQueue() + q2 = dpctl.SyclQueue() + chcs = dpnp.ones(10, dtype="i4", sycl_queue=q1) + inds = dpnp.zeros(10, dtype="i4", sycl_queue=q1) + out_bad_shape = dpnp.empty(11, dtype=chcs.dtype, sycl_queue=q1) + with pytest.raises(ValueError): + dpnp.choose(inds, [chcs], out=out_bad_shape) + out_bad_queue = dpnp.empty(chcs.shape, dtype=chcs.dtype, sycl_queue=q2) + with pytest.raises(ExecutionPlacementError): + dpnp.choose(inds, [chcs], out=out_bad_queue) + out_bad_dt = dpnp.empty(chcs.shape, dtype="i8", sycl_queue=q1) + with pytest.raises(TypeError): + dpnp.choose(inds, [chcs], out=out_bad_dt) + out_read_only = dpnp.empty(chcs.shape, dtype=chcs.dtype, sycl_queue=q1) + out_read_only.flags.writable = False + with pytest.raises(ValueError): + dpnp.choose(inds, [chcs], out=out_read_only) + + def test_choose_empty(self): + sh = (10, 0, 5) + inds = dpnp.ones(sh, dtype="i4") + chcs = dpnp.ones(sh) + r = dpnp.choose(inds, chcs) + assert r.shape == sh + r = dpnp.choose(inds, (chcs,) * 2) + assert r.shape == sh + inds = dpnp.unstack(inds)[0] + r = dpnp.choose(inds, chcs) + assert r.shape == sh[1:] + r = dpnp.choose(inds, [chcs]) + assert r.shape == sh + + def test_choose_0d_inputs(self): + sh = () + inds = dpnp.zeros(sh, dtype="i4") + chc = dpnp.ones(sh, dtype="i4") + r = dpnp.choose(inds, [chc]) + assert r == chc + + def test_choose_out_keyword(self): + inds = dpnp.tile(dpnp.array([0, 1, 2], dtype="i4"), (5, 3)) + inds_np = dpnp.asnumpy(inds) + chc1 = dpnp.zeros(9, dtype="f4") + chc2 = dpnp.ones(9, dtype="f4") + chc3 = dpnp.full(9, 2, dtype="f4") + chcs = [chc1, chc2, chc3] + chcs_np = [dpnp.asnumpy(chc) for chc in chcs] + out = dpnp.empty_like(inds, dtype="f4") + dpnp.choose(inds, chcs, out=out) + expected = numpy.choose(inds_np, chcs_np) + assert_array_equal(out, expected) + + def test_choose_in_overlaps_out(self): + # overlap with inds + inds = dpnp.zeros(6, dtype="i4") + inds_np = dpnp.asnumpy(inds) + chc_np = numpy.arange(6, dtype="i4") + chc = dpnp.arange(6, dtype="i4") + out = inds + expected = numpy.choose(inds_np, chc_np) + result = dpnp.choose(inds, chc, out=out) + assert_array_equal(expected, result) + assert result is out + assert (inds == out).all() + # overlap with chc + inds = dpnp.zeros(6, dtype="i4") + out = chc + expected = numpy.choose(inds_np, chc_np) + result = dpnp.choose(inds, chc, out=out) + assert_array_equal(expected, result) + assert result is out + assert (inds == out).all() + + def test_choose_strided(self): + # inds strided + inds = dpnp.tile(dpnp.array([0, 1], dtype="i4"), 5) + inds_np = dpnp.asnumpy(inds) + c1 = dpnp.arange(5, dtype="i4") + c2 = dpnp.full(5, -1, dtype="i4") + chcs = [c1, c2] + chcs_np = [dpnp.asnumpy(chc) for chc in chcs] + result = dpnp.choose(inds[::-2], chcs) + expected = numpy.choose(inds_np[::-2], chcs_np) + assert_array_equal(result, expected) + # choices strided + c3 = dpnp.arange(20, dtype="i4") + c4 = dpnp.full(20, -1, dtype="i4") + chcs = [c3[::-2], c4[::-2]] + chcs_np = [dpnp.asnumpy(c3)[::-2], dpnp.asnumpy(c4)[::-2]] + result = dpnp.choose(inds, chcs) + expected = numpy.choose(inds_np, chcs_np) + assert_array_equal(result, expected) + # all strided + result = dpnp.choose(inds[::-1], chcs) + expected = numpy.choose(inds_np[::-1], chcs_np) + assert_array_equal(result, expected) + + @pytest.mark.parametrize( + "indices", [[0, 2], [-5, 4]], ids=["[0, 2]", "[-5, 4]"] + ) + @pytest.mark.parametrize("mode", ["clip", "wrap"]) + def test_choose_modes(self, indices, mode): + chc = dpnp.array([-2, -1, 0, 1, 2], dtype="i4") + chc_np = dpnp.asnumpy(chc) + inds = dpnp.array(indices, dtype="i4") + inds_np = dpnp.asnumpy(inds) + expected = numpy.choose(inds_np, chc_np, mode=mode) + result = dpnp.choose(inds, chc, mode=mode) + assert_array_equal(expected, result) + + def test_choose_arg_validation(self): + # invalid choices + with pytest.raises(TypeError): + dpnp.choose(dpnp.zeros((), dtype="i4"), 1) + # invalid mode keyword + with pytest.raises(ValueError): + dpnp.choose(dpnp.zeros(()), dpnp.ones(()), mode="err") + + # based on examples from NumPy + def test_choose_broadcasting(self): + inds = dpnp.array([[1, 0, 1], [0, 1, 0], [1, 0, 1]], dtype="i4") + inds_np = dpnp.asnumpy(inds) + chcs = dpnp.array([-10, 10]) + chcs_np = dpnp.asnumpy(chcs) + result = dpnp.choose(inds, chcs) + expected = numpy.choose(inds_np, chcs_np) + assert_array_equal(result, expected) + + inds = dpnp.array([0, 1]).reshape((2, 1, 1)) + inds_np = dpnp.asnumpy(inds) + chc1 = dpnp.array([1, 2, 3]).reshape((1, 3, 1)) + chc2 = dpnp.array([-1, -2, -3, -4, -5]).reshape(1, 1, 5) + chcs = [chc1, chc2] + chcs_np = [dpnp.asnumpy(chc) for chc in chcs] + result = dpnp.choose(inds, chcs) + expected = numpy.choose(inds_np, chcs_np) + assert_array_equal(result, expected) + + @pytest.mark.parametrize("chc1_dt", get_all_dtypes(no_none=True)) + @pytest.mark.parametrize("chc2_dt", get_all_dtypes(no_none=True)) + def test_choose_promote_choices(self, chc1_dt, chc2_dt): + inds = dpnp.array([0, 1], dtype="i4") + inds_np = dpnp.asnumpy(inds) + chc1 = dpnp.zeros(1, dtype=chc1_dt) + chc2 = dpnp.ones(1, dtype=chc2_dt) + chcs = [chc1, chc2] + chcs_np = [dpnp.asnumpy(chc) for chc in chcs] + result = dpnp.choose(inds, chcs) + expected = numpy.choose(inds_np, chcs_np) + assert ( + _to_device_supported_dtype(expected.dtype, inds.sycl_device) + == result.dtype + ) diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index 6484699b26d1..08b7c3668126 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -2957,3 +2957,24 @@ def test_ix(device_0, device_1): ixgrid = dpnp.ix_(x0, x1) assert_sycl_queue_equal(ixgrid[0].sycl_queue, x0.sycl_queue) assert_sycl_queue_equal(ixgrid[1].sycl_queue, x1.sycl_queue) + + +@pytest.mark.parametrize( + "device", + valid_devices, + ids=[device.filter_string for device in valid_devices], +) +def test_choose(device): + chc = dpnp.arange(5, dtype="i4", device=device) + chc_np = dpnp.asnumpy(chc) + + inds = dpnp.array([0, 1, 3], dtype="i4", device=device) + inds_np = dpnp.asnumpy(inds) + + result = dpnp.choose(inds, chc) + expected = numpy.choose(inds_np, chc_np) + assert_allclose(expected, result) + + expected_queue = chc.sycl_queue + result_queue = result.sycl_queue + assert_sycl_queue_equal(result_queue, expected_queue) diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py index 1b5c8970d6d9..800cbc519537 100644 --- a/dpnp/tests/test_usm_type.py +++ b/dpnp/tests/test_usm_type.py @@ -1791,3 +1791,17 @@ def test_ix(usm_type_0, usm_type_1): ixgrid = dp.ix_(x0, x1) assert ixgrid[0].usm_type == x0.usm_type assert ixgrid[1].usm_type == x1.usm_type + + +@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types) +@pytest.mark.parametrize( + "usm_type_ind", list_of_usm_types, ids=list_of_usm_types +) +def test_choose(usm_type_x, usm_type_ind): + chc = dp.arange(5, usm_type=usm_type_x) + ind = dp.array([0, 2, 4], usm_type=usm_type_ind) + z = dp.choose(ind, chc) + + assert chc.usm_type == usm_type_x + assert ind.usm_type == usm_type_ind + assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_ind]) diff --git a/dpnp/tests/third_party/cupy/indexing_tests/test_indexing.py b/dpnp/tests/third_party/cupy/indexing_tests/test_indexing.py index a977b44bbdbd..bec8f4204b33 100644 --- a/dpnp/tests/third_party/cupy/indexing_tests/test_indexing.py +++ b/dpnp/tests/third_party/cupy/indexing_tests/test_indexing.py @@ -208,7 +208,6 @@ def test_choose(self, xp, dtype): c = testing.shaped_arange((3, 4), xp, dtype) return a.choose(c) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_array_equal() def test_choose_broadcast(self, xp, dtype): @@ -216,7 +215,6 @@ def test_choose_broadcast(self, xp, dtype): c = xp.array([-10, 10]).astype(dtype) return a.choose(c) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_array_equal() def test_choose_broadcast2(self, xp, dtype): @@ -224,7 +222,7 @@ def test_choose_broadcast2(self, xp, dtype): c = testing.shaped_arange((3, 5, 2), xp, dtype) return a.choose(c) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.skip("Different implementation of `wrap` keyword") @testing.for_all_dtypes() @testing.numpy_cupy_array_equal() def test_choose_wrap(self, xp, dtype): @@ -232,7 +230,6 @@ def test_choose_wrap(self, xp, dtype): c = testing.shaped_arange((3, 4), xp, dtype) return a.choose(c, mode="wrap") - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() @testing.numpy_cupy_array_equal() def test_choose_clip(self, xp, dtype): @@ -240,7 +237,6 @@ def test_choose_clip(self, xp, dtype): c = testing.shaped_arange((3, 4), xp, dtype) return a.choose(c, mode="clip") - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.with_requires("numpy>=1.19") def test_unknown_clip(self): for xp in (numpy, cupy): @@ -249,14 +245,13 @@ def test_unknown_clip(self): with pytest.raises(ValueError): a.choose(c, mode="unknown") - @pytest.mark.usefixtures("allow_fall_back_on_numpy") + @pytest.mark.skip("`raise` keyword not implemented") def test_raise(self): a = cupy.array([2]) c = cupy.array([[0, 1]]) with self.assertRaises(ValueError): a.choose(c) - @pytest.mark.usefixtures("allow_fall_back_on_numpy") @testing.for_all_dtypes() def test_choose_broadcast_fail(self, dtype): for xp in (numpy, cupy):