diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3d3995a85e7c..2e55c9eb8c03 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,7 +52,6 @@ repos: rev: 24.4.2 hooks: - id: black - args: ["--check", "--diff", "--color"] - repo: https://github.com/pycqa/isort rev: 5.13.2 hooks: diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt index da46cf99e3f5..d4ac718954a6 100644 --- a/dpnp/CMakeLists.txt +++ b/dpnp/CMakeLists.txt @@ -58,6 +58,7 @@ add_subdirectory(backend/extensions/fft) add_subdirectory(backend/extensions/lapack) add_subdirectory(backend/extensions/vm) add_subdirectory(backend/extensions/ufunc) +add_subdirectory(backend/extensions/statistics) add_subdirectory(dpnp_algo) add_subdirectory(dpnp_utils) diff --git a/dpnp/backend/extensions/statistics/CMakeLists.txt b/dpnp/backend/extensions/statistics/CMakeLists.txt new file mode 100644 index 000000000000..20c868066576 --- /dev/null +++ b/dpnp/backend/extensions/statistics/CMakeLists.txt @@ -0,0 +1,88 @@ +# ***************************************************************************** +# Copyright (c) 2016-2024, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + + +set(python_module_name _statistics_impl) +set(_module_src + ${CMAKE_CURRENT_SOURCE_DIR}/common.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/histogram.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/histogram_common.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/statistics_py.cpp +) + +pybind11_add_module(${python_module_name} MODULE ${_module_src}) +add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) + +if(_dpnp_sycl_targets) + # make fat binary + target_compile_options( + ${python_module_name} + PRIVATE + -fsycl-targets=${_dpnp_sycl_targets} + ) + target_link_options( + ${python_module_name} + PRIVATE + -fsycl-targets=${_dpnp_sycl_targets} + ) +endif() + +if (WIN32) + if (${CMAKE_VERSION} VERSION_LESS "3.27") + # this is a work-around for target_link_options inserting option after -link option, cause + # linker to ignore it. + set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel") + endif() +endif() + +set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON) + +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include) +target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src) + +target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR}) +target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR}) + +if (WIN32) + target_compile_options(${python_module_name} PRIVATE + /clang:-fno-approx-func + /clang:-fno-finite-math-only + ) +else() + target_compile_options(${python_module_name} PRIVATE + -fno-approx-func + -fno-finite-math-only + ) +endif() + +target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) + +if (DPNP_GENERATE_COVERAGE) + target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping) +endif() + +install(TARGETS ${python_module_name} + DESTINATION "dpnp/backend/extensions/statistics" +) diff --git a/dpnp/backend/extensions/statistics/common.cpp b/dpnp/backend/extensions/statistics/common.cpp new file mode 100644 index 000000000000..961205293725 --- /dev/null +++ b/dpnp/backend/extensions/statistics/common.cpp @@ -0,0 +1,124 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include "common.hpp" +#include "utils/type_dispatch.hpp" +#include + +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; + +namespace statistics +{ +namespace common +{ + +size_t get_max_local_size(const sycl::device &device) +{ + constexpr const int default_max_cpu_local_size = 256; + constexpr const int default_max_gpu_local_size = 0; + + return get_max_local_size(device, default_max_cpu_local_size, + default_max_gpu_local_size); +} + +size_t get_max_local_size(const sycl::device &device, + int cpu_local_size_limit, + int gpu_local_size_limit) +{ + int max_work_group_size = + device.get_info(); + if (device.is_cpu() && cpu_local_size_limit > 0) { + return std::min(cpu_local_size_limit, max_work_group_size); + } + else if (device.is_gpu() && gpu_local_size_limit > 0) { + return std::min(gpu_local_size_limit, max_work_group_size); + } + + return max_work_group_size; +} + +sycl::nd_range<1> + make_ndrange(size_t global_size, size_t local_range, size_t work_per_item) +{ + return make_ndrange(sycl::range<1>(global_size), + sycl::range<1>(local_range), + sycl::range<1>(work_per_item)); +} + +size_t get_local_mem_size_in_bytes(const sycl::device &device) +{ + // Reserving 1kb for runtime needs + constexpr const size_t reserve = 1024; + + return get_local_mem_size_in_bytes(device, reserve); +} + +size_t get_local_mem_size_in_bytes(const sycl::device &device, size_t reserve) +{ + size_t local_mem_size = + device.get_info(); + return local_mem_size - reserve; +} + +pybind11::dtype dtype_from_typenum(int dst_typenum) +{ + dpctl_td_ns::typenum_t dst_typenum_t = + static_cast(dst_typenum); + switch (dst_typenum_t) { + case dpctl_td_ns::typenum_t::BOOL: + return py::dtype("?"); + case dpctl_td_ns::typenum_t::INT8: + return py::dtype("i1"); + case dpctl_td_ns::typenum_t::UINT8: + return py::dtype("u1"); + case dpctl_td_ns::typenum_t::INT16: + return py::dtype("i2"); + case dpctl_td_ns::typenum_t::UINT16: + return py::dtype("u2"); + case dpctl_td_ns::typenum_t::INT32: + return py::dtype("i4"); + case dpctl_td_ns::typenum_t::UINT32: + return py::dtype("u4"); + case dpctl_td_ns::typenum_t::INT64: + return py::dtype("i8"); + case dpctl_td_ns::typenum_t::UINT64: + return py::dtype("u8"); + case dpctl_td_ns::typenum_t::HALF: + return py::dtype("f2"); + case dpctl_td_ns::typenum_t::FLOAT: + return py::dtype("f4"); + case dpctl_td_ns::typenum_t::DOUBLE: + return py::dtype("f8"); + case dpctl_td_ns::typenum_t::CFLOAT: + return py::dtype("c8"); + case dpctl_td_ns::typenum_t::CDOUBLE: + return py::dtype("c16"); + default: + throw py::value_error("Unrecognized dst_typeid"); + } +} + +} // namespace common +} // namespace statistics diff --git a/dpnp/backend/extensions/statistics/common.hpp b/dpnp/backend/extensions/statistics/common.hpp new file mode 100644 index 000000000000..518a3cf86a9e --- /dev/null +++ b/dpnp/backend/extensions/statistics/common.hpp @@ -0,0 +1,191 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include +#include + +// clang-format off +// math_utils.hpp doesn't include sycl header but uses sycl types +// so sycl.hpp must be included before math_utils.hpp +#include +#include "utils/math_utils.hpp" +// clang-format on + +namespace statistics +{ +namespace common +{ + +template +constexpr auto CeilDiv(N n, D d) +{ + return (n + d - 1) / d; +} + +template +constexpr auto Align(N n, D d) +{ + return CeilDiv(n, d) * d; +} + +template +struct AtomicOp +{ + static void add(T &lhs, const T value) + { + sycl::atomic_ref lh(lhs); + lh += value; + } +}; + +template +struct AtomicOp, Order, Scope> +{ + static void add(std::complex &lhs, const std::complex value) + { + T *_lhs = reinterpret_cast(lhs); + const T *_val = reinterpret_cast(value); + sycl::atomic_ref lh0(_lhs[0]); + lh0 += _val[0]; + sycl::atomic_ref lh1(_lhs[1]); + lh1 += _val[1]; + } +}; + +template +struct Less +{ + bool operator()(const T &lhs, const T &rhs) const + { + return std::less{}(lhs, rhs); + } +}; + +template +struct Less> +{ + bool operator()(const std::complex &lhs, + const std::complex &rhs) const + { + return dpctl::tensor::math_utils::less_complex(lhs, rhs); + } +}; + +template +struct IsNan +{ + static bool isnan(const T &v) + { + if constexpr (std::is_floating_point_v || + std::is_same_v) { + return sycl::isnan(v); + } + + return false; + } +}; + +template +struct IsNan> +{ + static bool isnan(const std::complex &v) + { + T real1 = std::real(v); + T imag1 = std::imag(v); + return sycl::isnan(real1) || sycl::isnan(imag1); + } +}; + +size_t get_max_local_size(const sycl::device &device); +size_t get_max_local_size(const sycl::device &device, + int cpu_local_size_limit, + int gpu_local_size_limit); + +inline size_t get_max_local_size(const sycl::queue &queue) +{ + return get_max_local_size(queue.get_device()); +} + +inline size_t get_max_local_size(const sycl::queue &queue, + int cpu_local_size_limit, + int gpu_local_size_limit) +{ + return get_max_local_size(queue.get_device(), cpu_local_size_limit, + gpu_local_size_limit); +} + +size_t get_local_mem_size_in_bytes(const sycl::device &device); +size_t get_local_mem_size_in_bytes(const sycl::device &device, size_t reserve); + +inline size_t get_local_mem_size_in_bytes(const sycl::queue &queue) +{ + return get_local_mem_size_in_bytes(queue.get_device()); +} + +inline size_t get_local_mem_size_in_bytes(const sycl::queue &queue, + size_t reserve) +{ + return get_local_mem_size_in_bytes(queue.get_device(), reserve); +} + +template +size_t get_local_mem_size_in_items(const sycl::device &device) +{ + return get_local_mem_size_in_bytes(device) / sizeof(T); +} + +template +size_t get_local_mem_size_in_items(const sycl::device &device, size_t reserve) +{ + return get_local_mem_size_in_bytes(device, sizeof(T) * reserve) / sizeof(T); +} + +template +sycl::nd_range make_ndrange(const sycl::range &global_range, + const sycl::range &local_range, + const sycl::range &work_per_item) +{ + sycl::range aligned_global_range; + + for (int i = 0; i < Dims; ++i) { + aligned_global_range[i] = + Align(CeilDiv(global_range[i], work_per_item[i]), local_range[i]); + } + + return sycl::nd_range(aligned_global_range, local_range); +} + +sycl::nd_range<1> + make_ndrange(size_t global_size, size_t local_range, size_t work_per_item); + +// This function is a copy from dpctl because it is not available in the public +// headers of dpctl. +pybind11::dtype dtype_from_typenum(int dst_typenum); + +} // namespace common +} // namespace statistics diff --git a/dpnp/backend/extensions/statistics/dispatch_table.hpp b/dpnp/backend/extensions/statistics/dispatch_table.hpp new file mode 100644 index 000000000000..ce11de891318 --- /dev/null +++ b/dpnp/backend/extensions/statistics/dispatch_table.hpp @@ -0,0 +1,292 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include + +#include "utils/type_dispatch.hpp" +#include +#include +#include +#include + +#include "common.hpp" + +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +namespace py = pybind11; + +namespace statistics +{ +namespace common +{ + +template +struct one_of +{ + static_assert(std::is_same_v>, + "one_of: second parameter cannot be empty std::tuple"); + static_assert(false, "one_of: second parameter must be std::tuple"); +}; + +template +struct one_of> +{ + static constexpr bool value = + std::is_same_v || one_of>::value; +}; + +template +struct one_of> +{ + static constexpr bool value = std::is_same_v; +}; + +template +constexpr bool one_of_v = one_of::value; + +template +using Table = FnT[dpctl_td_ns::num_types]; +template +using Table2 = Table[dpctl_td_ns::num_types]; + +using TypeId = int32_t; +using TypesPair = std::pair; + +struct int_pair_hash +{ + inline size_t operator()(const TypesPair &p) const + { + std::hash hasher; + return hasher(size_t(p.first) << (8 * sizeof(TypeId)) | + size_t(p.second)); + } +}; + +using SupportedTypesList = std::vector; +using SupportedTypesList2 = std::vector; +using SupportedTypesSet = std::unordered_set; +using SupportedTypesSet2 = std::unordered_set; + +using DType = py::dtype; +using DTypePair = std::pair; + +using SupportedDTypeList = std::vector; +using SupportedDTypeList2 = std::vector; + +template + typename Func> +struct TableBuilder2 +{ + template + struct impl + { + static constexpr bool is_defined = + one_of_v, SupportedTypes>; + + _FnT get() + { + if constexpr (is_defined) { + return Func::impl; + } + else { + return nullptr; + } + } + }; + + using type = + dpctl_td_ns::DispatchTableBuilder; +}; + +template +class DispatchTable2 +{ +public: + DispatchTable2(std::string first_name, std::string second_name) + : first_name(first_name), second_name(second_name) + { + } + + template + typename Func> + void populate_dispatch_table() + { + using TBulder = typename TableBuilder2::type; + TBulder builder; + + builder.populate_dispatch_table(table); + populate_supported_types(); + } + + FnT get_unsafe(int first_typenum, int second_typenum) const + { + auto array_types = dpctl_td_ns::usm_ndarray_types(); + const int first_type_id = + array_types.typenum_to_lookup_id(first_typenum); + const int second_type_id = + array_types.typenum_to_lookup_id(second_typenum); + + return table[first_type_id][second_type_id]; + } + + FnT get(int first_typenum, int second_typenum) const + { + auto fn = get_unsafe(first_typenum, second_typenum); + + if (fn == nullptr) { + auto array_types = dpctl_td_ns::usm_ndarray_types(); + const int first_type_id = + array_types.typenum_to_lookup_id(first_typenum); + const int second_type_id = + array_types.typenum_to_lookup_id(second_typenum); + + py::dtype first_dtype = dtype_from_typenum(first_type_id); + auto first_type_pos = + std::find(supported_first_type.begin(), + supported_first_type.end(), first_dtype); + if (first_type_pos == supported_first_type.end()) { + py::str types = py::str(py::cast(supported_first_type)); + py::str dtype = py::str(first_dtype); + + py::str err_msg = + py::str("'" + first_name + "' has unsupported type '") + + dtype + + py::str("'." + " Supported types are: ") + + types; + + throw py::value_error(static_cast(err_msg)); + } + + py::dtype second_dtype = dtype_from_typenum(second_type_id); + auto second_type_pos = + std::find(supported_second_type.begin(), + supported_second_type.end(), second_dtype); + if (second_type_pos == supported_second_type.end()) { + py::str types = py::str(py::cast(supported_second_type)); + py::str dtype = py::str(second_dtype); + + py::str err_msg = + py::str("'" + second_name + "' has unsupported type '") + + dtype + + py::str("'." + " Supported types are: ") + + types; + + throw py::value_error(static_cast(err_msg)); + } + + py::str first_dtype_str = py::str(first_dtype); + py::str second_dtype_str = py::str(second_dtype); + py::str types = py::str(py::cast(all_supported_types)); + + py::str err_msg = + py::str("'" + first_name + "' and '" + second_name + + "' has unsupported types combination: ('") + + first_dtype_str + py::str("', '") + second_dtype_str + + py::str("')." + " Supported types combinations are: ") + + types; + + throw py::value_error(static_cast(err_msg)); + } + + return fn; + } + + const SupportedDTypeList &get_supported_first_type() const + { + return supported_first_type; + } + + const SupportedDTypeList &get_supported_second_type() const + { + return supported_second_type; + } + + const SupportedDTypeList2 &get_all_supported_types() const + { + return all_supported_types; + } + +private: + void populate_supported_types() + { + SupportedTypesSet first_supported_types_set; + SupportedTypesSet second_supported_types_set; + SupportedTypesSet2 all_supported_types_set; + + for (int i = 0; i < dpctl_td_ns::num_types; ++i) { + for (int j = 0; j < dpctl_td_ns::num_types; ++j) { + if (table[i][j] != nullptr) { + all_supported_types_set.emplace(i, j); + first_supported_types_set.emplace(i); + second_supported_types_set.emplace(j); + } + } + } + + auto to_supported_dtype_list = [](const auto &supported_set, + auto &supported_list) { + SupportedTypesList lst(supported_set.begin(), supported_set.end()); + std::sort(lst.begin(), lst.end()); + supported_list.resize(supported_set.size()); + std::transform(lst.begin(), lst.end(), supported_list.begin(), + [](TypeId i) { return dtype_from_typenum(i); }); + }; + + to_supported_dtype_list(first_supported_types_set, + supported_first_type); + to_supported_dtype_list(second_supported_types_set, + supported_second_type); + + SupportedTypesList2 lst(all_supported_types_set.begin(), + all_supported_types_set.end()); + std::sort(lst.begin(), lst.end()); + all_supported_types.resize(all_supported_types_set.size()); + std::transform(lst.begin(), lst.end(), all_supported_types.begin(), + [](TypesPair p) { + return DTypePair(dtype_from_typenum(p.first), + dtype_from_typenum(p.second)); + }); + } + + std::string first_name; + std::string second_name; + + SupportedDTypeList supported_first_type; + SupportedDTypeList supported_second_type; + SupportedDTypeList2 all_supported_types; + + Table2 table; +}; + +} // namespace common +} // namespace statistics diff --git a/dpnp/backend/extensions/statistics/histogram.cpp b/dpnp/backend/extensions/statistics/histogram.cpp new file mode 100644 index 000000000000..4317280b5a8d --- /dev/null +++ b/dpnp/backend/extensions/statistics/histogram.cpp @@ -0,0 +1,287 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +// dpctl tensor headers +#include "dpctl4pybind11.hpp" +#include "utils/type_dispatch.hpp" + +#include "histogram.hpp" +#include "histogram_common.hpp" + +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +using dpctl::tensor::usm_ndarray; + +using namespace statistics::histogram; +using namespace statistics::common; + +namespace +{ + +template +struct HistogramEdges +{ + static constexpr bool const sync_after_init = DataStorage::sync_after_init; + using boundsT = std::tuple; + + HistogramEdges(const T *global_data, size_t size, sycl::handler &cgh) + : data(global_data, sycl::range<1>(size), cgh) + { + } + + template + void init(const sycl::nd_item<_Dims> &item) const + { + data.init(item); + } + + boundsT get_bounds() const + { + auto min = data.get_ptr()[0]; + auto max = data.get_ptr()[data.size() - 1]; + return {min, max}; + } + + template + size_t get_bin(const sycl::nd_item<_Dims> &, + const dT *val, + const boundsT &) const + { + uint32_t edges_count = data.size(); + uint32_t bins_count = edges_count - 1; + const auto *bins = data.get_ptr(); + + uint32_t bin = + std::upper_bound(bins, bins + edges_count, val[0], Less
{}) - + bins - 1; + bin = std::min(bin, bins_count - 1); + + return bin; + } + + template + bool in_bounds(const dT *val, const boundsT &bounds) const + { + Less
_less; + return !_less(val[0], std::get<0>(bounds)) && + !_less(std::get<1>(bounds), val[0]) && !IsNan
::isnan(val[0]); + } + +private: + DataStorage data; +}; + +template +using CachedEdges = HistogramEdges>; + +template +using UncachedEdges = HistogramEdges>; + +template +struct histogram_kernel +{ + static sycl::event impl(sycl::queue &exec_q, + const void *vin, + const void *vbins_edges, + const void *vweights, + void *vout, + const size_t bins_count, + const size_t size, + const std::vector &depends) + { + const T *in = static_cast(vin); + const BinsT *bins_edges = static_cast(vbins_edges); + const HistType *weights = static_cast(vweights); + HistType *out = static_cast(vout); + + auto device = exec_q.get_device(); + const auto local_size = get_max_local_size(device); + + constexpr uint32_t WorkPI = 128; // empirically found number + + const auto nd_range = make_ndrange(size, local_size, WorkPI); + + return exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + constexpr uint32_t dims = 1; + + auto dispatch_edges = [&](uint32_t local_mem, const auto &weights, + auto &hist) { + if (device.is_gpu() && (local_mem >= bins_count + 1)) { + auto edges = CachedEdges(bins_edges, bins_count + 1, cgh); + submit_histogram(in, size, dims, WorkPI, hist, edges, + weights, nd_range, cgh); + } + else { + auto edges = UncachedEdges(bins_edges, bins_count + 1, cgh); + submit_histogram(in, size, dims, WorkPI, hist, edges, + weights, nd_range, cgh); + } + }; + + auto dispatch_bins = [&](const auto &weights) { + const auto local_mem_size = + get_local_mem_size_in_items(device); + if (local_mem_size >= bins_count) { + const auto local_hist_count = get_local_hist_copies_count( + local_mem_size, local_size, bins_count); + + auto hist = HistWithLocalCopies( + out, bins_count, local_hist_count, cgh); + const auto free_local_mem = local_mem_size - hist.size(); + + dispatch_edges(free_local_mem, weights, hist); + } + else { + auto hist = HistGlobalMemory(out); + auto edges = UncachedEdges(bins_edges, bins_count + 1, cgh); + submit_histogram(in, size, dims, WorkPI, hist, edges, + weights, nd_range, cgh); + } + }; + + if (weights) { + auto _weights = Weights(weights); + dispatch_bins(_weights); + } + else { + auto _weights = NoWeights(); + dispatch_bins(_weights); + } + }); + } +}; + +template +using histogram_kernel_ = histogram_kernel; + +} // namespace + +using SupportedTypes = std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple>, + std::tuple>, + std::tuple>, + std::tuple>, + std::tuple, + std::tuple, + std::tuple, + std::tuple, + std::tuple>, + std::tuple>, + std::tuple, int64_t>, + std::tuple, int64_t>, + std::tuple, float>, + std::tuple, double>>; + +Histogram::Histogram() : dispatch_table("sample", "histogram") +{ + dispatch_table.populate_dispatch_table(); +} + +std::tuple + Histogram::call(const dpctl::tensor::usm_ndarray &sample, + const dpctl::tensor::usm_ndarray &bins, + std::optional &weights, + dpctl::tensor::usm_ndarray &histogram, + const std::vector &depends) +{ + validate(sample, bins, weights, histogram); + + if (sample.get_size() == 0) { + return {sycl::event(), sycl::event()}; + } + + const int sample_typenum = sample.get_typenum(); + const int hist_typenum = histogram.get_typenum(); + + auto histogram_func = dispatch_table.get(sample_typenum, hist_typenum); + + auto exec_q = sample.get_queue(); + + void *weights_ptr = + weights.has_value() ? weights.value().get_data() : nullptr; + + auto ev = + histogram_func(exec_q, sample.get_data(), bins.get_data(), weights_ptr, + histogram.get_data(), histogram.get_shape(0), + sample.get_shape(0), depends); + + sycl::event args_ev; + if (weights.has_value()) { + args_ev = dpctl::utils::keep_args_alive( + exec_q, {sample, bins, weights.value(), histogram}, {ev}); + } + else { + args_ev = dpctl::utils::keep_args_alive( + exec_q, {sample, bins, histogram}, {ev}); + } + + return {args_ev, ev}; +} + +std::unique_ptr hist; + +void statistics::histogram::populate_histogram(py::module_ m) +{ + using namespace std::placeholders; + + hist.reset(new Histogram()); + + auto hist_func = + [histp = hist.get()]( + const dpctl::tensor::usm_ndarray &sample, + const dpctl::tensor::usm_ndarray &bins, + std::optional &weights, + dpctl::tensor::usm_ndarray &histogram, + const std::vector &depends) { + return histp->call(sample, bins, weights, histogram, depends); + }; + + m.def("histogram", hist_func, "Compute the histogram of a dataset.", + py::arg("sample"), py::arg("bins"), py::arg("weights"), + py::arg("histogram"), py::arg("depends") = py::list()); + + auto histogram_dtypes = [histp = hist.get()]() { + return histp->dispatch_table.get_all_supported_types(); + }; + + m.def("histogram_dtypes", histogram_dtypes, + "Get the supported data types for histogram."); +} diff --git a/dpnp/backend/extensions/statistics/histogram.hpp b/dpnp/backend/extensions/statistics/histogram.hpp new file mode 100644 index 000000000000..e89737c12946 --- /dev/null +++ b/dpnp/backend/extensions/statistics/histogram.hpp @@ -0,0 +1,63 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include "dispatch_table.hpp" + +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; + +namespace statistics +{ +namespace histogram +{ +struct Histogram +{ + using FnT = sycl::event (*)(sycl::queue &, + const void *, + const void *, + const void *, + void *, + const size_t, + const size_t, + const std::vector &); + + common::DispatchTable2 dispatch_table; + + Histogram(); + + std::tuple + call(const dpctl::tensor::usm_ndarray &input, + const dpctl::tensor::usm_ndarray &bins_edges, + std::optional &weights, + dpctl::tensor::usm_ndarray &output, + const std::vector &depends); +}; + +void populate_histogram(py::module_ m); +} // namespace histogram +} // namespace statistics diff --git a/dpnp/backend/extensions/statistics/histogram_common.cpp b/dpnp/backend/extensions/statistics/histogram_common.cpp new file mode 100644 index 000000000000..30197f74e422 --- /dev/null +++ b/dpnp/backend/extensions/statistics/histogram_common.cpp @@ -0,0 +1,244 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include +#include +#include +#include + +#include "dpctl4pybind11.hpp" +#include "utils/memory_overlap.hpp" +#include "utils/output_validation.hpp" +#include "utils/type_dispatch.hpp" + +#include + +#include "histogram_common.hpp" + +namespace dpctl_td_ns = dpctl::tensor::type_dispatch; +using dpctl::tensor::usm_ndarray; +using dpctl_td_ns::typenum_t; + +namespace statistics +{ +using common::CeilDiv; + +namespace histogram +{ + +void validate(const usm_ndarray &sample, + const usm_ndarray &bins, + std::optional &weights, + const usm_ndarray &histogram) +{ + auto exec_q = sample.get_queue(); + using array_ptr = const usm_ndarray *; + + std::vector arrays{&sample, &bins, &histogram}; + std::unordered_map names = { + {arrays[0], "sample"}, {arrays[1], "bins"}, {arrays[2], "histogram"}}; + + array_ptr weights_ptr = nullptr; + + if (weights.has_value()) { + weights_ptr = &weights.value(); + arrays.push_back(weights_ptr); + names.insert({weights_ptr, "weights"}); + } + + auto get_name = [&](const array_ptr &arr) { + auto name_it = names.find(arr); + assert(name_it != names.end()); + + return "'" + name_it->second + "'"; + }; + + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(histogram); + + auto unequal_queue = + std::find_if(arrays.cbegin(), arrays.cend(), [&](const array_ptr &arr) { + return arr->get_queue() != exec_q; + }); + + if (unequal_queue != arrays.cend()) { + throw py::value_error( + get_name(*unequal_queue) + + " parameter has incompatible queue with parameter " + + get_name(&sample)); + } + + auto non_contig_array = + std::find_if(arrays.cbegin(), arrays.cend(), [&](const array_ptr &arr) { + return !arr->is_c_contiguous(); + }); + + if (non_contig_array != arrays.cend()) { + throw py::value_error(get_name(*non_contig_array) + + " parameter is not c-contiguos"); + } + + auto check_overlaping = [&](const array_ptr &first, + const array_ptr &second) { + if (first == nullptr || second == nullptr) { + return; + } + + const auto &overlap = dpctl::tensor::overlap::MemoryOverlap(); + + if (overlap(*first, *second)) { + throw py::value_error(get_name(first) + + " has overlapping memory segments with " + + get_name(second)); + } + }; + + check_overlaping(&sample, &histogram); + check_overlaping(&bins, &histogram); + check_overlaping(weights_ptr, &histogram); + + if (bins.get_size() < 2) { + throw py::value_error(get_name(&bins) + + " parameter must have at least 2 elements"); + } + + if (histogram.get_size() < 1) { + throw py::value_error(get_name(&histogram) + + " parameter must have at least 1 element"); + } + + if (histogram.get_ndim() != 1) { + throw py::value_error(get_name(&histogram) + + " parameter must be 1d. Actual " + + std::to_string(histogram.get_ndim()) + "d"); + } + + if (weights_ptr) { + if (weights_ptr->get_ndim() != 1) { + throw py::value_error( + get_name(weights_ptr) + " parameter must be 1d. Actual " + + std::to_string(weights_ptr->get_ndim()) + "d"); + } + + auto sample_size = sample.get_size(); + auto weights_size = weights_ptr->get_size(); + if (sample.get_size() != weights_ptr->get_size()) { + throw py::value_error( + get_name(&sample) + " size (" + std::to_string(sample_size) + + ") and " + get_name(weights_ptr) + " size (" + + std::to_string(weights_size) + ")" + " must match"); + } + } + + if (sample.get_ndim() > 2) { + throw py::value_error( + get_name(&sample) + + " parameter must have no more than 2 dimensions. Actual " + + std::to_string(sample.get_ndim()) + "d"); + } + + if (sample.get_ndim() == 1) { + if (bins.get_ndim() != 1) { + throw py::value_error(get_name(&sample) + " parameter is 1d, but " + + get_name(&bins) + " is " + + std::to_string(bins.get_ndim()) + "d"); + } + } + else if (sample.get_ndim() == 2) { + auto sample_count = sample.get_shape(0); + auto expected_dims = sample.get_shape(1); + + if (bins.get_ndim() != expected_dims) { + throw py::value_error(get_name(&sample) + " parameter has shape {" + + std::to_string(sample_count) + "x" + + std::to_string(expected_dims) + "}" + + ", so " + get_name(&bins) + + " parameter expected to be " + + std::to_string(expected_dims) + + "d. " + "Actual " + + std::to_string(bins.get_ndim()) + "d"); + } + } + + py::ssize_t expected_hist_size = 1; + for (int i = 0; i < bins.get_ndim(); ++i) { + expected_hist_size *= (bins.get_shape(i) - 1); + } + + if (histogram.get_size() != expected_hist_size) { + throw py::value_error( + get_name(&histogram) + " and " + get_name(&bins) + + " shape mismatch. " + get_name(&histogram) + + " expected to have size = " + std::to_string(expected_hist_size) + + ". Actual " + std::to_string(histogram.get_size())); + } + + int64_t max_hist_size = std::numeric_limits::max() - 1; + if (histogram.get_size() > max_hist_size) { + throw py::value_error(get_name(&histogram) + + " parameter size expected to be less than " + + std::to_string(max_hist_size) + ". Actual " + + std::to_string(histogram.get_size())); + } + + auto array_types = dpctl_td_ns::usm_ndarray_types(); + auto hist_type = static_cast( + array_types.typenum_to_lookup_id(histogram.get_typenum())); + if (histogram.get_elemsize() == 8 && hist_type != typenum_t::CFLOAT) { + auto device = exec_q.get_device(); + bool _64bit_atomics = device.has(sycl::aspect::atomic64); + + if (!_64bit_atomics) { + auto device_name = device.get_info(); + throw py::value_error( + get_name(&histogram) + + " parameter has 64-bit type, but 64-bit atomics " + + " are not supported for " + device_name); + } + } +} + +uint32_t get_local_hist_copies_count(uint32_t loc_mem_size_in_items, + uint32_t local_size, + uint32_t hist_size_in_items) +{ + constexpr uint32_t local_copies_limit = 16; + constexpr uint32_t atomics_per_work_item = 4; + + const uint32_t preferred_local_copies = + CeilDiv(atomics_per_work_item * local_size, hist_size_in_items); + const uint32_t local_copies_fit_memory = + loc_mem_size_in_items / hist_size_in_items; + + uint32_t local_hist_count = + std::min(preferred_local_copies, local_copies_limit); + local_hist_count = std::min(local_hist_count, local_copies_fit_memory); + + return local_hist_count; +} + +} // namespace histogram +} // namespace statistics diff --git a/dpnp/backend/extensions/statistics/histogram_common.hpp b/dpnp/backend/extensions/statistics/histogram_common.hpp new file mode 100644 index 000000000000..9d2cbd163024 --- /dev/null +++ b/dpnp/backend/extensions/statistics/histogram_common.hpp @@ -0,0 +1,342 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include "common.hpp" + +namespace dpctl +{ +namespace tensor +{ +class usm_ndarray; +} +} // namespace dpctl + +using dpctl::tensor::usm_ndarray; + +namespace statistics +{ +using common::AtomicOp; +using common::IsNan; +using common::Less; + +namespace histogram +{ + +template +struct CachedData +{ + static constexpr bool const sync_after_init = true; + using pointer_type = T *; + + using ncT = typename std::remove_const::type; + using LocalData = sycl::local_accessor; + + CachedData(T *global_data, sycl::range shape, sycl::handler &cgh) + { + this->global_data = global_data; + local_data = LocalData(shape, cgh); + } + + T *get_ptr() const + { + return &local_data[0]; + } + + template + void init(const sycl::nd_item<_Dims> &item) const + { + int32_t llid = item.get_local_linear_id(); + auto local_ptr = &local_data[0]; + int32_t size = local_data.size(); + auto group = item.get_group(); + int32_t local_size = group.get_local_linear_range(); + + for (int32_t i = llid; i < size; i += local_size) { + local_ptr[i] = global_data[i]; + } + } + + size_t size() const + { + return local_data.size(); + } + +private: + LocalData local_data; + T *global_data = nullptr; +}; + +template +struct UncachedData +{ + static constexpr bool const sync_after_init = false; + using Shape = sycl::range; + using pointer_type = T *; + + UncachedData(T *global_data, const Shape &shape, sycl::handler &) + { + this->global_data = global_data; + _shape = shape; + } + + T *get_ptr() const + { + return global_data; + } + + template + void init(const sycl::nd_item<_Dims> &) const + { + } + + size_t size() const + { + return _shape.size(); + } + +private: + T *global_data = nullptr; + Shape _shape; +}; + +template +struct HistLocalType +{ + using type = T; +}; + +template <> +struct HistLocalType +{ + using type = uint32_t; +}; + +template <> +struct HistLocalType +{ + using type = int32_t; +}; + +template ::type> +struct HistWithLocalCopies +{ + static constexpr bool const sync_after_init = true; + static constexpr bool const sync_before_finalize = true; + + using LocalHist = sycl::local_accessor; + + HistWithLocalCopies(T *global_data, + size_t bins_count, + int32_t copies_count, + sycl::handler &cgh) + { + local_hist = LocalHist(sycl::range<2>(copies_count, bins_count), cgh); + global_hist = global_data; + } + + template + void init(const sycl::nd_item<_Dims> &item, localT val = 0) const + { + uint32_t llid = item.get_local_linear_id(); + auto *local_ptr = &local_hist[0][0]; + uint32_t size = local_hist.size(); + auto group = item.get_group(); + uint32_t local_size = group.get_local_linear_range(); + + for (uint32_t i = llid; i < size; i += local_size) { + local_ptr[i] = val; + } + } + + template + void add(const sycl::nd_item<_Dims> &item, int32_t bin, localT value) const + { + int32_t llid = item.get_local_linear_id(); + int32_t local_hist_count = local_hist.get_range().get(0); + int32_t local_copy_id = + local_hist_count == 1 ? 0 : llid % local_hist_count; + + AtomicOp::add(local_hist[local_copy_id] + [bin], + value); + } + + template + void finalize(const sycl::nd_item<_Dims> &item) const + { + int32_t llid = item.get_local_linear_id(); + int32_t bins_count = local_hist.get_range().get(1); + int32_t local_hist_count = local_hist.get_range().get(0); + auto group = item.get_group(); + int32_t local_size = group.get_local_linear_range(); + + for (int32_t i = llid; i < bins_count; i += local_size) { + auto value = local_hist[0][i]; + for (int32_t lhc = 1; lhc < local_hist_count; ++lhc) { + value += local_hist[lhc][i]; + } + if (value != T(0)) { + AtomicOp::add(global_hist[i], + value); + } + } + } + + uint32_t size() const + { + return local_hist.size(); + } + +private: + LocalHist local_hist; + T *global_hist = nullptr; +}; + +template +struct HistGlobalMemory +{ + static constexpr bool const sync_after_init = false; + static constexpr bool const sync_before_finalize = false; + + HistGlobalMemory(T *global_data) + { + global_hist = global_data; + } + + template + void init(const sycl::nd_item<_Dims> &) const + { + } + + template + void add(const sycl::nd_item<_Dims> &, int32_t bin, T value) const + { + AtomicOp::add(global_hist[bin], value); + } + + template + void finalize(const sycl::nd_item<_Dims> &) const + { + } + +private: + T *global_hist = nullptr; +}; + +template +struct NoWeights +{ + constexpr T get(size_t) const + { + return 1; + } +}; + +template +struct Weights +{ + Weights(T *weights) + { + data = weights; + } + + T get(size_t id) const + { + return data[id]; + } + +private: + T *data = nullptr; +}; + +template +class histogram_kernel; + +template +void submit_histogram(const T *in, + size_t size, + size_t dims, + uint32_t WorkPI, + const HistImpl &hist, + const Edges &edges, + const Weights &weights, + sycl::nd_range<1> nd_range, + sycl::handler &cgh) +{ + cgh.parallel_for>( + nd_range, [=](sycl::nd_item<1> item) { + auto id = item.get_group_linear_id(); + auto lid = item.get_local_linear_id(); + auto group = item.get_group(); + auto local_size = item.get_local_range(0); + + hist.init(item); + edges.init(item); + + if constexpr (HistImpl::sync_after_init || Edges::sync_after_init) { + sycl::group_barrier(group, sycl::memory_scope::work_group); + } + + auto bounds = edges.get_bounds(); + + for (uint32_t i = 0; i < WorkPI; ++i) { + auto data_idx = id * WorkPI * local_size + i * local_size + lid; + if (data_idx < size) { + auto *d = &in[data_idx * dims]; + + if (edges.in_bounds(d, bounds)) { + auto bin = edges.get_bin(item, d, bounds); + auto weight = weights.get(data_idx); + hist.add(item, bin, weight); + } + } + } + + if constexpr (HistImpl::sync_before_finalize) { + sycl::group_barrier(group, sycl::memory_scope::work_group); + } + + hist.finalize(item); + }); +} + +void validate(const usm_ndarray &sample, + const usm_ndarray &bins, + std::optional &weights, + const usm_ndarray &histogram); + +uint32_t get_local_hist_copies_count(uint32_t loc_mem_size_in_items, + uint32_t local_size, + uint32_t hist_size_in_items); + +} // namespace histogram +} // namespace statistics diff --git a/dpnp/backend/extensions/statistics/statistics_py.cpp b/dpnp/backend/extensions/statistics/statistics_py.cpp new file mode 100644 index 000000000000..e4f533cd7a02 --- /dev/null +++ b/dpnp/backend/extensions/statistics/statistics_py.cpp @@ -0,0 +1,38 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +// This file defines functions of dpnp.backend._lapack_impl extensions +// +//***************************************************************************** + +#include +#include + +#include "histogram.hpp" + +PYBIND11_MODULE(_statistics_impl, m) +{ + statistics::histogram::populate_histogram(m); +} diff --git a/dpnp/dpnp_iface_histograms.py b/dpnp/dpnp_iface_histograms.py index c0e95e588c1a..b5eea9f37bfc 100644 --- a/dpnp/dpnp_iface_histograms.py +++ b/dpnp/dpnp_iface_histograms.py @@ -42,9 +42,16 @@ import dpctl.utils as dpu import numpy +from dpctl.tensor._type_utils import _can_cast import dpnp +# pylint: disable=no-name-in-module +import dpnp.backend.extensions.statistics._statistics_impl as statistics_ext + +# pylint: disable=no-name-in-module +from .dpnp_utils import map_dtype_to_device + __all__ = [ "digitize", "histogram", @@ -201,19 +208,6 @@ def _get_bin_edges(a, bins, range, usm_type): return bin_edges, None -def _search_sorted_inclusive(a, v): - """ - Like :obj:`dpnp.searchsorted`, but where the last item in `v` is placed - on the right. - In the context of a histogram, this makes the last bin edge inclusive - - """ - - return dpnp.concatenate( - (a.searchsorted(v[:-1], "left"), a.searchsorted(v[-1:], "right")) - ) - - def digitize(x, bins, right=False): """ Return the indices of the bins to which each value in input array belongs. @@ -306,6 +300,35 @@ def digitize(x, bins, right=False): return bins.size - dpnp.searchsorted(bins[::-1], x, side=side) +def _result_type_for_device(dtype1, dtype2, device): + rt = dpnp.result_type(dtype1, dtype2) + return map_dtype_to_device(rt, device) + + +def _align_dtypes(a_dtype, bins_dtype, ntype, supported_types, device): + has_fp64 = device.has_aspect_fp64 + has_fp16 = device.has_aspect_fp16 + + a_bin_dtype = _result_type_for_device(a_dtype, bins_dtype, device) + + # histogram implementation doesn't support uint64 as histogram type + # we can use int64 instead. Result would be correct even in case of overflow + if ntype == numpy.uint64: + ntype = dpnp.int64 + + if (a_bin_dtype, ntype) in supported_types: + return a_bin_dtype, ntype + + for sample_type, hist_type in supported_types: + if _can_cast( + a_bin_dtype, sample_type, has_fp16, has_fp64 + ) and _can_cast(ntype, hist_type, has_fp16, has_fp64): + return sample_type, hist_type + + # should not happen + return None, None + + def histogram(a, bins=10, range=None, density=None, weights=None): """ Compute the histogram of a data set. @@ -393,7 +416,7 @@ def histogram(a, bins=10, range=None, density=None, weights=None): a, weights, usm_type = _ravel_check_a_and_weights(a, weights) - bin_edges, uniform_bins = _get_bin_edges(a, bins, range, usm_type) + bin_edges, _ = _get_bin_edges(a, bins, range, usm_type) # Histogram is an integer or a float array depending on the weights. if weights is None: @@ -401,41 +424,74 @@ def histogram(a, bins=10, range=None, density=None, weights=None): else: ntype = weights.dtype - # The fast path uses bincount, but that only works for certain types - # of weight - # simple_weights = ( - # weights is None or - # dpnp.can_cast(weights.dtype, dpnp.double) or - # dpnp.can_cast(weights.dtype, complex) - # ) - # TODO: implement a fast path - simple_weights = False - - if uniform_bins is not None and simple_weights: - # TODO: implement fast algorithm for equal bins - pass - else: - # Compute via cumulative histogram - if weights is None: - sa = dpnp.sort(a) - cum_n = _search_sorted_inclusive(sa, bin_edges) - else: - zero = dpnp.zeros( - 1, dtype=ntype, sycl_queue=a.sycl_queue, usm_type=usm_type - ) - sorting_index = dpnp.argsort(a) - sa = a[sorting_index] - sw = weights[sorting_index] - cw = dpnp.concatenate((zero, sw.cumsum(dtype=ntype))) - bin_index = _search_sorted_inclusive(sa, bin_edges) - cum_n = cw[bin_index] + queue = a.sycl_queue + device = queue.sycl_device + + supported_types = statistics_ext.histogram_dtypes() + a_bin_dtype, hist_dtype = _align_dtypes( + a.dtype, bin_edges.dtype, ntype, supported_types, device + ) + + if a_bin_dtype is None or hist_dtype is None: + raise ValueError( + f"function '{histogram}' does not support input types " + f"({a.dtype}, {bin_edges.dtype}, {ntype}), " + "and the inputs could not be coerced to any " + "supported types" + ) - n = dpnp.diff(cum_n) + a_casted = dpnp.astype(a, a_bin_dtype, order="C", copy=False) + bin_edges_casted = dpnp.astype( + bin_edges, a_bin_dtype, order="C", copy=False + ) + weights_casted = ( + dpnp.astype(weights, hist_dtype, order="C", copy=False) + if weights is not None + else None + ) + + # histogram implementation uses atomics, but atomics doesn't work with + # host usm memory + n_usm_type = "device" if usm_type == "host" else usm_type + + # histogram implementation requires output array to be filled with zeros + n_casted = dpnp.zeros( + bin_edges.size - 1, + dtype=hist_dtype, + sycl_queue=a.sycl_queue, + usm_type=n_usm_type, + ) + + _manager = dpu.SequentialOrderManager[queue] + + a_usm = dpnp.get_usm_ndarray(a_casted) + bins_usm = dpnp.get_usm_ndarray(bin_edges_casted) + weights_usm = ( + dpnp.get_usm_ndarray(weights_casted) + if weights_casted is not None + else None + ) + n_usm = dpnp.get_usm_ndarray(n_casted) + + mem_ev, ht_ev = statistics_ext.histogram( + a_usm, + bins_usm, + weights_usm, + n_usm, + depends=_manager.submitted_events, + ) + _manager.add_event_pair(mem_ev, ht_ev) + + if usm_type != n_usm_type: + n = dpnp.asarray(n_casted, dtype=ntype, usm_type=usm_type) + else: + n = dpnp.astype(n_casted, ntype, copy=False) if density: - # pylint: disable=possibly-used-before-assignment - db = dpnp.diff(bin_edges).astype(dpnp.default_float_type()) - return n / db / n.sum(), bin_edges + db = dpnp.astype( + dpnp.diff(bin_edges), dpnp.default_float_type(sycl_queue=queue) + ) + return n / db / dpnp.sum(n), bin_edges return n, bin_edges diff --git a/tests/test_histogram.py b/tests/test_histogram.py index 0e6e33fd99c4..c37e5a4316ff 100644 --- a/tests/test_histogram.py +++ b/tests/test_histogram.py @@ -492,6 +492,19 @@ def test_weights_another_sycl_queue(self): with assert_raises(ValueError): dpnp.histogram(v, weights=w) + @pytest.mark.parametrize( + "bins_count", + [10, 10**2, 10**3, 10**4, 10**5, 10**6], + ) + def test_different_bins_amount(self, bins_count): + v = numpy.linspace(0, bins_count, bins_count, dtype=numpy.float32) + iv = dpnp.array(v) + + expected_hist, expected_edges = numpy.histogram(v, bins=bins_count) + result_hist, result_edges = dpnp.histogram(iv, bins=bins_count) + assert_array_equal(result_hist, expected_hist) + assert_allclose(result_edges, expected_edges) + class TestHistogramBinEdges: @pytest.mark.parametrize(