From 31f05a0ae12d95c8eb7f13916decfe06438510f8 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 18 Jun 2025 06:46:33 -0700 Subject: [PATCH 01/32] Init commit --- dpnp/backend/extensions/ufunc/CMakeLists.txt | 1 + .../ufunc/elementwise_functions/common.cpp | 2 + .../ufunc/elementwise_functions/isclose.cpp | 167 ++++++++ .../ufunc/elementwise_functions/isclose.hpp | 35 ++ .../kernels/elementwise_functions/isclose.hpp | 405 ++++++++++++++++++ dpnp/dpnp_iface_logic.py | 67 ++- 6 files changed, 660 insertions(+), 17 deletions(-) create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp create mode 100644 dpnp/backend/kernels/elementwise_functions/isclose.hpp diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt index 65d30d49f549..dfd58236ad69 100644 --- a/dpnp/backend/extensions/ufunc/CMakeLists.txt +++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt @@ -37,6 +37,7 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/heaviside.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/i0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/interpolate.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/isclose.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/lcm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/ldexp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/logaddexp2.cpp diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp index c6dd3e038eb1..9d30f16273f0 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp @@ -37,6 +37,7 @@ #include "heaviside.hpp" #include "i0.hpp" #include "interpolate.hpp" +#include "isclose.hpp" #include "lcm.hpp" #include "ldexp.hpp" #include "logaddexp2.hpp" @@ -66,6 +67,7 @@ void init_elementwise_functions(py::module_ m) init_heaviside(m); init_i0(m); init_interpolate(m); + init_isclose(m); init_lcm(m); init_ldexp(m); init_logaddexp2(m); diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp new file mode 100644 index 000000000000..7f1d44aa49e1 --- /dev/null +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -0,0 +1,167 @@ +//***************************************************************************** +// Copyright (c) 2024, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "dpctl4pybind11.hpp" +#include +#include +#include + +#include "kernels/elementwise_functions/isclose.hpp" + +#include "../../elementwise_functions/simplify_iteration_space.hpp" + +// dpctl tensor headers +#include "utils/memory_overlap.hpp" +#include "utils/offset_utils.hpp" +#include "utils/output_validation.hpp" +#include "utils/sycl_alloc_utils.hpp" +#include "utils/type_dispatch.hpp" +#include "utils/type_utils.hpp" + +#include "ext/common.hpp" + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +using ext::common::value_type_of; + +namespace dpnp::extensions::ufunc +{ +namespace impl +{ + +using dpctl::tensor::usm_ndarray; +using event_vector = std::vector; + +using isclose_contig_fn_ptr_t = sycl::event (*)( + sycl::queue &, std::size_t, + const py::object &, const py::object &, const py::object &, + const char *, const char *, char *, const std::vector &); + +static isclose_contig_fn_ptr_t isclose_contig_dispatch_vector[td_ns::num_types]; + +template +using value_type_of_t = typename value_type_of::type; + +// Template for checking if T is supported +template +struct IsCloseOutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +sycl::event isclose_contig_call(sycl::queue &q, + std::size_t nelems, + const py::object &rtol_, + const py::object &atol_, + const py::object &equal_nan_, + const char *in1_p, + const char *in2_p, + char *out_p, + const event_vector &depends) +{ + using dpctl::tensor::type_utils::is_complex_v; + using scT = std::conditional_t, typename value_type_of::type, T>; + + const scT rtol = py::cast(rtol_); + const scT atol = py::cast(atol_); + const bool equal_nan = py::cast(equal_nan_); + + return dpnp::kernels::isclose::isclose_contig_impl( + q, nelems, rtol, atol, equal_nan, in1_p, 0, in2_p, 0, out_p, 0, depends); +} + +template +struct IsCloseContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, void>) { + return nullptr; + } else { + return isclose_contig_call; + } + } +}; + +void populate_isclose_dispatch_vector() +{ + td_ns::DispatchVectorBuilder + dvb; + dvb.populate_dispatch_vector(isclose_contig_dispatch_vector); +} + +std::pair py_isclose( + const usm_ndarray &a, + const usm_ndarray &b, + const py::object &rtol, + const py::object &atol, + const py::object &equal_nan, + const usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends) +{ + auto types = td_ns::usm_ndarray_types(); + int typeid_ = types.typenum_to_lookup_id(a.get_typenum()); + + auto fn = isclose_contig_dispatch_vector[typeid_]; + auto comp_ev = fn(exec_q, a.get_size(), rtol, atol, equal_nan, + a.get_data(), b.get_data(), dst.get_data(), depends); + + sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {a, b, dst}, {comp_ev}); + return {ht_ev, comp_ev}; +} + +} // namespace impl + +void init_isclose(py::module_ m) +{ + impl::populate_isclose_dispatch_vector(); + m.def("_isclose", &impl::py_isclose, "", + py::arg("a"), py::arg("b"), py::arg("rtol"), py::arg("atol"), + py::arg("equal_nan"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); +} + +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp new file mode 100644 index 000000000000..2a409779cb3a --- /dev/null +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp @@ -0,0 +1,35 @@ +//***************************************************************************** +// Copyright (c) 2024-2025, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +namespace py = pybind11; + +namespace dpnp::extensions::ufunc +{ +void init_isclose(py::module_ m); +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp new file mode 100644 index 000000000000..32196917aa36 --- /dev/null +++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp @@ -0,0 +1,405 @@ +//***************************************************************************** +// Copyright (c) 2024-2025, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + + +// #pragma once + +// #include +// #include +// #include +// #include +// #include +// #include + +// // dpctl tensor headers +// #include "kernels/alignment.hpp" +// #include "utils/sycl_utils.hpp" +// #include "utils/type_utils.hpp" + +// namespace dpnp::kernels::isclose +// { + +// template +// inline bool isclose_scalar(T a, T b, scT rtol, scT atol, bool equal_nan) +// { +// if constexpr (dpctl::tensor::type_utils::is_complex_v) { +// return isclose_scalar(a.real(), b.real(), rtol, atol, equal_nan) && +// isclose_scalar(a.imag(), b.imag(), rtol, atol, equal_nan); +// } +// else { +// if (sycl::isnan(a) || sycl::isnan(b)) { +// return equal_nan && sycl::isnan(a) && sycl::isnan(b); +// } +// return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b); +// } +// } + +// template +// struct IsCloseContigFunctor +// { +// const T *in1_; +// const T *in2_; +// bool *out_; +// std::size_t n_; +// scT rtol_; +// scT atol_; +// bool equal_nan_; + +// IsCloseContigFunctor(const T *in1, const T *in2, bool *out, +// std::size_t n, scT rtol, scT atol, bool equal_nan) +// : in1_(in1), in2_(in2), out_(out), +// n_(n), rtol_(rtol), atol_(atol), equal_nan_(equal_nan) +// { +// } + +// void operator()(sycl::nd_item<1> item) const +// { +// const std::size_t gid = item.get_global_linear_id(); +// if (gid < n_) { +// out_[gid] = isclose_scalar(in1_[gid], in2_[gid], rtol_, atol_, equal_nan_); +// } +// } +// }; + +// template +// sycl::event isclose_contig_impl(sycl::queue &q, +// std::size_t n, +// scT rtol, +// scT atol, +// bool equal_nan, +// const char *in1_p, +// const char *in2_p, +// char *out_p, +// const std::vector &depends) +// { +// dpctl::tensor::type_utils::validate_type_for_device(q); + +// const T *in1 = reinterpret_cast(in1_p); +// const T *in2 = reinterpret_cast(in2_p); +// bool *out = reinterpret_cast(out_p); + +// constexpr std::size_t wg_size = 128; +// const std::size_t n_wgs = (n + wg_size - 1) / wg_size; +// const sycl::range<1> gws{n_wgs * wg_size}; +// const sycl::range<1> lws{wg_size}; + +// using Kernel = IsCloseContigFunctor; + +// return q.submit([&](sycl::handler &cgh) { +// cgh.depends_on(depends); +// cgh.parallel_for( +// sycl::nd_range<1>(gws, lws), +// Kernel(in1, in2, out, n, rtol, atol, equal_nan)); +// }); +// } + +// } // namespace dpnp::kernels::isclose + + +#pragma once + +#include +#include +#include + +#include +// dpctl tensor headers +#include "kernels/alignment.hpp" +#include "kernels/dpctl_tensor_types.hpp" +#include "utils/offset_utils.hpp" +#include "utils/sycl_utils.hpp" +#include "utils/type_utils.hpp" + +namespace dpnp::kernels::isclose +{ + +template +inline T isclose(const T a, const T b, const T rtol, const T atol, const bool equal_nan) +{ + if (sycl::isnan(a) || sycl::isnan(b)) { + //static cast? + return equal_nan && sycl::isnan(a) && sycl::isnan(b); + } + if (sycl::isinf(a) || sycl::isinf(b)) { + return a == b; + } + return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b); +} + +template +struct IsCloseFunctor +{ +private: + const T *a_ = nullptr; + const T *b_ = nullptr; + T *out_ = nullptr; + const InOutIndexerT inp_out_indexer_; + const scT rtol_; + const scT atol_; + const bool equal_nan_; + +public: + IsCloseFunctor(const T *a, + const T *b, + T *out, + const InOutIndexerT &inp_out_indexer, + const scT rtol, + const scT atol, + const bool equal_nan) + : a_(a), b_(b),out_(out), inp_out_indexer_(inp_out_indexer), rtol_(rtol), + atol_(atol), equal_nan_(equal_nan) + { + } + + void operator()(sycl::id<1> wid) const + { + const auto &offsets_ = inp_out_indexer_(wid.get(0)); + const dpctl::tensor::ssize_t &inp_offset = offsets_.get_first_offset(); + const dpctl::tensor::ssize_t &out_offset = offsets_.get_second_offset(); + + using dpctl::tensor::type_utils::is_complex_v; + if constexpr (is_complex_v) { + using realT = typename T::value_type; + static_assert(std::is_same_v); + T z_a = a_[inp_offset]; + T z_b = b_[inp_offset]; + realT x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_); + realT y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_); + out_[out_offset] = T{x, y}; + } + else { + out_[out_offset] = isclose(a_[inp_offset], b_[inp_offset], rtol_, atol_, equal_nan_); + } + } +}; + +template +struct IsCloseContigFunctor +{ +private: + const T *a_ = nullptr; + const T *b_ = nullptr; + T *out_ = nullptr; + std::size_t nelems_; + const scT rtol_; + const scT atol_; + const bool equal_nan_; + +public: + IsCloseContigFunctor(const T *a, + const T *b, + T *out, + const std::size_t n_elems, + const scT rtol, + const scT atol, + const bool equal_nan) + : a_(a), b_(b), out_(out), nelems_(n_elems), rtol_(rtol), atol_(atol), + equal_nan_(equal_nan) + { + } + + void operator()(sycl::nd_item<1> ndit) const + { + constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz; + /* Each work-item processes vec_sz elements, contiguous in memory */ + /* NOTE: work-group size must be divisible by sub-group size */ + + using dpctl::tensor::type_utils::is_complex_v; + if constexpr (enable_sg_loadstore && !is_complex_v) { + auto sg = ndit.get_sub_group(); + const std::uint16_t sgSize = sg.get_max_local_range()[0]; + const std::size_t base = + elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) + + sg.get_group_id()[0] * sgSize); + + if (base + elems_per_wi * sgSize < nelems_) { + using dpctl::tensor::sycl_utils::sub_group_load; + using dpctl::tensor::sycl_utils::sub_group_store; + // sycl::vec res_vec; +#pragma unroll + for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) { + const std::size_t offset = base + it * sgSize; + auto a_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&a_[offset]); + auto b_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&b_[offset]); + auto out_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&out_[offset]); + + const sycl::vec a_vec = + sub_group_load(sg, a_multi_ptr); + const sycl::vec b_vec = + sub_group_load(sg, b_multi_ptr); + + sycl::vec res_vec; +#pragma unroll + for (std::uint8_t vec_id = 0; vec_id < vec_sz; ++vec_id) { + res_vec[vec_id] = + isclose(a_vec[vec_id], b_vec[vec_id], rtol_, atol_, equal_nan_); + } + sub_group_store(sg, res_vec, out_multi_ptr); + } + } + else { + const std::size_t lane_id = sg.get_local_id()[0]; + for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) { + out_[k] = isclose(a_[k], b_[k], rtol_, atol_, equal_nan_); + } + } + } + else { + const std::uint16_t sgSize = + ndit.get_sub_group().get_local_range()[0]; + const std::size_t gid = ndit.get_global_linear_id(); + const std::uint16_t elems_per_sg = sgSize * elems_per_wi; + + const std::size_t start = + (gid / sgSize) * (elems_per_sg - sgSize) + gid; + const std::size_t end = std::min(nelems_, start + elems_per_sg); + for (std::size_t offset = start; offset < end; offset += sgSize) { + if constexpr (is_complex_v) { + using realT = typename T::value_type; + static_assert(std::is_same_v); + + T z_a = a_[offset]; + T z_b = b_[offset]; + realT x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_); + realT y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_); + out_[offset] = T{x, y}; + } + else { + out_[offset] = isclose(a_[offset], b_[offset], rtol_, atol_, equal_nan_); + } + } + } + } +}; + +// template +// sycl::event nan_to_num_strided_impl(sycl::queue &q, +// const size_t nelems, +// const int nd, +// const dpctl::tensor::ssize_t *shape_strides, +// const scT nan, +// const scT posinf, +// const scT neginf, +// const char *in_cp, +// const dpctl::tensor::ssize_t in_offset, +// char *out_cp, +// const dpctl::tensor::ssize_t out_offset, +// const std::vector &depends) +// { +// dpctl::tensor::type_utils::validate_type_for_device(q); + +// const T *in_tp = reinterpret_cast(in_cp); +// T *out_tp = reinterpret_cast(out_cp); + +// using InOutIndexerT = +// typename dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; +// const InOutIndexerT indexer{nd, in_offset, out_offset, shape_strides}; + +// sycl::event comp_ev = q.submit([&](sycl::handler &cgh) { +// cgh.depends_on(depends); + +// using NanToNumFunc = NanToNumFunctor; +// cgh.parallel_for( +// {nelems}, +// NanToNumFunc(in_tp, out_tp, indexer, nan, posinf, neginf)); +// }); +// return comp_ev; +// } + +template +sycl::event isclose_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const scT rtol, + const scT atol, + const bool equal_nan, + const char *a_cp, + ssize_t a_offset, + const char *b_cp, + ssize_t b_offset, + char *out_cp, + ssize_t out_offset, + const std::vector &depends = {}) +{ + constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz; + const std::size_t n_work_items_needed = nelems / elems_per_wi; + const std::size_t empirical_threshold = std::size_t(1) << 21; + const std::size_t lws = (n_work_items_needed <= empirical_threshold) + ? std::size_t(128) + : std::size_t(256); + + const std::size_t n_groups = + ((nelems + lws * elems_per_wi - 1) / (lws * elems_per_wi)); + const auto gws_range = sycl::range<1>(n_groups * lws); + const auto lws_range = sycl::range<1>(lws); + + const T *a_tp = reinterpret_cast(a_cp) + a_offset; + const T *b_tp = reinterpret_cast(b_cp) + b_offset; + T *out_tp = reinterpret_cast(out_cp) + out_offset; + + sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + using dpctl::tensor::kernels::alignment_utils::is_aligned; + using dpctl::tensor::kernels::alignment_utils::required_alignment; + if (is_aligned(a_tp) && + is_aligned(b_tp) && + is_aligned(out_tp)) + { + constexpr bool enable_sg_loadstore = true; + using IsCloseFunc = IsCloseContigFunctor; + + cgh.parallel_for( + sycl::nd_range<1>(gws_range, lws_range), + IsCloseFunc(a_tp, b_tp, out_tp, nelems, rtol, atol, equal_nan)); + } + else { + constexpr bool disable_sg_loadstore = false; + using IsCloseFunc = IsCloseContigFunctor; + + cgh.parallel_for( + sycl::nd_range<1>(gws_range, lws_range), + IsCloseFunc(a_tp, b_tp, out_tp, nelems, rtol, atol, equal_nan)); + } + }); + + return comp_ev; +} + +} // namespace dpnp::kernels::isclose diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py index 84258f698b8f..4ebee0a26ad9 100644 --- a/dpnp/dpnp_iface_logic.py +++ b/dpnp/dpnp_iface_logic.py @@ -44,10 +44,13 @@ import dpctl.tensor as dpt import dpctl.tensor._tensor_elementwise_impl as ti +import dpctl.utils as dpu + import numpy import dpnp from dpnp.dpnp_algo.dpnp_elementwise_common import DPNPBinaryFunc, DPNPUnaryFunc +import dpnp.backend.extensions.ufunc._ufunc_impl as ufi from .dpnp_utils import get_usm_allocations @@ -771,7 +774,7 @@ def array_equiv(a1, a2): ) -def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False): +def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False): """ Returns a boolean array where two arrays are element-wise equal within a tolerance. @@ -880,22 +883,52 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False): dt = dpnp.result_type(b, 1.0, rtol, atol) b = dpnp.astype(b, dt) - # Firstly handle finite values: - # result = absolute(a - b) <= atol + rtol * absolute(b) - dt = dpnp.result_type(b, rtol, atol) - _b = dpnp.abs(b, dtype=dt) - _b *= rtol - _b += atol - result = less_equal(dpnp.abs(a - b), _b) - - # Handle "inf" values: they are treated as equal if they are in the same - # place and of the same sign in both arrays - result &= isfinite(b) - result |= a == b - - if equal_nan: - result |= isnan(a) & isnan(b) - return result + if not new: + + print("OLD") + + # Firstly handle finite values: + # result = absolute(a - b) <= atol + rtol * absolute(b) + dt = dpnp.result_type(b, rtol, atol) + _b = dpnp.abs(b, dtype=dt) + _b *= rtol + _b += atol + result = less_equal(dpnp.abs(a - b), _b) + + # Handle "inf" values: they are treated as equal if they are in the same + # place and of the same sign in both arrays + result &= isfinite(b) + result |= a == b + + if equal_nan: + result |= isnan(a) & isnan(b) + return result + + else: + + print("NEW") + + usm_type, exec_q = get_usm_allocations([a, b]) + out_dtype = dpnp.bool + output = dpnp.empty( + a.shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type + ) + + _manager = dpu.SequentialOrderManager[exec_q] + mem_ev, ht_ev = ufi._isclose( + a.get_array(), + b.get_array(), + rtol, + atol, + equal_nan, + output.get_array(), + exec_q, + depends=_manager.submitted_events, + ) + _manager.add_event_pair(mem_ev, ht_ev) + + return output + def iscomplex(x): From 745a28c9bfb7c998201f95ea2a835dbd89457926 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 20 Jun 2025 06:51:24 -0700 Subject: [PATCH 02/32] Init IsCloseContigFunctor with explicit T1, T2, resTy --- .../ufunc/elementwise_functions/isclose.cpp | 193 ++++++++++++++---- .../kernels/elementwise_functions/isclose.hpp | 150 +++++++------- 2 files changed, 231 insertions(+), 112 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index 7f1d44aa49e1..33346be35058 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -67,30 +67,59 @@ namespace impl using dpctl::tensor::usm_ndarray; using event_vector = std::vector; -using isclose_contig_fn_ptr_t = sycl::event (*)( - sycl::queue &, std::size_t, - const py::object &, const py::object &, const py::object &, - const char *, const char *, char *, const std::vector &); +using isclose_contig_fn_ptr_t = + sycl::event (*)(sycl::queue &, + std::size_t, + const py::object &, + const py::object &, + const py::object &, + const char *, + const char *, + char *, + const std::vector &); -static isclose_contig_fn_ptr_t isclose_contig_dispatch_vector[td_ns::num_types]; +static isclose_contig_fn_ptr_t isclose_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; template using value_type_of_t = typename value_type_of::type; -// Template for checking if T is supported -template +// // Template for checking if T is supported +// template +// struct IsCloseOutputType +// { +// using value_type = typename std::disjunction< +// td_ns::TypeMapResultEntry, +// td_ns::TypeMapResultEntry, +// td_ns::TypeMapResultEntry, +// td_ns::TypeMapResultEntry>, +// td_ns::TypeMapResultEntry>, +// td_ns::DefaultResultEntry>::result_type; +// }; + +// Supports only float and complex types +template struct IsCloseOutputType { using value_type = typename std::disjunction< - td_ns::TypeMapResultEntry, - td_ns::TypeMapResultEntry, - td_ns::TypeMapResultEntry, - td_ns::TypeMapResultEntry>, - td_ns::TypeMapResultEntry>, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; }; -template +template sycl::event isclose_contig_call(sycl::queue &q, std::size_t nelems, const py::object &rtol_, @@ -102,65 +131,145 @@ sycl::event isclose_contig_call(sycl::queue &q, const event_vector &depends) { using dpctl::tensor::type_utils::is_complex_v; - using scT = std::conditional_t, typename value_type_of::type, T>; + using scT = std::conditional_t, + typename value_type_of::type, T1>; const scT rtol = py::cast(rtol_); const scT atol = py::cast(atol_); const bool equal_nan = py::cast(equal_nan_); - return dpnp::kernels::isclose::isclose_contig_impl( - q, nelems, rtol, atol, equal_nan, in1_p, 0, in2_p, 0, out_p, 0, depends); + return dpnp::kernels::isclose::isclose_contig_impl( + q, nelems, rtol, atol, equal_nan, in1_p, 0, in2_p, 0, out_p, 0, + depends); } -template +template struct IsCloseContigFactory { fnT get() { - if constexpr (std::is_same_v::value_type, void>) { + if constexpr (!IsCloseOutputType::is_defined) { return nullptr; - } else { - return isclose_contig_call; + } + else { + return isclose_contig_call; } } }; -void populate_isclose_dispatch_vector() +void populate_isclose_dispatch_table() { - td_ns::DispatchVectorBuilder + td_ns::DispatchTableBuilder dvb; - dvb.populate_dispatch_vector(isclose_contig_dispatch_vector); + dvb.populate_dispatch_table(isclose_contig_dispatch_table); } -std::pair py_isclose( - const usm_ndarray &a, - const usm_ndarray &b, - const py::object &rtol, - const py::object &atol, - const py::object &equal_nan, - const usm_ndarray &dst, - sycl::queue &exec_q, - const std::vector &depends) +std::pair + py_isclose(const usm_ndarray &a, + const usm_ndarray &b, + const py::object &rtol, + const py::object &atol, + const py::object &equal_nan, + const usm_ndarray &res, + sycl::queue &exec_q, + const std::vector &depends) { auto types = td_ns::usm_ndarray_types(); - int typeid_ = types.typenum_to_lookup_id(a.get_typenum()); + int a_typeid = types.typenum_to_lookup_id(a.get_typenum()); + int b_typeid = types.typenum_to_lookup_id(b.get_typenum()); + + if (!dpctl::utils::queues_are_compatible(exec_q, {a, b, res})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(res); + + int res_nd = res.get_ndim(); + if (res_nd != a.get_ndim() || res_nd != b.get_ndim()) { + throw py::value_error("Array dimensions are not the same."); + } + + const py::ssize_t *a_shape = a.get_shape_raw(); + const py::ssize_t *b_shape = b.get_shape_raw(); + const py::ssize_t *res_shape = res.get_shape_raw(); + bool shapes_equal(true); + std::size_t nelems(1); + + for (int i = 0; i < res_nd; ++i) { + nelems *= static_cast(a_shape[i]); + shapes_equal = shapes_equal && (a_shape[i] == res_shape[i] && + b_shape[i] == res_shape[i]); + } + if (!shapes_equal) { + throw py::value_error("Array shapes are not the same."); + } + + // if nelems is zero, return + if (nelems == 0) { + return std::make_pair(sycl::event(), sycl::event()); + } + + dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(res, nelems); + + // check memory overlap + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + auto const &same_logical_tensors = + dpctl::tensor::overlap::SameLogicalTensors(); + if ((overlap(a, res) && !same_logical_tensors(a, res)) || + (overlap(b, res) && !same_logical_tensors(b, res))) + { + throw py::value_error("Arrays index overlapping segments of memory"); + } + + const char *a_data = a.get_data(); + const char *b_data = b.get_data(); + char *res_data = res.get_data(); + + // handle contiguous inputs + bool is_a_c_contig = a.is_c_contiguous(); + bool is_a_f_contig = a.is_f_contiguous(); + + bool is_b_c_contig = b.is_c_contiguous(); + bool is_b_f_contig = b.is_f_contiguous(); - auto fn = isclose_contig_dispatch_vector[typeid_]; - auto comp_ev = fn(exec_q, a.get_size(), rtol, atol, equal_nan, - a.get_data(), b.get_data(), dst.get_data(), depends); + bool is_res_c_contig = res.is_c_contiguous(); + bool is_res_f_contig = res.is_f_contiguous(); - sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {a, b, dst}, {comp_ev}); - return {ht_ev, comp_ev}; + bool all_c_contig = (is_a_c_contig && is_b_c_contig && is_res_c_contig); + bool all_f_contig = (is_a_f_contig && is_b_f_contig && is_res_f_contig); + + if (all_c_contig || all_f_contig) { + auto contig_fn = isclose_contig_dispatch_table[a_typeid][b_typeid]; + + if (contig_fn == nullptr) { + throw std::runtime_error( + "Contiguous implementation is missing for a_typeid=" + + std::to_string(a_typeid) + + " and b_typeid=" + std::to_string(b_typeid)); + } + + auto comp_ev = contig_fn(exec_q, nelems, rtol, atol, equal_nan, a_data, + b_data, res_data, depends); + sycl::event ht_ev = + dpctl::utils::keep_args_alive(exec_q, {a, b, res}, {comp_ev}); + + return std::make_pair(ht_ev, comp_ev); + } + else { + throw py::value_error("Stride implementation is not implemented"); + } } } // namespace impl void init_isclose(py::module_ m) { - impl::populate_isclose_dispatch_vector(); - m.def("_isclose", &impl::py_isclose, "", - py::arg("a"), py::arg("b"), py::arg("rtol"), py::arg("atol"), - py::arg("equal_nan"), py::arg("dst"), py::arg("sycl_queue"), + impl::populate_isclose_dispatch_table(); + m.def("_isclose", &impl::py_isclose, "", py::arg("a"), py::arg("b"), + py::arg("rtol"), py::arg("atol"), py::arg("equal_nan"), + py::arg("res"), py::arg("sycl_queue"), py::arg("depends") = py::list()); } diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp index 32196917aa36..788c569d7d99 100644 --- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp +++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp @@ -23,7 +23,6 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** - // #pragma once // #include @@ -56,8 +55,8 @@ // } // } -// template -// struct IsCloseContigFunctor +// template struct IsCloseContigFunctor // { // const T *in1_; // const T *in2_; @@ -78,7 +77,8 @@ // { // const std::size_t gid = item.get_global_linear_id(); // if (gid < n_) { -// out_[gid] = isclose_scalar(in1_[gid], in2_[gid], rtol_, atol_, equal_nan_); +// out_[gid] = isclose_scalar(in1_[gid], in2_[gid], rtol_, +// atol_, equal_nan_); // } // } // }; @@ -117,7 +117,6 @@ // } // namespace dpnp::kernels::isclose - #pragma once #include @@ -136,10 +135,14 @@ namespace dpnp::kernels::isclose { template -inline T isclose(const T a, const T b, const T rtol, const T atol, const bool equal_nan) +inline bool isclose(const T a, + const T b, + const T rtol, + const T atol, + const bool equal_nan) { if (sycl::isnan(a) || sycl::isnan(b)) { - //static cast? + // static cast? return equal_nan && sycl::isnan(a) && sycl::isnan(b); } if (sycl::isinf(a) || sycl::isinf(b)) { @@ -148,28 +151,32 @@ inline T isclose(const T a, const T b, const T rtol, const T atol, const bool eq return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b); } -template +template struct IsCloseFunctor { private: - const T *a_ = nullptr; - const T *b_ = nullptr; - T *out_ = nullptr; + const T1 *a_ = nullptr; + const T2 *b_ = nullptr; + resTy *out_ = nullptr; const InOutIndexerT inp_out_indexer_; const scT rtol_; const scT atol_; const bool equal_nan_; public: - IsCloseFunctor(const T *a, - const T *b, - T *out, + IsCloseFunctor(const T1 *a, + const T2 *b, + resTy *out, const InOutIndexerT &inp_out_indexer, const scT rtol, const scT atol, const bool equal_nan) - : a_(a), b_(b),out_(out), inp_out_indexer_(inp_out_indexer), rtol_(rtol), - atol_(atol), equal_nan_(equal_nan) + : a_(a), b_(b), out_(out), inp_out_indexer_(inp_out_indexer), + rtol_(rtol), atol_(atol), equal_nan_(equal_nan) { } @@ -180,45 +187,46 @@ struct IsCloseFunctor const dpctl::tensor::ssize_t &out_offset = offsets_.get_second_offset(); using dpctl::tensor::type_utils::is_complex_v; - if constexpr (is_complex_v) { - using realT = typename T::value_type; - static_assert(std::is_same_v); - T z_a = a_[inp_offset]; - T z_b = b_[inp_offset]; - realT x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_); - realT y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_); - out_[out_offset] = T{x, y}; + if constexpr (is_complex_v) { + T1 z_a = a_[inp_offset]; + T2 z_b = b_[inp_offset]; + bool x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_); + bool y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_); + out_[out_offset] = x && y; } else { - out_[out_offset] = isclose(a_[inp_offset], b_[inp_offset], rtol_, atol_, equal_nan_); + out_[out_offset] = isclose(a_[inp_offset], b_[inp_offset], rtol_, + atol_, equal_nan_); } } }; -template struct IsCloseContigFunctor { private: - const T *a_ = nullptr; - const T *b_ = nullptr; - T *out_ = nullptr; + const T1 *a_ = nullptr; + const T2 *b_ = nullptr; + resTy *out_ = nullptr; std::size_t nelems_; const scT rtol_; const scT atol_; const bool equal_nan_; public: - IsCloseContigFunctor(const T *a, - const T *b, - T *out, - const std::size_t n_elems, - const scT rtol, - const scT atol, - const bool equal_nan) + IsCloseContigFunctor(const T1 *a, + const T2 *b, + resTy *out, + const std::size_t n_elems, + const scT rtol, + const scT atol, + const bool equal_nan) : a_(a), b_(b), out_(out), nelems_(n_elems), rtol_(rtol), atol_(atol), equal_nan_(equal_nan) { @@ -231,7 +239,7 @@ struct IsCloseContigFunctor /* NOTE: work-group size must be divisible by sub-group size */ using dpctl::tensor::type_utils::is_complex_v; - if constexpr (enable_sg_loadstore && !is_complex_v) { + if constexpr (enable_sg_loadstore && !is_complex_v) { auto sg = ndit.get_sub_group(); const std::uint16_t sgSize = sg.get_max_local_range()[0]; const std::size_t base = @@ -241,7 +249,6 @@ struct IsCloseContigFunctor if (base + elems_per_wi * sgSize < nelems_) { using dpctl::tensor::sycl_utils::sub_group_load; using dpctl::tensor::sycl_utils::sub_group_store; - // sycl::vec res_vec; #pragma unroll for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) { const std::size_t offset = base + it * sgSize; @@ -255,16 +262,16 @@ struct IsCloseContigFunctor sycl::access::address_space::global_space, sycl::access::decorated::yes>(&out_[offset]); - const sycl::vec a_vec = + const sycl::vec a_vec = sub_group_load(sg, a_multi_ptr); - const sycl::vec b_vec = + const sycl::vec b_vec = sub_group_load(sg, b_multi_ptr); - sycl::vec res_vec; + sycl::vec res_vec; #pragma unroll for (std::uint8_t vec_id = 0; vec_id < vec_sz; ++vec_id) { - res_vec[vec_id] = - isclose(a_vec[vec_id], b_vec[vec_id], rtol_, atol_, equal_nan_); + res_vec[vec_id] = isclose(a_vec[vec_id], b_vec[vec_id], + rtol_, atol_, equal_nan_); } sub_group_store(sg, res_vec, out_multi_ptr); } @@ -286,18 +293,18 @@ struct IsCloseContigFunctor (gid / sgSize) * (elems_per_sg - sgSize) + gid; const std::size_t end = std::min(nelems_, start + elems_per_sg); for (std::size_t offset = start; offset < end; offset += sgSize) { - if constexpr (is_complex_v) { - using realT = typename T::value_type; - static_assert(std::is_same_v); - - T z_a = a_[offset]; - T z_b = b_[offset]; - realT x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_); - realT y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_); - out_[offset] = T{x, y}; + if constexpr (is_complex_v) { + T1 z_a = a_[offset]; + T2 z_b = b_[offset]; + bool x = isclose(z_a.real(), z_b.real(), rtol_, atol_, + equal_nan_); + bool y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, + equal_nan_); + out_[offset] = x && y; } else { - out_[offset] = isclose(a_[offset], b_[offset], rtol_, atol_, equal_nan_); + out_[offset] = isclose(a_[offset], b_[offset], rtol_, atol_, + equal_nan_); } } } @@ -308,15 +315,13 @@ struct IsCloseContigFunctor // sycl::event nan_to_num_strided_impl(sycl::queue &q, // const size_t nelems, // const int nd, -// const dpctl::tensor::ssize_t *shape_strides, -// const scT nan, -// const scT posinf, -// const scT neginf, -// const char *in_cp, -// const dpctl::tensor::ssize_t in_offset, -// char *out_cp, -// const dpctl::tensor::ssize_t out_offset, -// const std::vector &depends) +// const dpctl::tensor::ssize_t +// *shape_strides, const scT nan, const scT +// posinf, const scT neginf, const char +// *in_cp, const dpctl::tensor::ssize_t +// in_offset, char *out_cp, const +// dpctl::tensor::ssize_t out_offset, const +// std::vector &depends) // { // dpctl::tensor::type_utils::validate_type_for_device(q); @@ -338,7 +343,8 @@ struct IsCloseContigFunctor // return comp_ev; // } -template @@ -367,9 +373,11 @@ sycl::event isclose_contig_impl(sycl::queue &exec_q, const auto gws_range = sycl::range<1>(n_groups * lws); const auto lws_range = sycl::range<1>(lws); - const T *a_tp = reinterpret_cast(a_cp) + a_offset; - const T *b_tp = reinterpret_cast(b_cp) + b_offset; - T *out_tp = reinterpret_cast(out_cp) + out_offset; + const T1 *a_tp = reinterpret_cast(a_cp) + a_offset; + const T2 *b_tp = reinterpret_cast(b_cp) + b_offset; + + using resTy = bool; + resTy *out_tp = reinterpret_cast(out_cp) + out_offset; sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(depends); @@ -381,8 +389,9 @@ sycl::event isclose_contig_impl(sycl::queue &exec_q, is_aligned(out_tp)) { constexpr bool enable_sg_loadstore = true; - using IsCloseFunc = IsCloseContigFunctor; + using IsCloseFunc = + IsCloseContigFunctor; cgh.parallel_for( sycl::nd_range<1>(gws_range, lws_range), @@ -390,8 +399,9 @@ sycl::event isclose_contig_impl(sycl::queue &exec_q, } else { constexpr bool disable_sg_loadstore = false; - using IsCloseFunc = IsCloseContigFunctor; + using IsCloseFunc = + IsCloseContigFunctor; cgh.parallel_for( sycl::nd_range<1>(gws_range, lws_range), From 295a848f59f8c6b364a3ee808e53d15dfcb849bc Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 23 Jun 2025 05:05:41 -0700 Subject: [PATCH 03/32] Handle scalar inputs and broadcast shapes in def isclose() --- dpnp/dpnp_iface_logic.py | 49 +++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py index 4ebee0a26ad9..fed871bad857 100644 --- a/dpnp/dpnp_iface_logic.py +++ b/dpnp/dpnp_iface_logic.py @@ -45,12 +45,11 @@ import dpctl.tensor as dpt import dpctl.tensor._tensor_elementwise_impl as ti import dpctl.utils as dpu - import numpy import dpnp -from dpnp.dpnp_algo.dpnp_elementwise_common import DPNPBinaryFunc, DPNPUnaryFunc import dpnp.backend.extensions.ufunc._ufunc_impl as ufi +from dpnp.dpnp_algo.dpnp_elementwise_common import DPNPBinaryFunc, DPNPUnaryFunc from .dpnp_utils import get_usm_allocations @@ -885,8 +884,6 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False): if not new: - print("OLD") - # Firstly handle finite values: # result = absolute(a - b) <= atol + rtol * absolute(b) dt = dpnp.result_type(b, rtol, atol) @@ -906,12 +903,49 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False): else: - print("NEW") + dt = dpnp.result_type(a, b, 1.0) + + if dpnp.isscalar(a): + usm_type = b.usm_type + exec_q = b.sycl_queue + a = dpnp.array( + a, + dt, + usm_type=usm_type, + sycl_queue=exec_q, + ) + elif dpnp.isscalar(b): + usm_type = a.usm_type + exec_q = a.sycl_queue + b = dpnp.array( + b, + dt, + usm_type=usm_type, + sycl_queue=exec_q, + ) + else: + usm_type, exec_q = get_usm_allocations([a, b]) + + # remove order="C" + a = dpnp.astype(a, dt, order="C", casting="same_kind", copy=False) + b = dpnp.astype(b, dt, order="C", casting="same_kind", copy=False) + + try: + res_shape = dpnp.broadcast_shapes(a.shape, b.shape) + except ValueError: + raise ValueError( + "operands could not be broadcast together with shapes " + f"{a.shape} and {b.shape}" + ) + + if a.shape != res_shape: + a = dpnp.broadcast_to(a, res_shape) + if b.shape != res_shape: + b = dpnp.broadcast_to(b, res_shape) - usm_type, exec_q = get_usm_allocations([a, b]) out_dtype = dpnp.bool output = dpnp.empty( - a.shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type + res_shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type ) _manager = dpu.SequentialOrderManager[exec_q] @@ -930,7 +964,6 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False): return output - def iscomplex(x): """ Returns a bool array, where ``True`` if input element is complex. From 1088fa7a37f52d41e9ebe572ed40f4d04db2110a Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Tue, 24 Jun 2025 04:46:30 -0700 Subject: [PATCH 04/32] Use three_offsets_indexer in IsCloseFunctor --- .../kernels/elementwise_functions/isclose.hpp | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp index 788c569d7d99..881b166b85a6 100644 --- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp +++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp @@ -155,14 +155,14 @@ template + typename ThreeOffsets_IndexerT> struct IsCloseFunctor { private: const T1 *a_ = nullptr; const T2 *b_ = nullptr; resTy *out_ = nullptr; - const InOutIndexerT inp_out_indexer_; + const ThreeOffsets_IndexerT three_offsets_indexer_; const scT rtol_; const scT atol_; const bool equal_nan_; @@ -171,31 +171,35 @@ struct IsCloseFunctor IsCloseFunctor(const T1 *a, const T2 *b, resTy *out, - const InOutIndexerT &inp_out_indexer, + const ThreeOffsets_IndexerT &inps_res_indexer, const scT rtol, const scT atol, const bool equal_nan) - : a_(a), b_(b), out_(out), inp_out_indexer_(inp_out_indexer), + : a_(a), b_(b), out_(out), three_offsets_indexer_(inps_res_indexer), rtol_(rtol), atol_(atol), equal_nan_(equal_nan) { } void operator()(sycl::id<1> wid) const { - const auto &offsets_ = inp_out_indexer_(wid.get(0)); - const dpctl::tensor::ssize_t &inp_offset = offsets_.get_first_offset(); - const dpctl::tensor::ssize_t &out_offset = offsets_.get_second_offset(); + const auto &three_offsets_ = three_offsets_indexer_(wid.get(0)); + const dpctl::tensor::ssize_t &inp1_offset = + three_offsets_.get_first_offset(); + const dpctl::tensor::ssize_t &inp2_offset = + three_offsets_.get_second_offset(); + const dpctl::tensor::ssize_t &out_offset = + three_offsets_.get_third_offset(); using dpctl::tensor::type_utils::is_complex_v; if constexpr (is_complex_v) { - T1 z_a = a_[inp_offset]; - T2 z_b = b_[inp_offset]; + T1 z_a = a_[inp1_offset]; + T2 z_b = b_[inp2_offset]; bool x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_); bool y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_); out_[out_offset] = x && y; } else { - out_[out_offset] = isclose(a_[inp_offset], b_[inp_offset], rtol_, + out_[out_offset] = isclose(a_[inp1_offset], b_[inp2_offset], rtol_, atol_, equal_nan_); } } From 1151fc38a46aef98979b966f2f4aa428307840b6 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 17 Jul 2025 03:36:54 -0700 Subject: [PATCH 05/32] Add strided implementation of isclose() --- .../ufunc/elementwise_functions/isclose.cpp | 168 +++++++++++++++++- .../kernels/elementwise_functions/isclose.hpp | 61 ++++--- 2 files changed, 198 insertions(+), 31 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index 33346be35058..6f4041b9673a 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -67,6 +67,21 @@ namespace impl using dpctl::tensor::usm_ndarray; using event_vector = std::vector; +using isclose_fn_ptr_t = sycl::event (*)(sycl::queue &, + int, + std::size_t, + const py::ssize_t *, + const py::object &, + const py::object &, + const py::object &, + const char *, + py::ssize_t, + const char *, + py::ssize_t, + char *, + py::ssize_t, + const std::vector &); + using isclose_contig_fn_ptr_t = sycl::event (*)(sycl::queue &, std::size_t, @@ -78,6 +93,8 @@ using isclose_contig_fn_ptr_t = char *, const std::vector &); +static isclose_fn_ptr_t isclose_dispatch_table[td_ns::num_types] + [td_ns::num_types]; static isclose_contig_fn_ptr_t isclose_contig_dispatch_table[td_ns::num_types] [td_ns::num_types]; @@ -119,6 +136,35 @@ struct IsCloseOutputType static constexpr bool is_defined = !std::is_same_v; }; +template +sycl::event isclose_strided_call(sycl::queue &exec_q, + int nd, + std::size_t nelems, + const py::ssize_t *shape_strides, + const py::object &rtol_, + const py::object &atol_, + const py::object &equal_nan_, + const char *in1_p, + py::ssize_t in1_offset, + const char *in2_p, + py::ssize_t in2_offset, + char *out_p, + py::ssize_t out_offset, + const std::vector &depends) +{ + using dpctl::tensor::type_utils::is_complex_v; + using scT = std::conditional_t, + typename value_type_of::type, T1>; + + const scT rtol = py::cast(rtol_); + const scT atol = py::cast(atol_); + const bool equal_nan = py::cast(equal_nan_); + + return dpnp::kernels::isclose::isclose_strided_impl( + exec_q, nd, nelems, shape_strides, rtol, atol, equal_nan, in1_p, + in1_offset, in2_p, in2_offset, out_p, out_offset, depends); +} + template sycl::event isclose_contig_call(sycl::queue &q, std::size_t nelems, @@ -143,6 +189,22 @@ sycl::event isclose_contig_call(sycl::queue &q, depends); } +template +struct IsCloseFactory +{ + fnT get() + { + if constexpr (std::is_same_v< + typename IsCloseOutputType::value_type, void>) + { + return nullptr; + } + else { + return isclose_strided_call; + } + } +}; + template struct IsCloseContigFactory { @@ -159,10 +221,15 @@ struct IsCloseContigFactory void populate_isclose_dispatch_table() { + td_ns::DispatchTableBuilder + dvb1; + dvb1.populate_dispatch_table(isclose_dispatch_table); + td_ns::DispatchTableBuilder - dvb; - dvb.populate_dispatch_table(isclose_contig_dispatch_table); + dvb2; + dvb2.populate_dispatch_table(isclose_contig_dispatch_table); } std::pair @@ -257,9 +324,101 @@ std::pair return std::make_pair(ht_ev, comp_ev); } - else { - throw py::value_error("Stride implementation is not implemented"); + + // simplify iteration space + // if 1d with strides 1 - input is contig + // dispatch to strided + + std::cout << "Strided impl run" << std::endl; + + auto const &a_strides = a.get_strides_vector(); + auto const &b_strides = b.get_strides_vector(); + auto const &res_strides = res.get_strides_vector(); + + using shT = std::vector; + shT simplified_shape; + shT simplified_a_strides; + shT simplified_b_strides; + shT simplified_res_strides; + py::ssize_t a_offset(0); + py::ssize_t b_offset(0); + py::ssize_t res_offset(0); + + int nd = res_nd; + const py::ssize_t *shape = a_shape; + + py_internal::simplify_iteration_space_3( + nd, shape, a_strides, b_strides, res_strides, + // output + simplified_shape, simplified_a_strides, simplified_b_strides, + simplified_res_strides, a_offset, b_offset, res_offset); + + if (nd == 1 && simplified_a_strides[0] == 1 && + simplified_b_strides[0] == 1 && simplified_res_strides[0] == 1) + { + // Special case of contiguous data + auto contig_fn = isclose_contig_dispatch_table[a_typeid][b_typeid]; + + if (contig_fn == nullptr) { + throw std::runtime_error( + "Contiguous implementation is missing for a_typeid=" + + std::to_string(a_typeid) + + " and b_typeid=" + std::to_string(b_typeid)); + } + + int a_elem_size = a.get_elemsize(); + int b_elem_size = b.get_elemsize(); + int res_elem_size = res.get_elemsize(); + auto comp_ev = contig_fn( + exec_q, nelems, rtol, atol, equal_nan, + a_data + a_elem_size * a_offset, b_data + b_elem_size * b_offset, + res_data + res_elem_size * res_offset, depends); + + sycl::event ht_ev = + dpctl::utils::keep_args_alive(exec_q, {a, b, res}, {comp_ev}); + + return std::make_pair(ht_ev, comp_ev); + } + + auto fn = isclose_dispatch_table[a_typeid][b_typeid]; + + if (fn == nullptr) { + throw std::runtime_error( + "isclose implementation is missing for a_typeid=" + + std::to_string(a_typeid) + + " and b_typeid=" + std::to_string(b_typeid)); } + + using dpctl::tensor::offset_utils::device_allocate_and_pack; + + std::vector host_tasks{}; + host_tasks.reserve(2); + + auto ptr_size_event_triple_ = device_allocate_and_pack( + exec_q, host_tasks, simplified_shape, simplified_a_strides, + simplified_b_strides, simplified_res_strides); + auto shape_strides_owner = std::move(std::get<0>(ptr_size_event_triple_)); + const sycl::event ©_shape_ev = std::get<2>(ptr_size_event_triple_); + const py::ssize_t *shape_strides = shape_strides_owner.get(); + + std::vector all_deps; + all_deps.reserve(depends.size() + 1); + all_deps.insert(all_deps.end(), depends.begin(), depends.end()); + all_deps.push_back(copy_shape_ev); + + sycl::event comp_ev = + fn(exec_q, nelems, nd, shape_strides, atol, rtol, equal_nan, a_data, + a_offset, b_data, b_offset, res_data, res_offset, all_deps); + + // async free of shape_strides temporary + sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free( + exec_q, {comp_ev}, shape_strides_owner); + + host_tasks.push_back(tmp_cleanup_ev); + + return std::make_pair( + dpctl::utils::keep_args_alive(exec_q, {a, b, res}, host_tasks), + comp_ev); } } // namespace impl @@ -267,6 +426,7 @@ std::pair void init_isclose(py::module_ m) { impl::populate_isclose_dispatch_table(); + m.def("_isclose", &impl::py_isclose, "", py::arg("a"), py::arg("b"), py::arg("rtol"), py::arg("atol"), py::arg("equal_nan"), py::arg("res"), py::arg("sycl_queue"), diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp index 881b166b85a6..988f67e46ccd 100644 --- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp +++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp @@ -315,37 +315,44 @@ struct IsCloseContigFunctor } }; -// template -// sycl::event nan_to_num_strided_impl(sycl::queue &q, -// const size_t nelems, -// const int nd, -// const dpctl::tensor::ssize_t -// *shape_strides, const scT nan, const scT -// posinf, const scT neginf, const char -// *in_cp, const dpctl::tensor::ssize_t -// in_offset, char *out_cp, const -// dpctl::tensor::ssize_t out_offset, const -// std::vector &depends) -// { -// dpctl::tensor::type_utils::validate_type_for_device(q); +template +sycl::event isclose_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + const int nd, + const dpctl::tensor::ssize_t *shape_strides, + const scT rtol, + const scT atol, + const bool equal_nan, + const char *a_cp, + const dpctl::tensor::ssize_t a_offset, + const char *b_cp, + const dpctl::tensor::ssize_t b_offset, + char *out_cp, + const dpctl::tensor::ssize_t out_offset, + const std::vector &depends) +{ + dpctl::tensor::type_utils::validate_type_for_device(exec_q); -// const T *in_tp = reinterpret_cast(in_cp); -// T *out_tp = reinterpret_cast(out_cp); + const T1 *a_tp = reinterpret_cast(a_cp); + const T2 *b_tp = reinterpret_cast(b_cp); -// using InOutIndexerT = -// typename dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; -// const InOutIndexerT indexer{nd, in_offset, out_offset, shape_strides}; + using resTy = bool; + resTy *out_tp = reinterpret_cast(out_cp); -// sycl::event comp_ev = q.submit([&](sycl::handler &cgh) { -// cgh.depends_on(depends); + using IndexerT = + typename dpctl::tensor::offset_utils::ThreeOffsets_StridedIndexer; + const IndexerT indexer{nd, a_offset, b_offset, out_offset, shape_strides}; -// using NanToNumFunc = NanToNumFunctor; -// cgh.parallel_for( -// {nelems}, -// NanToNumFunc(in_tp, out_tp, indexer, nan, posinf, neginf)); -// }); -// return comp_ev; -// } + sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + using IsCloseFunc = IsCloseFunctor; + cgh.parallel_for( + {nelems}, + IsCloseFunc(a_tp, b_tp, out_tp, indexer, atol, rtol, equal_nan)); + }); + return comp_ev; +} template Date: Thu, 24 Jul 2025 03:42:30 -0700 Subject: [PATCH 06/32] Update strided implementation and rename functions --- .../ufunc/elementwise_functions/isclose.cpp | 163 ++++++++------- .../kernels/elementwise_functions/isclose.hpp | 192 +++++------------- 2 files changed, 135 insertions(+), 220 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index 6f4041b9673a..86f251a1450b 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -67,36 +67,37 @@ namespace impl using dpctl::tensor::usm_ndarray; using event_vector = std::vector; -using isclose_fn_ptr_t = sycl::event (*)(sycl::queue &, - int, - std::size_t, - const py::ssize_t *, - const py::object &, - const py::object &, - const py::object &, - const char *, - py::ssize_t, - const char *, - py::ssize_t, - char *, - py::ssize_t, - const std::vector &); - -using isclose_contig_fn_ptr_t = +using isclose_strided_scalar_fn_ptr_t = sycl::event (*)(sycl::queue &, + int, std::size_t, + const py::ssize_t *, const py::object &, const py::object &, const py::object &, const char *, + py::ssize_t, const char *, + py::ssize_t, char *, + py::ssize_t, const std::vector &); -static isclose_fn_ptr_t isclose_dispatch_table[td_ns::num_types] - [td_ns::num_types]; -static isclose_contig_fn_ptr_t isclose_contig_dispatch_table[td_ns::num_types] - [td_ns::num_types]; +using isclose_contig_scalar_fn_ptr_t = + sycl::event (*)(sycl::queue &, + std::size_t, + const py::object &, + const py::object &, + const py::object &, + const char *, + const char *, + char *, + const std::vector &); + +static isclose_strided_scalar_fn_ptr_t + isclose_strided_scalar_dispatch_table[td_ns::num_types][td_ns::num_types]; +static isclose_contig_scalar_fn_ptr_t + isclose_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; template using value_type_of_t = typename value_type_of::type; @@ -137,60 +138,60 @@ struct IsCloseOutputType }; template -sycl::event isclose_strided_call(sycl::queue &exec_q, - int nd, - std::size_t nelems, - const py::ssize_t *shape_strides, - const py::object &rtol_, - const py::object &atol_, - const py::object &equal_nan_, - const char *in1_p, - py::ssize_t in1_offset, - const char *in2_p, - py::ssize_t in2_offset, - char *out_p, - py::ssize_t out_offset, - const std::vector &depends) +sycl::event isclose_strided_scalar_call(sycl::queue &exec_q, + int nd, + std::size_t nelems, + const py::ssize_t *shape_strides, + const py::object &py_rtol, + const py::object &py_atol, + const py::object &py_equal_nan, + const char *in1_p, + py::ssize_t in1_offset, + const char *in2_p, + py::ssize_t in2_offset, + char *out_p, + py::ssize_t out_offset, + const std::vector &depends) { using dpctl::tensor::type_utils::is_complex_v; using scT = std::conditional_t, typename value_type_of::type, T1>; - const scT rtol = py::cast(rtol_); - const scT atol = py::cast(atol_); - const bool equal_nan = py::cast(equal_nan_); + const scT rtol = py::cast(py_rtol); + const scT atol = py::cast(py_atol); + const bool equal_nan = py::cast(py_equal_nan); - return dpnp::kernels::isclose::isclose_strided_impl( + return dpnp::kernels::isclose::isclose_strided_scalar_impl( exec_q, nd, nelems, shape_strides, rtol, atol, equal_nan, in1_p, in1_offset, in2_p, in2_offset, out_p, out_offset, depends); } template -sycl::event isclose_contig_call(sycl::queue &q, - std::size_t nelems, - const py::object &rtol_, - const py::object &atol_, - const py::object &equal_nan_, - const char *in1_p, - const char *in2_p, - char *out_p, - const event_vector &depends) +sycl::event isclose_contig_scalar_call(sycl::queue &q, + std::size_t nelems, + const py::object &py_rtol, + const py::object &py_atol, + const py::object &py_equal_nan, + const char *in1_p, + const char *in2_p, + char *out_p, + const event_vector &depends) { using dpctl::tensor::type_utils::is_complex_v; using scT = std::conditional_t, typename value_type_of::type, T1>; - const scT rtol = py::cast(rtol_); - const scT atol = py::cast(atol_); - const bool equal_nan = py::cast(equal_nan_); + const scT rtol = py::cast(py_rtol); + const scT atol = py::cast(py_atol); + const bool equal_nan = py::cast(py_equal_nan); - return dpnp::kernels::isclose::isclose_contig_impl( + return dpnp::kernels::isclose::isclose_contig_scalar_impl( q, nelems, rtol, atol, equal_nan, in1_p, 0, in2_p, 0, out_p, 0, depends); } template -struct IsCloseFactory +struct IsCloseStridedScalarFactory { fnT get() { @@ -200,13 +201,13 @@ struct IsCloseFactory return nullptr; } else { - return isclose_strided_call; + return isclose_strided_scalar_call; } } }; template -struct IsCloseContigFactory +struct IsCloseContigScalarFactory { fnT get() { @@ -214,33 +215,33 @@ struct IsCloseContigFactory return nullptr; } else { - return isclose_contig_call; + return isclose_contig_scalar_call; } } }; void populate_isclose_dispatch_table() { - td_ns::DispatchTableBuilder + td_ns::DispatchTableBuilder dvb1; - dvb1.populate_dispatch_table(isclose_dispatch_table); + dvb1.populate_dispatch_table(isclose_strided_scalar_dispatch_table); - td_ns::DispatchTableBuilder + td_ns::DispatchTableBuilder dvb2; dvb2.populate_dispatch_table(isclose_contig_dispatch_table); } std::pair - py_isclose(const usm_ndarray &a, - const usm_ndarray &b, - const py::object &rtol, - const py::object &atol, - const py::object &equal_nan, - const usm_ndarray &res, - sycl::queue &exec_q, - const std::vector &depends) + py_isclose_scalar(const usm_ndarray &a, + const usm_ndarray &b, + const py::object &py_rtol, + const py::object &py_atol, + const py::object &py_equal_nan, + const usm_ndarray &res, + sycl::queue &exec_q, + const std::vector &depends) { auto types = td_ns::usm_ndarray_types(); int a_typeid = types.typenum_to_lookup_id(a.get_typenum()); @@ -317,8 +318,8 @@ std::pair " and b_typeid=" + std::to_string(b_typeid)); } - auto comp_ev = contig_fn(exec_q, nelems, rtol, atol, equal_nan, a_data, - b_data, res_data, depends); + auto comp_ev = contig_fn(exec_q, nelems, py_rtol, py_atol, py_equal_nan, + a_data, b_data, res_data, depends); sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {a, b, res}, {comp_ev}); @@ -366,11 +367,13 @@ std::pair " and b_typeid=" + std::to_string(b_typeid)); } + std::cout << "Run contig impl in strided" << std::endl; + int a_elem_size = a.get_elemsize(); int b_elem_size = b.get_elemsize(); int res_elem_size = res.get_elemsize(); auto comp_ev = contig_fn( - exec_q, nelems, rtol, atol, equal_nan, + exec_q, nelems, py_rtol, py_atol, py_equal_nan, a_data + a_elem_size * a_offset, b_data + b_elem_size * b_offset, res_data + res_elem_size * res_offset, depends); @@ -380,15 +383,17 @@ std::pair return std::make_pair(ht_ev, comp_ev); } - auto fn = isclose_dispatch_table[a_typeid][b_typeid]; + auto strided_fn = isclose_strided_scalar_dispatch_table[a_typeid][b_typeid]; - if (fn == nullptr) { + if (strided_fn == nullptr) { throw std::runtime_error( "isclose implementation is missing for a_typeid=" + std::to_string(a_typeid) + " and b_typeid=" + std::to_string(b_typeid)); } + std::cout << "Run strided impl" << std::endl; + using dpctl::tensor::offset_utils::device_allocate_and_pack; std::vector host_tasks{}; @@ -406,9 +411,9 @@ std::pair all_deps.insert(all_deps.end(), depends.begin(), depends.end()); all_deps.push_back(copy_shape_ev); - sycl::event comp_ev = - fn(exec_q, nelems, nd, shape_strides, atol, rtol, equal_nan, a_data, - a_offset, b_data, b_offset, res_data, res_offset, all_deps); + sycl::event comp_ev = strided_fn( + exec_q, nd, nelems, shape_strides, py_rtol, py_atol, py_equal_nan, + a_data, a_offset, b_data, b_offset, res_data, res_offset, all_deps); // async free of shape_strides temporary sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free( @@ -427,9 +432,9 @@ void init_isclose(py::module_ m) { impl::populate_isclose_dispatch_table(); - m.def("_isclose", &impl::py_isclose, "", py::arg("a"), py::arg("b"), - py::arg("rtol"), py::arg("atol"), py::arg("equal_nan"), - py::arg("res"), py::arg("sycl_queue"), + m.def("_isclose_scalar", &impl::py_isclose_scalar, "", py::arg("a"), + py::arg("b"), py::arg("py_rtol"), py::arg("py_atol"), + py::arg("py_equal_nan"), py::arg("res"), py::arg("sycl_queue"), py::arg("depends") = py::list()); } diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp index 988f67e46ccd..2eb52bbd7587 100644 --- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp +++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp @@ -23,100 +23,6 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** -// #pragma once - -// #include -// #include -// #include -// #include -// #include -// #include - -// // dpctl tensor headers -// #include "kernels/alignment.hpp" -// #include "utils/sycl_utils.hpp" -// #include "utils/type_utils.hpp" - -// namespace dpnp::kernels::isclose -// { - -// template -// inline bool isclose_scalar(T a, T b, scT rtol, scT atol, bool equal_nan) -// { -// if constexpr (dpctl::tensor::type_utils::is_complex_v) { -// return isclose_scalar(a.real(), b.real(), rtol, atol, equal_nan) && -// isclose_scalar(a.imag(), b.imag(), rtol, atol, equal_nan); -// } -// else { -// if (sycl::isnan(a) || sycl::isnan(b)) { -// return equal_nan && sycl::isnan(a) && sycl::isnan(b); -// } -// return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b); -// } -// } - -// template struct IsCloseContigFunctor -// { -// const T *in1_; -// const T *in2_; -// bool *out_; -// std::size_t n_; -// scT rtol_; -// scT atol_; -// bool equal_nan_; - -// IsCloseContigFunctor(const T *in1, const T *in2, bool *out, -// std::size_t n, scT rtol, scT atol, bool equal_nan) -// : in1_(in1), in2_(in2), out_(out), -// n_(n), rtol_(rtol), atol_(atol), equal_nan_(equal_nan) -// { -// } - -// void operator()(sycl::nd_item<1> item) const -// { -// const std::size_t gid = item.get_global_linear_id(); -// if (gid < n_) { -// out_[gid] = isclose_scalar(in1_[gid], in2_[gid], rtol_, -// atol_, equal_nan_); -// } -// } -// }; - -// template -// sycl::event isclose_contig_impl(sycl::queue &q, -// std::size_t n, -// scT rtol, -// scT atol, -// bool equal_nan, -// const char *in1_p, -// const char *in2_p, -// char *out_p, -// const std::vector &depends) -// { -// dpctl::tensor::type_utils::validate_type_for_device(q); - -// const T *in1 = reinterpret_cast(in1_p); -// const T *in2 = reinterpret_cast(in2_p); -// bool *out = reinterpret_cast(out_p); - -// constexpr std::size_t wg_size = 128; -// const std::size_t n_wgs = (n + wg_size - 1) / wg_size; -// const sycl::range<1> gws{n_wgs * wg_size}; -// const sycl::range<1> lws{wg_size}; - -// using Kernel = IsCloseContigFunctor; - -// return q.submit([&](sycl::handler &cgh) { -// cgh.depends_on(depends); -// cgh.parallel_for( -// sycl::nd_range<1>(gws, lws), -// Kernel(in1, in2, out, n, rtol, atol, equal_nan)); -// }); -// } - -// } // namespace dpnp::kernels::isclose - #pragma once #include @@ -156,7 +62,7 @@ template -struct IsCloseFunctor +struct IsCloseStridedScalarFunctor { private: const T1 *a_ = nullptr; @@ -168,13 +74,13 @@ struct IsCloseFunctor const bool equal_nan_; public: - IsCloseFunctor(const T1 *a, - const T2 *b, - resTy *out, - const ThreeOffsets_IndexerT &inps_res_indexer, - const scT rtol, - const scT atol, - const bool equal_nan) + IsCloseStridedScalarFunctor(const T1 *a, + const T2 *b, + resTy *out, + const ThreeOffsets_IndexerT &inps_res_indexer, + const scT rtol, + const scT atol, + const bool equal_nan) : a_(a), b_(b), out_(out), three_offsets_indexer_(inps_res_indexer), rtol_(rtol), atol_(atol), equal_nan_(equal_nan) { @@ -212,7 +118,7 @@ template -struct IsCloseContigFunctor +struct IsCloseContigScalarFunctor { private: const T1 *a_ = nullptr; @@ -224,13 +130,13 @@ struct IsCloseContigFunctor const bool equal_nan_; public: - IsCloseContigFunctor(const T1 *a, - const T2 *b, - resTy *out, - const std::size_t n_elems, - const scT rtol, - const scT atol, - const bool equal_nan) + IsCloseContigScalarFunctor(const T1 *a, + const T2 *b, + resTy *out, + const std::size_t n_elems, + const scT rtol, + const scT atol, + const bool equal_nan) : a_(a), b_(b), out_(out), nelems_(n_elems), rtol_(rtol), atol_(atol), equal_nan_(equal_nan) { @@ -316,20 +222,21 @@ struct IsCloseContigFunctor }; template -sycl::event isclose_strided_impl(sycl::queue &exec_q, - std::size_t nelems, - const int nd, - const dpctl::tensor::ssize_t *shape_strides, - const scT rtol, - const scT atol, - const bool equal_nan, - const char *a_cp, - const dpctl::tensor::ssize_t a_offset, - const char *b_cp, - const dpctl::tensor::ssize_t b_offset, - char *out_cp, - const dpctl::tensor::ssize_t out_offset, - const std::vector &depends) +sycl::event + isclose_strided_scalar_impl(sycl::queue &exec_q, + const int nd, + std::size_t nelems, + const dpctl::tensor::ssize_t *shape_strides, + const scT rtol, + const scT atol, + const bool equal_nan, + const char *a_cp, + const dpctl::tensor::ssize_t a_offset, + const char *b_cp, + const dpctl::tensor::ssize_t b_offset, + char *out_cp, + const dpctl::tensor::ssize_t out_offset, + const std::vector &depends) { dpctl::tensor::type_utils::validate_type_for_device(exec_q); @@ -346,7 +253,8 @@ sycl::event isclose_strided_impl(sycl::queue &exec_q, sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(depends); - using IsCloseFunc = IsCloseFunctor; + using IsCloseFunc = + IsCloseStridedScalarFunctor; cgh.parallel_for( {nelems}, IsCloseFunc(a_tp, b_tp, out_tp, indexer, atol, rtol, equal_nan)); @@ -359,18 +267,19 @@ template -sycl::event isclose_contig_impl(sycl::queue &exec_q, - std::size_t nelems, - const scT rtol, - const scT atol, - const bool equal_nan, - const char *a_cp, - ssize_t a_offset, - const char *b_cp, - ssize_t b_offset, - char *out_cp, - ssize_t out_offset, - const std::vector &depends = {}) +sycl::event + isclose_contig_scalar_impl(sycl::queue &exec_q, + std::size_t nelems, + const scT rtol, + const scT atol, + const bool equal_nan, + const char *a_cp, + ssize_t a_offset, + const char *b_cp, + ssize_t b_offset, + char *out_cp, + ssize_t out_offset, + const std::vector &depends = {}) { constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz; const std::size_t n_work_items_needed = nelems / elems_per_wi; @@ -384,6 +293,7 @@ sycl::event isclose_contig_impl(sycl::queue &exec_q, const auto gws_range = sycl::range<1>(n_groups * lws); const auto lws_range = sycl::range<1>(lws); + // ? + offset const T1 *a_tp = reinterpret_cast(a_cp) + a_offset; const T2 *b_tp = reinterpret_cast(b_cp) + b_offset; @@ -401,8 +311,8 @@ sycl::event isclose_contig_impl(sycl::queue &exec_q, { constexpr bool enable_sg_loadstore = true; using IsCloseFunc = - IsCloseContigFunctor; + IsCloseContigScalarFunctor; cgh.parallel_for( sycl::nd_range<1>(gws_range, lws_range), @@ -411,8 +321,8 @@ sycl::event isclose_contig_impl(sycl::queue &exec_q, else { constexpr bool disable_sg_loadstore = false; using IsCloseFunc = - IsCloseContigFunctor; + IsCloseContigScalarFunctor; cgh.parallel_for( sycl::nd_range<1>(gws_range, lws_range), From 29bcb344b876004b4abfb1b656c8e5d83c5bbf34 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 24 Jul 2025 04:24:45 -0700 Subject: [PATCH 07/32] Refactor isclose scalar dispatch to use dispatch vectors --- .../ufunc/elementwise_functions/isclose.cpp | 118 ++++++++---------- .../kernels/elementwise_functions/isclose.hpp | 61 +++++---- 2 files changed, 83 insertions(+), 96 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index 86f251a1450b..e2dc5913f3fe 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -61,12 +61,10 @@ using ext::common::value_type_of; namespace dpnp::extensions::ufunc { + namespace impl { -using dpctl::tensor::usm_ndarray; -using event_vector = std::vector; - using isclose_strided_scalar_fn_ptr_t = sycl::event (*)(sycl::queue &, int, @@ -95,49 +93,32 @@ using isclose_contig_scalar_fn_ptr_t = const std::vector &); static isclose_strided_scalar_fn_ptr_t - isclose_strided_scalar_dispatch_table[td_ns::num_types][td_ns::num_types]; + isclose_strided_scalar_dispatch_vector[td_ns::num_types]; static isclose_contig_scalar_fn_ptr_t - isclose_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; + isclose_contig_dispatch_vector[td_ns::num_types]; template using value_type_of_t = typename value_type_of::type; -// // Template for checking if T is supported -// template -// struct IsCloseOutputType -// { -// using value_type = typename std::disjunction< -// td_ns::TypeMapResultEntry, -// td_ns::TypeMapResultEntry, -// td_ns::TypeMapResultEntry, -// td_ns::TypeMapResultEntry>, -// td_ns::TypeMapResultEntry>, -// td_ns::DefaultResultEntry>::result_type; -// }; - -// Supports only float and complex types -template +/** + * @brief A factory to define pairs of supported types for which + * isclose function is available. + * + * @tparam T Type of input vector `a` and `b` and of result vector `y`. + */ +template struct IsCloseOutputType { using value_type = typename std::disjunction< - td_ns::BinaryTypeMapResultEntry, - td_ns::BinaryTypeMapResultEntry, - td_ns::BinaryTypeMapResultEntry, - T2, - std::complex, - bool>, - td_ns::BinaryTypeMapResultEntry, - T2, - std::complex, - bool>, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, td_ns::DefaultResultEntry>::result_type; - - static constexpr bool is_defined = !std::is_same_v; }; -template +template sycl::event isclose_strided_scalar_call(sycl::queue &exec_q, int nd, std::size_t nelems, @@ -154,19 +135,18 @@ sycl::event isclose_strided_scalar_call(sycl::queue &exec_q, const std::vector &depends) { using dpctl::tensor::type_utils::is_complex_v; - using scT = std::conditional_t, - typename value_type_of::type, T1>; + using scT = std::conditional_t, value_type_of_t, T>; const scT rtol = py::cast(py_rtol); const scT atol = py::cast(py_atol); const bool equal_nan = py::cast(py_equal_nan); - return dpnp::kernels::isclose::isclose_strided_scalar_impl( + return dpnp::kernels::isclose::isclose_strided_scalar_impl( exec_q, nd, nelems, shape_strides, rtol, atol, equal_nan, in1_p, in1_offset, in2_p, in2_offset, out_p, out_offset, depends); } -template +template sycl::event isclose_contig_scalar_call(sycl::queue &q, std::size_t nelems, const py::object &py_rtol, @@ -175,71 +155,74 @@ sycl::event isclose_contig_scalar_call(sycl::queue &q, const char *in1_p, const char *in2_p, char *out_p, - const event_vector &depends) + const std::vector &depends) { using dpctl::tensor::type_utils::is_complex_v; - using scT = std::conditional_t, - typename value_type_of::type, T1>; + using scT = std::conditional_t, value_type_of_t, T>; const scT rtol = py::cast(py_rtol); const scT atol = py::cast(py_atol); const bool equal_nan = py::cast(py_equal_nan); - return dpnp::kernels::isclose::isclose_contig_scalar_impl( + return dpnp::kernels::isclose::isclose_contig_scalar_impl( q, nelems, rtol, atol, equal_nan, in1_p, 0, in2_p, 0, out_p, 0, depends); } -template +template struct IsCloseStridedScalarFactory { fnT get() { - if constexpr (std::is_same_v< - typename IsCloseOutputType::value_type, void>) + if constexpr (std::is_same_v::value_type, + void>) { return nullptr; } else { - return isclose_strided_scalar_call; + return isclose_strided_scalar_call; } } }; -template +template struct IsCloseContigScalarFactory { fnT get() { - if constexpr (!IsCloseOutputType::is_defined) { + if constexpr (std::is_same_v::value_type, + void>) + { return nullptr; } else { - return isclose_contig_scalar_call; + return isclose_contig_scalar_call; } } }; -void populate_isclose_dispatch_table() +void populate_isclose_dispatch_vectors() { - td_ns::DispatchTableBuilder + using namespace td_ns; + + DispatchVectorBuilder dvb1; - dvb1.populate_dispatch_table(isclose_strided_scalar_dispatch_table); + dvb1.populate_dispatch_vector(isclose_strided_scalar_dispatch_vector); - td_ns::DispatchTableBuilder + DispatchVectorBuilder dvb2; - dvb2.populate_dispatch_table(isclose_contig_dispatch_table); + dvb2.populate_dispatch_vector(isclose_contig_dispatch_vector); } std::pair - py_isclose_scalar(const usm_ndarray &a, - const usm_ndarray &b, + py_isclose_scalar(const dpctl::tensor::usm_ndarray &a, + const dpctl::tensor::usm_ndarray &b, const py::object &py_rtol, const py::object &py_atol, const py::object &py_equal_nan, - const usm_ndarray &res, + const dpctl::tensor::usm_ndarray &res, sycl::queue &exec_q, const std::vector &depends) { @@ -247,6 +230,10 @@ std::pair int a_typeid = types.typenum_to_lookup_id(a.get_typenum()); int b_typeid = types.typenum_to_lookup_id(b.get_typenum()); + if (a_typeid != b_typeid) { + throw py::type_error("Array data types are not the same."); + } + if (!dpctl::utils::queues_are_compatible(exec_q, {a, b, res})) { throw py::value_error( "Execution queue is not compatible with allocation queues"); @@ -309,7 +296,8 @@ std::pair bool all_f_contig = (is_a_f_contig && is_b_f_contig && is_res_f_contig); if (all_c_contig || all_f_contig) { - auto contig_fn = isclose_contig_dispatch_table[a_typeid][b_typeid]; + // a_typeid == b_typeid + auto contig_fn = isclose_contig_dispatch_vector[a_typeid]; if (contig_fn == nullptr) { throw std::runtime_error( @@ -358,7 +346,8 @@ std::pair simplified_b_strides[0] == 1 && simplified_res_strides[0] == 1) { // Special case of contiguous data - auto contig_fn = isclose_contig_dispatch_table[a_typeid][b_typeid]; + // a_typeid == b_typeid + auto contig_fn = isclose_contig_dispatch_vector[a_typeid]; if (contig_fn == nullptr) { throw std::runtime_error( @@ -383,7 +372,8 @@ std::pair return std::make_pair(ht_ev, comp_ev); } - auto strided_fn = isclose_strided_scalar_dispatch_table[a_typeid][b_typeid]; + // a_typeid == b_typeid + auto strided_fn = isclose_strided_scalar_dispatch_vector[a_typeid]; if (strided_fn == nullptr) { throw std::runtime_error( @@ -430,7 +420,7 @@ std::pair void init_isclose(py::module_ m) { - impl::populate_isclose_dispatch_table(); + impl::populate_isclose_dispatch_vectors(); m.def("_isclose_scalar", &impl::py_isclose_scalar, "", py::arg("a"), py::arg("b"), py::arg("py_rtol"), py::arg("py_atol"), diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp index 2eb52bbd7587..4dedcbca32ea 100644 --- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp +++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp @@ -57,16 +57,15 @@ inline bool isclose(const T a, return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b); } -template struct IsCloseStridedScalarFunctor { private: - const T1 *a_ = nullptr; - const T2 *b_ = nullptr; + const T *a_ = nullptr; + const T *b_ = nullptr; resTy *out_ = nullptr; const ThreeOffsets_IndexerT three_offsets_indexer_; const scT rtol_; @@ -74,8 +73,8 @@ struct IsCloseStridedScalarFunctor const bool equal_nan_; public: - IsCloseStridedScalarFunctor(const T1 *a, - const T2 *b, + IsCloseStridedScalarFunctor(const T *a, + const T *b, resTy *out, const ThreeOffsets_IndexerT &inps_res_indexer, const scT rtol, @@ -97,9 +96,9 @@ struct IsCloseStridedScalarFunctor three_offsets_.get_third_offset(); using dpctl::tensor::type_utils::is_complex_v; - if constexpr (is_complex_v) { - T1 z_a = a_[inp1_offset]; - T2 z_b = b_[inp2_offset]; + if constexpr (is_complex_v) { + T z_a = a_[inp1_offset]; + T z_b = b_[inp2_offset]; bool x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_); bool y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_); out_[out_offset] = x && y; @@ -111,8 +110,7 @@ struct IsCloseStridedScalarFunctor } }; -template ) { + if constexpr (enable_sg_loadstore && !is_complex_v) { auto sg = ndit.get_sub_group(); const std::uint16_t sgSize = sg.get_max_local_range()[0]; const std::size_t base = @@ -172,9 +170,9 @@ struct IsCloseContigScalarFunctor sycl::access::address_space::global_space, sycl::access::decorated::yes>(&out_[offset]); - const sycl::vec a_vec = + const sycl::vec a_vec = sub_group_load(sg, a_multi_ptr); - const sycl::vec b_vec = + const sycl::vec b_vec = sub_group_load(sg, b_multi_ptr); sycl::vec res_vec; @@ -203,9 +201,9 @@ struct IsCloseContigScalarFunctor (gid / sgSize) * (elems_per_sg - sgSize) + gid; const std::size_t end = std::min(nelems_, start + elems_per_sg); for (std::size_t offset = start; offset < end; offset += sgSize) { - if constexpr (is_complex_v) { - T1 z_a = a_[offset]; - T2 z_b = b_[offset]; + if constexpr (is_complex_v) { + T z_a = a_[offset]; + T z_b = b_[offset]; bool x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_); bool y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, @@ -221,7 +219,7 @@ struct IsCloseContigScalarFunctor } }; -template +template sycl::event isclose_strided_scalar_impl(sycl::queue &exec_q, const int nd, @@ -238,10 +236,10 @@ sycl::event const dpctl::tensor::ssize_t out_offset, const std::vector &depends) { - dpctl::tensor::type_utils::validate_type_for_device(exec_q); + dpctl::tensor::type_utils::validate_type_for_device(exec_q); - const T1 *a_tp = reinterpret_cast(a_cp); - const T2 *b_tp = reinterpret_cast(b_cp); + const T *a_tp = reinterpret_cast(a_cp); + const T *b_tp = reinterpret_cast(b_cp); using resTy = bool; resTy *out_tp = reinterpret_cast(out_cp); @@ -254,7 +252,7 @@ sycl::event cgh.depends_on(depends); using IsCloseFunc = - IsCloseStridedScalarFunctor; + IsCloseStridedScalarFunctor; cgh.parallel_for( {nelems}, IsCloseFunc(a_tp, b_tp, out_tp, indexer, atol, rtol, equal_nan)); @@ -262,8 +260,7 @@ sycl::event return comp_ev; } -template @@ -294,8 +291,8 @@ sycl::event const auto lws_range = sycl::range<1>(lws); // ? + offset - const T1 *a_tp = reinterpret_cast(a_cp) + a_offset; - const T2 *b_tp = reinterpret_cast(b_cp) + b_offset; + const T *a_tp = reinterpret_cast(a_cp) + a_offset; + const T *b_tp = reinterpret_cast(b_cp) + b_offset; using resTy = bool; resTy *out_tp = reinterpret_cast(out_cp) + out_offset; @@ -311,7 +308,7 @@ sycl::event { constexpr bool enable_sg_loadstore = true; using IsCloseFunc = - IsCloseContigScalarFunctor; cgh.parallel_for( @@ -321,7 +318,7 @@ sycl::event else { constexpr bool disable_sg_loadstore = false; using IsCloseFunc = - IsCloseContigScalarFunctor; cgh.parallel_for( From fe1f5193bebdcb06c7e56de9cc0540aeceec1896 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 24 Jul 2025 05:11:50 -0700 Subject: [PATCH 08/32] Simplify isclose function pointer declarations --- .../ufunc/elementwise_functions/isclose.cpp | 63 +++++++++---------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index e2dc5913f3fe..4926dbb1e0d7 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -65,41 +65,40 @@ namespace dpnp::extensions::ufunc namespace impl { -using isclose_strided_scalar_fn_ptr_t = - sycl::event (*)(sycl::queue &, - int, - std::size_t, - const py::ssize_t *, - const py::object &, - const py::object &, - const py::object &, - const char *, - py::ssize_t, - const char *, - py::ssize_t, - char *, - py::ssize_t, - const std::vector &); - -using isclose_contig_scalar_fn_ptr_t = - sycl::event (*)(sycl::queue &, - std::size_t, - const py::object &, - const py::object &, - const py::object &, - const char *, - const char *, - char *, - const std::vector &); - -static isclose_strided_scalar_fn_ptr_t - isclose_strided_scalar_dispatch_vector[td_ns::num_types]; -static isclose_contig_scalar_fn_ptr_t - isclose_contig_dispatch_vector[td_ns::num_types]; - template using value_type_of_t = typename value_type_of::type; +typedef sycl::event (*isclose_strided_scalar_fn_ptr_t)( + sycl::queue &, + int, // nd + std::size_t, // nelems + const py::ssize_t *, // shape_strides + const py::object &, // rtol + const py::object &, // atol + const py::object &, // equal_nan + const char *, // a + py::ssize_t, // a_offset + const char *, // b + py::ssize_t, // b_offset + char *, // out + py::ssize_t, // out_offset + const std::vector &); + +typedef sycl::event (*isclose_contig_scalar_fn_ptr_t)( + sycl::queue &, + std::size_t, // nelems + const py::object &, // rtol + const py::object &, // atol + const py::object &, // equal_nan + const char *, // a + const char *, // b + char *, // out + const std::vector &); + +isclose_strided_scalar_fn_ptr_t + isclose_strided_scalar_dispatch_vector[td_ns::num_types]; +isclose_contig_scalar_fn_ptr_t isclose_contig_dispatch_vector[td_ns::num_types]; + /** * @brief A factory to define pairs of supported types for which * isclose function is available. From 38d84010683ceb93e50e4291293f30767fae5072 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 24 Jul 2025 05:21:19 -0700 Subject: [PATCH 09/32] Restructure isclose.cpp --- .../ufunc/elementwise_functions/isclose.cpp | 158 +++++++++--------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index 4926dbb1e0d7..da2933425b58 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -84,39 +84,6 @@ typedef sycl::event (*isclose_strided_scalar_fn_ptr_t)( py::ssize_t, // out_offset const std::vector &); -typedef sycl::event (*isclose_contig_scalar_fn_ptr_t)( - sycl::queue &, - std::size_t, // nelems - const py::object &, // rtol - const py::object &, // atol - const py::object &, // equal_nan - const char *, // a - const char *, // b - char *, // out - const std::vector &); - -isclose_strided_scalar_fn_ptr_t - isclose_strided_scalar_dispatch_vector[td_ns::num_types]; -isclose_contig_scalar_fn_ptr_t isclose_contig_dispatch_vector[td_ns::num_types]; - -/** - * @brief A factory to define pairs of supported types for which - * isclose function is available. - * - * @tparam T Type of input vector `a` and `b` and of result vector `y`. - */ -template -struct IsCloseOutputType -{ - using value_type = typename std::disjunction< - td_ns::TypeMapResultEntry, - td_ns::TypeMapResultEntry, - td_ns::TypeMapResultEntry, - td_ns::TypeMapResultEntry>, - td_ns::TypeMapResultEntry>, - td_ns::DefaultResultEntry>::result_type; -}; - template sycl::event isclose_strided_scalar_call(sycl::queue &exec_q, int nd, @@ -145,6 +112,17 @@ sycl::event isclose_strided_scalar_call(sycl::queue &exec_q, in1_offset, in2_p, in2_offset, out_p, out_offset, depends); } +typedef sycl::event (*isclose_contig_scalar_fn_ptr_t)( + sycl::queue &, + std::size_t, // nelems + const py::object &, // rtol + const py::object &, // atol + const py::object &, // equal_nan + const char *, // a + const char *, // b + char *, // out + const std::vector &); + template sycl::event isclose_contig_scalar_call(sycl::queue &q, std::size_t nelems, @@ -168,52 +146,9 @@ sycl::event isclose_contig_scalar_call(sycl::queue &q, depends); } -template -struct IsCloseStridedScalarFactory -{ - fnT get() - { - if constexpr (std::is_same_v::value_type, - void>) - { - return nullptr; - } - else { - return isclose_strided_scalar_call; - } - } -}; - -template -struct IsCloseContigScalarFactory -{ - fnT get() - { - if constexpr (std::is_same_v::value_type, - void>) - { - return nullptr; - } - else { - return isclose_contig_scalar_call; - } - } -}; - -void populate_isclose_dispatch_vectors() -{ - using namespace td_ns; - - DispatchVectorBuilder - dvb1; - dvb1.populate_dispatch_vector(isclose_strided_scalar_dispatch_vector); - - DispatchVectorBuilder - dvb2; - dvb2.populate_dispatch_vector(isclose_contig_dispatch_vector); -} +isclose_strided_scalar_fn_ptr_t + isclose_strided_scalar_dispatch_vector[td_ns::num_types]; +isclose_contig_scalar_fn_ptr_t isclose_contig_dispatch_vector[td_ns::num_types]; std::pair py_isclose_scalar(const dpctl::tensor::usm_ndarray &a, @@ -415,6 +350,71 @@ std::pair comp_ev); } +/** + * @brief A factory to define pairs of supported types for which + * isclose function is available. + * + * @tparam T Type of input vector `a` and `b` and of result vector `y`. + */ +template +struct IsCloseOutputType +{ + using value_type = typename std::disjunction< + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry, + td_ns::TypeMapResultEntry>, + td_ns::TypeMapResultEntry>, + td_ns::DefaultResultEntry>::result_type; +}; + +template +struct IsCloseStridedScalarFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) + { + return nullptr; + } + else { + return isclose_strided_scalar_call; + } + } +}; + +template +struct IsCloseContigScalarFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type, + void>) + { + return nullptr; + } + else { + return isclose_contig_scalar_call; + } + } +}; + +void populate_isclose_dispatch_vectors() +{ + using namespace td_ns; + + DispatchVectorBuilder + dvb1; + dvb1.populate_dispatch_vector(isclose_strided_scalar_dispatch_vector); + + DispatchVectorBuilder + dvb2; + dvb2.populate_dispatch_vector(isclose_contig_dispatch_vector); +} + } // namespace impl void init_isclose(py::module_ m) From 821065f58014e6450b3f9dd3fdc2821f759ffa2b Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 24 Jul 2025 06:05:22 -0700 Subject: [PATCH 10/32] Use common validation utilities --- .../ufunc/elementwise_functions/isclose.cpp | 76 +++++++++---------- 1 file changed, 35 insertions(+), 41 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index da2933425b58..ac463dd1051f 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -53,11 +53,21 @@ #include "utils/type_utils.hpp" #include "ext/common.hpp" +#include "ext/validation_utils.hpp" namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; using ext::common::value_type_of; +using ext::validation::array_names; + +using ext::common::dtype_from_typenum; +using ext::validation::check_has_dtype; +using ext::validation::check_no_overlap; +using ext::validation::check_num_dims; +using ext::validation::check_queue; +using ext::validation::check_same_dtype; +using ext::validation::check_same_size; namespace dpnp::extensions::ufunc { @@ -160,25 +170,22 @@ std::pair sycl::queue &exec_q, const std::vector &depends) { - auto types = td_ns::usm_ndarray_types(); - int a_typeid = types.typenum_to_lookup_id(a.get_typenum()); - int b_typeid = types.typenum_to_lookup_id(b.get_typenum()); + array_names names = {{&a, "a"}, {&b, "b"}, {&res, "res"}}; - if (a_typeid != b_typeid) { - throw py::type_error("Array data types are not the same."); - } + check_same_dtype(&a, &b, names); + check_has_dtype(&res, td_ns::typenum_t::BOOL, names); - if (!dpctl::utils::queues_are_compatible(exec_q, {a, b, res})) { - throw py::value_error( - "Execution queue is not compatible with allocation queues"); - } + check_same_size({&a, &b, &res}, names); + int res_nd = res.get_ndim(); + check_num_dims({&a, &b}, res_nd, names); + check_queue({&a, &b, &res}, names, exec_q); + check_no_overlap({&a, &b}, {&res}, names); dpctl::tensor::validation::CheckWritable::throw_if_not_writable(res); - int res_nd = res.get_ndim(); - if (res_nd != a.get_ndim() || res_nd != b.get_ndim()) { - throw py::value_error("Array dimensions are not the same."); - } + auto types = td_ns::usm_ndarray_types(); + // a_typeid == b_typeid (check_same_dtype(&a, &b, names)) + int a_b_typeid = types.typenum_to_lookup_id(a.get_typenum()); const py::ssize_t *a_shape = a.get_shape_raw(); const py::ssize_t *b_shape = b.get_shape_raw(); @@ -192,7 +199,7 @@ std::pair b_shape[i] == res_shape[i]); } if (!shapes_equal) { - throw py::value_error("Array shapes are not the same."); + throw py::value_error("Array shapes are not the same"); } // if nelems is zero, return @@ -202,16 +209,6 @@ std::pair dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(res, nelems); - // check memory overlap - auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); - auto const &same_logical_tensors = - dpctl::tensor::overlap::SameLogicalTensors(); - if ((overlap(a, res) && !same_logical_tensors(a, res)) || - (overlap(b, res) && !same_logical_tensors(b, res))) - { - throw py::value_error("Arrays index overlapping segments of memory"); - } - const char *a_data = a.get_data(); const char *b_data = b.get_data(); char *res_data = res.get_data(); @@ -230,14 +227,13 @@ std::pair bool all_f_contig = (is_a_f_contig && is_b_f_contig && is_res_f_contig); if (all_c_contig || all_f_contig) { - // a_typeid == b_typeid - auto contig_fn = isclose_contig_dispatch_vector[a_typeid]; + auto contig_fn = isclose_contig_dispatch_vector[a_b_typeid]; if (contig_fn == nullptr) { + py::dtype a_b_dtype_py = dtype_from_typenum(a_b_typeid); throw std::runtime_error( - "Contiguous implementation is missing for a_typeid=" + - std::to_string(a_typeid) + - " and b_typeid=" + std::to_string(b_typeid)); + "Contiguous implementation is missing for " + + std::string(py::str(a_b_dtype_py)) + "data type"); } auto comp_ev = contig_fn(exec_q, nelems, py_rtol, py_atol, py_equal_nan, @@ -280,14 +276,13 @@ std::pair simplified_b_strides[0] == 1 && simplified_res_strides[0] == 1) { // Special case of contiguous data - // a_typeid == b_typeid - auto contig_fn = isclose_contig_dispatch_vector[a_typeid]; + auto contig_fn = isclose_contig_dispatch_vector[a_b_typeid]; if (contig_fn == nullptr) { + py::dtype a_b_dtype_py = dtype_from_typenum(a_b_typeid); throw std::runtime_error( - "Contiguous implementation is missing for a_typeid=" + - std::to_string(a_typeid) + - " and b_typeid=" + std::to_string(b_typeid)); + "Contiguous implementation is missing for " + + std::string(py::str(a_b_dtype_py)) + "data type"); } std::cout << "Run contig impl in strided" << std::endl; @@ -306,14 +301,13 @@ std::pair return std::make_pair(ht_ev, comp_ev); } - // a_typeid == b_typeid - auto strided_fn = isclose_strided_scalar_dispatch_vector[a_typeid]; + auto strided_fn = isclose_strided_scalar_dispatch_vector[a_b_typeid]; if (strided_fn == nullptr) { - throw std::runtime_error( - "isclose implementation is missing for a_typeid=" + - std::to_string(a_typeid) + - " and b_typeid=" + std::to_string(b_typeid)); + py::dtype a_b_dtype_py = dtype_from_typenum(a_b_typeid); + throw std::runtime_error("Strided implementation is missing for " + + std::string(py::str(a_b_dtype_py)) + + "data type"); } std::cout << "Run strided impl" << std::endl; From 4df11369663aa251b42c8bc100f22dab6e7703e7 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 25 Jul 2025 03:12:00 -0700 Subject: [PATCH 11/32] Fix argument order in IsCloseStridedScalarFunctor constructor --- dpnp/backend/kernels/elementwise_functions/isclose.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp index 4dedcbca32ea..338f1a2c8929 100644 --- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp +++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp @@ -255,7 +255,7 @@ sycl::event IsCloseStridedScalarFunctor; cgh.parallel_for( {nelems}, - IsCloseFunc(a_tp, b_tp, out_tp, indexer, atol, rtol, equal_nan)); + IsCloseFunc(a_tp, b_tp, out_tp, indexer, rtol, atol, equal_nan)); }); return comp_ev; } From d80eab71bb8de83fddff8be216680478620445c7 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 25 Jul 2025 06:13:38 -0700 Subject: [PATCH 12/32] Cleanup isclose.cpp --- .../extensions/ufunc/elementwise_functions/isclose.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index ac463dd1051f..4666ba9b63dd 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -248,8 +248,6 @@ std::pair // if 1d with strides 1 - input is contig // dispatch to strided - std::cout << "Strided impl run" << std::endl; - auto const &a_strides = a.get_strides_vector(); auto const &b_strides = b.get_strides_vector(); auto const &res_strides = res.get_strides_vector(); @@ -285,8 +283,6 @@ std::pair std::string(py::str(a_b_dtype_py)) + "data type"); } - std::cout << "Run contig impl in strided" << std::endl; - int a_elem_size = a.get_elemsize(); int b_elem_size = b.get_elemsize(); int res_elem_size = res.get_elemsize(); @@ -310,8 +306,6 @@ std::pair "data type"); } - std::cout << "Run strided impl" << std::endl; - using dpctl::tensor::offset_utils::device_allocate_and_pack; std::vector host_tasks{}; From 8873008476f70bfc874d350426bb01eb0d3650b3 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 25 Jul 2025 06:31:45 -0700 Subject: [PATCH 13/32] Add support rtol/atol as complex to match NumPy --- dpnp/dpnp_iface_logic.py | 77 +++++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 36 deletions(-) diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py index fed871bad857..1052ac40d3ac 100644 --- a/dpnp/dpnp_iface_logic.py +++ b/dpnp/dpnp_iface_logic.py @@ -773,7 +773,7 @@ def array_equiv(a1, a2): ) -def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False): +def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False): """ Returns a boolean array where two arrays are element-wise equal within a tolerance. @@ -872,37 +872,8 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False): rtol, atol, scalar_type=True, all_scalars=True ) - # make sure b is an inexact type to avoid bad behavior on abs(MIN_INT) - if dpnp.isscalar(b): - dt = dpnp.result_type(a, b, 1.0, rtol, atol) - b = dpnp.asarray( - b, dtype=dt, sycl_queue=a.sycl_queue, usm_type=a.usm_type - ) - elif dpnp.issubdtype(b, dpnp.integer): - dt = dpnp.result_type(b, 1.0, rtol, atol) - b = dpnp.astype(b, dt) - - if not new: - - # Firstly handle finite values: - # result = absolute(a - b) <= atol + rtol * absolute(b) - dt = dpnp.result_type(b, rtol, atol) - _b = dpnp.abs(b, dtype=dt) - _b *= rtol - _b += atol - result = less_equal(dpnp.abs(a - b), _b) - - # Handle "inf" values: they are treated as equal if they are in the same - # place and of the same sign in both arrays - result &= isfinite(b) - result |= a == b - - if equal_nan: - result |= isnan(a) & isnan(b) - return result - - else: - + # Use own SYCL kernel for scalar rtol/atol + if dpnp.isscalar(rtol) and dpnp.isscalar(atol): dt = dpnp.result_type(a, b, 1.0) if dpnp.isscalar(a): @@ -926,10 +897,17 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False): else: usm_type, exec_q = get_usm_allocations([a, b]) - # remove order="C" - a = dpnp.astype(a, dt, order="C", casting="same_kind", copy=False) - b = dpnp.astype(b, dt, order="C", casting="same_kind", copy=False) + a = dpnp.astype(a, dt, casting="same_kind", copy=False) + b = dpnp.astype(b, dt, casting="same_kind", copy=False) + # Convert complex rtol/atol to real values + # to avoid pybind11 cast errors and match NumPy behavior + if isinstance(rtol, complex): + rtol = abs(rtol) + if isinstance(atol, complex): + atol = abs(atol) + + # pylint: disable=W0707 try: res_shape = dpnp.broadcast_shapes(a.shape, b.shape) except ValueError: @@ -949,7 +927,7 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False): ) _manager = dpu.SequentialOrderManager[exec_q] - mem_ev, ht_ev = ufi._isclose( + mem_ev, ht_ev = ufi._isclose_scalar( a.get_array(), b.get_array(), rtol, @@ -963,6 +941,33 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False): return output + # make sure b is an inexact type to avoid bad behavior on abs(MIN_INT) + if dpnp.isscalar(b): + dt = dpnp.result_type(a, b, 1.0, rtol, atol) + b = dpnp.asarray( + b, dtype=dt, sycl_queue=a.sycl_queue, usm_type=a.usm_type + ) + elif dpnp.issubdtype(b, dpnp.integer): + dt = dpnp.result_type(b, 1.0, rtol, atol) + b = dpnp.astype(b, dt) + + # Firstly handle finite values: + # result = absolute(a - b) <= atol + rtol * absolute(b) + dt = dpnp.result_type(b, rtol, atol) + _b = dpnp.abs(b, dtype=dt) + _b *= rtol + _b += atol + result = less_equal(dpnp.abs(a - b), _b) + + # Handle "inf" values: they are treated as equal if they are in the same + # place and of the same sign in both arrays + result &= isfinite(b) + result |= a == b + + if equal_nan: + result |= isnan(a) & isnan(b) + return result + def iscomplex(x): """ From 86c99cd021510e591a2a85813f18f6e79cfaaf5a Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 25 Jul 2025 08:34:35 -0700 Subject: [PATCH 14/32] Expand and update tests for dpnp.isclose() --- dpnp/tests/test_logic.py | 133 ++++++++++++++++++++++++++++++++++----- 1 file changed, 118 insertions(+), 15 deletions(-) diff --git a/dpnp/tests/test_logic.py b/dpnp/tests/test_logic.py index 4281279da450..c1a068724938 100644 --- a/dpnp/tests/test_logic.py +++ b/dpnp/tests/test_logic.py @@ -515,23 +515,126 @@ def test_infinity_sign_errors(func): getattr(dpnp, func)(x, out=out) -@pytest.mark.parametrize("dtype", get_integer_float_dtypes()) -@pytest.mark.parametrize( - "rtol", [1e-05, dpnp.array(1e-05), dpnp.full(10, 1e-05)] -) -@pytest.mark.parametrize( - "atol", [1e-08, dpnp.array(1e-08), dpnp.full(10, 1e-08)] -) -def test_isclose(dtype, rtol, atol): - a = numpy.random.rand(10) - b = a + numpy.random.rand(10) * 1e-8 +class TestIsClose: + @pytest.mark.parametrize("val", [1.0, numpy.inf, -numpy.inf, numpy.nan]) + def test_input_0d(self, val): + dp_arr = dpnp.array(val) + np_arr = numpy.array(val) - dpnp_a = dpnp.array(a, dtype=dtype) - dpnp_b = dpnp.array(b, dtype=dtype) + # array & scalar + dp_res = dpnp.isclose(dp_arr, val) + np_res = numpy.isclose(np_arr, val) + assert_allclose(dp_res, np_res) - np_res = numpy.isclose(a, b, 1e-05, 1e-08) - dpnp_res = dpnp.isclose(dpnp_a, dpnp_b, rtol, atol) - assert_allclose(dpnp_res, np_res) + # scalar & array + dp_res = dpnp.isclose(val, dp_arr) + np_res = numpy.isclose(val, np_arr) + assert_allclose(dp_res, np_res) + + # array & array + dp_res = dpnp.isclose(dp_arr, dp_arr) + np_res = numpy.isclose(np_arr, np_arr) + assert_allclose(dp_res, np_res) + + @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) + @pytest.mark.parametrize( + "rtol", [1e-5, dpnp.array(1e-5), dpnp.full((10,), 1e-5)] + ) + @pytest.mark.parametrize( + "atol", [1e-8, dpnp.array(1e-8), dpnp.full((10,), 1e-8)] + ) + def test_isclose(self, dtype, rtol, atol): + a = numpy.random.rand(10) + b = a + numpy.random.rand(10) * 1e-8 + + dpnp_a = dpnp.array(a, dtype=dtype) + dpnp_b = dpnp.array(b, dtype=dtype) + + np_res = numpy.isclose(a, b, rtol=1e-5, atol=1e-8) + dpnp_res = dpnp.isclose(dpnp_a, dpnp_b, rtol=rtol, atol=atol) + assert_allclose(dpnp_res, np_res) + + @pytest.mark.parametrize( + "sh_a, sh_b", + [ + ((10,), (1,)), + ((3, 1, 5), (3, 5)), + ((3, 1, 5), (1, 3, 5)), + ((1, 10), (10,)), + ], + ) + def test_broadcast_shapes(self, sh_a, sh_b): + a_np = numpy.ones(sh_a) + b_np = numpy.ones(sh_b) + + a_dp = dpnp.ones(sh_a) + b_dp = dpnp.ones(sh_b) + + np_res = numpy.isclose(a_np, b_np) + dp_res = dpnp.isclose(a_dp, b_dp) + assert_allclose(dp_res, np_res) + + def test_equal_nan(self): + a = numpy.array([numpy.nan, 1.0]) + b = numpy.array([numpy.nan, 1.0]) + + dp_a = dpnp.array(a) + dp_b = dpnp.array(b) + + np_res = numpy.isclose(a, b, equal_nan=True) + dp_res = dpnp.isclose(dp_a, dp_b, equal_nan=True) + assert_allclose(dp_res, np_res) + + def test_rtol_atol_arrays(self): + a = numpy.array([2.1, 2.1, 2.1, 2.1, 5, numpy.nan]) + b = numpy.array([2, 2, 2, 2, numpy.nan, 5]) + atol = numpy.array([0.11, 0.09, 1e-8, 1e-8, 1, 1]) + rtol = numpy.array([1e-8, 1e-8, 0.06, 0.04, 1, 1]) + + dp_a = dpnp.array(a) + dp_b = dpnp.array(b) + dp_rtol = dpnp.array(rtol) + dp_atol = dpnp.array(atol) + + np_res = numpy.isclose(a, b, rtol=rtol, atol=atol) + dp_res = dpnp.isclose(dp_a, dp_b, rtol=dp_rtol, atol=dp_atol) + assert_allclose(dp_res, np_res) + + @pytest.mark.parametrize( + "rtol, atol", + [ + (1e-05 + 1j, 1e-08), + (1e-05, 1e-08 + 1j), + (1e-05 + 1j, 1e-08 + 1j), + ], + ) + def test_rtol_atol_complex(self, rtol, atol): + a = dpnp.array([1.0, 2.0]) + b = dpnp.array([1.0, 2.0 + 1e-7]) + + dpnp_res = dpnp.isclose(a, b, rtol=rtol, atol=atol) + np_res = numpy.isclose(a.asnumpy(), b.asnumpy(), rtol=rtol, atol=atol) + assert_allclose(dpnp_res, np_res) + + def test_rtol_atol_nep50_broadcasting(self): + below_one = float(1.0 - numpy.finfo("f8").eps) + f32 = numpy.array(below_one, dtype="f4") + dp_f32 = dpnp.array(f32) + + assert_allclose( + dpnp.isclose(dp_f32, below_one, rtol=0, atol=0), + numpy.isclose(f32, below_one, rtol=0, atol=0), + ) + + def test_invalid_input(self): + # unsupported type + assert_raises(TypeError, dpnp.isclose, 1.0, 1.0) + assert_raises(TypeError, dpnp.isclose, [1.0], [1.0]) + + # broadcast error + assert_raises( + ValueError, dpnp.isclose, dpnp.ones((10,)), dpnp.ones((3, 5)) + ) @pytest.mark.parametrize("a", [numpy.array([1, 2]), numpy.array([1, 1])]) From 28745204bcc1a89002ebb5a3895a19ac92823c60 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 25 Jul 2025 08:44:53 -0700 Subject: [PATCH 15/32] Update copyright year --- dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp | 2 +- dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp | 2 +- dpnp/backend/kernels/elementwise_functions/isclose.hpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index 4666ba9b63dd..a23dced2b9ca 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2024, Intel Corporation +// Copyright (c) 2025, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp index 2a409779cb3a..5216db4f3a29 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2024-2025, Intel Corporation +// Copyright (c) 2025, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp index 338f1a2c8929..137c574bfeb0 100644 --- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp +++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp @@ -1,5 +1,5 @@ //***************************************************************************** -// Copyright (c) 2024-2025, Intel Corporation +// Copyright (c) 2025, Intel Corporation // All rights reserved. // // Redistribution and use in source and binary forms, with or without From 68b38785bba2b584ee089457f8f6f2ef740e5ab2 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 25 Jul 2025 08:52:48 -0700 Subject: [PATCH 16/32] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2acb9cd4a5eb..06af8aea95e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Replaced the use of `numpy.testing.suppress_warnings` with appropriate calls from the warnings module [#2529](https://github.com/IntelPython/dpnp/pull/2529) * Improved documentations of `dpnp.ndarray` class and added a page with description of supported constants [#2422](https://github.com/IntelPython/dpnp/pull/2422) * Updated `dpnp.size` to accept tuple of ints for `axes` argument [#2536](https://github.com/IntelPython/dpnp/pull/2536) +* Improved performance of `dpnp.isclose` function by implementing a dedicated kernel for scalar `rtol` and `atol` arguments [#2540](https://github.com/IntelPython/dpnp/pull/2540) ### Deprecated From ddd2ec02f21c18f83c2f91cc2583c7941768176e Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 30 Jul 2025 03:22:26 -0700 Subject: [PATCH 17/32] Fix incorrect complex isclose logic and add proper overload --- .../kernels/elementwise_functions/isclose.hpp | 74 +++++++++++-------- 1 file changed, 43 insertions(+), 31 deletions(-) diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp index 137c574bfeb0..4be6c103e5c7 100644 --- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp +++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp @@ -25,6 +25,13 @@ #pragma once +#define SYCL_EXT_ONEAPI_COMPLEX +#if __has_include() +#include +#else +#include +#endif + #include #include #include @@ -39,6 +46,7 @@ namespace dpnp::kernels::isclose { +namespace exprm_ns = sycl::ext::oneapi::experimental; template inline bool isclose(const T a, @@ -47,14 +55,39 @@ inline bool isclose(const T a, const T atol, const bool equal_nan) { - if (sycl::isnan(a) || sycl::isnan(b)) { - // static cast? - return equal_nan && sycl::isnan(a) && sycl::isnan(b); + if (sycl::isfinite(a) && sycl::isfinite(b)) { + return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b); + } + + if (sycl::isnan(a) && sycl::isnan(b)) { + return equal_nan; + } + + return a == b; +} + +template +inline bool isclose(const std::complex a, + const std::complex b, + const T rtol, + const T atol, + const bool equal_nan) +{ + const bool a_finite = sycl::isfinite(a.real()) && sycl::isfinite(a.imag()); + const bool b_finite = sycl::isfinite(b.real()) && sycl::isfinite(b.imag()); + + if (a_finite && b_finite) { + return exprm_ns::abs(exprm_ns::complex(a - b)) <= + atol + rtol * exprm_ns::abs(exprm_ns::complex(b)); } - if (sycl::isinf(a) || sycl::isinf(b)) { - return a == b; + + if (sycl::isnan(a.real()) && sycl::isnan(a.imag()) && + sycl::isnan(b.real()) && sycl::isnan(b.imag())) + { + return equal_nan; } - return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b); + + return a == b; } template ) { - T z_a = a_[inp1_offset]; - T z_b = b_[inp2_offset]; - bool x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_); - bool y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_); - out_[out_offset] = x && y; - } - else { - out_[out_offset] = isclose(a_[inp1_offset], b_[inp2_offset], rtol_, - atol_, equal_nan_); - } + out_[out_offset] = + isclose(a_[inp1_offset], b_[inp2_offset], rtol_, atol_, equal_nan_); } }; @@ -201,19 +224,8 @@ struct IsCloseContigScalarFunctor (gid / sgSize) * (elems_per_sg - sgSize) + gid; const std::size_t end = std::min(nelems_, start + elems_per_sg); for (std::size_t offset = start; offset < end; offset += sgSize) { - if constexpr (is_complex_v) { - T z_a = a_[offset]; - T z_b = b_[offset]; - bool x = isclose(z_a.real(), z_b.real(), rtol_, atol_, - equal_nan_); - bool y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, - equal_nan_); - out_[offset] = x && y; - } - else { - out_[offset] = isclose(a_[offset], b_[offset], rtol_, atol_, - equal_nan_); - } + out_[offset] = + isclose(a_[offset], b_[offset], rtol_, atol_, equal_nan_); } } } From 819262d40629ed5075f2c9e36a95cd60ef66ab93 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 30 Jul 2025 03:31:32 -0700 Subject: [PATCH 18/32] Make immutable variables const --- .../ufunc/elementwise_functions/isclose.cpp | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index a23dced2b9ca..752273fe085a 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -80,8 +80,8 @@ using value_type_of_t = typename value_type_of::type; typedef sycl::event (*isclose_strided_scalar_fn_ptr_t)( sycl::queue &, - int, // nd - std::size_t, // nelems + const int, // nd + const std::size_t, // nelems const py::ssize_t *, // shape_strides const py::object &, // rtol const py::object &, // atol @@ -96,8 +96,8 @@ typedef sycl::event (*isclose_strided_scalar_fn_ptr_t)( template sycl::event isclose_strided_scalar_call(sycl::queue &exec_q, - int nd, - std::size_t nelems, + const int nd, + const std::size_t nelems, const py::ssize_t *shape_strides, const py::object &py_rtol, const py::object &py_atol, @@ -124,7 +124,7 @@ sycl::event isclose_strided_scalar_call(sycl::queue &exec_q, typedef sycl::event (*isclose_contig_scalar_fn_ptr_t)( sycl::queue &, - std::size_t, // nelems + const std::size_t, // nelems const py::object &, // rtol const py::object &, // atol const py::object &, // equal_nan @@ -135,7 +135,7 @@ typedef sycl::event (*isclose_contig_scalar_fn_ptr_t)( template sycl::event isclose_contig_scalar_call(sycl::queue &q, - std::size_t nelems, + const std::size_t nelems, const py::object &py_rtol, const py::object &py_atol, const py::object &py_equal_nan, @@ -214,17 +214,19 @@ std::pair char *res_data = res.get_data(); // handle contiguous inputs - bool is_a_c_contig = a.is_c_contiguous(); - bool is_a_f_contig = a.is_f_contiguous(); + const bool is_a_c_contig = a.is_c_contiguous(); + const bool is_a_f_contig = a.is_f_contiguous(); - bool is_b_c_contig = b.is_c_contiguous(); - bool is_b_f_contig = b.is_f_contiguous(); + const bool is_b_c_contig = b.is_c_contiguous(); + const bool is_b_f_contig = b.is_f_contiguous(); - bool is_res_c_contig = res.is_c_contiguous(); - bool is_res_f_contig = res.is_f_contiguous(); + const bool is_res_c_contig = res.is_c_contiguous(); + const bool is_res_f_contig = res.is_f_contiguous(); - bool all_c_contig = (is_a_c_contig && is_b_c_contig && is_res_c_contig); - bool all_f_contig = (is_a_f_contig && is_b_f_contig && is_res_f_contig); + const bool all_c_contig = + (is_a_c_contig && is_b_c_contig && is_res_c_contig); + const bool all_f_contig = + (is_a_f_contig && is_b_f_contig && is_res_f_contig); if (all_c_contig || all_f_contig) { auto contig_fn = isclose_contig_dispatch_vector[a_b_typeid]; @@ -362,8 +364,7 @@ struct IsCloseStridedScalarFactory fnT get() { if constexpr (std::is_same_v::value_type, - void>) - { + void>) { return nullptr; } else { @@ -378,8 +379,7 @@ struct IsCloseContigScalarFactory fnT get() { if constexpr (std::is_same_v::value_type, - void>) - { + void>) { return nullptr; } else { From 8691551fe60b108c66954f9c591d79b9cfd0c4d8 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 30 Jul 2025 04:22:59 -0700 Subject: [PATCH 19/32] Extend check_no_overlap to handle same logical tensors --- .../common/ext/details/validation_utils_internal.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp index 2ff800aced59..816ea0453ade 100644 --- a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp +++ b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp @@ -114,8 +114,10 @@ inline void check_no_overlap(const array_ptr &input, } const auto &overlap = dpctl::tensor::overlap::MemoryOverlap(); + const auto &same_logical_tensors = + dpctl::tensor::overlap::SameLogicalTensors(); - if (overlap(*input, *output)) { + if (overlap(*input, *output) && !same_logical_tensors(*input, *output)) { throw py::value_error(name_of(input, names) + " has overlapping memory segments with " + name_of(output, names)); From 15685e8295a39c978bd971ad2a88477805e91f90 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 30 Jul 2025 04:39:52 -0700 Subject: [PATCH 20/32] Fix rtol/atol conversion getting real part of complex to match NumPy --- dpnp/dpnp_iface_logic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py index 1052ac40d3ac..ab37689961e6 100644 --- a/dpnp/dpnp_iface_logic.py +++ b/dpnp/dpnp_iface_logic.py @@ -900,12 +900,12 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False): a = dpnp.astype(a, dt, casting="same_kind", copy=False) b = dpnp.astype(b, dt, casting="same_kind", copy=False) - # Convert complex rtol/atol to real values + # Convert complex rtol/atol to to their real parts # to avoid pybind11 cast errors and match NumPy behavior if isinstance(rtol, complex): - rtol = abs(rtol) + rtol = rtol.real if isinstance(atol, complex): - atol = abs(atol) + atol = atol.real # pylint: disable=W0707 try: From 725891c0bbc30898e56e46748fb1be92b6bdbbce Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 30 Jul 2025 04:47:01 -0700 Subject: [PATCH 21/32] Apply remarks --- .../extensions/ufunc/elementwise_functions/isclose.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index 752273fe085a..57f26ccf6930 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -45,7 +45,6 @@ #include "../../elementwise_functions/simplify_iteration_space.hpp" // dpctl tensor headers -#include "utils/memory_overlap.hpp" #include "utils/offset_utils.hpp" #include "utils/output_validation.hpp" #include "utils/sycl_alloc_utils.hpp" @@ -58,7 +57,7 @@ namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -using ext::common::value_type_of; +using ext::common::value_type_of_t; using ext::validation::array_names; using ext::common::dtype_from_typenum; @@ -68,6 +67,7 @@ using ext::validation::check_num_dims; using ext::validation::check_queue; using ext::validation::check_same_dtype; using ext::validation::check_same_size; +using ext::validation::check_writable; namespace dpnp::extensions::ufunc { @@ -75,9 +75,6 @@ namespace dpnp::extensions::ufunc namespace impl { -template -using value_type_of_t = typename value_type_of::type; - typedef sycl::event (*isclose_strided_scalar_fn_ptr_t)( sycl::queue &, const int, // nd @@ -181,7 +178,7 @@ std::pair check_queue({&a, &b, &res}, names, exec_q); check_no_overlap({&a, &b}, {&res}, names); - dpctl::tensor::validation::CheckWritable::throw_if_not_writable(res); + check_writable({&res}, names); auto types = td_ns::usm_ndarray_types(); // a_typeid == b_typeid (check_same_dtype(&a, &b, names)) From c7c701141182bc4eaeb4c7db8dbbf071cb9b6ab0 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 30 Jul 2025 05:00:42 -0700 Subject: [PATCH 22/32] Refactor isclose() by using separate _isclose_scalar_tol() for scalar rtol/atol --- dpnp/dpnp_iface_logic.py | 140 +++++++++++++++++++++------------------ 1 file changed, 74 insertions(+), 66 deletions(-) diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py index ab37689961e6..3031560de3d7 100644 --- a/dpnp/dpnp_iface_logic.py +++ b/dpnp/dpnp_iface_logic.py @@ -84,6 +84,79 @@ ] +def _isclose_scalar_tol(a, b, rtol, atol, equal_nan): + """ + Specialized implementation of dpnp.isclose() for scalar rtol and atol + using a dedicated SYCL kernel. + """ + dt = dpnp.result_type(a, b, 1.0) + + if dpnp.isscalar(a): + usm_type = b.usm_type + exec_q = b.sycl_queue + a = dpnp.array( + a, + dt, + usm_type=usm_type, + sycl_queue=exec_q, + ) + elif dpnp.isscalar(b): + usm_type = a.usm_type + exec_q = a.sycl_queue + b = dpnp.array( + b, + dt, + usm_type=usm_type, + sycl_queue=exec_q, + ) + else: + usm_type, exec_q = get_usm_allocations([a, b]) + + a = dpnp.astype(a, dt, casting="same_kind", copy=False) + b = dpnp.astype(b, dt, casting="same_kind", copy=False) + + # Convert complex rtol/atol to to their real parts + # to avoid pybind11 cast errors and match NumPy behavior + if isinstance(rtol, complex): + rtol = rtol.real + if isinstance(atol, complex): + atol = atol.real + + # pylint: disable=W0707 + try: + res_shape = dpnp.broadcast_shapes(a.shape, b.shape) + except ValueError: + raise ValueError( + "operands could not be broadcast together with shapes " + f"{a.shape} and {b.shape}" + ) + + if a.shape != res_shape: + a = dpnp.broadcast_to(a, res_shape) + if b.shape != res_shape: + b = dpnp.broadcast_to(b, res_shape) + + out_dtype = dpnp.bool + output = dpnp.empty( + res_shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type + ) + + _manager = dpu.SequentialOrderManager[exec_q] + mem_ev, ht_ev = ufi._isclose_scalar( + a.get_array(), + b.get_array(), + rtol, + atol, + equal_nan, + output.get_array(), + exec_q, + depends=_manager.submitted_events, + ) + _manager.add_event_pair(mem_ev, ht_ev) + + return output + + def all(a, /, axis=None, out=None, keepdims=False, *, where=True): """ Test whether all array elements along a given axis evaluate to ``True``. @@ -874,72 +947,7 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False): # Use own SYCL kernel for scalar rtol/atol if dpnp.isscalar(rtol) and dpnp.isscalar(atol): - dt = dpnp.result_type(a, b, 1.0) - - if dpnp.isscalar(a): - usm_type = b.usm_type - exec_q = b.sycl_queue - a = dpnp.array( - a, - dt, - usm_type=usm_type, - sycl_queue=exec_q, - ) - elif dpnp.isscalar(b): - usm_type = a.usm_type - exec_q = a.sycl_queue - b = dpnp.array( - b, - dt, - usm_type=usm_type, - sycl_queue=exec_q, - ) - else: - usm_type, exec_q = get_usm_allocations([a, b]) - - a = dpnp.astype(a, dt, casting="same_kind", copy=False) - b = dpnp.astype(b, dt, casting="same_kind", copy=False) - - # Convert complex rtol/atol to to their real parts - # to avoid pybind11 cast errors and match NumPy behavior - if isinstance(rtol, complex): - rtol = rtol.real - if isinstance(atol, complex): - atol = atol.real - - # pylint: disable=W0707 - try: - res_shape = dpnp.broadcast_shapes(a.shape, b.shape) - except ValueError: - raise ValueError( - "operands could not be broadcast together with shapes " - f"{a.shape} and {b.shape}" - ) - - if a.shape != res_shape: - a = dpnp.broadcast_to(a, res_shape) - if b.shape != res_shape: - b = dpnp.broadcast_to(b, res_shape) - - out_dtype = dpnp.bool - output = dpnp.empty( - res_shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type - ) - - _manager = dpu.SequentialOrderManager[exec_q] - mem_ev, ht_ev = ufi._isclose_scalar( - a.get_array(), - b.get_array(), - rtol, - atol, - equal_nan, - output.get_array(), - exec_q, - depends=_manager.submitted_events, - ) - _manager.add_event_pair(mem_ev, ht_ev) - - return output + return _isclose_scalar_tol(a, b, rtol, atol, equal_nan) # make sure b is an inexact type to avoid bad behavior on abs(MIN_INT) if dpnp.isscalar(b): From c5da321735d2b2d5178016d059c86b523391f269 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Wed, 30 Jul 2025 05:35:38 -0700 Subject: [PATCH 23/32] Update TestIsClose --- dpnp/tests/test_logic.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/dpnp/tests/test_logic.py b/dpnp/tests/test_logic.py index c1a068724938..9ac356729020 100644 --- a/dpnp/tests/test_logic.py +++ b/dpnp/tests/test_logic.py @@ -10,7 +10,9 @@ import dpnp from .helper import ( + generate_random_numpy_array, get_all_dtypes, + get_complex_dtypes, get_float_complex_dtypes, get_float_dtypes, get_integer_float_dtypes, @@ -554,6 +556,21 @@ def test_isclose(self, dtype, rtol, atol): dpnp_res = dpnp.isclose(dpnp_a, dpnp_b, rtol=rtol, atol=atol) assert_allclose(dpnp_res, np_res) + @pytest.mark.parametrize("dtype", get_complex_dtypes()) + @pytest.mark.parametrize("shape", [(4, 4), (16, 16), (4, 4, 4)]) + def test_isclose_complex(self, dtype, shape): + a = generate_random_numpy_array(shape, dtype=dtype, seed_value=81) + b = a.copy() + + b = b + (1e-6 + 1e-6j) + + dpnp_a = dpnp.array(a, dtype=dtype) + dpnp_b = dpnp.array(b, dtype=dtype) + + np_res = numpy.isclose(a, b) + dpnp_res = dpnp.isclose(dpnp_a, dpnp_b) + assert_allclose(dpnp_res, np_res) + @pytest.mark.parametrize( "sh_a, sh_b", [ @@ -603,14 +620,14 @@ def test_rtol_atol_arrays(self): @pytest.mark.parametrize( "rtol, atol", [ - (1e-05 + 1j, 1e-08), - (1e-05, 1e-08 + 1j), - (1e-05 + 1j, 1e-08 + 1j), + (0 + 1e-5j, 1e-08), + (1e-05, 0 + 1e-8j), + (0 + 1e-5j, 0 + 1e-8j), ], ) def test_rtol_atol_complex(self, rtol, atol): - a = dpnp.array([1.0, 2.0]) - b = dpnp.array([1.0, 2.0 + 1e-7]) + a = dpnp.array([1.0, 1.0]) + b = dpnp.array([1.0, 1.0 + 1e-6]) dpnp_res = dpnp.isclose(a, b, rtol=rtol, atol=atol) np_res = numpy.isclose(a.asnumpy(), b.asnumpy(), rtol=rtol, atol=atol) From c1112c364b21280bde4c141529092845c5e02f7f Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 31 Jul 2025 04:14:17 -0700 Subject: [PATCH 24/32] Add with_requires(numpy --- dpnp/tests/test_logic.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dpnp/tests/test_logic.py b/dpnp/tests/test_logic.py index 9ac356729020..3efad4b98aff 100644 --- a/dpnp/tests/test_logic.py +++ b/dpnp/tests/test_logic.py @@ -17,6 +17,7 @@ get_float_dtypes, get_integer_float_dtypes, ) +from .third_party.cupy import testing class TestAllAny: @@ -602,6 +603,8 @@ def test_equal_nan(self): dp_res = dpnp.isclose(dp_a, dp_b, equal_nan=True) assert_allclose(dp_res, np_res) + # array-like rtol/atol support requires NumPy >= 2.0 + @testing.with_requires("numpy>=2.0") def test_rtol_atol_arrays(self): a = numpy.array([2.1, 2.1, 2.1, 2.1, 5, numpy.nan]) b = numpy.array([2, 2, 2, 2, numpy.nan, 5]) @@ -633,7 +636,9 @@ def test_rtol_atol_complex(self, rtol, atol): np_res = numpy.isclose(a.asnumpy(), b.asnumpy(), rtol=rtol, atol=atol) assert_allclose(dpnp_res, np_res) - def test_rtol_atol_nep50_broadcasting(self): + # NEP 50: float32 vs Python float comparison requires NumPy >= 2.0 + @testing.with_requires("numpy>=2.0") + def test_rtol_atol_nep50(self): below_one = float(1.0 - numpy.finfo("f8").eps) f32 = numpy.array(below_one, dtype="f4") dp_f32 = dpnp.array(f32) From 35fc36768ab7df5ddb78dafadbf0fe9a092750a4 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 31 Jul 2025 04:36:45 -0700 Subject: [PATCH 25/32] Remove unused includes --- .../extensions/ufunc/elementwise_functions/isclose.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index 57f26ccf6930..7fd1d8d65ad6 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -23,22 +23,16 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** -#include #include #include #include -#include -#include #include -#include #include #include #include "dpctl4pybind11.hpp" -#include #include -#include #include "kernels/elementwise_functions/isclose.hpp" @@ -47,7 +41,6 @@ // dpctl tensor headers #include "utils/offset_utils.hpp" #include "utils/output_validation.hpp" -#include "utils/sycl_alloc_utils.hpp" #include "utils/type_dispatch.hpp" #include "utils/type_utils.hpp" From 4b552ebbe291a0141bcd458d92b0d248716e007a Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 31 Jul 2025 04:42:14 -0700 Subject: [PATCH 26/32] Use dpctl tensor include to use experimental SYCL namespace for complex --- dpnp/backend/kernels/elementwise_functions/isclose.hpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp index 4be6c103e5c7..cfbf5276d4ca 100644 --- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp +++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp @@ -25,13 +25,7 @@ #pragma once -#define SYCL_EXT_ONEAPI_COMPLEX -#if __has_include() -#include -#else -#include -#endif - +#include #include #include #include @@ -40,13 +34,13 @@ // dpctl tensor headers #include "kernels/alignment.hpp" #include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/sycl_complex.hpp" #include "utils/offset_utils.hpp" #include "utils/sycl_utils.hpp" #include "utils/type_utils.hpp" namespace dpnp::kernels::isclose { -namespace exprm_ns = sycl::ext::oneapi::experimental; template inline bool isclose(const T a, From f68249c6dbf39f8441a0d040f3a5106386d805e0 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 31 Jul 2025 04:48:12 -0700 Subject: [PATCH 27/32] Return inclusion pybind11/stl.h --- dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index 7fd1d8d65ad6..683cb48247b1 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -33,6 +33,7 @@ #include "dpctl4pybind11.hpp" #include +#include #include "kernels/elementwise_functions/isclose.hpp" From 394727fbb47aa074ac56de55b9f4bbc085c5b7b6 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 31 Jul 2025 04:48:54 -0700 Subject: [PATCH 28/32] Use static_assert to isclose --- dpnp/backend/kernels/elementwise_functions/isclose.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp index cfbf5276d4ca..614f90661864 100644 --- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp +++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp @@ -49,6 +49,8 @@ inline bool isclose(const T a, const T atol, const bool equal_nan) { + static_assert(std::is_floating_point_v || std::is_same_v); + if (sycl::isfinite(a) && sycl::isfinite(b)) { return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b); } From c27596fa720e6bd775f8202cdb429d17ed5bfd9f Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 31 Jul 2025 05:07:06 -0700 Subject: [PATCH 29/32] Remove offsets passing for contig impl --- .../extensions/ufunc/elementwise_functions/isclose.cpp | 3 +-- dpnp/backend/kernels/elementwise_functions/isclose.hpp | 10 +++------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index 683cb48247b1..bd6053c9b7ab 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -143,8 +143,7 @@ sycl::event isclose_contig_scalar_call(sycl::queue &q, const bool equal_nan = py::cast(py_equal_nan); return dpnp::kernels::isclose::isclose_contig_scalar_impl( - q, nelems, rtol, atol, equal_nan, in1_p, 0, in2_p, 0, out_p, 0, - depends); + q, nelems, rtol, atol, equal_nan, in1_p, in2_p, out_p, depends); } isclose_strided_scalar_fn_ptr_t diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp index 614f90661864..b2a0db782f5f 100644 --- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp +++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp @@ -279,11 +279,8 @@ sycl::event const scT atol, const bool equal_nan, const char *a_cp, - ssize_t a_offset, const char *b_cp, - ssize_t b_offset, char *out_cp, - ssize_t out_offset, const std::vector &depends = {}) { constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz; @@ -298,12 +295,11 @@ sycl::event const auto gws_range = sycl::range<1>(n_groups * lws); const auto lws_range = sycl::range<1>(lws); - // ? + offset - const T *a_tp = reinterpret_cast(a_cp) + a_offset; - const T *b_tp = reinterpret_cast(b_cp) + b_offset; + const T *a_tp = reinterpret_cast(a_cp); + const T *b_tp = reinterpret_cast(b_cp); using resTy = bool; - resTy *out_tp = reinterpret_cast(out_cp) + out_offset; + resTy *out_tp = reinterpret_cast(out_cp); sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(depends); From a71f1bba50543268a981d39dd2e85be53c2e5289 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 31 Jul 2025 05:31:06 -0700 Subject: [PATCH 30/32] Use dpnp.broadcast_arrays() instead of dpnp.broadcast_shapes() --- dpnp/dpnp_iface_logic.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py index 3031560de3d7..bc4a79af20d5 100644 --- a/dpnp/dpnp_iface_logic.py +++ b/dpnp/dpnp_iface_logic.py @@ -124,21 +124,16 @@ def _isclose_scalar_tol(a, b, rtol, atol, equal_nan): # pylint: disable=W0707 try: - res_shape = dpnp.broadcast_shapes(a.shape, b.shape) + a, b = dpnp.broadcast_arrays(a, b) except ValueError: raise ValueError( "operands could not be broadcast together with shapes " f"{a.shape} and {b.shape}" ) - if a.shape != res_shape: - a = dpnp.broadcast_to(a, res_shape) - if b.shape != res_shape: - b = dpnp.broadcast_to(b, res_shape) - out_dtype = dpnp.bool output = dpnp.empty( - res_shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type + a.shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type ) _manager = dpu.SequentialOrderManager[exec_q] From 2d22e625c0107acbbdb2283bd7d1ae15408f6511 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 31 Jul 2025 06:39:21 -0700 Subject: [PATCH 31/32] Improve code coverage --- dpnp/tests/test_logic.py | 78 ++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 23 deletions(-) diff --git a/dpnp/tests/test_logic.py b/dpnp/tests/test_logic.py index 3efad4b98aff..bf003410849b 100644 --- a/dpnp/tests/test_logic.py +++ b/dpnp/tests/test_logic.py @@ -519,26 +519,6 @@ def test_infinity_sign_errors(func): class TestIsClose: - @pytest.mark.parametrize("val", [1.0, numpy.inf, -numpy.inf, numpy.nan]) - def test_input_0d(self, val): - dp_arr = dpnp.array(val) - np_arr = numpy.array(val) - - # array & scalar - dp_res = dpnp.isclose(dp_arr, val) - np_res = numpy.isclose(np_arr, val) - assert_allclose(dp_res, np_res) - - # scalar & array - dp_res = dpnp.isclose(val, dp_arr) - np_res = numpy.isclose(val, np_arr) - assert_allclose(dp_res, np_res) - - # array & array - dp_res = dpnp.isclose(dp_arr, dp_arr) - np_res = numpy.isclose(np_arr, np_arr) - assert_allclose(dp_res, np_res) - @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True)) @pytest.mark.parametrize( "rtol", [1e-5, dpnp.array(1e-5), dpnp.full((10,), 1e-5)] @@ -572,6 +552,51 @@ def test_isclose_complex(self, dtype, shape): dpnp_res = dpnp.isclose(dpnp_a, dpnp_b) assert_allclose(dpnp_res, np_res) + @pytest.mark.parametrize( + "rtol, atol", + [ + (1e-5, 1e-8), + (dpnp.array(1e-5), dpnp.array(1e-8)), + ], + ) + def test_empty_input(self, rtol, atol): + a = numpy.array([]) + b = numpy.array([]) + + dpnp_a = dpnp.array(a) + dpnp_b = dpnp.array(b) + + np_res = numpy.isclose(a, b, rtol=1e-5, atol=1e-8) + dpnp_res = dpnp.isclose(dpnp_a, dpnp_b, rtol=rtol, atol=atol) + assert_allclose(dpnp_res, np_res) + + @pytest.mark.parametrize( + "rtol, atol", + [ + (1e-5, 1e-8), + (dpnp.array(1e-5), dpnp.array(1e-8)), + ], + ) + @pytest.mark.parametrize("val", [1.0, numpy.inf, -numpy.inf, numpy.nan]) + def test_input_0d(self, val, rtol, atol): + dp_arr = dpnp.array(val) + np_arr = numpy.array(val) + + # array & scalar + dp_res = dpnp.isclose(dp_arr, val, rtol=rtol, atol=atol) + np_res = numpy.isclose(np_arr, val, rtol=1e-5, atol=1e-8) + assert_allclose(dp_res, np_res) + + # scalar & array + dp_res = dpnp.isclose(val, dp_arr, rtol=rtol, atol=atol) + np_res = numpy.isclose(val, np_arr, rtol=1e-5, atol=1e-8) + assert_allclose(dp_res, np_res) + + # array & array + dp_res = dpnp.isclose(dp_arr, dp_arr, rtol=rtol, atol=atol) + np_res = numpy.isclose(np_arr, np_arr, rtol=1e-5, atol=1e-8) + assert_allclose(dp_res, np_res) + @pytest.mark.parametrize( "sh_a, sh_b", [ @@ -592,15 +617,22 @@ def test_broadcast_shapes(self, sh_a, sh_b): dp_res = dpnp.isclose(a_dp, b_dp) assert_allclose(dp_res, np_res) - def test_equal_nan(self): + @pytest.mark.parametrize( + "rtol, atol", + [ + (1e-5, 1e-8), + (dpnp.array(1e-5), dpnp.array(1e-8)), + ], + ) + def test_equal_nan(self, rtol, atol): a = numpy.array([numpy.nan, 1.0]) b = numpy.array([numpy.nan, 1.0]) dp_a = dpnp.array(a) dp_b = dpnp.array(b) - np_res = numpy.isclose(a, b, equal_nan=True) - dp_res = dpnp.isclose(dp_a, dp_b, equal_nan=True) + np_res = numpy.isclose(a, b, rtol=1e-5, atol=1e-8, equal_nan=True) + dp_res = dpnp.isclose(dp_a, dp_b, rtol=rtol, atol=atol, equal_nan=True) assert_allclose(dp_res, np_res) # array-like rtol/atol support requires NumPy >= 2.0 From 0822f9813e6c7cc2c4e2d97671538aea721fe712 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 4 Aug 2025 03:56:50 -0700 Subject: [PATCH 32/32] Remove impossible used special case for contig data --- .../ufunc/elementwise_functions/isclose.cpp | 27 ------------------- 1 file changed, 27 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp index bd6053c9b7ab..43ff66a9a9cb 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp @@ -262,33 +262,6 @@ std::pair simplified_shape, simplified_a_strides, simplified_b_strides, simplified_res_strides, a_offset, b_offset, res_offset); - if (nd == 1 && simplified_a_strides[0] == 1 && - simplified_b_strides[0] == 1 && simplified_res_strides[0] == 1) - { - // Special case of contiguous data - auto contig_fn = isclose_contig_dispatch_vector[a_b_typeid]; - - if (contig_fn == nullptr) { - py::dtype a_b_dtype_py = dtype_from_typenum(a_b_typeid); - throw std::runtime_error( - "Contiguous implementation is missing for " + - std::string(py::str(a_b_dtype_py)) + "data type"); - } - - int a_elem_size = a.get_elemsize(); - int b_elem_size = b.get_elemsize(); - int res_elem_size = res.get_elemsize(); - auto comp_ev = contig_fn( - exec_q, nelems, py_rtol, py_atol, py_equal_nan, - a_data + a_elem_size * a_offset, b_data + b_elem_size * b_offset, - res_data + res_elem_size * res_offset, depends); - - sycl::event ht_ev = - dpctl::utils::keep_args_alive(exec_q, {a, b, res}, {comp_ev}); - - return std::make_pair(ht_ev, comp_ev); - } - auto strided_fn = isclose_strided_scalar_dispatch_vector[a_b_typeid]; if (strided_fn == nullptr) {