From 31f05a0ae12d95c8eb7f13916decfe06438510f8 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 18 Jun 2025 06:46:33 -0700
Subject: [PATCH 01/32] Init commit

---
 dpnp/backend/extensions/ufunc/CMakeLists.txt  |   1 +
 .../ufunc/elementwise_functions/common.cpp    |   2 +
 .../ufunc/elementwise_functions/isclose.cpp   | 167 ++++++++
 .../ufunc/elementwise_functions/isclose.hpp   |  35 ++
 .../kernels/elementwise_functions/isclose.hpp | 405 ++++++++++++++++++
 dpnp/dpnp_iface_logic.py                      |  67 ++-
 6 files changed, 660 insertions(+), 17 deletions(-)
 create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
 create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp
 create mode 100644 dpnp/backend/kernels/elementwise_functions/isclose.hpp

diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt
index 65d30d49f549..dfd58236ad69 100644
--- a/dpnp/backend/extensions/ufunc/CMakeLists.txt
+++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt
@@ -37,6 +37,7 @@ set(_elementwise_sources
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/heaviside.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/i0.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/interpolate.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/isclose.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/lcm.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/ldexp.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/logaddexp2.cpp
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
index c6dd3e038eb1..9d30f16273f0 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
@@ -37,6 +37,7 @@
 #include "heaviside.hpp"
 #include "i0.hpp"
 #include "interpolate.hpp"
+#include "isclose.hpp"
 #include "lcm.hpp"
 #include "ldexp.hpp"
 #include "logaddexp2.hpp"
@@ -66,6 +67,7 @@ void init_elementwise_functions(py::module_ m)
     init_heaviside(m);
     init_i0(m);
     init_interpolate(m);
+    init_isclose(m);
     init_lcm(m);
     init_ldexp(m);
     init_logaddexp2(m);
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
new file mode 100644
index 000000000000..7f1d44aa49e1
--- /dev/null
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -0,0 +1,167 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <algorithm>
+#include <complex>
+#include <cstddef>
+#include <stdexcept>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpctl4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "kernels/elementwise_functions/isclose.hpp"
+
+#include "../../elementwise_functions/simplify_iteration_space.hpp"
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/offset_utils.hpp"
+#include "utils/output_validation.hpp"
+#include "utils/sycl_alloc_utils.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+
+#include "ext/common.hpp"
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using ext::common::value_type_of;
+
+namespace dpnp::extensions::ufunc
+{
+namespace impl
+{
+
+using dpctl::tensor::usm_ndarray;
+using event_vector = std::vector<sycl::event>;
+
+using isclose_contig_fn_ptr_t = sycl::event (*)(
+    sycl::queue &, std::size_t,
+    const py::object &, const py::object &, const py::object &,
+    const char *, const char *, char *, const std::vector<sycl::event> &);
+
+static isclose_contig_fn_ptr_t isclose_contig_dispatch_vector[td_ns::num_types];
+
+template <typename T>
+using value_type_of_t = typename value_type_of<T>::type;
+
+// Template for checking if T is supported
+template <typename T>
+struct IsCloseOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+template <typename T>
+sycl::event isclose_contig_call(sycl::queue &q,
+                                std::size_t nelems,
+                                const py::object &rtol_,
+                                const py::object &atol_,
+                                const py::object &equal_nan_,
+                                const char *in1_p,
+                                const char *in2_p,
+                                char *out_p,
+                                const event_vector &depends)
+{
+    using dpctl::tensor::type_utils::is_complex_v;
+    using scT = std::conditional_t<is_complex_v<T>, typename value_type_of<T>::type, T>;
+
+    const scT rtol = py::cast<scT>(rtol_);
+    const scT atol = py::cast<scT>(atol_);
+    const bool equal_nan = py::cast<bool>(equal_nan_);
+
+    return dpnp::kernels::isclose::isclose_contig_impl<T, scT>(
+        q, nelems, rtol, atol, equal_nan, in1_p, 0, in2_p, 0, out_p, 0, depends);
+}
+
+template <typename fnT, typename T>
+struct IsCloseContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename IsCloseOutputType<T>::value_type, void>) {
+            return nullptr;
+        } else {
+            return isclose_contig_call<T>;
+        }
+    }
+};
+
+void populate_isclose_dispatch_vector()
+{
+    td_ns::DispatchVectorBuilder<isclose_contig_fn_ptr_t, IsCloseContigFactory, td_ns::num_types>
+        dvb;
+    dvb.populate_dispatch_vector(isclose_contig_dispatch_vector);
+}
+
+std::pair<sycl::event, sycl::event> py_isclose(
+    const usm_ndarray &a,
+    const usm_ndarray &b,
+    const py::object &rtol,
+    const py::object &atol,
+    const py::object &equal_nan,
+    const usm_ndarray &dst,
+    sycl::queue &exec_q,
+    const std::vector<sycl::event> &depends)
+{
+    auto types = td_ns::usm_ndarray_types();
+    int typeid_ = types.typenum_to_lookup_id(a.get_typenum());
+
+    auto fn = isclose_contig_dispatch_vector[typeid_];
+    auto comp_ev = fn(exec_q, a.get_size(), rtol, atol, equal_nan,
+                      a.get_data(), b.get_data(), dst.get_data(), depends);
+
+    sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {a, b, dst}, {comp_ev});
+    return {ht_ev, comp_ev};
+}
+
+} // namespace impl
+
+void init_isclose(py::module_ m)
+{
+    impl::populate_isclose_dispatch_vector();
+    m.def("_isclose", &impl::py_isclose, "",
+          py::arg("a"), py::arg("b"), py::arg("rtol"), py::arg("atol"),
+          py::arg("equal_nan"), py::arg("dst"), py::arg("sycl_queue"),
+          py::arg("depends") = py::list());
+}
+
+} // namespace dpnp::extensions::ufunc
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp
new file mode 100644
index 000000000000..2a409779cb3a
--- /dev/null
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp
@@ -0,0 +1,35 @@
+//*****************************************************************************
+// Copyright (c) 2024-2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpnp::extensions::ufunc
+{
+void init_isclose(py::module_ m);
+} // namespace dpnp::extensions::ufunc
diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
new file mode 100644
index 000000000000..32196917aa36
--- /dev/null
+++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
@@ -0,0 +1,405 @@
+//*****************************************************************************
+// Copyright (c) 2024-2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+
+// #pragma once
+
+// #include <sycl/sycl.hpp>
+// #include <complex>
+// #include <cstddef>
+// #include <cstdint>
+// #include <type_traits>
+// #include <vector>
+
+// // dpctl tensor headers
+// #include "kernels/alignment.hpp"
+// #include "utils/sycl_utils.hpp"
+// #include "utils/type_utils.hpp"
+
+// namespace dpnp::kernels::isclose
+// {
+
+// template <typename T, typename scT>
+// inline bool isclose_scalar(T a, T b, scT rtol, scT atol, bool equal_nan)
+// {
+//     if constexpr (dpctl::tensor::type_utils::is_complex_v<T>) {
+//         return isclose_scalar(a.real(), b.real(), rtol, atol, equal_nan) &&
+//                isclose_scalar(a.imag(), b.imag(), rtol, atol, equal_nan);
+//     }
+//     else {
+//         if (sycl::isnan(a) || sycl::isnan(b)) {
+//             return equal_nan && sycl::isnan(a) && sycl::isnan(b);
+//         }
+//         return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b);
+//     }
+// }
+
+// template <typename T, typename scT, std::uint8_t vec_sz = 4, std::uint8_t n_vecs = 2, bool enable_sg = true>
+// struct IsCloseContigFunctor
+// {
+//     const T *in1_;
+//     const T *in2_;
+//     bool *out_;
+//     std::size_t n_;
+//     scT rtol_;
+//     scT atol_;
+//     bool equal_nan_;
+
+//     IsCloseContigFunctor(const T *in1, const T *in2, bool *out,
+//                          std::size_t n, scT rtol, scT atol, bool equal_nan)
+//         : in1_(in1), in2_(in2), out_(out),
+//           n_(n), rtol_(rtol), atol_(atol), equal_nan_(equal_nan)
+//     {
+//     }
+
+//     void operator()(sycl::nd_item<1> item) const
+//     {
+//         const std::size_t gid = item.get_global_linear_id();
+//         if (gid < n_) {
+//             out_[gid] = isclose_scalar<T, scT>(in1_[gid], in2_[gid], rtol_, atol_, equal_nan_);
+//         }
+//     }
+// };
+
+// template <typename T, typename scT>
+// sycl::event isclose_contig_impl(sycl::queue &q,
+//                                 std::size_t n,
+//                                 scT rtol,
+//                                 scT atol,
+//                                 bool equal_nan,
+//                                 const char *in1_p,
+//                                 const char *in2_p,
+//                                 char *out_p,
+//                                 const std::vector<sycl::event> &depends)
+// {
+//     dpctl::tensor::type_utils::validate_type_for_device<T>(q);
+
+//     const T *in1 = reinterpret_cast<const T *>(in1_p);
+//     const T *in2 = reinterpret_cast<const T *>(in2_p);
+//     bool *out = reinterpret_cast<bool *>(out_p);
+
+//     constexpr std::size_t wg_size = 128;
+//     const std::size_t n_wgs = (n + wg_size - 1) / wg_size;
+//     const sycl::range<1> gws{n_wgs * wg_size};
+//     const sycl::range<1> lws{wg_size};
+
+//     using Kernel = IsCloseContigFunctor<T, scT>;
+
+//     return q.submit([&](sycl::handler &cgh) {
+//         cgh.depends_on(depends);
+//         cgh.parallel_for<Kernel>(
+//             sycl::nd_range<1>(gws, lws),
+//             Kernel(in1, in2, out, n, rtol, atol, equal_nan));
+//     });
+// }
+
+// } // namespace dpnp::kernels::isclose
+
+
+#pragma once
+
+#include <complex>
+#include <cstddef>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+// dpctl tensor headers
+#include "kernels/alignment.hpp"
+#include "kernels/dpctl_tensor_types.hpp"
+#include "utils/offset_utils.hpp"
+#include "utils/sycl_utils.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpnp::kernels::isclose
+{
+
+template <typename T>
+inline T isclose(const T a, const T b, const T rtol, const T atol, const bool equal_nan)
+{
+    if (sycl::isnan(a) || sycl::isnan(b)) {
+        //static cast<T>?
+        return equal_nan && sycl::isnan(a) && sycl::isnan(b);
+    }
+    if (sycl::isinf(a) || sycl::isinf(b)) {
+        return a == b;
+    }
+    return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b);
+}
+
+template <typename T, typename scT, typename InOutIndexerT>
+struct IsCloseFunctor
+{
+private:
+    const T *a_ = nullptr;
+    const T *b_ = nullptr;
+    T *out_ = nullptr;
+    const InOutIndexerT inp_out_indexer_;
+    const scT rtol_;
+    const scT atol_;
+    const bool equal_nan_;
+
+public:
+    IsCloseFunctor(const T *a,
+                   const T *b,
+                   T *out,
+                   const InOutIndexerT &inp_out_indexer,
+                   const scT rtol,
+                   const scT atol,
+                   const bool equal_nan)
+        : a_(a), b_(b),out_(out), inp_out_indexer_(inp_out_indexer), rtol_(rtol),
+          atol_(atol), equal_nan_(equal_nan)
+    {
+    }
+
+    void operator()(sycl::id<1> wid) const
+    {
+        const auto &offsets_ = inp_out_indexer_(wid.get(0));
+        const dpctl::tensor::ssize_t &inp_offset = offsets_.get_first_offset();
+        const dpctl::tensor::ssize_t &out_offset = offsets_.get_second_offset();
+
+        using dpctl::tensor::type_utils::is_complex_v;
+        if constexpr (is_complex_v<T>) {
+            using realT = typename T::value_type;
+            static_assert(std::is_same_v<realT, scT>);
+            T z_a = a_[inp_offset];
+            T z_b = b_[inp_offset];
+            realT x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_);
+            realT y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_);
+            out_[out_offset] = T{x, y};
+        }
+        else {
+            out_[out_offset] = isclose(a_[inp_offset], b_[inp_offset], rtol_, atol_, equal_nan_);
+        }
+    }
+};
+
+template <typename T,
+          typename scT,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+struct IsCloseContigFunctor
+{
+private:
+    const T *a_ = nullptr;
+    const T *b_ = nullptr;
+    T *out_ = nullptr;
+    std::size_t nelems_;
+    const scT rtol_;
+    const scT atol_;
+    const bool equal_nan_;
+
+public:
+    IsCloseContigFunctor(const T *a,
+                         const T *b,
+                          T *out,
+                          const std::size_t n_elems,
+                          const scT rtol,
+                          const scT atol,
+                          const bool equal_nan)
+        : a_(a), b_(b), out_(out), nelems_(n_elems), rtol_(rtol), atol_(atol),
+          equal_nan_(equal_nan)
+    {
+    }
+
+    void operator()(sycl::nd_item<1> ndit) const
+    {
+        constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz;
+        /* Each work-item processes vec_sz elements, contiguous in memory */
+        /* NOTE: work-group size must be divisible by sub-group size */
+
+        using dpctl::tensor::type_utils::is_complex_v;
+        if constexpr (enable_sg_loadstore && !is_complex_v<T>) {
+            auto sg = ndit.get_sub_group();
+            const std::uint16_t sgSize = sg.get_max_local_range()[0];
+            const std::size_t base =
+                elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) +
+                                sg.get_group_id()[0] * sgSize);
+
+            if (base + elems_per_wi * sgSize < nelems_) {
+                using dpctl::tensor::sycl_utils::sub_group_load;
+                using dpctl::tensor::sycl_utils::sub_group_store;
+                // sycl::vec<T, vec_sz> res_vec;
+#pragma unroll
+                for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) {
+                    const std::size_t offset = base + it * sgSize;
+                    auto a_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&a_[offset]);
+                    auto b_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&b_[offset]);
+                    auto out_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&out_[offset]);
+
+                    const sycl::vec<T, vec_sz> a_vec =
+                        sub_group_load<vec_sz>(sg, a_multi_ptr);
+                    const sycl::vec<T, vec_sz> b_vec =
+                        sub_group_load<vec_sz>(sg, b_multi_ptr);
+
+                    sycl::vec<T, vec_sz> res_vec;
+#pragma unroll
+                    for (std::uint8_t vec_id = 0; vec_id < vec_sz; ++vec_id) {
+                        res_vec[vec_id] =
+                            isclose(a_vec[vec_id], b_vec[vec_id], rtol_, atol_, equal_nan_);
+                    }
+                    sub_group_store<vec_sz>(sg, res_vec, out_multi_ptr);
+                }
+            }
+            else {
+                const std::size_t lane_id = sg.get_local_id()[0];
+                for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) {
+                    out_[k] = isclose(a_[k], b_[k], rtol_, atol_, equal_nan_);
+                }
+            }
+        }
+        else {
+            const std::uint16_t sgSize =
+                ndit.get_sub_group().get_local_range()[0];
+            const std::size_t gid = ndit.get_global_linear_id();
+            const std::uint16_t elems_per_sg = sgSize * elems_per_wi;
+
+            const std::size_t start =
+                (gid / sgSize) * (elems_per_sg - sgSize) + gid;
+            const std::size_t end = std::min(nelems_, start + elems_per_sg);
+            for (std::size_t offset = start; offset < end; offset += sgSize) {
+                if constexpr (is_complex_v<T>) {
+                    using realT = typename T::value_type;
+                    static_assert(std::is_same_v<realT, scT>);
+
+                    T z_a = a_[offset];
+                    T z_b = b_[offset];
+                    realT x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_);
+                    realT y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_);
+                    out_[offset] = T{x, y};
+                }
+                else {
+                    out_[offset] = isclose(a_[offset], b_[offset], rtol_, atol_, equal_nan_);
+                }
+            }
+        }
+    }
+};
+
+// template <typename T, typename scT>
+// sycl::event nan_to_num_strided_impl(sycl::queue &q,
+//                                     const size_t nelems,
+//                                     const int nd,
+//                                     const dpctl::tensor::ssize_t *shape_strides,
+//                                     const scT nan,
+//                                     const scT posinf,
+//                                     const scT neginf,
+//                                     const char *in_cp,
+//                                     const dpctl::tensor::ssize_t in_offset,
+//                                     char *out_cp,
+//                                     const dpctl::tensor::ssize_t out_offset,
+//                                     const std::vector<sycl::event> &depends)
+// {
+//     dpctl::tensor::type_utils::validate_type_for_device<T>(q);
+
+//     const T *in_tp = reinterpret_cast<const T *>(in_cp);
+//     T *out_tp = reinterpret_cast<T *>(out_cp);
+
+//     using InOutIndexerT =
+//         typename dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer;
+//     const InOutIndexerT indexer{nd, in_offset, out_offset, shape_strides};
+
+//     sycl::event comp_ev = q.submit([&](sycl::handler &cgh) {
+//         cgh.depends_on(depends);
+
+//         using NanToNumFunc = NanToNumFunctor<T, scT, InOutIndexerT>;
+//         cgh.parallel_for<NanToNumFunc>(
+//             {nelems},
+//             NanToNumFunc(in_tp, out_tp, indexer, nan, posinf, neginf));
+//     });
+//     return comp_ev;
+// }
+
+template <typename T,
+          typename scT,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u>
+sycl::event isclose_contig_impl(sycl::queue &exec_q,
+                                std::size_t nelems,
+                                const scT rtol,
+                                const scT atol,
+                                const bool equal_nan,
+                                const char *a_cp,
+                                ssize_t a_offset,
+                                const char *b_cp,
+                                ssize_t b_offset,
+                                char *out_cp,
+                                ssize_t out_offset,
+                                const std::vector<sycl::event> &depends = {})
+{
+    constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz;
+    const std::size_t n_work_items_needed = nelems / elems_per_wi;
+    const std::size_t empirical_threshold = std::size_t(1) << 21;
+    const std::size_t lws = (n_work_items_needed <= empirical_threshold)
+                                ? std::size_t(128)
+                                : std::size_t(256);
+
+    const std::size_t n_groups =
+        ((nelems + lws * elems_per_wi - 1) / (lws * elems_per_wi));
+    const auto gws_range = sycl::range<1>(n_groups * lws);
+    const auto lws_range = sycl::range<1>(lws);
+
+    const T *a_tp = reinterpret_cast<const T *>(a_cp) + a_offset;
+    const T *b_tp = reinterpret_cast<const T *>(b_cp) + b_offset;
+    T *out_tp = reinterpret_cast<T *>(out_cp) + out_offset;
+
+    sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+
+        using dpctl::tensor::kernels::alignment_utils::is_aligned;
+        using dpctl::tensor::kernels::alignment_utils::required_alignment;
+        if (is_aligned<required_alignment>(a_tp) &&
+            is_aligned<required_alignment>(b_tp) &&
+            is_aligned<required_alignment>(out_tp))
+        {
+            constexpr bool enable_sg_loadstore = true;
+            using IsCloseFunc = IsCloseContigFunctor<T, scT, vec_sz, n_vecs,
+                                                       enable_sg_loadstore>;
+
+            cgh.parallel_for<IsCloseFunc>(
+                sycl::nd_range<1>(gws_range, lws_range),
+                IsCloseFunc(a_tp, b_tp, out_tp, nelems, rtol, atol, equal_nan));
+        }
+        else {
+            constexpr bool disable_sg_loadstore = false;
+            using IsCloseFunc = IsCloseContigFunctor<T, scT, vec_sz, n_vecs,
+                                                       disable_sg_loadstore>;
+
+            cgh.parallel_for<IsCloseFunc>(
+                sycl::nd_range<1>(gws_range, lws_range),
+                IsCloseFunc(a_tp, b_tp, out_tp, nelems, rtol, atol, equal_nan));
+        }
+    });
+
+    return comp_ev;
+}
+
+} // namespace dpnp::kernels::isclose
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 84258f698b8f..4ebee0a26ad9 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -44,10 +44,13 @@
 
 import dpctl.tensor as dpt
 import dpctl.tensor._tensor_elementwise_impl as ti
+import dpctl.utils as dpu
+
 import numpy
 
 import dpnp
 from dpnp.dpnp_algo.dpnp_elementwise_common import DPNPBinaryFunc, DPNPUnaryFunc
+import dpnp.backend.extensions.ufunc._ufunc_impl as ufi
 
 from .dpnp_utils import get_usm_allocations
 
@@ -771,7 +774,7 @@ def array_equiv(a1, a2):
 )
 
 
-def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
+def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False):
     """
     Returns a boolean array where two arrays are element-wise equal within
     a tolerance.
@@ -880,22 +883,52 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
         dt = dpnp.result_type(b, 1.0, rtol, atol)
         b = dpnp.astype(b, dt)
 
-    # Firstly handle finite values:
-    # result = absolute(a - b) <= atol + rtol * absolute(b)
-    dt = dpnp.result_type(b, rtol, atol)
-    _b = dpnp.abs(b, dtype=dt)
-    _b *= rtol
-    _b += atol
-    result = less_equal(dpnp.abs(a - b), _b)
-
-    # Handle "inf" values: they are treated as equal if they are in the same
-    # place and of the same sign in both arrays
-    result &= isfinite(b)
-    result |= a == b
-
-    if equal_nan:
-        result |= isnan(a) & isnan(b)
-    return result
+    if not new:
+
+        print("OLD")
+
+        # Firstly handle finite values:
+        # result = absolute(a - b) <= atol + rtol * absolute(b)
+        dt = dpnp.result_type(b, rtol, atol)
+        _b = dpnp.abs(b, dtype=dt)
+        _b *= rtol
+        _b += atol
+        result = less_equal(dpnp.abs(a - b), _b)
+
+        # Handle "inf" values: they are treated as equal if they are in the same
+        # place and of the same sign in both arrays
+        result &= isfinite(b)
+        result |= a == b
+
+        if equal_nan:
+            result |= isnan(a) & isnan(b)
+        return result
+
+    else:
+
+        print("NEW")
+
+        usm_type, exec_q = get_usm_allocations([a, b])
+        out_dtype = dpnp.bool
+        output = dpnp.empty(
+            a.shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type
+        )
+
+        _manager = dpu.SequentialOrderManager[exec_q]
+        mem_ev, ht_ev = ufi._isclose(
+            a.get_array(),
+            b.get_array(),
+            rtol,
+            atol,
+            equal_nan,
+            output.get_array(),
+            exec_q,
+            depends=_manager.submitted_events,
+        )
+        _manager.add_event_pair(mem_ev, ht_ev)
+
+        return output
+
 
 
 def iscomplex(x):

From 745a28c9bfb7c998201f95ea2a835dbd89457926 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 20 Jun 2025 06:51:24 -0700
Subject: [PATCH 02/32] Init IsCloseContigFunctor with explicit T1, T2, resTy

---
 .../ufunc/elementwise_functions/isclose.cpp   | 193 ++++++++++++++----
 .../kernels/elementwise_functions/isclose.hpp | 150 +++++++-------
 2 files changed, 231 insertions(+), 112 deletions(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
index 7f1d44aa49e1..33346be35058 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -67,30 +67,59 @@ namespace impl
 using dpctl::tensor::usm_ndarray;
 using event_vector = std::vector<sycl::event>;
 
-using isclose_contig_fn_ptr_t = sycl::event (*)(
-    sycl::queue &, std::size_t,
-    const py::object &, const py::object &, const py::object &,
-    const char *, const char *, char *, const std::vector<sycl::event> &);
+using isclose_contig_fn_ptr_t =
+    sycl::event (*)(sycl::queue &,
+                    std::size_t,
+                    const py::object &,
+                    const py::object &,
+                    const py::object &,
+                    const char *,
+                    const char *,
+                    char *,
+                    const std::vector<sycl::event> &);
 
-static isclose_contig_fn_ptr_t isclose_contig_dispatch_vector[td_ns::num_types];
+static isclose_contig_fn_ptr_t isclose_contig_dispatch_table[td_ns::num_types]
+                                                            [td_ns::num_types];
 
 template <typename T>
 using value_type_of_t = typename value_type_of<T>::type;
 
-// Template for checking if T is supported
-template <typename T>
+// // Template for checking if T is supported
+// template <typename T>
+// struct IsCloseOutputType
+// {
+//     using value_type = typename std::disjunction<
+//         td_ns::TypeMapResultEntry<T, sycl::half>,
+//         td_ns::TypeMapResultEntry<T, float>,
+//         td_ns::TypeMapResultEntry<T, double>,
+//         td_ns::TypeMapResultEntry<T, std::complex<float>>,
+//         td_ns::TypeMapResultEntry<T, std::complex<double>>,
+//         td_ns::DefaultResultEntry<void>>::result_type;
+// };
+
+// Supports only float and complex types
+template <typename T1, typename T2>
 struct IsCloseOutputType
 {
     using value_type = typename std::disjunction<
-        td_ns::TypeMapResultEntry<T, sycl::half>,
-        td_ns::TypeMapResultEntry<T, float>,
-        td_ns::TypeMapResultEntry<T, double>,
-        td_ns::TypeMapResultEntry<T, std::complex<float>>,
-        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::BinaryTypeMapResultEntry<T1, float, T2, float, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1, double, T2, double, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<float>,
+                                        T2,
+                                        std::complex<float>,
+                                        bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<double>,
+                                        T2,
+                                        std::complex<double>,
+                                        bool>,
         td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
 };
 
-template <typename T>
+template <typename T1, typename T2>
 sycl::event isclose_contig_call(sycl::queue &q,
                                 std::size_t nelems,
                                 const py::object &rtol_,
@@ -102,65 +131,145 @@ sycl::event isclose_contig_call(sycl::queue &q,
                                 const event_vector &depends)
 {
     using dpctl::tensor::type_utils::is_complex_v;
-    using scT = std::conditional_t<is_complex_v<T>, typename value_type_of<T>::type, T>;
+    using scT = std::conditional_t<is_complex_v<T1>,
+                                   typename value_type_of<T1>::type, T1>;
 
     const scT rtol = py::cast<scT>(rtol_);
     const scT atol = py::cast<scT>(atol_);
     const bool equal_nan = py::cast<bool>(equal_nan_);
 
-    return dpnp::kernels::isclose::isclose_contig_impl<T, scT>(
-        q, nelems, rtol, atol, equal_nan, in1_p, 0, in2_p, 0, out_p, 0, depends);
+    return dpnp::kernels::isclose::isclose_contig_impl<T1, T2, scT>(
+        q, nelems, rtol, atol, equal_nan, in1_p, 0, in2_p, 0, out_p, 0,
+        depends);
 }
 
-template <typename fnT, typename T>
+template <typename fnT, typename T1, typename T2>
 struct IsCloseContigFactory
 {
     fnT get()
     {
-        if constexpr (std::is_same_v<typename IsCloseOutputType<T>::value_type, void>) {
+        if constexpr (!IsCloseOutputType<T1, T2>::is_defined) {
             return nullptr;
-        } else {
-            return isclose_contig_call<T>;
+        }
+        else {
+            return isclose_contig_call<T1, T2>;
         }
     }
 };
 
-void populate_isclose_dispatch_vector()
+void populate_isclose_dispatch_table()
 {
-    td_ns::DispatchVectorBuilder<isclose_contig_fn_ptr_t, IsCloseContigFactory, td_ns::num_types>
+    td_ns::DispatchTableBuilder<isclose_contig_fn_ptr_t, IsCloseContigFactory,
+                                td_ns::num_types>
         dvb;
-    dvb.populate_dispatch_vector(isclose_contig_dispatch_vector);
+    dvb.populate_dispatch_table(isclose_contig_dispatch_table);
 }
 
-std::pair<sycl::event, sycl::event> py_isclose(
-    const usm_ndarray &a,
-    const usm_ndarray &b,
-    const py::object &rtol,
-    const py::object &atol,
-    const py::object &equal_nan,
-    const usm_ndarray &dst,
-    sycl::queue &exec_q,
-    const std::vector<sycl::event> &depends)
+std::pair<sycl::event, sycl::event>
+    py_isclose(const usm_ndarray &a,
+               const usm_ndarray &b,
+               const py::object &rtol,
+               const py::object &atol,
+               const py::object &equal_nan,
+               const usm_ndarray &res,
+               sycl::queue &exec_q,
+               const std::vector<sycl::event> &depends)
 {
     auto types = td_ns::usm_ndarray_types();
-    int typeid_ = types.typenum_to_lookup_id(a.get_typenum());
+    int a_typeid = types.typenum_to_lookup_id(a.get_typenum());
+    int b_typeid = types.typenum_to_lookup_id(b.get_typenum());
+
+    if (!dpctl::utils::queues_are_compatible(exec_q, {a, b, res})) {
+        throw py::value_error(
+            "Execution queue is not compatible with allocation queues");
+    }
+
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(res);
+
+    int res_nd = res.get_ndim();
+    if (res_nd != a.get_ndim() || res_nd != b.get_ndim()) {
+        throw py::value_error("Array dimensions are not the same.");
+    }
+
+    const py::ssize_t *a_shape = a.get_shape_raw();
+    const py::ssize_t *b_shape = b.get_shape_raw();
+    const py::ssize_t *res_shape = res.get_shape_raw();
+    bool shapes_equal(true);
+    std::size_t nelems(1);
+
+    for (int i = 0; i < res_nd; ++i) {
+        nelems *= static_cast<std::size_t>(a_shape[i]);
+        shapes_equal = shapes_equal && (a_shape[i] == res_shape[i] &&
+                                        b_shape[i] == res_shape[i]);
+    }
+    if (!shapes_equal) {
+        throw py::value_error("Array shapes are not the same.");
+    }
+
+    // if nelems is zero, return
+    if (nelems == 0) {
+        return std::make_pair(sycl::event(), sycl::event());
+    }
+
+    dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(res, nelems);
+
+    // check memory overlap
+    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    auto const &same_logical_tensors =
+        dpctl::tensor::overlap::SameLogicalTensors();
+    if ((overlap(a, res) && !same_logical_tensors(a, res)) ||
+        (overlap(b, res) && !same_logical_tensors(b, res)))
+    {
+        throw py::value_error("Arrays index overlapping segments of memory");
+    }
+
+    const char *a_data = a.get_data();
+    const char *b_data = b.get_data();
+    char *res_data = res.get_data();
+
+    // handle contiguous inputs
+    bool is_a_c_contig = a.is_c_contiguous();
+    bool is_a_f_contig = a.is_f_contiguous();
+
+    bool is_b_c_contig = b.is_c_contiguous();
+    bool is_b_f_contig = b.is_f_contiguous();
 
-    auto fn = isclose_contig_dispatch_vector[typeid_];
-    auto comp_ev = fn(exec_q, a.get_size(), rtol, atol, equal_nan,
-                      a.get_data(), b.get_data(), dst.get_data(), depends);
+    bool is_res_c_contig = res.is_c_contiguous();
+    bool is_res_f_contig = res.is_f_contiguous();
 
-    sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {a, b, dst}, {comp_ev});
-    return {ht_ev, comp_ev};
+    bool all_c_contig = (is_a_c_contig && is_b_c_contig && is_res_c_contig);
+    bool all_f_contig = (is_a_f_contig && is_b_f_contig && is_res_f_contig);
+
+    if (all_c_contig || all_f_contig) {
+        auto contig_fn = isclose_contig_dispatch_table[a_typeid][b_typeid];
+
+        if (contig_fn == nullptr) {
+            throw std::runtime_error(
+                "Contiguous implementation is missing for a_typeid=" +
+                std::to_string(a_typeid) +
+                " and b_typeid=" + std::to_string(b_typeid));
+        }
+
+        auto comp_ev = contig_fn(exec_q, nelems, rtol, atol, equal_nan, a_data,
+                                 b_data, res_data, depends);
+        sycl::event ht_ev =
+            dpctl::utils::keep_args_alive(exec_q, {a, b, res}, {comp_ev});
+
+        return std::make_pair(ht_ev, comp_ev);
+    }
+    else {
+        throw py::value_error("Stride implementation is not implemented");
+    }
 }
 
 } // namespace impl
 
 void init_isclose(py::module_ m)
 {
-    impl::populate_isclose_dispatch_vector();
-    m.def("_isclose", &impl::py_isclose, "",
-          py::arg("a"), py::arg("b"), py::arg("rtol"), py::arg("atol"),
-          py::arg("equal_nan"), py::arg("dst"), py::arg("sycl_queue"),
+    impl::populate_isclose_dispatch_table();
+    m.def("_isclose", &impl::py_isclose, "", py::arg("a"), py::arg("b"),
+          py::arg("rtol"), py::arg("atol"), py::arg("equal_nan"),
+          py::arg("res"), py::arg("sycl_queue"),
           py::arg("depends") = py::list());
 }
 
diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
index 32196917aa36..788c569d7d99 100644
--- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
@@ -23,7 +23,6 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
-
 // #pragma once
 
 // #include <sycl/sycl.hpp>
@@ -56,8 +55,8 @@
 //     }
 // }
 
-// template <typename T, typename scT, std::uint8_t vec_sz = 4, std::uint8_t n_vecs = 2, bool enable_sg = true>
-// struct IsCloseContigFunctor
+// template <typename T, typename scT, std::uint8_t vec_sz = 4, std::uint8_t
+// n_vecs = 2, bool enable_sg = true> struct IsCloseContigFunctor
 // {
 //     const T *in1_;
 //     const T *in2_;
@@ -78,7 +77,8 @@
 //     {
 //         const std::size_t gid = item.get_global_linear_id();
 //         if (gid < n_) {
-//             out_[gid] = isclose_scalar<T, scT>(in1_[gid], in2_[gid], rtol_, atol_, equal_nan_);
+//             out_[gid] = isclose_scalar<T, scT>(in1_[gid], in2_[gid], rtol_,
+//             atol_, equal_nan_);
 //         }
 //     }
 // };
@@ -117,7 +117,6 @@
 
 // } // namespace dpnp::kernels::isclose
 
-
 #pragma once
 
 #include <complex>
@@ -136,10 +135,14 @@ namespace dpnp::kernels::isclose
 {
 
 template <typename T>
-inline T isclose(const T a, const T b, const T rtol, const T atol, const bool equal_nan)
+inline bool isclose(const T a,
+                    const T b,
+                    const T rtol,
+                    const T atol,
+                    const bool equal_nan)
 {
     if (sycl::isnan(a) || sycl::isnan(b)) {
-        //static cast<T>?
+        // static cast<T>?
         return equal_nan && sycl::isnan(a) && sycl::isnan(b);
     }
     if (sycl::isinf(a) || sycl::isinf(b)) {
@@ -148,28 +151,32 @@ inline T isclose(const T a, const T b, const T rtol, const T atol, const bool eq
     return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b);
 }
 
-template <typename T, typename scT, typename InOutIndexerT>
+template <typename T1,
+          typename T2,
+          typename scT,
+          typename resTy,
+          typename InOutIndexerT>
 struct IsCloseFunctor
 {
 private:
-    const T *a_ = nullptr;
-    const T *b_ = nullptr;
-    T *out_ = nullptr;
+    const T1 *a_ = nullptr;
+    const T2 *b_ = nullptr;
+    resTy *out_ = nullptr;
     const InOutIndexerT inp_out_indexer_;
     const scT rtol_;
     const scT atol_;
     const bool equal_nan_;
 
 public:
-    IsCloseFunctor(const T *a,
-                   const T *b,
-                   T *out,
+    IsCloseFunctor(const T1 *a,
+                   const T2 *b,
+                   resTy *out,
                    const InOutIndexerT &inp_out_indexer,
                    const scT rtol,
                    const scT atol,
                    const bool equal_nan)
-        : a_(a), b_(b),out_(out), inp_out_indexer_(inp_out_indexer), rtol_(rtol),
-          atol_(atol), equal_nan_(equal_nan)
+        : a_(a), b_(b), out_(out), inp_out_indexer_(inp_out_indexer),
+          rtol_(rtol), atol_(atol), equal_nan_(equal_nan)
     {
     }
 
@@ -180,45 +187,46 @@ struct IsCloseFunctor
         const dpctl::tensor::ssize_t &out_offset = offsets_.get_second_offset();
 
         using dpctl::tensor::type_utils::is_complex_v;
-        if constexpr (is_complex_v<T>) {
-            using realT = typename T::value_type;
-            static_assert(std::is_same_v<realT, scT>);
-            T z_a = a_[inp_offset];
-            T z_b = b_[inp_offset];
-            realT x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_);
-            realT y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_);
-            out_[out_offset] = T{x, y};
+        if constexpr (is_complex_v<T1>) {
+            T1 z_a = a_[inp_offset];
+            T2 z_b = b_[inp_offset];
+            bool x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_);
+            bool y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_);
+            out_[out_offset] = x && y;
         }
         else {
-            out_[out_offset] = isclose(a_[inp_offset], b_[inp_offset], rtol_, atol_, equal_nan_);
+            out_[out_offset] = isclose(a_[inp_offset], b_[inp_offset], rtol_,
+                                       atol_, equal_nan_);
         }
     }
 };
 
-template <typename T,
+template <typename T1,
+          typename T2,
           typename scT,
+          typename resTy,
           std::uint8_t vec_sz = 4u,
           std::uint8_t n_vecs = 2u,
           bool enable_sg_loadstore = true>
 struct IsCloseContigFunctor
 {
 private:
-    const T *a_ = nullptr;
-    const T *b_ = nullptr;
-    T *out_ = nullptr;
+    const T1 *a_ = nullptr;
+    const T2 *b_ = nullptr;
+    resTy *out_ = nullptr;
     std::size_t nelems_;
     const scT rtol_;
     const scT atol_;
     const bool equal_nan_;
 
 public:
-    IsCloseContigFunctor(const T *a,
-                         const T *b,
-                          T *out,
-                          const std::size_t n_elems,
-                          const scT rtol,
-                          const scT atol,
-                          const bool equal_nan)
+    IsCloseContigFunctor(const T1 *a,
+                         const T2 *b,
+                         resTy *out,
+                         const std::size_t n_elems,
+                         const scT rtol,
+                         const scT atol,
+                         const bool equal_nan)
         : a_(a), b_(b), out_(out), nelems_(n_elems), rtol_(rtol), atol_(atol),
           equal_nan_(equal_nan)
     {
@@ -231,7 +239,7 @@ struct IsCloseContigFunctor
         /* NOTE: work-group size must be divisible by sub-group size */
 
         using dpctl::tensor::type_utils::is_complex_v;
-        if constexpr (enable_sg_loadstore && !is_complex_v<T>) {
+        if constexpr (enable_sg_loadstore && !is_complex_v<T1>) {
             auto sg = ndit.get_sub_group();
             const std::uint16_t sgSize = sg.get_max_local_range()[0];
             const std::size_t base =
@@ -241,7 +249,6 @@ struct IsCloseContigFunctor
             if (base + elems_per_wi * sgSize < nelems_) {
                 using dpctl::tensor::sycl_utils::sub_group_load;
                 using dpctl::tensor::sycl_utils::sub_group_store;
-                // sycl::vec<T, vec_sz> res_vec;
 #pragma unroll
                 for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) {
                     const std::size_t offset = base + it * sgSize;
@@ -255,16 +262,16 @@ struct IsCloseContigFunctor
                         sycl::access::address_space::global_space,
                         sycl::access::decorated::yes>(&out_[offset]);
 
-                    const sycl::vec<T, vec_sz> a_vec =
+                    const sycl::vec<T1, vec_sz> a_vec =
                         sub_group_load<vec_sz>(sg, a_multi_ptr);
-                    const sycl::vec<T, vec_sz> b_vec =
+                    const sycl::vec<T2, vec_sz> b_vec =
                         sub_group_load<vec_sz>(sg, b_multi_ptr);
 
-                    sycl::vec<T, vec_sz> res_vec;
+                    sycl::vec<resTy, vec_sz> res_vec;
 #pragma unroll
                     for (std::uint8_t vec_id = 0; vec_id < vec_sz; ++vec_id) {
-                        res_vec[vec_id] =
-                            isclose(a_vec[vec_id], b_vec[vec_id], rtol_, atol_, equal_nan_);
+                        res_vec[vec_id] = isclose(a_vec[vec_id], b_vec[vec_id],
+                                                  rtol_, atol_, equal_nan_);
                     }
                     sub_group_store<vec_sz>(sg, res_vec, out_multi_ptr);
                 }
@@ -286,18 +293,18 @@ struct IsCloseContigFunctor
                 (gid / sgSize) * (elems_per_sg - sgSize) + gid;
             const std::size_t end = std::min(nelems_, start + elems_per_sg);
             for (std::size_t offset = start; offset < end; offset += sgSize) {
-                if constexpr (is_complex_v<T>) {
-                    using realT = typename T::value_type;
-                    static_assert(std::is_same_v<realT, scT>);
-
-                    T z_a = a_[offset];
-                    T z_b = b_[offset];
-                    realT x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_);
-                    realT y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_);
-                    out_[offset] = T{x, y};
+                if constexpr (is_complex_v<T1>) {
+                    T1 z_a = a_[offset];
+                    T2 z_b = b_[offset];
+                    bool x = isclose(z_a.real(), z_b.real(), rtol_, atol_,
+                                     equal_nan_);
+                    bool y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_,
+                                     equal_nan_);
+                    out_[offset] = x && y;
                 }
                 else {
-                    out_[offset] = isclose(a_[offset], b_[offset], rtol_, atol_, equal_nan_);
+                    out_[offset] = isclose(a_[offset], b_[offset], rtol_, atol_,
+                                           equal_nan_);
                 }
             }
         }
@@ -308,15 +315,13 @@ struct IsCloseContigFunctor
 // sycl::event nan_to_num_strided_impl(sycl::queue &q,
 //                                     const size_t nelems,
 //                                     const int nd,
-//                                     const dpctl::tensor::ssize_t *shape_strides,
-//                                     const scT nan,
-//                                     const scT posinf,
-//                                     const scT neginf,
-//                                     const char *in_cp,
-//                                     const dpctl::tensor::ssize_t in_offset,
-//                                     char *out_cp,
-//                                     const dpctl::tensor::ssize_t out_offset,
-//                                     const std::vector<sycl::event> &depends)
+//                                     const dpctl::tensor::ssize_t
+//                                     *shape_strides, const scT nan, const scT
+//                                     posinf, const scT neginf, const char
+//                                     *in_cp, const dpctl::tensor::ssize_t
+//                                     in_offset, char *out_cp, const
+//                                     dpctl::tensor::ssize_t out_offset, const
+//                                     std::vector<sycl::event> &depends)
 // {
 //     dpctl::tensor::type_utils::validate_type_for_device<T>(q);
 
@@ -338,7 +343,8 @@ struct IsCloseContigFunctor
 //     return comp_ev;
 // }
 
-template <typename T,
+template <typename T1,
+          typename T2,
           typename scT,
           std::uint8_t vec_sz = 4u,
           std::uint8_t n_vecs = 2u>
@@ -367,9 +373,11 @@ sycl::event isclose_contig_impl(sycl::queue &exec_q,
     const auto gws_range = sycl::range<1>(n_groups * lws);
     const auto lws_range = sycl::range<1>(lws);
 
-    const T *a_tp = reinterpret_cast<const T *>(a_cp) + a_offset;
-    const T *b_tp = reinterpret_cast<const T *>(b_cp) + b_offset;
-    T *out_tp = reinterpret_cast<T *>(out_cp) + out_offset;
+    const T1 *a_tp = reinterpret_cast<const T1 *>(a_cp) + a_offset;
+    const T2 *b_tp = reinterpret_cast<const T2 *>(b_cp) + b_offset;
+
+    using resTy = bool;
+    resTy *out_tp = reinterpret_cast<resTy *>(out_cp) + out_offset;
 
     sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
         cgh.depends_on(depends);
@@ -381,8 +389,9 @@ sycl::event isclose_contig_impl(sycl::queue &exec_q,
             is_aligned<required_alignment>(out_tp))
         {
             constexpr bool enable_sg_loadstore = true;
-            using IsCloseFunc = IsCloseContigFunctor<T, scT, vec_sz, n_vecs,
-                                                       enable_sg_loadstore>;
+            using IsCloseFunc =
+                IsCloseContigFunctor<T1, T2, scT, resTy, vec_sz, n_vecs,
+                                     enable_sg_loadstore>;
 
             cgh.parallel_for<IsCloseFunc>(
                 sycl::nd_range<1>(gws_range, lws_range),
@@ -390,8 +399,9 @@ sycl::event isclose_contig_impl(sycl::queue &exec_q,
         }
         else {
             constexpr bool disable_sg_loadstore = false;
-            using IsCloseFunc = IsCloseContigFunctor<T, scT, vec_sz, n_vecs,
-                                                       disable_sg_loadstore>;
+            using IsCloseFunc =
+                IsCloseContigFunctor<T1, T2, scT, resTy, vec_sz, n_vecs,
+                                     disable_sg_loadstore>;
 
             cgh.parallel_for<IsCloseFunc>(
                 sycl::nd_range<1>(gws_range, lws_range),

From 295a848f59f8c6b364a3ee808e53d15dfcb849bc Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 23 Jun 2025 05:05:41 -0700
Subject: [PATCH 03/32] Handle scalar inputs and broadcast shapes in def
 isclose()

---
 dpnp/dpnp_iface_logic.py | 49 +++++++++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 8 deletions(-)

diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 4ebee0a26ad9..fed871bad857 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -45,12 +45,11 @@
 import dpctl.tensor as dpt
 import dpctl.tensor._tensor_elementwise_impl as ti
 import dpctl.utils as dpu
-
 import numpy
 
 import dpnp
-from dpnp.dpnp_algo.dpnp_elementwise_common import DPNPBinaryFunc, DPNPUnaryFunc
 import dpnp.backend.extensions.ufunc._ufunc_impl as ufi
+from dpnp.dpnp_algo.dpnp_elementwise_common import DPNPBinaryFunc, DPNPUnaryFunc
 
 from .dpnp_utils import get_usm_allocations
 
@@ -885,8 +884,6 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False):
 
     if not new:
 
-        print("OLD")
-
         # Firstly handle finite values:
         # result = absolute(a - b) <= atol + rtol * absolute(b)
         dt = dpnp.result_type(b, rtol, atol)
@@ -906,12 +903,49 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False):
 
     else:
 
-        print("NEW")
+        dt = dpnp.result_type(a, b, 1.0)
+
+        if dpnp.isscalar(a):
+            usm_type = b.usm_type
+            exec_q = b.sycl_queue
+            a = dpnp.array(
+                a,
+                dt,
+                usm_type=usm_type,
+                sycl_queue=exec_q,
+            )
+        elif dpnp.isscalar(b):
+            usm_type = a.usm_type
+            exec_q = a.sycl_queue
+            b = dpnp.array(
+                b,
+                dt,
+                usm_type=usm_type,
+                sycl_queue=exec_q,
+            )
+        else:
+            usm_type, exec_q = get_usm_allocations([a, b])
+
+        # remove order="C"
+        a = dpnp.astype(a, dt, order="C", casting="same_kind", copy=False)
+        b = dpnp.astype(b, dt, order="C", casting="same_kind", copy=False)
+
+        try:
+            res_shape = dpnp.broadcast_shapes(a.shape, b.shape)
+        except ValueError:
+            raise ValueError(
+                "operands could not be broadcast together with shapes "
+                f"{a.shape} and {b.shape}"
+            )
+
+        if a.shape != res_shape:
+            a = dpnp.broadcast_to(a, res_shape)
+        if b.shape != res_shape:
+            b = dpnp.broadcast_to(b, res_shape)
 
-        usm_type, exec_q = get_usm_allocations([a, b])
         out_dtype = dpnp.bool
         output = dpnp.empty(
-            a.shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type
+            res_shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type
         )
 
         _manager = dpu.SequentialOrderManager[exec_q]
@@ -930,7 +964,6 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False):
         return output
 
 
-
 def iscomplex(x):
     """
     Returns a bool array, where ``True`` if input element is complex.

From 1088fa7a37f52d41e9ebe572ed40f4d04db2110a Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 24 Jun 2025 04:46:30 -0700
Subject: [PATCH 04/32] Use three_offsets_indexer in IsCloseFunctor

---
 .../kernels/elementwise_functions/isclose.hpp | 24 +++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
index 788c569d7d99..881b166b85a6 100644
--- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
@@ -155,14 +155,14 @@ template <typename T1,
           typename T2,
           typename scT,
           typename resTy,
-          typename InOutIndexerT>
+          typename ThreeOffsets_IndexerT>
 struct IsCloseFunctor
 {
 private:
     const T1 *a_ = nullptr;
     const T2 *b_ = nullptr;
     resTy *out_ = nullptr;
-    const InOutIndexerT inp_out_indexer_;
+    const ThreeOffsets_IndexerT three_offsets_indexer_;
     const scT rtol_;
     const scT atol_;
     const bool equal_nan_;
@@ -171,31 +171,35 @@ struct IsCloseFunctor
     IsCloseFunctor(const T1 *a,
                    const T2 *b,
                    resTy *out,
-                   const InOutIndexerT &inp_out_indexer,
+                   const ThreeOffsets_IndexerT &inps_res_indexer,
                    const scT rtol,
                    const scT atol,
                    const bool equal_nan)
-        : a_(a), b_(b), out_(out), inp_out_indexer_(inp_out_indexer),
+        : a_(a), b_(b), out_(out), three_offsets_indexer_(inps_res_indexer),
           rtol_(rtol), atol_(atol), equal_nan_(equal_nan)
     {
     }
 
     void operator()(sycl::id<1> wid) const
     {
-        const auto &offsets_ = inp_out_indexer_(wid.get(0));
-        const dpctl::tensor::ssize_t &inp_offset = offsets_.get_first_offset();
-        const dpctl::tensor::ssize_t &out_offset = offsets_.get_second_offset();
+        const auto &three_offsets_ = three_offsets_indexer_(wid.get(0));
+        const dpctl::tensor::ssize_t &inp1_offset =
+            three_offsets_.get_first_offset();
+        const dpctl::tensor::ssize_t &inp2_offset =
+            three_offsets_.get_second_offset();
+        const dpctl::tensor::ssize_t &out_offset =
+            three_offsets_.get_third_offset();
 
         using dpctl::tensor::type_utils::is_complex_v;
         if constexpr (is_complex_v<T1>) {
-            T1 z_a = a_[inp_offset];
-            T2 z_b = b_[inp_offset];
+            T1 z_a = a_[inp1_offset];
+            T2 z_b = b_[inp2_offset];
             bool x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_);
             bool y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_);
             out_[out_offset] = x && y;
         }
         else {
-            out_[out_offset] = isclose(a_[inp_offset], b_[inp_offset], rtol_,
+            out_[out_offset] = isclose(a_[inp1_offset], b_[inp2_offset], rtol_,
                                        atol_, equal_nan_);
         }
     }

From 1151fc38a46aef98979b966f2f4aa428307840b6 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 17 Jul 2025 03:36:54 -0700
Subject: [PATCH 05/32] Add strided implementation of isclose()

---
 .../ufunc/elementwise_functions/isclose.cpp   | 168 +++++++++++++++++-
 .../kernels/elementwise_functions/isclose.hpp |  61 ++++---
 2 files changed, 198 insertions(+), 31 deletions(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
index 33346be35058..6f4041b9673a 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -67,6 +67,21 @@ namespace impl
 using dpctl::tensor::usm_ndarray;
 using event_vector = std::vector<sycl::event>;
 
+using isclose_fn_ptr_t = sycl::event (*)(sycl::queue &,
+                                         int,
+                                         std::size_t,
+                                         const py::ssize_t *,
+                                         const py::object &,
+                                         const py::object &,
+                                         const py::object &,
+                                         const char *,
+                                         py::ssize_t,
+                                         const char *,
+                                         py::ssize_t,
+                                         char *,
+                                         py::ssize_t,
+                                         const std::vector<sycl::event> &);
+
 using isclose_contig_fn_ptr_t =
     sycl::event (*)(sycl::queue &,
                     std::size_t,
@@ -78,6 +93,8 @@ using isclose_contig_fn_ptr_t =
                     char *,
                     const std::vector<sycl::event> &);
 
+static isclose_fn_ptr_t isclose_dispatch_table[td_ns::num_types]
+                                              [td_ns::num_types];
 static isclose_contig_fn_ptr_t isclose_contig_dispatch_table[td_ns::num_types]
                                                             [td_ns::num_types];
 
@@ -119,6 +136,35 @@ struct IsCloseOutputType
     static constexpr bool is_defined = !std::is_same_v<value_type, void>;
 };
 
+template <typename T1, typename T2>
+sycl::event isclose_strided_call(sycl::queue &exec_q,
+                                 int nd,
+                                 std::size_t nelems,
+                                 const py::ssize_t *shape_strides,
+                                 const py::object &rtol_,
+                                 const py::object &atol_,
+                                 const py::object &equal_nan_,
+                                 const char *in1_p,
+                                 py::ssize_t in1_offset,
+                                 const char *in2_p,
+                                 py::ssize_t in2_offset,
+                                 char *out_p,
+                                 py::ssize_t out_offset,
+                                 const std::vector<sycl::event> &depends)
+{
+    using dpctl::tensor::type_utils::is_complex_v;
+    using scT = std::conditional_t<is_complex_v<T1>,
+                                   typename value_type_of<T1>::type, T1>;
+
+    const scT rtol = py::cast<scT>(rtol_);
+    const scT atol = py::cast<scT>(atol_);
+    const bool equal_nan = py::cast<bool>(equal_nan_);
+
+    return dpnp::kernels::isclose::isclose_strided_impl<T1, T2, scT>(
+        exec_q, nd, nelems, shape_strides, rtol, atol, equal_nan, in1_p,
+        in1_offset, in2_p, in2_offset, out_p, out_offset, depends);
+}
+
 template <typename T1, typename T2>
 sycl::event isclose_contig_call(sycl::queue &q,
                                 std::size_t nelems,
@@ -143,6 +189,22 @@ sycl::event isclose_contig_call(sycl::queue &q,
         depends);
 }
 
+template <typename fnT, typename T1, typename T2>
+struct IsCloseFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename IsCloseOutputType<T1, T2>::value_type, void>)
+        {
+            return nullptr;
+        }
+        else {
+            return isclose_strided_call<T1, T2>;
+        }
+    }
+};
+
 template <typename fnT, typename T1, typename T2>
 struct IsCloseContigFactory
 {
@@ -159,10 +221,15 @@ struct IsCloseContigFactory
 
 void populate_isclose_dispatch_table()
 {
+    td_ns::DispatchTableBuilder<isclose_fn_ptr_t, IsCloseFactory,
+                                td_ns::num_types>
+        dvb1;
+    dvb1.populate_dispatch_table(isclose_dispatch_table);
+
     td_ns::DispatchTableBuilder<isclose_contig_fn_ptr_t, IsCloseContigFactory,
                                 td_ns::num_types>
-        dvb;
-    dvb.populate_dispatch_table(isclose_contig_dispatch_table);
+        dvb2;
+    dvb2.populate_dispatch_table(isclose_contig_dispatch_table);
 }
 
 std::pair<sycl::event, sycl::event>
@@ -257,9 +324,101 @@ std::pair<sycl::event, sycl::event>
 
         return std::make_pair(ht_ev, comp_ev);
     }
-    else {
-        throw py::value_error("Stride implementation is not implemented");
+
+    // simplify iteration space
+    //     if 1d with strides 1 - input is contig
+    //     dispatch to strided
+
+    std::cout << "Strided impl run" << std::endl;
+
+    auto const &a_strides = a.get_strides_vector();
+    auto const &b_strides = b.get_strides_vector();
+    auto const &res_strides = res.get_strides_vector();
+
+    using shT = std::vector<py::ssize_t>;
+    shT simplified_shape;
+    shT simplified_a_strides;
+    shT simplified_b_strides;
+    shT simplified_res_strides;
+    py::ssize_t a_offset(0);
+    py::ssize_t b_offset(0);
+    py::ssize_t res_offset(0);
+
+    int nd = res_nd;
+    const py::ssize_t *shape = a_shape;
+
+    py_internal::simplify_iteration_space_3(
+        nd, shape, a_strides, b_strides, res_strides,
+        // output
+        simplified_shape, simplified_a_strides, simplified_b_strides,
+        simplified_res_strides, a_offset, b_offset, res_offset);
+
+    if (nd == 1 && simplified_a_strides[0] == 1 &&
+        simplified_b_strides[0] == 1 && simplified_res_strides[0] == 1)
+    {
+        // Special case of contiguous data
+        auto contig_fn = isclose_contig_dispatch_table[a_typeid][b_typeid];
+
+        if (contig_fn == nullptr) {
+            throw std::runtime_error(
+                "Contiguous implementation is missing for a_typeid=" +
+                std::to_string(a_typeid) +
+                " and b_typeid=" + std::to_string(b_typeid));
+        }
+
+        int a_elem_size = a.get_elemsize();
+        int b_elem_size = b.get_elemsize();
+        int res_elem_size = res.get_elemsize();
+        auto comp_ev = contig_fn(
+            exec_q, nelems, rtol, atol, equal_nan,
+            a_data + a_elem_size * a_offset, b_data + b_elem_size * b_offset,
+            res_data + res_elem_size * res_offset, depends);
+
+        sycl::event ht_ev =
+            dpctl::utils::keep_args_alive(exec_q, {a, b, res}, {comp_ev});
+
+        return std::make_pair(ht_ev, comp_ev);
+    }
+
+    auto fn = isclose_dispatch_table[a_typeid][b_typeid];
+
+    if (fn == nullptr) {
+        throw std::runtime_error(
+            "isclose implementation is missing for a_typeid=" +
+            std::to_string(a_typeid) +
+            " and b_typeid=" + std::to_string(b_typeid));
     }
+
+    using dpctl::tensor::offset_utils::device_allocate_and_pack;
+
+    std::vector<sycl::event> host_tasks{};
+    host_tasks.reserve(2);
+
+    auto ptr_size_event_triple_ = device_allocate_and_pack<py::ssize_t>(
+        exec_q, host_tasks, simplified_shape, simplified_a_strides,
+        simplified_b_strides, simplified_res_strides);
+    auto shape_strides_owner = std::move(std::get<0>(ptr_size_event_triple_));
+    const sycl::event &copy_shape_ev = std::get<2>(ptr_size_event_triple_);
+    const py::ssize_t *shape_strides = shape_strides_owner.get();
+
+    std::vector<sycl::event> all_deps;
+    all_deps.reserve(depends.size() + 1);
+    all_deps.insert(all_deps.end(), depends.begin(), depends.end());
+    all_deps.push_back(copy_shape_ev);
+
+    sycl::event comp_ev =
+        fn(exec_q, nelems, nd, shape_strides, atol, rtol, equal_nan, a_data,
+           a_offset, b_data, b_offset, res_data, res_offset, all_deps);
+
+    // async free of shape_strides temporary
+    sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free(
+        exec_q, {comp_ev}, shape_strides_owner);
+
+    host_tasks.push_back(tmp_cleanup_ev);
+
+    return std::make_pair(
+        dpctl::utils::keep_args_alive(exec_q, {a, b, res}, host_tasks),
+        comp_ev);
 }
 
 } // namespace impl
@@ -267,6 +426,7 @@ std::pair<sycl::event, sycl::event>
 void init_isclose(py::module_ m)
 {
     impl::populate_isclose_dispatch_table();
+
     m.def("_isclose", &impl::py_isclose, "", py::arg("a"), py::arg("b"),
           py::arg("rtol"), py::arg("atol"), py::arg("equal_nan"),
           py::arg("res"), py::arg("sycl_queue"),
diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
index 881b166b85a6..988f67e46ccd 100644
--- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
@@ -315,37 +315,44 @@ struct IsCloseContigFunctor
     }
 };
 
-// template <typename T, typename scT>
-// sycl::event nan_to_num_strided_impl(sycl::queue &q,
-//                                     const size_t nelems,
-//                                     const int nd,
-//                                     const dpctl::tensor::ssize_t
-//                                     *shape_strides, const scT nan, const scT
-//                                     posinf, const scT neginf, const char
-//                                     *in_cp, const dpctl::tensor::ssize_t
-//                                     in_offset, char *out_cp, const
-//                                     dpctl::tensor::ssize_t out_offset, const
-//                                     std::vector<sycl::event> &depends)
-// {
-//     dpctl::tensor::type_utils::validate_type_for_device<T>(q);
+template <typename T1, typename T2, typename scT>
+sycl::event isclose_strided_impl(sycl::queue &exec_q,
+                                 std::size_t nelems,
+                                 const int nd,
+                                 const dpctl::tensor::ssize_t *shape_strides,
+                                 const scT rtol,
+                                 const scT atol,
+                                 const bool equal_nan,
+                                 const char *a_cp,
+                                 const dpctl::tensor::ssize_t a_offset,
+                                 const char *b_cp,
+                                 const dpctl::tensor::ssize_t b_offset,
+                                 char *out_cp,
+                                 const dpctl::tensor::ssize_t out_offset,
+                                 const std::vector<sycl::event> &depends)
+{
+    dpctl::tensor::type_utils::validate_type_for_device<T1>(exec_q);
 
-//     const T *in_tp = reinterpret_cast<const T *>(in_cp);
-//     T *out_tp = reinterpret_cast<T *>(out_cp);
+    const T1 *a_tp = reinterpret_cast<const T1 *>(a_cp);
+    const T2 *b_tp = reinterpret_cast<const T2 *>(b_cp);
 
-//     using InOutIndexerT =
-//         typename dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer;
-//     const InOutIndexerT indexer{nd, in_offset, out_offset, shape_strides};
+    using resTy = bool;
+    resTy *out_tp = reinterpret_cast<resTy *>(out_cp);
 
-//     sycl::event comp_ev = q.submit([&](sycl::handler &cgh) {
-//         cgh.depends_on(depends);
+    using IndexerT =
+        typename dpctl::tensor::offset_utils::ThreeOffsets_StridedIndexer;
+    const IndexerT indexer{nd, a_offset, b_offset, out_offset, shape_strides};
 
-//         using NanToNumFunc = NanToNumFunctor<T, scT, InOutIndexerT>;
-//         cgh.parallel_for<NanToNumFunc>(
-//             {nelems},
-//             NanToNumFunc(in_tp, out_tp, indexer, nan, posinf, neginf));
-//     });
-//     return comp_ev;
-// }
+    sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+
+        using IsCloseFunc = IsCloseFunctor<T1, T2, scT, resTy, IndexerT>;
+        cgh.parallel_for<IsCloseFunc>(
+            {nelems},
+            IsCloseFunc(a_tp, b_tp, out_tp, indexer, atol, rtol, equal_nan));
+    });
+    return comp_ev;
+}
 
 template <typename T1,
           typename T2,

From 5f90ab5cdd2f7072a503936521baa26baf7f09de Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 24 Jul 2025 03:42:30 -0700
Subject: [PATCH 06/32] Update strided implementation and rename functions

---
 .../ufunc/elementwise_functions/isclose.cpp   | 163 ++++++++-------
 .../kernels/elementwise_functions/isclose.hpp | 192 +++++-------------
 2 files changed, 135 insertions(+), 220 deletions(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
index 6f4041b9673a..86f251a1450b 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -67,36 +67,37 @@ namespace impl
 using dpctl::tensor::usm_ndarray;
 using event_vector = std::vector<sycl::event>;
 
-using isclose_fn_ptr_t = sycl::event (*)(sycl::queue &,
-                                         int,
-                                         std::size_t,
-                                         const py::ssize_t *,
-                                         const py::object &,
-                                         const py::object &,
-                                         const py::object &,
-                                         const char *,
-                                         py::ssize_t,
-                                         const char *,
-                                         py::ssize_t,
-                                         char *,
-                                         py::ssize_t,
-                                         const std::vector<sycl::event> &);
-
-using isclose_contig_fn_ptr_t =
+using isclose_strided_scalar_fn_ptr_t =
     sycl::event (*)(sycl::queue &,
+                    int,
                     std::size_t,
+                    const py::ssize_t *,
                     const py::object &,
                     const py::object &,
                     const py::object &,
                     const char *,
+                    py::ssize_t,
                     const char *,
+                    py::ssize_t,
                     char *,
+                    py::ssize_t,
                     const std::vector<sycl::event> &);
 
-static isclose_fn_ptr_t isclose_dispatch_table[td_ns::num_types]
-                                              [td_ns::num_types];
-static isclose_contig_fn_ptr_t isclose_contig_dispatch_table[td_ns::num_types]
-                                                            [td_ns::num_types];
+using isclose_contig_scalar_fn_ptr_t =
+    sycl::event (*)(sycl::queue &,
+                    std::size_t,
+                    const py::object &,
+                    const py::object &,
+                    const py::object &,
+                    const char *,
+                    const char *,
+                    char *,
+                    const std::vector<sycl::event> &);
+
+static isclose_strided_scalar_fn_ptr_t
+    isclose_strided_scalar_dispatch_table[td_ns::num_types][td_ns::num_types];
+static isclose_contig_scalar_fn_ptr_t
+    isclose_contig_dispatch_table[td_ns::num_types][td_ns::num_types];
 
 template <typename T>
 using value_type_of_t = typename value_type_of<T>::type;
@@ -137,60 +138,60 @@ struct IsCloseOutputType
 };
 
 template <typename T1, typename T2>
-sycl::event isclose_strided_call(sycl::queue &exec_q,
-                                 int nd,
-                                 std::size_t nelems,
-                                 const py::ssize_t *shape_strides,
-                                 const py::object &rtol_,
-                                 const py::object &atol_,
-                                 const py::object &equal_nan_,
-                                 const char *in1_p,
-                                 py::ssize_t in1_offset,
-                                 const char *in2_p,
-                                 py::ssize_t in2_offset,
-                                 char *out_p,
-                                 py::ssize_t out_offset,
-                                 const std::vector<sycl::event> &depends)
+sycl::event isclose_strided_scalar_call(sycl::queue &exec_q,
+                                        int nd,
+                                        std::size_t nelems,
+                                        const py::ssize_t *shape_strides,
+                                        const py::object &py_rtol,
+                                        const py::object &py_atol,
+                                        const py::object &py_equal_nan,
+                                        const char *in1_p,
+                                        py::ssize_t in1_offset,
+                                        const char *in2_p,
+                                        py::ssize_t in2_offset,
+                                        char *out_p,
+                                        py::ssize_t out_offset,
+                                        const std::vector<sycl::event> &depends)
 {
     using dpctl::tensor::type_utils::is_complex_v;
     using scT = std::conditional_t<is_complex_v<T1>,
                                    typename value_type_of<T1>::type, T1>;
 
-    const scT rtol = py::cast<scT>(rtol_);
-    const scT atol = py::cast<scT>(atol_);
-    const bool equal_nan = py::cast<bool>(equal_nan_);
+    const scT rtol = py::cast<scT>(py_rtol);
+    const scT atol = py::cast<scT>(py_atol);
+    const bool equal_nan = py::cast<bool>(py_equal_nan);
 
-    return dpnp::kernels::isclose::isclose_strided_impl<T1, T2, scT>(
+    return dpnp::kernels::isclose::isclose_strided_scalar_impl<T1, T2, scT>(
         exec_q, nd, nelems, shape_strides, rtol, atol, equal_nan, in1_p,
         in1_offset, in2_p, in2_offset, out_p, out_offset, depends);
 }
 
 template <typename T1, typename T2>
-sycl::event isclose_contig_call(sycl::queue &q,
-                                std::size_t nelems,
-                                const py::object &rtol_,
-                                const py::object &atol_,
-                                const py::object &equal_nan_,
-                                const char *in1_p,
-                                const char *in2_p,
-                                char *out_p,
-                                const event_vector &depends)
+sycl::event isclose_contig_scalar_call(sycl::queue &q,
+                                       std::size_t nelems,
+                                       const py::object &py_rtol,
+                                       const py::object &py_atol,
+                                       const py::object &py_equal_nan,
+                                       const char *in1_p,
+                                       const char *in2_p,
+                                       char *out_p,
+                                       const event_vector &depends)
 {
     using dpctl::tensor::type_utils::is_complex_v;
     using scT = std::conditional_t<is_complex_v<T1>,
                                    typename value_type_of<T1>::type, T1>;
 
-    const scT rtol = py::cast<scT>(rtol_);
-    const scT atol = py::cast<scT>(atol_);
-    const bool equal_nan = py::cast<bool>(equal_nan_);
+    const scT rtol = py::cast<scT>(py_rtol);
+    const scT atol = py::cast<scT>(py_atol);
+    const bool equal_nan = py::cast<bool>(py_equal_nan);
 
-    return dpnp::kernels::isclose::isclose_contig_impl<T1, T2, scT>(
+    return dpnp::kernels::isclose::isclose_contig_scalar_impl<T1, T2, scT>(
         q, nelems, rtol, atol, equal_nan, in1_p, 0, in2_p, 0, out_p, 0,
         depends);
 }
 
 template <typename fnT, typename T1, typename T2>
-struct IsCloseFactory
+struct IsCloseStridedScalarFactory
 {
     fnT get()
     {
@@ -200,13 +201,13 @@ struct IsCloseFactory
             return nullptr;
         }
         else {
-            return isclose_strided_call<T1, T2>;
+            return isclose_strided_scalar_call<T1, T2>;
         }
     }
 };
 
 template <typename fnT, typename T1, typename T2>
-struct IsCloseContigFactory
+struct IsCloseContigScalarFactory
 {
     fnT get()
     {
@@ -214,33 +215,33 @@ struct IsCloseContigFactory
             return nullptr;
         }
         else {
-            return isclose_contig_call<T1, T2>;
+            return isclose_contig_scalar_call<T1, T2>;
         }
     }
 };
 
 void populate_isclose_dispatch_table()
 {
-    td_ns::DispatchTableBuilder<isclose_fn_ptr_t, IsCloseFactory,
-                                td_ns::num_types>
+    td_ns::DispatchTableBuilder<isclose_strided_scalar_fn_ptr_t,
+                                IsCloseStridedScalarFactory, td_ns::num_types>
         dvb1;
-    dvb1.populate_dispatch_table(isclose_dispatch_table);
+    dvb1.populate_dispatch_table(isclose_strided_scalar_dispatch_table);
 
-    td_ns::DispatchTableBuilder<isclose_contig_fn_ptr_t, IsCloseContigFactory,
-                                td_ns::num_types>
+    td_ns::DispatchTableBuilder<isclose_contig_scalar_fn_ptr_t,
+                                IsCloseContigScalarFactory, td_ns::num_types>
         dvb2;
     dvb2.populate_dispatch_table(isclose_contig_dispatch_table);
 }
 
 std::pair<sycl::event, sycl::event>
-    py_isclose(const usm_ndarray &a,
-               const usm_ndarray &b,
-               const py::object &rtol,
-               const py::object &atol,
-               const py::object &equal_nan,
-               const usm_ndarray &res,
-               sycl::queue &exec_q,
-               const std::vector<sycl::event> &depends)
+    py_isclose_scalar(const usm_ndarray &a,
+                      const usm_ndarray &b,
+                      const py::object &py_rtol,
+                      const py::object &py_atol,
+                      const py::object &py_equal_nan,
+                      const usm_ndarray &res,
+                      sycl::queue &exec_q,
+                      const std::vector<sycl::event> &depends)
 {
     auto types = td_ns::usm_ndarray_types();
     int a_typeid = types.typenum_to_lookup_id(a.get_typenum());
@@ -317,8 +318,8 @@ std::pair<sycl::event, sycl::event>
                 " and b_typeid=" + std::to_string(b_typeid));
         }
 
-        auto comp_ev = contig_fn(exec_q, nelems, rtol, atol, equal_nan, a_data,
-                                 b_data, res_data, depends);
+        auto comp_ev = contig_fn(exec_q, nelems, py_rtol, py_atol, py_equal_nan,
+                                 a_data, b_data, res_data, depends);
         sycl::event ht_ev =
             dpctl::utils::keep_args_alive(exec_q, {a, b, res}, {comp_ev});
 
@@ -366,11 +367,13 @@ std::pair<sycl::event, sycl::event>
                 " and b_typeid=" + std::to_string(b_typeid));
         }
 
+        std::cout << "Run contig impl in strided" << std::endl;
+
         int a_elem_size = a.get_elemsize();
         int b_elem_size = b.get_elemsize();
         int res_elem_size = res.get_elemsize();
         auto comp_ev = contig_fn(
-            exec_q, nelems, rtol, atol, equal_nan,
+            exec_q, nelems, py_rtol, py_atol, py_equal_nan,
             a_data + a_elem_size * a_offset, b_data + b_elem_size * b_offset,
             res_data + res_elem_size * res_offset, depends);
 
@@ -380,15 +383,17 @@ std::pair<sycl::event, sycl::event>
         return std::make_pair(ht_ev, comp_ev);
     }
 
-    auto fn = isclose_dispatch_table[a_typeid][b_typeid];
+    auto strided_fn = isclose_strided_scalar_dispatch_table[a_typeid][b_typeid];
 
-    if (fn == nullptr) {
+    if (strided_fn == nullptr) {
         throw std::runtime_error(
             "isclose implementation is missing for a_typeid=" +
             std::to_string(a_typeid) +
             " and b_typeid=" + std::to_string(b_typeid));
     }
 
+    std::cout << "Run strided impl" << std::endl;
+
     using dpctl::tensor::offset_utils::device_allocate_and_pack;
 
     std::vector<sycl::event> host_tasks{};
@@ -406,9 +411,9 @@ std::pair<sycl::event, sycl::event>
     all_deps.insert(all_deps.end(), depends.begin(), depends.end());
     all_deps.push_back(copy_shape_ev);
 
-    sycl::event comp_ev =
-        fn(exec_q, nelems, nd, shape_strides, atol, rtol, equal_nan, a_data,
-           a_offset, b_data, b_offset, res_data, res_offset, all_deps);
+    sycl::event comp_ev = strided_fn(
+        exec_q, nd, nelems, shape_strides, py_rtol, py_atol, py_equal_nan,
+        a_data, a_offset, b_data, b_offset, res_data, res_offset, all_deps);
 
     // async free of shape_strides temporary
     sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free(
@@ -427,9 +432,9 @@ void init_isclose(py::module_ m)
 {
     impl::populate_isclose_dispatch_table();
 
-    m.def("_isclose", &impl::py_isclose, "", py::arg("a"), py::arg("b"),
-          py::arg("rtol"), py::arg("atol"), py::arg("equal_nan"),
-          py::arg("res"), py::arg("sycl_queue"),
+    m.def("_isclose_scalar", &impl::py_isclose_scalar, "", py::arg("a"),
+          py::arg("b"), py::arg("py_rtol"), py::arg("py_atol"),
+          py::arg("py_equal_nan"), py::arg("res"), py::arg("sycl_queue"),
           py::arg("depends") = py::list());
 }
 
diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
index 988f67e46ccd..2eb52bbd7587 100644
--- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
@@ -23,100 +23,6 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
-// #pragma once
-
-// #include <sycl/sycl.hpp>
-// #include <complex>
-// #include <cstddef>
-// #include <cstdint>
-// #include <type_traits>
-// #include <vector>
-
-// // dpctl tensor headers
-// #include "kernels/alignment.hpp"
-// #include "utils/sycl_utils.hpp"
-// #include "utils/type_utils.hpp"
-
-// namespace dpnp::kernels::isclose
-// {
-
-// template <typename T, typename scT>
-// inline bool isclose_scalar(T a, T b, scT rtol, scT atol, bool equal_nan)
-// {
-//     if constexpr (dpctl::tensor::type_utils::is_complex_v<T>) {
-//         return isclose_scalar(a.real(), b.real(), rtol, atol, equal_nan) &&
-//                isclose_scalar(a.imag(), b.imag(), rtol, atol, equal_nan);
-//     }
-//     else {
-//         if (sycl::isnan(a) || sycl::isnan(b)) {
-//             return equal_nan && sycl::isnan(a) && sycl::isnan(b);
-//         }
-//         return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b);
-//     }
-// }
-
-// template <typename T, typename scT, std::uint8_t vec_sz = 4, std::uint8_t
-// n_vecs = 2, bool enable_sg = true> struct IsCloseContigFunctor
-// {
-//     const T *in1_;
-//     const T *in2_;
-//     bool *out_;
-//     std::size_t n_;
-//     scT rtol_;
-//     scT atol_;
-//     bool equal_nan_;
-
-//     IsCloseContigFunctor(const T *in1, const T *in2, bool *out,
-//                          std::size_t n, scT rtol, scT atol, bool equal_nan)
-//         : in1_(in1), in2_(in2), out_(out),
-//           n_(n), rtol_(rtol), atol_(atol), equal_nan_(equal_nan)
-//     {
-//     }
-
-//     void operator()(sycl::nd_item<1> item) const
-//     {
-//         const std::size_t gid = item.get_global_linear_id();
-//         if (gid < n_) {
-//             out_[gid] = isclose_scalar<T, scT>(in1_[gid], in2_[gid], rtol_,
-//             atol_, equal_nan_);
-//         }
-//     }
-// };
-
-// template <typename T, typename scT>
-// sycl::event isclose_contig_impl(sycl::queue &q,
-//                                 std::size_t n,
-//                                 scT rtol,
-//                                 scT atol,
-//                                 bool equal_nan,
-//                                 const char *in1_p,
-//                                 const char *in2_p,
-//                                 char *out_p,
-//                                 const std::vector<sycl::event> &depends)
-// {
-//     dpctl::tensor::type_utils::validate_type_for_device<T>(q);
-
-//     const T *in1 = reinterpret_cast<const T *>(in1_p);
-//     const T *in2 = reinterpret_cast<const T *>(in2_p);
-//     bool *out = reinterpret_cast<bool *>(out_p);
-
-//     constexpr std::size_t wg_size = 128;
-//     const std::size_t n_wgs = (n + wg_size - 1) / wg_size;
-//     const sycl::range<1> gws{n_wgs * wg_size};
-//     const sycl::range<1> lws{wg_size};
-
-//     using Kernel = IsCloseContigFunctor<T, scT>;
-
-//     return q.submit([&](sycl::handler &cgh) {
-//         cgh.depends_on(depends);
-//         cgh.parallel_for<Kernel>(
-//             sycl::nd_range<1>(gws, lws),
-//             Kernel(in1, in2, out, n, rtol, atol, equal_nan));
-//     });
-// }
-
-// } // namespace dpnp::kernels::isclose
-
 #pragma once
 
 #include <complex>
@@ -156,7 +62,7 @@ template <typename T1,
           typename scT,
           typename resTy,
           typename ThreeOffsets_IndexerT>
-struct IsCloseFunctor
+struct IsCloseStridedScalarFunctor
 {
 private:
     const T1 *a_ = nullptr;
@@ -168,13 +74,13 @@ struct IsCloseFunctor
     const bool equal_nan_;
 
 public:
-    IsCloseFunctor(const T1 *a,
-                   const T2 *b,
-                   resTy *out,
-                   const ThreeOffsets_IndexerT &inps_res_indexer,
-                   const scT rtol,
-                   const scT atol,
-                   const bool equal_nan)
+    IsCloseStridedScalarFunctor(const T1 *a,
+                                const T2 *b,
+                                resTy *out,
+                                const ThreeOffsets_IndexerT &inps_res_indexer,
+                                const scT rtol,
+                                const scT atol,
+                                const bool equal_nan)
         : a_(a), b_(b), out_(out), three_offsets_indexer_(inps_res_indexer),
           rtol_(rtol), atol_(atol), equal_nan_(equal_nan)
     {
@@ -212,7 +118,7 @@ template <typename T1,
           std::uint8_t vec_sz = 4u,
           std::uint8_t n_vecs = 2u,
           bool enable_sg_loadstore = true>
-struct IsCloseContigFunctor
+struct IsCloseContigScalarFunctor
 {
 private:
     const T1 *a_ = nullptr;
@@ -224,13 +130,13 @@ struct IsCloseContigFunctor
     const bool equal_nan_;
 
 public:
-    IsCloseContigFunctor(const T1 *a,
-                         const T2 *b,
-                         resTy *out,
-                         const std::size_t n_elems,
-                         const scT rtol,
-                         const scT atol,
-                         const bool equal_nan)
+    IsCloseContigScalarFunctor(const T1 *a,
+                               const T2 *b,
+                               resTy *out,
+                               const std::size_t n_elems,
+                               const scT rtol,
+                               const scT atol,
+                               const bool equal_nan)
         : a_(a), b_(b), out_(out), nelems_(n_elems), rtol_(rtol), atol_(atol),
           equal_nan_(equal_nan)
     {
@@ -316,20 +222,21 @@ struct IsCloseContigFunctor
 };
 
 template <typename T1, typename T2, typename scT>
-sycl::event isclose_strided_impl(sycl::queue &exec_q,
-                                 std::size_t nelems,
-                                 const int nd,
-                                 const dpctl::tensor::ssize_t *shape_strides,
-                                 const scT rtol,
-                                 const scT atol,
-                                 const bool equal_nan,
-                                 const char *a_cp,
-                                 const dpctl::tensor::ssize_t a_offset,
-                                 const char *b_cp,
-                                 const dpctl::tensor::ssize_t b_offset,
-                                 char *out_cp,
-                                 const dpctl::tensor::ssize_t out_offset,
-                                 const std::vector<sycl::event> &depends)
+sycl::event
+    isclose_strided_scalar_impl(sycl::queue &exec_q,
+                                const int nd,
+                                std::size_t nelems,
+                                const dpctl::tensor::ssize_t *shape_strides,
+                                const scT rtol,
+                                const scT atol,
+                                const bool equal_nan,
+                                const char *a_cp,
+                                const dpctl::tensor::ssize_t a_offset,
+                                const char *b_cp,
+                                const dpctl::tensor::ssize_t b_offset,
+                                char *out_cp,
+                                const dpctl::tensor::ssize_t out_offset,
+                                const std::vector<sycl::event> &depends)
 {
     dpctl::tensor::type_utils::validate_type_for_device<T1>(exec_q);
 
@@ -346,7 +253,8 @@ sycl::event isclose_strided_impl(sycl::queue &exec_q,
     sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
         cgh.depends_on(depends);
 
-        using IsCloseFunc = IsCloseFunctor<T1, T2, scT, resTy, IndexerT>;
+        using IsCloseFunc =
+            IsCloseStridedScalarFunctor<T1, T2, scT, resTy, IndexerT>;
         cgh.parallel_for<IsCloseFunc>(
             {nelems},
             IsCloseFunc(a_tp, b_tp, out_tp, indexer, atol, rtol, equal_nan));
@@ -359,18 +267,19 @@ template <typename T1,
           typename scT,
           std::uint8_t vec_sz = 4u,
           std::uint8_t n_vecs = 2u>
-sycl::event isclose_contig_impl(sycl::queue &exec_q,
-                                std::size_t nelems,
-                                const scT rtol,
-                                const scT atol,
-                                const bool equal_nan,
-                                const char *a_cp,
-                                ssize_t a_offset,
-                                const char *b_cp,
-                                ssize_t b_offset,
-                                char *out_cp,
-                                ssize_t out_offset,
-                                const std::vector<sycl::event> &depends = {})
+sycl::event
+    isclose_contig_scalar_impl(sycl::queue &exec_q,
+                               std::size_t nelems,
+                               const scT rtol,
+                               const scT atol,
+                               const bool equal_nan,
+                               const char *a_cp,
+                               ssize_t a_offset,
+                               const char *b_cp,
+                               ssize_t b_offset,
+                               char *out_cp,
+                               ssize_t out_offset,
+                               const std::vector<sycl::event> &depends = {})
 {
     constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz;
     const std::size_t n_work_items_needed = nelems / elems_per_wi;
@@ -384,6 +293,7 @@ sycl::event isclose_contig_impl(sycl::queue &exec_q,
     const auto gws_range = sycl::range<1>(n_groups * lws);
     const auto lws_range = sycl::range<1>(lws);
 
+    // ? + offset
     const T1 *a_tp = reinterpret_cast<const T1 *>(a_cp) + a_offset;
     const T2 *b_tp = reinterpret_cast<const T2 *>(b_cp) + b_offset;
 
@@ -401,8 +311,8 @@ sycl::event isclose_contig_impl(sycl::queue &exec_q,
         {
             constexpr bool enable_sg_loadstore = true;
             using IsCloseFunc =
-                IsCloseContigFunctor<T1, T2, scT, resTy, vec_sz, n_vecs,
-                                     enable_sg_loadstore>;
+                IsCloseContigScalarFunctor<T1, T2, scT, resTy, vec_sz, n_vecs,
+                                           enable_sg_loadstore>;
 
             cgh.parallel_for<IsCloseFunc>(
                 sycl::nd_range<1>(gws_range, lws_range),
@@ -411,8 +321,8 @@ sycl::event isclose_contig_impl(sycl::queue &exec_q,
         else {
             constexpr bool disable_sg_loadstore = false;
             using IsCloseFunc =
-                IsCloseContigFunctor<T1, T2, scT, resTy, vec_sz, n_vecs,
-                                     disable_sg_loadstore>;
+                IsCloseContigScalarFunctor<T1, T2, scT, resTy, vec_sz, n_vecs,
+                                           disable_sg_loadstore>;
 
             cgh.parallel_for<IsCloseFunc>(
                 sycl::nd_range<1>(gws_range, lws_range),

From 29bcb344b876004b4abfb1b656c8e5d83c5bbf34 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 24 Jul 2025 04:24:45 -0700
Subject: [PATCH 07/32] Refactor isclose scalar dispatch to use dispatch
 vectors

---
 .../ufunc/elementwise_functions/isclose.cpp   | 118 ++++++++----------
 .../kernels/elementwise_functions/isclose.hpp |  61 +++++----
 2 files changed, 83 insertions(+), 96 deletions(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
index 86f251a1450b..e2dc5913f3fe 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -61,12 +61,10 @@ using ext::common::value_type_of;
 
 namespace dpnp::extensions::ufunc
 {
+
 namespace impl
 {
 
-using dpctl::tensor::usm_ndarray;
-using event_vector = std::vector<sycl::event>;
-
 using isclose_strided_scalar_fn_ptr_t =
     sycl::event (*)(sycl::queue &,
                     int,
@@ -95,49 +93,32 @@ using isclose_contig_scalar_fn_ptr_t =
                     const std::vector<sycl::event> &);
 
 static isclose_strided_scalar_fn_ptr_t
-    isclose_strided_scalar_dispatch_table[td_ns::num_types][td_ns::num_types];
+    isclose_strided_scalar_dispatch_vector[td_ns::num_types];
 static isclose_contig_scalar_fn_ptr_t
-    isclose_contig_dispatch_table[td_ns::num_types][td_ns::num_types];
+    isclose_contig_dispatch_vector[td_ns::num_types];
 
 template <typename T>
 using value_type_of_t = typename value_type_of<T>::type;
 
-// // Template for checking if T is supported
-// template <typename T>
-// struct IsCloseOutputType
-// {
-//     using value_type = typename std::disjunction<
-//         td_ns::TypeMapResultEntry<T, sycl::half>,
-//         td_ns::TypeMapResultEntry<T, float>,
-//         td_ns::TypeMapResultEntry<T, double>,
-//         td_ns::TypeMapResultEntry<T, std::complex<float>>,
-//         td_ns::TypeMapResultEntry<T, std::complex<double>>,
-//         td_ns::DefaultResultEntry<void>>::result_type;
-// };
-
-// Supports only float and complex types
-template <typename T1, typename T2>
+/**
+ * @brief A factory to define pairs of supported types for which
+ * isclose function is available.
+ *
+ * @tparam T Type of input vector `a` and `b` and of result vector `y`.
+ */
+template <typename T>
 struct IsCloseOutputType
 {
     using value_type = typename std::disjunction<
-        td_ns::BinaryTypeMapResultEntry<T1, float, T2, float, bool>,
-        td_ns::BinaryTypeMapResultEntry<T1, double, T2, double, bool>,
-        td_ns::BinaryTypeMapResultEntry<T1,
-                                        std::complex<float>,
-                                        T2,
-                                        std::complex<float>,
-                                        bool>,
-        td_ns::BinaryTypeMapResultEntry<T1,
-                                        std::complex<double>,
-                                        T2,
-                                        std::complex<double>,
-                                        bool>,
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
         td_ns::DefaultResultEntry<void>>::result_type;
-
-    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
 };
 
-template <typename T1, typename T2>
+template <typename T>
 sycl::event isclose_strided_scalar_call(sycl::queue &exec_q,
                                         int nd,
                                         std::size_t nelems,
@@ -154,19 +135,18 @@ sycl::event isclose_strided_scalar_call(sycl::queue &exec_q,
                                         const std::vector<sycl::event> &depends)
 {
     using dpctl::tensor::type_utils::is_complex_v;
-    using scT = std::conditional_t<is_complex_v<T1>,
-                                   typename value_type_of<T1>::type, T1>;
+    using scT = std::conditional_t<is_complex_v<T>, value_type_of_t<T>, T>;
 
     const scT rtol = py::cast<scT>(py_rtol);
     const scT atol = py::cast<scT>(py_atol);
     const bool equal_nan = py::cast<bool>(py_equal_nan);
 
-    return dpnp::kernels::isclose::isclose_strided_scalar_impl<T1, T2, scT>(
+    return dpnp::kernels::isclose::isclose_strided_scalar_impl<T, scT>(
         exec_q, nd, nelems, shape_strides, rtol, atol, equal_nan, in1_p,
         in1_offset, in2_p, in2_offset, out_p, out_offset, depends);
 }
 
-template <typename T1, typename T2>
+template <typename T>
 sycl::event isclose_contig_scalar_call(sycl::queue &q,
                                        std::size_t nelems,
                                        const py::object &py_rtol,
@@ -175,71 +155,74 @@ sycl::event isclose_contig_scalar_call(sycl::queue &q,
                                        const char *in1_p,
                                        const char *in2_p,
                                        char *out_p,
-                                       const event_vector &depends)
+                                       const std::vector<sycl::event> &depends)
 {
     using dpctl::tensor::type_utils::is_complex_v;
-    using scT = std::conditional_t<is_complex_v<T1>,
-                                   typename value_type_of<T1>::type, T1>;
+    using scT = std::conditional_t<is_complex_v<T>, value_type_of_t<T>, T>;
 
     const scT rtol = py::cast<scT>(py_rtol);
     const scT atol = py::cast<scT>(py_atol);
     const bool equal_nan = py::cast<bool>(py_equal_nan);
 
-    return dpnp::kernels::isclose::isclose_contig_scalar_impl<T1, T2, scT>(
+    return dpnp::kernels::isclose::isclose_contig_scalar_impl<T, scT>(
         q, nelems, rtol, atol, equal_nan, in1_p, 0, in2_p, 0, out_p, 0,
         depends);
 }
 
-template <typename fnT, typename T1, typename T2>
+template <typename fnT, typename T>
 struct IsCloseStridedScalarFactory
 {
     fnT get()
     {
-        if constexpr (std::is_same_v<
-                          typename IsCloseOutputType<T1, T2>::value_type, void>)
+        if constexpr (std::is_same_v<typename IsCloseOutputType<T>::value_type,
+                                     void>)
         {
             return nullptr;
         }
         else {
-            return isclose_strided_scalar_call<T1, T2>;
+            return isclose_strided_scalar_call<T>;
         }
     }
 };
 
-template <typename fnT, typename T1, typename T2>
+template <typename fnT, typename T>
 struct IsCloseContigScalarFactory
 {
     fnT get()
     {
-        if constexpr (!IsCloseOutputType<T1, T2>::is_defined) {
+        if constexpr (std::is_same_v<typename IsCloseOutputType<T>::value_type,
+                                     void>)
+        {
             return nullptr;
         }
         else {
-            return isclose_contig_scalar_call<T1, T2>;
+            return isclose_contig_scalar_call<T>;
         }
     }
 };
 
-void populate_isclose_dispatch_table()
+void populate_isclose_dispatch_vectors()
 {
-    td_ns::DispatchTableBuilder<isclose_strided_scalar_fn_ptr_t,
-                                IsCloseStridedScalarFactory, td_ns::num_types>
+    using namespace td_ns;
+
+    DispatchVectorBuilder<isclose_strided_scalar_fn_ptr_t,
+                          IsCloseStridedScalarFactory, num_types>
         dvb1;
-    dvb1.populate_dispatch_table(isclose_strided_scalar_dispatch_table);
+    dvb1.populate_dispatch_vector(isclose_strided_scalar_dispatch_vector);
 
-    td_ns::DispatchTableBuilder<isclose_contig_scalar_fn_ptr_t,
-                                IsCloseContigScalarFactory, td_ns::num_types>
+    DispatchVectorBuilder<isclose_contig_scalar_fn_ptr_t,
+                          IsCloseContigScalarFactory, num_types>
         dvb2;
-    dvb2.populate_dispatch_table(isclose_contig_dispatch_table);
+    dvb2.populate_dispatch_vector(isclose_contig_dispatch_vector);
 }
 
 std::pair<sycl::event, sycl::event>
-    py_isclose_scalar(const usm_ndarray &a,
-                      const usm_ndarray &b,
+    py_isclose_scalar(const dpctl::tensor::usm_ndarray &a,
+                      const dpctl::tensor::usm_ndarray &b,
                       const py::object &py_rtol,
                       const py::object &py_atol,
                       const py::object &py_equal_nan,
-                      const usm_ndarray &res,
+                      const dpctl::tensor::usm_ndarray &res,
                       sycl::queue &exec_q,
                       const std::vector<sycl::event> &depends)
 {
@@ -247,6 +230,10 @@ std::pair<sycl::event, sycl::event>
     int a_typeid = types.typenum_to_lookup_id(a.get_typenum());
     int b_typeid = types.typenum_to_lookup_id(b.get_typenum());
 
+    if (a_typeid != b_typeid) {
+        throw py::type_error("Array data types are not the same.");
+    }
+
     if (!dpctl::utils::queues_are_compatible(exec_q, {a, b, res})) {
         throw py::value_error(
             "Execution queue is not compatible with allocation queues");
@@ -309,7 +296,8 @@ std::pair<sycl::event, sycl::event>
     bool all_f_contig = (is_a_f_contig && is_b_f_contig && is_res_f_contig);
 
     if (all_c_contig || all_f_contig) {
-        auto contig_fn = isclose_contig_dispatch_table[a_typeid][b_typeid];
+        // a_typeid == b_typeid
+        auto contig_fn = isclose_contig_dispatch_vector[a_typeid];
 
         if (contig_fn == nullptr) {
             throw std::runtime_error(
@@ -358,7 +346,8 @@ std::pair<sycl::event, sycl::event>
         simplified_b_strides[0] == 1 && simplified_res_strides[0] == 1)
     {
         // Special case of contiguous data
-        auto contig_fn = isclose_contig_dispatch_table[a_typeid][b_typeid];
+        // a_typeid == b_typeid
+        auto contig_fn = isclose_contig_dispatch_vector[a_typeid];
 
         if (contig_fn == nullptr) {
             throw std::runtime_error(
@@ -383,7 +372,8 @@ std::pair<sycl::event, sycl::event>
         return std::make_pair(ht_ev, comp_ev);
     }
 
-    auto strided_fn = isclose_strided_scalar_dispatch_table[a_typeid][b_typeid];
+    // a_typeid == b_typeid
+    auto strided_fn = isclose_strided_scalar_dispatch_vector[a_typeid];
 
     if (strided_fn == nullptr) {
         throw std::runtime_error(
@@ -430,7 +420,7 @@ std::pair<sycl::event, sycl::event>
 
 void init_isclose(py::module_ m)
 {
-    impl::populate_isclose_dispatch_table();
+    impl::populate_isclose_dispatch_vectors();
 
     m.def("_isclose_scalar", &impl::py_isclose_scalar, "", py::arg("a"),
           py::arg("b"), py::arg("py_rtol"), py::arg("py_atol"),
diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
index 2eb52bbd7587..4dedcbca32ea 100644
--- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
@@ -57,16 +57,15 @@ inline bool isclose(const T a,
     return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b);
 }
 
-template <typename T1,
-          typename T2,
+template <typename T,
           typename scT,
           typename resTy,
           typename ThreeOffsets_IndexerT>
 struct IsCloseStridedScalarFunctor
 {
 private:
-    const T1 *a_ = nullptr;
-    const T2 *b_ = nullptr;
+    const T *a_ = nullptr;
+    const T *b_ = nullptr;
     resTy *out_ = nullptr;
     const ThreeOffsets_IndexerT three_offsets_indexer_;
     const scT rtol_;
@@ -74,8 +73,8 @@ struct IsCloseStridedScalarFunctor
     const bool equal_nan_;
 
 public:
-    IsCloseStridedScalarFunctor(const T1 *a,
-                                const T2 *b,
+    IsCloseStridedScalarFunctor(const T *a,
+                                const T *b,
                                 resTy *out,
                                 const ThreeOffsets_IndexerT &inps_res_indexer,
                                 const scT rtol,
@@ -97,9 +96,9 @@ struct IsCloseStridedScalarFunctor
             three_offsets_.get_third_offset();
 
         using dpctl::tensor::type_utils::is_complex_v;
-        if constexpr (is_complex_v<T1>) {
-            T1 z_a = a_[inp1_offset];
-            T2 z_b = b_[inp2_offset];
+        if constexpr (is_complex_v<T>) {
+            T z_a = a_[inp1_offset];
+            T z_b = b_[inp2_offset];
             bool x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_);
             bool y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_);
             out_[out_offset] = x && y;
@@ -111,8 +110,7 @@ struct IsCloseStridedScalarFunctor
     }
 };
 
-template <typename T1,
-          typename T2,
+template <typename T,
           typename scT,
           typename resTy,
           std::uint8_t vec_sz = 4u,
@@ -121,8 +119,8 @@ template <typename T1,
 struct IsCloseContigScalarFunctor
 {
 private:
-    const T1 *a_ = nullptr;
-    const T2 *b_ = nullptr;
+    const T *a_ = nullptr;
+    const T *b_ = nullptr;
     resTy *out_ = nullptr;
     std::size_t nelems_;
     const scT rtol_;
@@ -130,8 +128,8 @@ struct IsCloseContigScalarFunctor
     const bool equal_nan_;
 
 public:
-    IsCloseContigScalarFunctor(const T1 *a,
-                               const T2 *b,
+    IsCloseContigScalarFunctor(const T *a,
+                               const T *b,
                                resTy *out,
                                const std::size_t n_elems,
                                const scT rtol,
@@ -149,7 +147,7 @@ struct IsCloseContigScalarFunctor
         /* NOTE: work-group size must be divisible by sub-group size */
 
         using dpctl::tensor::type_utils::is_complex_v;
-        if constexpr (enable_sg_loadstore && !is_complex_v<T1>) {
+        if constexpr (enable_sg_loadstore && !is_complex_v<T>) {
             auto sg = ndit.get_sub_group();
             const std::uint16_t sgSize = sg.get_max_local_range()[0];
             const std::size_t base =
@@ -172,9 +170,9 @@ struct IsCloseContigScalarFunctor
                         sycl::access::address_space::global_space,
                         sycl::access::decorated::yes>(&out_[offset]);
 
-                    const sycl::vec<T1, vec_sz> a_vec =
+                    const sycl::vec<T, vec_sz> a_vec =
                         sub_group_load<vec_sz>(sg, a_multi_ptr);
-                    const sycl::vec<T2, vec_sz> b_vec =
+                    const sycl::vec<T, vec_sz> b_vec =
                         sub_group_load<vec_sz>(sg, b_multi_ptr);
 
                     sycl::vec<resTy, vec_sz> res_vec;
@@ -203,9 +201,9 @@ struct IsCloseContigScalarFunctor
                 (gid / sgSize) * (elems_per_sg - sgSize) + gid;
             const std::size_t end = std::min(nelems_, start + elems_per_sg);
             for (std::size_t offset = start; offset < end; offset += sgSize) {
-                if constexpr (is_complex_v<T1>) {
-                    T1 z_a = a_[offset];
-                    T2 z_b = b_[offset];
+                if constexpr (is_complex_v<T>) {
+                    T z_a = a_[offset];
+                    T z_b = b_[offset];
                     bool x = isclose(z_a.real(), z_b.real(), rtol_, atol_,
                                      equal_nan_);
                     bool y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_,
@@ -221,7 +219,7 @@ struct IsCloseContigScalarFunctor
     }
 };
 
-template <typename T1, typename T2, typename scT>
+template <typename T, typename scT>
 sycl::event
     isclose_strided_scalar_impl(sycl::queue &exec_q,
                                 const int nd,
@@ -238,10 +236,10 @@ sycl::event
                                 const dpctl::tensor::ssize_t out_offset,
                                 const std::vector<sycl::event> &depends)
 {
-    dpctl::tensor::type_utils::validate_type_for_device<T1>(exec_q);
+    dpctl::tensor::type_utils::validate_type_for_device<T>(exec_q);
 
-    const T1 *a_tp = reinterpret_cast<const T1 *>(a_cp);
-    const T2 *b_tp = reinterpret_cast<const T2 *>(b_cp);
+    const T *a_tp = reinterpret_cast<const T *>(a_cp);
+    const T *b_tp = reinterpret_cast<const T *>(b_cp);
 
     using resTy = bool;
     resTy *out_tp = reinterpret_cast<resTy *>(out_cp);
@@ -254,7 +252,7 @@ sycl::event
         cgh.depends_on(depends);
 
         using IsCloseFunc =
-            IsCloseStridedScalarFunctor<T1, T2, scT, resTy, IndexerT>;
+            IsCloseStridedScalarFunctor<T, scT, resTy, IndexerT>;
         cgh.parallel_for<IsCloseFunc>(
             {nelems},
             IsCloseFunc(a_tp, b_tp, out_tp, indexer, atol, rtol, equal_nan));
@@ -262,8 +260,7 @@ sycl::event
     return comp_ev;
 }
 
-template <typename T1,
-          typename T2,
+template <typename T,
           typename scT,
           std::uint8_t vec_sz = 4u,
           std::uint8_t n_vecs = 2u>
@@ -294,8 +291,8 @@ sycl::event
     const auto lws_range = sycl::range<1>(lws);
 
     // ? + offset
-    const T1 *a_tp = reinterpret_cast<const T1 *>(a_cp) + a_offset;
-    const T2 *b_tp = reinterpret_cast<const T2 *>(b_cp) + b_offset;
+    const T *a_tp = reinterpret_cast<const T *>(a_cp) + a_offset;
+    const T *b_tp = reinterpret_cast<const T *>(b_cp) + b_offset;
 
     using resTy = bool;
     resTy *out_tp = reinterpret_cast<resTy *>(out_cp) + out_offset;
@@ -311,7 +308,7 @@ sycl::event
         {
             constexpr bool enable_sg_loadstore = true;
             using IsCloseFunc =
-                IsCloseContigScalarFunctor<T1, T2, scT, resTy, vec_sz, n_vecs,
+                IsCloseContigScalarFunctor<T, scT, resTy, vec_sz, n_vecs,
                                            enable_sg_loadstore>;
 
             cgh.parallel_for<IsCloseFunc>(
@@ -321,7 +318,7 @@ sycl::event
         else {
             constexpr bool disable_sg_loadstore = false;
             using IsCloseFunc =
-                IsCloseContigScalarFunctor<T1, T2, scT, resTy, vec_sz, n_vecs,
+                IsCloseContigScalarFunctor<T, scT, resTy, vec_sz, n_vecs,
                                            disable_sg_loadstore>;
 
             cgh.parallel_for<IsCloseFunc>(

From fe1f5193bebdcb06c7e56de9cc0540aeceec1896 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 24 Jul 2025 05:11:50 -0700
Subject: [PATCH 08/32] Simplify isclose function pointer declarations

---
 .../ufunc/elementwise_functions/isclose.cpp   | 63 +++++++++----------
 1 file changed, 31 insertions(+), 32 deletions(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
index e2dc5913f3fe..4926dbb1e0d7 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -65,41 +65,40 @@ namespace dpnp::extensions::ufunc
 namespace impl
 {
 
-using isclose_strided_scalar_fn_ptr_t =
-    sycl::event (*)(sycl::queue &,
-                    int,
-                    std::size_t,
-                    const py::ssize_t *,
-                    const py::object &,
-                    const py::object &,
-                    const py::object &,
-                    const char *,
-                    py::ssize_t,
-                    const char *,
-                    py::ssize_t,
-                    char *,
-                    py::ssize_t,
-                    const std::vector<sycl::event> &);
-
-using isclose_contig_scalar_fn_ptr_t =
-    sycl::event (*)(sycl::queue &,
-                    std::size_t,
-                    const py::object &,
-                    const py::object &,
-                    const py::object &,
-                    const char *,
-                    const char *,
-                    char *,
-                    const std::vector<sycl::event> &);
-
-static isclose_strided_scalar_fn_ptr_t
-    isclose_strided_scalar_dispatch_vector[td_ns::num_types];
-static isclose_contig_scalar_fn_ptr_t
-    isclose_contig_dispatch_vector[td_ns::num_types];
-
 template <typename T>
 using value_type_of_t = typename value_type_of<T>::type;
 
+typedef sycl::event (*isclose_strided_scalar_fn_ptr_t)(
+    sycl::queue &,
+    int,                 // nd
+    std::size_t,         // nelems
+    const py::ssize_t *, // shape_strides
+    const py::object &,  // rtol
+    const py::object &,  // atol
+    const py::object &,  // equal_nan
+    const char *,        // a
+    py::ssize_t,         // a_offset
+    const char *,        // b
+    py::ssize_t,         // b_offset
+    char *,              // out
+    py::ssize_t,         // out_offset
+    const std::vector<sycl::event> &);
+
+typedef sycl::event (*isclose_contig_scalar_fn_ptr_t)(
+    sycl::queue &,
+    std::size_t,        // nelems
+    const py::object &, // rtol
+    const py::object &, // atol
+    const py::object &, // equal_nan
+    const char *,       // a
+    const char *,       // b
+    char *,             // out
+    const std::vector<sycl::event> &);
+
+isclose_strided_scalar_fn_ptr_t
+    isclose_strided_scalar_dispatch_vector[td_ns::num_types];
+isclose_contig_scalar_fn_ptr_t isclose_contig_dispatch_vector[td_ns::num_types];
+
 /**
  * @brief A factory to define pairs of supported types for which
  * isclose function is available.

From 38d84010683ceb93e50e4291293f30767fae5072 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 24 Jul 2025 05:21:19 -0700
Subject: [PATCH 09/32] Restructure isclose.cpp

---
 .../ufunc/elementwise_functions/isclose.cpp   | 158 +++++++++---------
 1 file changed, 79 insertions(+), 79 deletions(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
index 4926dbb1e0d7..da2933425b58 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -84,39 +84,6 @@ typedef sycl::event (*isclose_strided_scalar_fn_ptr_t)(
     py::ssize_t,         // out_offset
     const std::vector<sycl::event> &);
 
-typedef sycl::event (*isclose_contig_scalar_fn_ptr_t)(
-    sycl::queue &,
-    std::size_t,        // nelems
-    const py::object &, // rtol
-    const py::object &, // atol
-    const py::object &, // equal_nan
-    const char *,       // a
-    const char *,       // b
-    char *,             // out
-    const std::vector<sycl::event> &);
-
-isclose_strided_scalar_fn_ptr_t
-    isclose_strided_scalar_dispatch_vector[td_ns::num_types];
-isclose_contig_scalar_fn_ptr_t isclose_contig_dispatch_vector[td_ns::num_types];
-
-/**
- * @brief A factory to define pairs of supported types for which
- * isclose function is available.
- *
- * @tparam T Type of input vector `a` and `b` and of result vector `y`.
- */
-template <typename T>
-struct IsCloseOutputType
-{
-    using value_type = typename std::disjunction<
-        td_ns::TypeMapResultEntry<T, sycl::half>,
-        td_ns::TypeMapResultEntry<T, float>,
-        td_ns::TypeMapResultEntry<T, double>,
-        td_ns::TypeMapResultEntry<T, std::complex<float>>,
-        td_ns::TypeMapResultEntry<T, std::complex<double>>,
-        td_ns::DefaultResultEntry<void>>::result_type;
-};
-
 template <typename T>
 sycl::event isclose_strided_scalar_call(sycl::queue &exec_q,
                                         int nd,
@@ -145,6 +112,17 @@ sycl::event isclose_strided_scalar_call(sycl::queue &exec_q,
         in1_offset, in2_p, in2_offset, out_p, out_offset, depends);
 }
 
+typedef sycl::event (*isclose_contig_scalar_fn_ptr_t)(
+    sycl::queue &,
+    std::size_t,        // nelems
+    const py::object &, // rtol
+    const py::object &, // atol
+    const py::object &, // equal_nan
+    const char *,       // a
+    const char *,       // b
+    char *,             // out
+    const std::vector<sycl::event> &);
+
 template <typename T>
 sycl::event isclose_contig_scalar_call(sycl::queue &q,
                                        std::size_t nelems,
@@ -168,52 +146,9 @@ sycl::event isclose_contig_scalar_call(sycl::queue &q,
         depends);
 }
 
-template <typename fnT, typename T>
-struct IsCloseStridedScalarFactory
-{
-    fnT get()
-    {
-        if constexpr (std::is_same_v<typename IsCloseOutputType<T>::value_type,
-                                     void>)
-        {
-            return nullptr;
-        }
-        else {
-            return isclose_strided_scalar_call<T>;
-        }
-    }
-};
-
-template <typename fnT, typename T>
-struct IsCloseContigScalarFactory
-{
-    fnT get()
-    {
-        if constexpr (std::is_same_v<typename IsCloseOutputType<T>::value_type,
-                                     void>)
-        {
-            return nullptr;
-        }
-        else {
-            return isclose_contig_scalar_call<T>;
-        }
-    }
-};
-
-void populate_isclose_dispatch_vectors()
-{
-    using namespace td_ns;
-
-    DispatchVectorBuilder<isclose_strided_scalar_fn_ptr_t,
-                          IsCloseStridedScalarFactory, num_types>
-        dvb1;
-    dvb1.populate_dispatch_vector(isclose_strided_scalar_dispatch_vector);
-
-    DispatchVectorBuilder<isclose_contig_scalar_fn_ptr_t,
-                          IsCloseContigScalarFactory, num_types>
-        dvb2;
-    dvb2.populate_dispatch_vector(isclose_contig_dispatch_vector);
-}
+isclose_strided_scalar_fn_ptr_t
+    isclose_strided_scalar_dispatch_vector[td_ns::num_types];
+isclose_contig_scalar_fn_ptr_t isclose_contig_dispatch_vector[td_ns::num_types];
 
 std::pair<sycl::event, sycl::event>
     py_isclose_scalar(const dpctl::tensor::usm_ndarray &a,
@@ -415,6 +350,71 @@ std::pair<sycl::event, sycl::event>
         comp_ev);
 }
 
+/**
+ * @brief A factory to define pairs of supported types for which
+ * isclose function is available.
+ *
+ * @tparam T Type of input vector `a` and `b` and of result vector `y`.
+ */
+template <typename T>
+struct IsCloseOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+template <typename fnT, typename T>
+struct IsCloseStridedScalarFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename IsCloseOutputType<T>::value_type,
+                                     void>)
+        {
+            return nullptr;
+        }
+        else {
+            return isclose_strided_scalar_call<T>;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct IsCloseContigScalarFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename IsCloseOutputType<T>::value_type,
+                                     void>)
+        {
+            return nullptr;
+        }
+        else {
+            return isclose_contig_scalar_call<T>;
+        }
+    }
+};
+
+void populate_isclose_dispatch_vectors()
+{
+    using namespace td_ns;
+
+    DispatchVectorBuilder<isclose_strided_scalar_fn_ptr_t,
+                          IsCloseStridedScalarFactory, num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(isclose_strided_scalar_dispatch_vector);
+
+    DispatchVectorBuilder<isclose_contig_scalar_fn_ptr_t,
+                          IsCloseContigScalarFactory, num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(isclose_contig_dispatch_vector);
+}
+
 } // namespace impl
 
 void init_isclose(py::module_ m)

From 821065f58014e6450b3f9dd3fdc2821f759ffa2b Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 24 Jul 2025 06:05:22 -0700
Subject: [PATCH 10/32] Use common validation utilities

---
 .../ufunc/elementwise_functions/isclose.cpp   | 76 +++++++++----------
 1 file changed, 35 insertions(+), 41 deletions(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
index da2933425b58..ac463dd1051f 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -53,11 +53,21 @@
 #include "utils/type_utils.hpp"
 
 #include "ext/common.hpp"
+#include "ext/validation_utils.hpp"
 
 namespace py = pybind11;
 namespace td_ns = dpctl::tensor::type_dispatch;
 
 using ext::common::value_type_of;
+using ext::validation::array_names;
+
+using ext::common::dtype_from_typenum;
+using ext::validation::check_has_dtype;
+using ext::validation::check_no_overlap;
+using ext::validation::check_num_dims;
+using ext::validation::check_queue;
+using ext::validation::check_same_dtype;
+using ext::validation::check_same_size;
 
 namespace dpnp::extensions::ufunc
 {
@@ -160,25 +170,22 @@ std::pair<sycl::event, sycl::event>
                       sycl::queue &exec_q,
                       const std::vector<sycl::event> &depends)
 {
-    auto types = td_ns::usm_ndarray_types();
-    int a_typeid = types.typenum_to_lookup_id(a.get_typenum());
-    int b_typeid = types.typenum_to_lookup_id(b.get_typenum());
+    array_names names = {{&a, "a"}, {&b, "b"}, {&res, "res"}};
 
-    if (a_typeid != b_typeid) {
-        throw py::type_error("Array data types are not the same.");
-    }
+    check_same_dtype(&a, &b, names);
+    check_has_dtype(&res, td_ns::typenum_t::BOOL, names);
 
-    if (!dpctl::utils::queues_are_compatible(exec_q, {a, b, res})) {
-        throw py::value_error(
-            "Execution queue is not compatible with allocation queues");
-    }
+    check_same_size({&a, &b, &res}, names);
+    int res_nd = res.get_ndim();
+    check_num_dims({&a, &b}, res_nd, names);
 
+    check_queue({&a, &b, &res}, names, exec_q);
+    check_no_overlap({&a, &b}, {&res}, names);
     dpctl::tensor::validation::CheckWritable::throw_if_not_writable(res);
 
-    int res_nd = res.get_ndim();
-    if (res_nd != a.get_ndim() || res_nd != b.get_ndim()) {
-        throw py::value_error("Array dimensions are not the same.");
-    }
+    auto types = td_ns::usm_ndarray_types();
+    // a_typeid == b_typeid (check_same_dtype(&a, &b, names))
+    int a_b_typeid = types.typenum_to_lookup_id(a.get_typenum());
 
     const py::ssize_t *a_shape = a.get_shape_raw();
     const py::ssize_t *b_shape = b.get_shape_raw();
@@ -192,7 +199,7 @@ std::pair<sycl::event, sycl::event>
                                         b_shape[i] == res_shape[i]);
     }
     if (!shapes_equal) {
-        throw py::value_error("Array shapes are not the same.");
+        throw py::value_error("Array shapes are not the same");
     }
 
     // if nelems is zero, return
@@ -202,16 +209,6 @@ std::pair<sycl::event, sycl::event>
 
     dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(res, nelems);
 
-    // check memory overlap
-    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
-    auto const &same_logical_tensors =
-        dpctl::tensor::overlap::SameLogicalTensors();
-    if ((overlap(a, res) && !same_logical_tensors(a, res)) ||
-        (overlap(b, res) && !same_logical_tensors(b, res)))
-    {
-        throw py::value_error("Arrays index overlapping segments of memory");
-    }
-
     const char *a_data = a.get_data();
     const char *b_data = b.get_data();
     char *res_data = res.get_data();
@@ -230,14 +227,13 @@ std::pair<sycl::event, sycl::event>
     bool all_f_contig = (is_a_f_contig && is_b_f_contig && is_res_f_contig);
 
     if (all_c_contig || all_f_contig) {
-        // a_typeid == b_typeid
-        auto contig_fn = isclose_contig_dispatch_vector[a_typeid];
+        auto contig_fn = isclose_contig_dispatch_vector[a_b_typeid];
 
         if (contig_fn == nullptr) {
+            py::dtype a_b_dtype_py = dtype_from_typenum(a_b_typeid);
             throw std::runtime_error(
-                "Contiguous implementation is missing for a_typeid=" +
-                std::to_string(a_typeid) +
-                " and b_typeid=" + std::to_string(b_typeid));
+                "Contiguous implementation is missing for " +
+                std::string(py::str(a_b_dtype_py)) + "data type");
         }
 
         auto comp_ev = contig_fn(exec_q, nelems, py_rtol, py_atol, py_equal_nan,
@@ -280,14 +276,13 @@ std::pair<sycl::event, sycl::event>
         simplified_b_strides[0] == 1 && simplified_res_strides[0] == 1)
     {
         // Special case of contiguous data
-        // a_typeid == b_typeid
-        auto contig_fn = isclose_contig_dispatch_vector[a_typeid];
+        auto contig_fn = isclose_contig_dispatch_vector[a_b_typeid];
 
         if (contig_fn == nullptr) {
+            py::dtype a_b_dtype_py = dtype_from_typenum(a_b_typeid);
             throw std::runtime_error(
-                "Contiguous implementation is missing for a_typeid=" +
-                std::to_string(a_typeid) +
-                " and b_typeid=" + std::to_string(b_typeid));
+                "Contiguous implementation is missing for " +
+                std::string(py::str(a_b_dtype_py)) + "data type");
         }
 
         std::cout << "Run contig impl in strided" << std::endl;
@@ -306,14 +301,13 @@ std::pair<sycl::event, sycl::event>
         return std::make_pair(ht_ev, comp_ev);
     }
 
-    // a_typeid == b_typeid
-    auto strided_fn = isclose_strided_scalar_dispatch_vector[a_typeid];
+    auto strided_fn = isclose_strided_scalar_dispatch_vector[a_b_typeid];
 
     if (strided_fn == nullptr) {
-        throw std::runtime_error(
-            "isclose implementation is missing for a_typeid=" +
-            std::to_string(a_typeid) +
-            " and b_typeid=" + std::to_string(b_typeid));
+        py::dtype a_b_dtype_py = dtype_from_typenum(a_b_typeid);
+        throw std::runtime_error("Strided implementation is missing for " +
+                                 std::string(py::str(a_b_dtype_py)) +
+                                 "data type");
     }
 
     std::cout << "Run strided impl" << std::endl;

From 4df11369663aa251b42c8bc100f22dab6e7703e7 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 25 Jul 2025 03:12:00 -0700
Subject: [PATCH 11/32] Fix argument order in IsCloseStridedScalarFunctor
 constructor

---
 dpnp/backend/kernels/elementwise_functions/isclose.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
index 4dedcbca32ea..338f1a2c8929 100644
--- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
@@ -255,7 +255,7 @@ sycl::event
             IsCloseStridedScalarFunctor<T, scT, resTy, IndexerT>;
         cgh.parallel_for<IsCloseFunc>(
             {nelems},
-            IsCloseFunc(a_tp, b_tp, out_tp, indexer, atol, rtol, equal_nan));
+            IsCloseFunc(a_tp, b_tp, out_tp, indexer, rtol, atol, equal_nan));
     });
     return comp_ev;
 }

From d80eab71bb8de83fddff8be216680478620445c7 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 25 Jul 2025 06:13:38 -0700
Subject: [PATCH 12/32] Cleanup isclose.cpp

---
 .../extensions/ufunc/elementwise_functions/isclose.cpp      | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
index ac463dd1051f..4666ba9b63dd 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -248,8 +248,6 @@ std::pair<sycl::event, sycl::event>
     //     if 1d with strides 1 - input is contig
     //     dispatch to strided
 
-    std::cout << "Strided impl run" << std::endl;
-
     auto const &a_strides = a.get_strides_vector();
     auto const &b_strides = b.get_strides_vector();
     auto const &res_strides = res.get_strides_vector();
@@ -285,8 +283,6 @@ std::pair<sycl::event, sycl::event>
                 std::string(py::str(a_b_dtype_py)) + "data type");
         }
 
-        std::cout << "Run contig impl in strided" << std::endl;
-
         int a_elem_size = a.get_elemsize();
         int b_elem_size = b.get_elemsize();
         int res_elem_size = res.get_elemsize();
@@ -310,8 +306,6 @@ std::pair<sycl::event, sycl::event>
                                  "data type");
     }
 
-    std::cout << "Run strided impl" << std::endl;
-
     using dpctl::tensor::offset_utils::device_allocate_and_pack;
 
     std::vector<sycl::event> host_tasks{};

From 8873008476f70bfc874d350426bb01eb0d3650b3 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 25 Jul 2025 06:31:45 -0700
Subject: [PATCH 13/32] Add support rtol/atol as complex to match NumPy

---
 dpnp/dpnp_iface_logic.py | 77 +++++++++++++++++++++-------------------
 1 file changed, 41 insertions(+), 36 deletions(-)

diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index fed871bad857..1052ac40d3ac 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -773,7 +773,7 @@ def array_equiv(a1, a2):
 )
 
 
-def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False):
+def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
     """
     Returns a boolean array where two arrays are element-wise equal within
     a tolerance.
@@ -872,37 +872,8 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False):
         rtol, atol, scalar_type=True, all_scalars=True
     )
 
-    # make sure b is an inexact type to avoid bad behavior on abs(MIN_INT)
-    if dpnp.isscalar(b):
-        dt = dpnp.result_type(a, b, 1.0, rtol, atol)
-        b = dpnp.asarray(
-            b, dtype=dt, sycl_queue=a.sycl_queue, usm_type=a.usm_type
-        )
-    elif dpnp.issubdtype(b, dpnp.integer):
-        dt = dpnp.result_type(b, 1.0, rtol, atol)
-        b = dpnp.astype(b, dt)
-
-    if not new:
-
-        # Firstly handle finite values:
-        # result = absolute(a - b) <= atol + rtol * absolute(b)
-        dt = dpnp.result_type(b, rtol, atol)
-        _b = dpnp.abs(b, dtype=dt)
-        _b *= rtol
-        _b += atol
-        result = less_equal(dpnp.abs(a - b), _b)
-
-        # Handle "inf" values: they are treated as equal if they are in the same
-        # place and of the same sign in both arrays
-        result &= isfinite(b)
-        result |= a == b
-
-        if equal_nan:
-            result |= isnan(a) & isnan(b)
-        return result
-
-    else:
-
+    # Use own SYCL kernel for scalar rtol/atol
+    if dpnp.isscalar(rtol) and dpnp.isscalar(atol):
         dt = dpnp.result_type(a, b, 1.0)
 
         if dpnp.isscalar(a):
@@ -926,10 +897,17 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False):
         else:
             usm_type, exec_q = get_usm_allocations([a, b])
 
-        # remove order="C"
-        a = dpnp.astype(a, dt, order="C", casting="same_kind", copy=False)
-        b = dpnp.astype(b, dt, order="C", casting="same_kind", copy=False)
+        a = dpnp.astype(a, dt, casting="same_kind", copy=False)
+        b = dpnp.astype(b, dt, casting="same_kind", copy=False)
 
+        # Convert complex rtol/atol to real values
+        # to avoid pybind11 cast errors and match NumPy behavior
+        if isinstance(rtol, complex):
+            rtol = abs(rtol)
+        if isinstance(atol, complex):
+            atol = abs(atol)
+
+        # pylint: disable=W0707
         try:
             res_shape = dpnp.broadcast_shapes(a.shape, b.shape)
         except ValueError:
@@ -949,7 +927,7 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False):
         )
 
         _manager = dpu.SequentialOrderManager[exec_q]
-        mem_ev, ht_ev = ufi._isclose(
+        mem_ev, ht_ev = ufi._isclose_scalar(
             a.get_array(),
             b.get_array(),
             rtol,
@@ -963,6 +941,33 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False, new=False):
 
         return output
 
+    # make sure b is an inexact type to avoid bad behavior on abs(MIN_INT)
+    if dpnp.isscalar(b):
+        dt = dpnp.result_type(a, b, 1.0, rtol, atol)
+        b = dpnp.asarray(
+            b, dtype=dt, sycl_queue=a.sycl_queue, usm_type=a.usm_type
+        )
+    elif dpnp.issubdtype(b, dpnp.integer):
+        dt = dpnp.result_type(b, 1.0, rtol, atol)
+        b = dpnp.astype(b, dt)
+
+    # Firstly handle finite values:
+    # result = absolute(a - b) <= atol + rtol * absolute(b)
+    dt = dpnp.result_type(b, rtol, atol)
+    _b = dpnp.abs(b, dtype=dt)
+    _b *= rtol
+    _b += atol
+    result = less_equal(dpnp.abs(a - b), _b)
+
+    # Handle "inf" values: they are treated as equal if they are in the same
+    # place and of the same sign in both arrays
+    result &= isfinite(b)
+    result |= a == b
+
+    if equal_nan:
+        result |= isnan(a) & isnan(b)
+    return result
+
 
 def iscomplex(x):
     """

From 86c99cd021510e591a2a85813f18f6e79cfaaf5a Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 25 Jul 2025 08:34:35 -0700
Subject: [PATCH 14/32] Expand and update tests for dpnp.isclose()

---
 dpnp/tests/test_logic.py | 133 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 118 insertions(+), 15 deletions(-)

diff --git a/dpnp/tests/test_logic.py b/dpnp/tests/test_logic.py
index 4281279da450..c1a068724938 100644
--- a/dpnp/tests/test_logic.py
+++ b/dpnp/tests/test_logic.py
@@ -515,23 +515,126 @@ def test_infinity_sign_errors(func):
         getattr(dpnp, func)(x, out=out)
 
 
-@pytest.mark.parametrize("dtype", get_integer_float_dtypes())
-@pytest.mark.parametrize(
-    "rtol", [1e-05, dpnp.array(1e-05), dpnp.full(10, 1e-05)]
-)
-@pytest.mark.parametrize(
-    "atol", [1e-08, dpnp.array(1e-08), dpnp.full(10, 1e-08)]
-)
-def test_isclose(dtype, rtol, atol):
-    a = numpy.random.rand(10)
-    b = a + numpy.random.rand(10) * 1e-8
+class TestIsClose:
+    @pytest.mark.parametrize("val", [1.0, numpy.inf, -numpy.inf, numpy.nan])
+    def test_input_0d(self, val):
+        dp_arr = dpnp.array(val)
+        np_arr = numpy.array(val)
 
-    dpnp_a = dpnp.array(a, dtype=dtype)
-    dpnp_b = dpnp.array(b, dtype=dtype)
+        # array & scalar
+        dp_res = dpnp.isclose(dp_arr, val)
+        np_res = numpy.isclose(np_arr, val)
+        assert_allclose(dp_res, np_res)
 
-    np_res = numpy.isclose(a, b, 1e-05, 1e-08)
-    dpnp_res = dpnp.isclose(dpnp_a, dpnp_b, rtol, atol)
-    assert_allclose(dpnp_res, np_res)
+        # scalar & array
+        dp_res = dpnp.isclose(val, dp_arr)
+        np_res = numpy.isclose(val, np_arr)
+        assert_allclose(dp_res, np_res)
+
+        # array & array
+        dp_res = dpnp.isclose(dp_arr, dp_arr)
+        np_res = numpy.isclose(np_arr, np_arr)
+        assert_allclose(dp_res, np_res)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    @pytest.mark.parametrize(
+        "rtol", [1e-5, dpnp.array(1e-5), dpnp.full((10,), 1e-5)]
+    )
+    @pytest.mark.parametrize(
+        "atol", [1e-8, dpnp.array(1e-8), dpnp.full((10,), 1e-8)]
+    )
+    def test_isclose(self, dtype, rtol, atol):
+        a = numpy.random.rand(10)
+        b = a + numpy.random.rand(10) * 1e-8
+
+        dpnp_a = dpnp.array(a, dtype=dtype)
+        dpnp_b = dpnp.array(b, dtype=dtype)
+
+        np_res = numpy.isclose(a, b, rtol=1e-5, atol=1e-8)
+        dpnp_res = dpnp.isclose(dpnp_a, dpnp_b, rtol=rtol, atol=atol)
+        assert_allclose(dpnp_res, np_res)
+
+    @pytest.mark.parametrize(
+        "sh_a, sh_b",
+        [
+            ((10,), (1,)),
+            ((3, 1, 5), (3, 5)),
+            ((3, 1, 5), (1, 3, 5)),
+            ((1, 10), (10,)),
+        ],
+    )
+    def test_broadcast_shapes(self, sh_a, sh_b):
+        a_np = numpy.ones(sh_a)
+        b_np = numpy.ones(sh_b)
+
+        a_dp = dpnp.ones(sh_a)
+        b_dp = dpnp.ones(sh_b)
+
+        np_res = numpy.isclose(a_np, b_np)
+        dp_res = dpnp.isclose(a_dp, b_dp)
+        assert_allclose(dp_res, np_res)
+
+    def test_equal_nan(self):
+        a = numpy.array([numpy.nan, 1.0])
+        b = numpy.array([numpy.nan, 1.0])
+
+        dp_a = dpnp.array(a)
+        dp_b = dpnp.array(b)
+
+        np_res = numpy.isclose(a, b, equal_nan=True)
+        dp_res = dpnp.isclose(dp_a, dp_b, equal_nan=True)
+        assert_allclose(dp_res, np_res)
+
+    def test_rtol_atol_arrays(self):
+        a = numpy.array([2.1, 2.1, 2.1, 2.1, 5, numpy.nan])
+        b = numpy.array([2, 2, 2, 2, numpy.nan, 5])
+        atol = numpy.array([0.11, 0.09, 1e-8, 1e-8, 1, 1])
+        rtol = numpy.array([1e-8, 1e-8, 0.06, 0.04, 1, 1])
+
+        dp_a = dpnp.array(a)
+        dp_b = dpnp.array(b)
+        dp_rtol = dpnp.array(rtol)
+        dp_atol = dpnp.array(atol)
+
+        np_res = numpy.isclose(a, b, rtol=rtol, atol=atol)
+        dp_res = dpnp.isclose(dp_a, dp_b, rtol=dp_rtol, atol=dp_atol)
+        assert_allclose(dp_res, np_res)
+
+    @pytest.mark.parametrize(
+        "rtol, atol",
+        [
+            (1e-05 + 1j, 1e-08),
+            (1e-05, 1e-08 + 1j),
+            (1e-05 + 1j, 1e-08 + 1j),
+        ],
+    )
+    def test_rtol_atol_complex(self, rtol, atol):
+        a = dpnp.array([1.0, 2.0])
+        b = dpnp.array([1.0, 2.0 + 1e-7])
+
+        dpnp_res = dpnp.isclose(a, b, rtol=rtol, atol=atol)
+        np_res = numpy.isclose(a.asnumpy(), b.asnumpy(), rtol=rtol, atol=atol)
+        assert_allclose(dpnp_res, np_res)
+
+    def test_rtol_atol_nep50_broadcasting(self):
+        below_one = float(1.0 - numpy.finfo("f8").eps)
+        f32 = numpy.array(below_one, dtype="f4")
+        dp_f32 = dpnp.array(f32)
+
+        assert_allclose(
+            dpnp.isclose(dp_f32, below_one, rtol=0, atol=0),
+            numpy.isclose(f32, below_one, rtol=0, atol=0),
+        )
+
+    def test_invalid_input(self):
+        # unsupported type
+        assert_raises(TypeError, dpnp.isclose, 1.0, 1.0)
+        assert_raises(TypeError, dpnp.isclose, [1.0], [1.0])
+
+        # broadcast error
+        assert_raises(
+            ValueError, dpnp.isclose, dpnp.ones((10,)), dpnp.ones((3, 5))
+        )
 
 
 @pytest.mark.parametrize("a", [numpy.array([1, 2]), numpy.array([1, 1])])

From 28745204bcc1a89002ebb5a3895a19ac92823c60 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 25 Jul 2025 08:44:53 -0700
Subject: [PATCH 15/32] Update copyright year

---
 dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp | 2 +-
 dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp | 2 +-
 dpnp/backend/kernels/elementwise_functions/isclose.hpp          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
index 4666ba9b63dd..a23dced2b9ca 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2024, Intel Corporation
+// Copyright (c) 2025, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp
index 2a409779cb3a..5216db4f3a29 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2024-2025, Intel Corporation
+// Copyright (c) 2025, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
index 338f1a2c8929..137c574bfeb0 100644
--- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2024-2025, Intel Corporation
+// Copyright (c) 2025, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without

From 68b38785bba2b584ee089457f8f6f2ef740e5ab2 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Fri, 25 Jul 2025 08:52:48 -0700
Subject: [PATCH 16/32] Update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2acb9cd4a5eb..06af8aea95e0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 * Replaced the use of `numpy.testing.suppress_warnings` with appropriate calls from the warnings module [#2529](https://github.com/IntelPython/dpnp/pull/2529)
 * Improved documentations of `dpnp.ndarray` class and added a page with description of supported constants [#2422](https://github.com/IntelPython/dpnp/pull/2422)
 * Updated `dpnp.size` to accept tuple of ints for `axes` argument [#2536](https://github.com/IntelPython/dpnp/pull/2536)
+* Improved performance of `dpnp.isclose` function by implementing a dedicated kernel for scalar `rtol` and `atol` arguments [#2540](https://github.com/IntelPython/dpnp/pull/2540)
 
 ### Deprecated
 

From ddd2ec02f21c18f83c2f91cc2583c7941768176e Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 30 Jul 2025 03:22:26 -0700
Subject: [PATCH 17/32] Fix incorrect complex isclose logic and add proper
 overload

---
 .../kernels/elementwise_functions/isclose.hpp | 74 +++++++++++--------
 1 file changed, 43 insertions(+), 31 deletions(-)

diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
index 137c574bfeb0..4be6c103e5c7 100644
--- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
@@ -25,6 +25,13 @@
 
 #pragma once
 
+#define SYCL_EXT_ONEAPI_COMPLEX
+#if __has_include(<sycl/ext/oneapi/experimental/sycl_complex.hpp>)
+#include <sycl/ext/oneapi/experimental/sycl_complex.hpp>
+#else
+#include <sycl/ext/oneapi/experimental/complex/complex.hpp>
+#endif
+
 #include <complex>
 #include <cstddef>
 #include <vector>
@@ -39,6 +46,7 @@
 
 namespace dpnp::kernels::isclose
 {
+namespace exprm_ns = sycl::ext::oneapi::experimental;
 
 template <typename T>
 inline bool isclose(const T a,
@@ -47,14 +55,39 @@ inline bool isclose(const T a,
                     const T atol,
                     const bool equal_nan)
 {
-    if (sycl::isnan(a) || sycl::isnan(b)) {
-        // static cast<T>?
-        return equal_nan && sycl::isnan(a) && sycl::isnan(b);
+    if (sycl::isfinite(a) && sycl::isfinite(b)) {
+        return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b);
+    }
+
+    if (sycl::isnan(a) && sycl::isnan(b)) {
+        return equal_nan;
+    }
+
+    return a == b;
+}
+
+template <typename T>
+inline bool isclose(const std::complex<T> a,
+                    const std::complex<T> b,
+                    const T rtol,
+                    const T atol,
+                    const bool equal_nan)
+{
+    const bool a_finite = sycl::isfinite(a.real()) && sycl::isfinite(a.imag());
+    const bool b_finite = sycl::isfinite(b.real()) && sycl::isfinite(b.imag());
+
+    if (a_finite && b_finite) {
+        return exprm_ns::abs(exprm_ns::complex<T>(a - b)) <=
+               atol + rtol * exprm_ns::abs(exprm_ns::complex<T>(b));
     }
-    if (sycl::isinf(a) || sycl::isinf(b)) {
-        return a == b;
+
+    if (sycl::isnan(a.real()) && sycl::isnan(a.imag()) &&
+        sycl::isnan(b.real()) && sycl::isnan(b.imag()))
+    {
+        return equal_nan;
     }
-    return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b);
+
+    return a == b;
 }
 
 template <typename T,
@@ -95,18 +128,8 @@ struct IsCloseStridedScalarFunctor
         const dpctl::tensor::ssize_t &out_offset =
             three_offsets_.get_third_offset();
 
-        using dpctl::tensor::type_utils::is_complex_v;
-        if constexpr (is_complex_v<T>) {
-            T z_a = a_[inp1_offset];
-            T z_b = b_[inp2_offset];
-            bool x = isclose(z_a.real(), z_b.real(), rtol_, atol_, equal_nan_);
-            bool y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_, equal_nan_);
-            out_[out_offset] = x && y;
-        }
-        else {
-            out_[out_offset] = isclose(a_[inp1_offset], b_[inp2_offset], rtol_,
-                                       atol_, equal_nan_);
-        }
+        out_[out_offset] =
+            isclose(a_[inp1_offset], b_[inp2_offset], rtol_, atol_, equal_nan_);
     }
 };
 
@@ -201,19 +224,8 @@ struct IsCloseContigScalarFunctor
                 (gid / sgSize) * (elems_per_sg - sgSize) + gid;
             const std::size_t end = std::min(nelems_, start + elems_per_sg);
             for (std::size_t offset = start; offset < end; offset += sgSize) {
-                if constexpr (is_complex_v<T>) {
-                    T z_a = a_[offset];
-                    T z_b = b_[offset];
-                    bool x = isclose(z_a.real(), z_b.real(), rtol_, atol_,
-                                     equal_nan_);
-                    bool y = isclose(z_a.imag(), z_b.imag(), rtol_, atol_,
-                                     equal_nan_);
-                    out_[offset] = x && y;
-                }
-                else {
-                    out_[offset] = isclose(a_[offset], b_[offset], rtol_, atol_,
-                                           equal_nan_);
-                }
+                out_[offset] =
+                    isclose(a_[offset], b_[offset], rtol_, atol_, equal_nan_);
             }
         }
     }

From 819262d40629ed5075f2c9e36a95cd60ef66ab93 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 30 Jul 2025 03:31:32 -0700
Subject: [PATCH 18/32] Make immutable variables const

---
 .../ufunc/elementwise_functions/isclose.cpp   | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
index a23dced2b9ca..752273fe085a 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -80,8 +80,8 @@ using value_type_of_t = typename value_type_of<T>::type;
 
 typedef sycl::event (*isclose_strided_scalar_fn_ptr_t)(
     sycl::queue &,
-    int,                 // nd
-    std::size_t,         // nelems
+    const int,           // nd
+    const std::size_t,   // nelems
     const py::ssize_t *, // shape_strides
     const py::object &,  // rtol
     const py::object &,  // atol
@@ -96,8 +96,8 @@ typedef sycl::event (*isclose_strided_scalar_fn_ptr_t)(
 
 template <typename T>
 sycl::event isclose_strided_scalar_call(sycl::queue &exec_q,
-                                        int nd,
-                                        std::size_t nelems,
+                                        const int nd,
+                                        const std::size_t nelems,
                                         const py::ssize_t *shape_strides,
                                         const py::object &py_rtol,
                                         const py::object &py_atol,
@@ -124,7 +124,7 @@ sycl::event isclose_strided_scalar_call(sycl::queue &exec_q,
 
 typedef sycl::event (*isclose_contig_scalar_fn_ptr_t)(
     sycl::queue &,
-    std::size_t,        // nelems
+    const std::size_t,  // nelems
     const py::object &, // rtol
     const py::object &, // atol
     const py::object &, // equal_nan
@@ -135,7 +135,7 @@ typedef sycl::event (*isclose_contig_scalar_fn_ptr_t)(
 
 template <typename T>
 sycl::event isclose_contig_scalar_call(sycl::queue &q,
-                                       std::size_t nelems,
+                                       const std::size_t nelems,
                                        const py::object &py_rtol,
                                        const py::object &py_atol,
                                        const py::object &py_equal_nan,
@@ -214,17 +214,19 @@ std::pair<sycl::event, sycl::event>
     char *res_data = res.get_data();
 
     // handle contiguous inputs
-    bool is_a_c_contig = a.is_c_contiguous();
-    bool is_a_f_contig = a.is_f_contiguous();
+    const bool is_a_c_contig = a.is_c_contiguous();
+    const bool is_a_f_contig = a.is_f_contiguous();
 
-    bool is_b_c_contig = b.is_c_contiguous();
-    bool is_b_f_contig = b.is_f_contiguous();
+    const bool is_b_c_contig = b.is_c_contiguous();
+    const bool is_b_f_contig = b.is_f_contiguous();
 
-    bool is_res_c_contig = res.is_c_contiguous();
-    bool is_res_f_contig = res.is_f_contiguous();
+    const bool is_res_c_contig = res.is_c_contiguous();
+    const bool is_res_f_contig = res.is_f_contiguous();
 
-    bool all_c_contig = (is_a_c_contig && is_b_c_contig && is_res_c_contig);
-    bool all_f_contig = (is_a_f_contig && is_b_f_contig && is_res_f_contig);
+    const bool all_c_contig =
+        (is_a_c_contig && is_b_c_contig && is_res_c_contig);
+    const bool all_f_contig =
+        (is_a_f_contig && is_b_f_contig && is_res_f_contig);
 
     if (all_c_contig || all_f_contig) {
         auto contig_fn = isclose_contig_dispatch_vector[a_b_typeid];
@@ -362,8 +364,7 @@ struct IsCloseStridedScalarFactory
     fnT get()
     {
         if constexpr (std::is_same_v<typename IsCloseOutputType<T>::value_type,
-                                     void>)
-        {
+                                     void>) {
             return nullptr;
         }
         else {
@@ -378,8 +379,7 @@ struct IsCloseContigScalarFactory
     fnT get()
     {
         if constexpr (std::is_same_v<typename IsCloseOutputType<T>::value_type,
-                                     void>)
-        {
+                                     void>) {
             return nullptr;
         }
         else {

From 8691551fe60b108c66954f9c591d79b9cfd0c4d8 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 30 Jul 2025 04:22:59 -0700
Subject: [PATCH 19/32] Extend check_no_overlap to handle same logical tensors

---
 .../common/ext/details/validation_utils_internal.hpp          | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp
index 2ff800aced59..816ea0453ade 100644
--- a/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp
+++ b/dpnp/backend/extensions/common/ext/details/validation_utils_internal.hpp
@@ -114,8 +114,10 @@ inline void check_no_overlap(const array_ptr &input,
     }
 
     const auto &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    const auto &same_logical_tensors =
+        dpctl::tensor::overlap::SameLogicalTensors();
 
-    if (overlap(*input, *output)) {
+    if (overlap(*input, *output) && !same_logical_tensors(*input, *output)) {
         throw py::value_error(name_of(input, names) +
                               " has overlapping memory segments with " +
                               name_of(output, names));

From 15685e8295a39c978bd971ad2a88477805e91f90 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 30 Jul 2025 04:39:52 -0700
Subject: [PATCH 20/32] Fix rtol/atol conversion getting real part of complex
 to match NumPy

---
 dpnp/dpnp_iface_logic.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 1052ac40d3ac..ab37689961e6 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -900,12 +900,12 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
         a = dpnp.astype(a, dt, casting="same_kind", copy=False)
         b = dpnp.astype(b, dt, casting="same_kind", copy=False)
 
-        # Convert complex rtol/atol to real values
+        # Convert complex rtol/atol to to their real parts
         # to avoid pybind11 cast errors and match NumPy behavior
         if isinstance(rtol, complex):
-            rtol = abs(rtol)
+            rtol = rtol.real
         if isinstance(atol, complex):
-            atol = abs(atol)
+            atol = atol.real
 
         # pylint: disable=W0707
         try:

From 725891c0bbc30898e56e46748fb1be92b6bdbbce Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 30 Jul 2025 04:47:01 -0700
Subject: [PATCH 21/32] Apply remarks

---
 .../extensions/ufunc/elementwise_functions/isclose.cpp   | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
index 752273fe085a..57f26ccf6930 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -45,7 +45,6 @@
 #include "../../elementwise_functions/simplify_iteration_space.hpp"
 
 // dpctl tensor headers
-#include "utils/memory_overlap.hpp"
 #include "utils/offset_utils.hpp"
 #include "utils/output_validation.hpp"
 #include "utils/sycl_alloc_utils.hpp"
@@ -58,7 +57,7 @@
 namespace py = pybind11;
 namespace td_ns = dpctl::tensor::type_dispatch;
 
-using ext::common::value_type_of;
+using ext::common::value_type_of_t;
 using ext::validation::array_names;
 
 using ext::common::dtype_from_typenum;
@@ -68,6 +67,7 @@ using ext::validation::check_num_dims;
 using ext::validation::check_queue;
 using ext::validation::check_same_dtype;
 using ext::validation::check_same_size;
+using ext::validation::check_writable;
 
 namespace dpnp::extensions::ufunc
 {
@@ -75,9 +75,6 @@ namespace dpnp::extensions::ufunc
 namespace impl
 {
 
-template <typename T>
-using value_type_of_t = typename value_type_of<T>::type;
-
 typedef sycl::event (*isclose_strided_scalar_fn_ptr_t)(
     sycl::queue &,
     const int,           // nd
@@ -181,7 +178,7 @@ std::pair<sycl::event, sycl::event>
 
     check_queue({&a, &b, &res}, names, exec_q);
     check_no_overlap({&a, &b}, {&res}, names);
-    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(res);
+    check_writable({&res}, names);
 
     auto types = td_ns::usm_ndarray_types();
     // a_typeid == b_typeid (check_same_dtype(&a, &b, names))

From c7c701141182bc4eaeb4c7db8dbbf071cb9b6ab0 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 30 Jul 2025 05:00:42 -0700
Subject: [PATCH 22/32] Refactor isclose() by using separate
 _isclose_scalar_tol() for scalar rtol/atol

---
 dpnp/dpnp_iface_logic.py | 140 +++++++++++++++++++++------------------
 1 file changed, 74 insertions(+), 66 deletions(-)

diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index ab37689961e6..3031560de3d7 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -84,6 +84,79 @@
 ]
 
 
+def _isclose_scalar_tol(a, b, rtol, atol, equal_nan):
+    """
+    Specialized implementation of dpnp.isclose() for scalar rtol and atol
+    using a dedicated SYCL kernel.
+    """
+    dt = dpnp.result_type(a, b, 1.0)
+
+    if dpnp.isscalar(a):
+        usm_type = b.usm_type
+        exec_q = b.sycl_queue
+        a = dpnp.array(
+            a,
+            dt,
+            usm_type=usm_type,
+            sycl_queue=exec_q,
+        )
+    elif dpnp.isscalar(b):
+        usm_type = a.usm_type
+        exec_q = a.sycl_queue
+        b = dpnp.array(
+            b,
+            dt,
+            usm_type=usm_type,
+            sycl_queue=exec_q,
+        )
+    else:
+        usm_type, exec_q = get_usm_allocations([a, b])
+
+    a = dpnp.astype(a, dt, casting="same_kind", copy=False)
+    b = dpnp.astype(b, dt, casting="same_kind", copy=False)
+
+    # Convert complex rtol/atol to to their real parts
+    # to avoid pybind11 cast errors and match NumPy behavior
+    if isinstance(rtol, complex):
+        rtol = rtol.real
+    if isinstance(atol, complex):
+        atol = atol.real
+
+    # pylint: disable=W0707
+    try:
+        res_shape = dpnp.broadcast_shapes(a.shape, b.shape)
+    except ValueError:
+        raise ValueError(
+            "operands could not be broadcast together with shapes "
+            f"{a.shape} and {b.shape}"
+        )
+
+    if a.shape != res_shape:
+        a = dpnp.broadcast_to(a, res_shape)
+    if b.shape != res_shape:
+        b = dpnp.broadcast_to(b, res_shape)
+
+    out_dtype = dpnp.bool
+    output = dpnp.empty(
+        res_shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type
+    )
+
+    _manager = dpu.SequentialOrderManager[exec_q]
+    mem_ev, ht_ev = ufi._isclose_scalar(
+        a.get_array(),
+        b.get_array(),
+        rtol,
+        atol,
+        equal_nan,
+        output.get_array(),
+        exec_q,
+        depends=_manager.submitted_events,
+    )
+    _manager.add_event_pair(mem_ev, ht_ev)
+
+    return output
+
+
 def all(a, /, axis=None, out=None, keepdims=False, *, where=True):
     """
     Test whether all array elements along a given axis evaluate to ``True``.
@@ -874,72 +947,7 @@ def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
 
     # Use own SYCL kernel for scalar rtol/atol
     if dpnp.isscalar(rtol) and dpnp.isscalar(atol):
-        dt = dpnp.result_type(a, b, 1.0)
-
-        if dpnp.isscalar(a):
-            usm_type = b.usm_type
-            exec_q = b.sycl_queue
-            a = dpnp.array(
-                a,
-                dt,
-                usm_type=usm_type,
-                sycl_queue=exec_q,
-            )
-        elif dpnp.isscalar(b):
-            usm_type = a.usm_type
-            exec_q = a.sycl_queue
-            b = dpnp.array(
-                b,
-                dt,
-                usm_type=usm_type,
-                sycl_queue=exec_q,
-            )
-        else:
-            usm_type, exec_q = get_usm_allocations([a, b])
-
-        a = dpnp.astype(a, dt, casting="same_kind", copy=False)
-        b = dpnp.astype(b, dt, casting="same_kind", copy=False)
-
-        # Convert complex rtol/atol to to their real parts
-        # to avoid pybind11 cast errors and match NumPy behavior
-        if isinstance(rtol, complex):
-            rtol = rtol.real
-        if isinstance(atol, complex):
-            atol = atol.real
-
-        # pylint: disable=W0707
-        try:
-            res_shape = dpnp.broadcast_shapes(a.shape, b.shape)
-        except ValueError:
-            raise ValueError(
-                "operands could not be broadcast together with shapes "
-                f"{a.shape} and {b.shape}"
-            )
-
-        if a.shape != res_shape:
-            a = dpnp.broadcast_to(a, res_shape)
-        if b.shape != res_shape:
-            b = dpnp.broadcast_to(b, res_shape)
-
-        out_dtype = dpnp.bool
-        output = dpnp.empty(
-            res_shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type
-        )
-
-        _manager = dpu.SequentialOrderManager[exec_q]
-        mem_ev, ht_ev = ufi._isclose_scalar(
-            a.get_array(),
-            b.get_array(),
-            rtol,
-            atol,
-            equal_nan,
-            output.get_array(),
-            exec_q,
-            depends=_manager.submitted_events,
-        )
-        _manager.add_event_pair(mem_ev, ht_ev)
-
-        return output
+        return _isclose_scalar_tol(a, b, rtol, atol, equal_nan)
 
     # make sure b is an inexact type to avoid bad behavior on abs(MIN_INT)
     if dpnp.isscalar(b):

From c5da321735d2b2d5178016d059c86b523391f269 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 30 Jul 2025 05:35:38 -0700
Subject: [PATCH 23/32] Update TestIsClose

---
 dpnp/tests/test_logic.py | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/dpnp/tests/test_logic.py b/dpnp/tests/test_logic.py
index c1a068724938..9ac356729020 100644
--- a/dpnp/tests/test_logic.py
+++ b/dpnp/tests/test_logic.py
@@ -10,7 +10,9 @@
 import dpnp
 
 from .helper import (
+    generate_random_numpy_array,
     get_all_dtypes,
+    get_complex_dtypes,
     get_float_complex_dtypes,
     get_float_dtypes,
     get_integer_float_dtypes,
@@ -554,6 +556,21 @@ def test_isclose(self, dtype, rtol, atol):
         dpnp_res = dpnp.isclose(dpnp_a, dpnp_b, rtol=rtol, atol=atol)
         assert_allclose(dpnp_res, np_res)
 
+    @pytest.mark.parametrize("dtype", get_complex_dtypes())
+    @pytest.mark.parametrize("shape", [(4, 4), (16, 16), (4, 4, 4)])
+    def test_isclose_complex(self, dtype, shape):
+        a = generate_random_numpy_array(shape, dtype=dtype, seed_value=81)
+        b = a.copy()
+
+        b = b + (1e-6 + 1e-6j)
+
+        dpnp_a = dpnp.array(a, dtype=dtype)
+        dpnp_b = dpnp.array(b, dtype=dtype)
+
+        np_res = numpy.isclose(a, b)
+        dpnp_res = dpnp.isclose(dpnp_a, dpnp_b)
+        assert_allclose(dpnp_res, np_res)
+
     @pytest.mark.parametrize(
         "sh_a, sh_b",
         [
@@ -603,14 +620,14 @@ def test_rtol_atol_arrays(self):
     @pytest.mark.parametrize(
         "rtol, atol",
         [
-            (1e-05 + 1j, 1e-08),
-            (1e-05, 1e-08 + 1j),
-            (1e-05 + 1j, 1e-08 + 1j),
+            (0 + 1e-5j, 1e-08),
+            (1e-05, 0 + 1e-8j),
+            (0 + 1e-5j, 0 + 1e-8j),
         ],
     )
     def test_rtol_atol_complex(self, rtol, atol):
-        a = dpnp.array([1.0, 2.0])
-        b = dpnp.array([1.0, 2.0 + 1e-7])
+        a = dpnp.array([1.0, 1.0])
+        b = dpnp.array([1.0, 1.0 + 1e-6])
 
         dpnp_res = dpnp.isclose(a, b, rtol=rtol, atol=atol)
         np_res = numpy.isclose(a.asnumpy(), b.asnumpy(), rtol=rtol, atol=atol)

From c1112c364b21280bde4c141529092845c5e02f7f Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 31 Jul 2025 04:14:17 -0700
Subject: [PATCH 24/32] Add with_requires(numpy

---
 dpnp/tests/test_logic.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/dpnp/tests/test_logic.py b/dpnp/tests/test_logic.py
index 9ac356729020..3efad4b98aff 100644
--- a/dpnp/tests/test_logic.py
+++ b/dpnp/tests/test_logic.py
@@ -17,6 +17,7 @@
     get_float_dtypes,
     get_integer_float_dtypes,
 )
+from .third_party.cupy import testing
 
 
 class TestAllAny:
@@ -602,6 +603,8 @@ def test_equal_nan(self):
         dp_res = dpnp.isclose(dp_a, dp_b, equal_nan=True)
         assert_allclose(dp_res, np_res)
 
+    # array-like rtol/atol support requires NumPy >= 2.0
+    @testing.with_requires("numpy>=2.0")
     def test_rtol_atol_arrays(self):
         a = numpy.array([2.1, 2.1, 2.1, 2.1, 5, numpy.nan])
         b = numpy.array([2, 2, 2, 2, numpy.nan, 5])
@@ -633,7 +636,9 @@ def test_rtol_atol_complex(self, rtol, atol):
         np_res = numpy.isclose(a.asnumpy(), b.asnumpy(), rtol=rtol, atol=atol)
         assert_allclose(dpnp_res, np_res)
 
-    def test_rtol_atol_nep50_broadcasting(self):
+    # NEP 50: float32 vs Python float comparison requires NumPy >= 2.0
+    @testing.with_requires("numpy>=2.0")
+    def test_rtol_atol_nep50(self):
         below_one = float(1.0 - numpy.finfo("f8").eps)
         f32 = numpy.array(below_one, dtype="f4")
         dp_f32 = dpnp.array(f32)

From 35fc36768ab7df5ddb78dafadbf0fe9a092750a4 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 31 Jul 2025 04:36:45 -0700
Subject: [PATCH 25/32] Remove unused includes

---
 .../extensions/ufunc/elementwise_functions/isclose.cpp     | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
index 57f26ccf6930..7fd1d8d65ad6 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -23,22 +23,16 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
-#include <algorithm>
 #include <complex>
 #include <cstddef>
 #include <stdexcept>
-#include <string>
-#include <tuple>
 #include <type_traits>
-#include <utility>
 #include <vector>
 
 #include <sycl/sycl.hpp>
 
 #include "dpctl4pybind11.hpp"
-#include <pybind11/numpy.h>
 #include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
 
 #include "kernels/elementwise_functions/isclose.hpp"
 
@@ -47,7 +41,6 @@
 // dpctl tensor headers
 #include "utils/offset_utils.hpp"
 #include "utils/output_validation.hpp"
-#include "utils/sycl_alloc_utils.hpp"
 #include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
 

From 4b552ebbe291a0141bcd458d92b0d248716e007a Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 31 Jul 2025 04:42:14 -0700
Subject: [PATCH 26/32] Use dpctl tensor include to use experimental SYCL
 namespace for complex

---
 dpnp/backend/kernels/elementwise_functions/isclose.hpp | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
index 4be6c103e5c7..cfbf5276d4ca 100644
--- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
@@ -25,13 +25,7 @@
 
 #pragma once
 
-#define SYCL_EXT_ONEAPI_COMPLEX
-#if __has_include(<sycl/ext/oneapi/experimental/sycl_complex.hpp>)
-#include <sycl/ext/oneapi/experimental/sycl_complex.hpp>
-#else
-#include <sycl/ext/oneapi/experimental/complex/complex.hpp>
-#endif
-
+#include <algorithm>
 #include <complex>
 #include <cstddef>
 #include <vector>
@@ -40,13 +34,13 @@
 // dpctl tensor headers
 #include "kernels/alignment.hpp"
 #include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/sycl_complex.hpp"
 #include "utils/offset_utils.hpp"
 #include "utils/sycl_utils.hpp"
 #include "utils/type_utils.hpp"
 
 namespace dpnp::kernels::isclose
 {
-namespace exprm_ns = sycl::ext::oneapi::experimental;
 
 template <typename T>
 inline bool isclose(const T a,

From f68249c6dbf39f8441a0d040f3a5106386d805e0 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 31 Jul 2025 04:48:12 -0700
Subject: [PATCH 27/32] Return inclusion pybind11/stl.h

---
 dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
index 7fd1d8d65ad6..683cb48247b1 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -33,6 +33,7 @@
 
 #include "dpctl4pybind11.hpp"
 #include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
 
 #include "kernels/elementwise_functions/isclose.hpp"
 

From 394727fbb47aa074ac56de55b9f4bbc085c5b7b6 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 31 Jul 2025 04:48:54 -0700
Subject: [PATCH 28/32] Use static_assert to isclose

---
 dpnp/backend/kernels/elementwise_functions/isclose.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
index cfbf5276d4ca..614f90661864 100644
--- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
@@ -49,6 +49,8 @@ inline bool isclose(const T a,
                     const T atol,
                     const bool equal_nan)
 {
+    static_assert(std::is_floating_point_v<T> || std::is_same_v<T, sycl::half>);
+
     if (sycl::isfinite(a) && sycl::isfinite(b)) {
         return sycl::fabs(a - b) <= atol + rtol * sycl::fabs(b);
     }

From c27596fa720e6bd775f8202cdb429d17ed5bfd9f Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 31 Jul 2025 05:07:06 -0700
Subject: [PATCH 29/32] Remove offsets passing for contig impl

---
 .../extensions/ufunc/elementwise_functions/isclose.cpp |  3 +--
 dpnp/backend/kernels/elementwise_functions/isclose.hpp | 10 +++-------
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
index 683cb48247b1..bd6053c9b7ab 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -143,8 +143,7 @@ sycl::event isclose_contig_scalar_call(sycl::queue &q,
     const bool equal_nan = py::cast<bool>(py_equal_nan);
 
     return dpnp::kernels::isclose::isclose_contig_scalar_impl<T, scT>(
-        q, nelems, rtol, atol, equal_nan, in1_p, 0, in2_p, 0, out_p, 0,
-        depends);
+        q, nelems, rtol, atol, equal_nan, in1_p, in2_p, out_p, depends);
 }
 
 isclose_strided_scalar_fn_ptr_t
diff --git a/dpnp/backend/kernels/elementwise_functions/isclose.hpp b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
index 614f90661864..b2a0db782f5f 100644
--- a/dpnp/backend/kernels/elementwise_functions/isclose.hpp
+++ b/dpnp/backend/kernels/elementwise_functions/isclose.hpp
@@ -279,11 +279,8 @@ sycl::event
                                const scT atol,
                                const bool equal_nan,
                                const char *a_cp,
-                               ssize_t a_offset,
                                const char *b_cp,
-                               ssize_t b_offset,
                                char *out_cp,
-                               ssize_t out_offset,
                                const std::vector<sycl::event> &depends = {})
 {
     constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz;
@@ -298,12 +295,11 @@ sycl::event
     const auto gws_range = sycl::range<1>(n_groups * lws);
     const auto lws_range = sycl::range<1>(lws);
 
-    // ? + offset
-    const T *a_tp = reinterpret_cast<const T *>(a_cp) + a_offset;
-    const T *b_tp = reinterpret_cast<const T *>(b_cp) + b_offset;
+    const T *a_tp = reinterpret_cast<const T *>(a_cp);
+    const T *b_tp = reinterpret_cast<const T *>(b_cp);
 
     using resTy = bool;
-    resTy *out_tp = reinterpret_cast<resTy *>(out_cp) + out_offset;
+    resTy *out_tp = reinterpret_cast<resTy *>(out_cp);
 
     sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
         cgh.depends_on(depends);

From a71f1bba50543268a981d39dd2e85be53c2e5289 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 31 Jul 2025 05:31:06 -0700
Subject: [PATCH 30/32] Use dpnp.broadcast_arrays() instead of
 dpnp.broadcast_shapes()

---
 dpnp/dpnp_iface_logic.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 3031560de3d7..bc4a79af20d5 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -124,21 +124,16 @@ def _isclose_scalar_tol(a, b, rtol, atol, equal_nan):
 
     # pylint: disable=W0707
     try:
-        res_shape = dpnp.broadcast_shapes(a.shape, b.shape)
+        a, b = dpnp.broadcast_arrays(a, b)
     except ValueError:
         raise ValueError(
             "operands could not be broadcast together with shapes "
             f"{a.shape} and {b.shape}"
         )
 
-    if a.shape != res_shape:
-        a = dpnp.broadcast_to(a, res_shape)
-    if b.shape != res_shape:
-        b = dpnp.broadcast_to(b, res_shape)
-
     out_dtype = dpnp.bool
     output = dpnp.empty(
-        res_shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type
+        a.shape, dtype=out_dtype, sycl_queue=exec_q, usm_type=usm_type
     )
 
     _manager = dpu.SequentialOrderManager[exec_q]

From 2d22e625c0107acbbdb2283bd7d1ae15408f6511 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Thu, 31 Jul 2025 06:39:21 -0700
Subject: [PATCH 31/32] Improve code coverage

---
 dpnp/tests/test_logic.py | 78 ++++++++++++++++++++++++++++------------
 1 file changed, 55 insertions(+), 23 deletions(-)

diff --git a/dpnp/tests/test_logic.py b/dpnp/tests/test_logic.py
index 3efad4b98aff..bf003410849b 100644
--- a/dpnp/tests/test_logic.py
+++ b/dpnp/tests/test_logic.py
@@ -519,26 +519,6 @@ def test_infinity_sign_errors(func):
 
 
 class TestIsClose:
-    @pytest.mark.parametrize("val", [1.0, numpy.inf, -numpy.inf, numpy.nan])
-    def test_input_0d(self, val):
-        dp_arr = dpnp.array(val)
-        np_arr = numpy.array(val)
-
-        # array & scalar
-        dp_res = dpnp.isclose(dp_arr, val)
-        np_res = numpy.isclose(np_arr, val)
-        assert_allclose(dp_res, np_res)
-
-        # scalar & array
-        dp_res = dpnp.isclose(val, dp_arr)
-        np_res = numpy.isclose(val, np_arr)
-        assert_allclose(dp_res, np_res)
-
-        # array & array
-        dp_res = dpnp.isclose(dp_arr, dp_arr)
-        np_res = numpy.isclose(np_arr, np_arr)
-        assert_allclose(dp_res, np_res)
-
     @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
     @pytest.mark.parametrize(
         "rtol", [1e-5, dpnp.array(1e-5), dpnp.full((10,), 1e-5)]
@@ -572,6 +552,51 @@ def test_isclose_complex(self, dtype, shape):
         dpnp_res = dpnp.isclose(dpnp_a, dpnp_b)
         assert_allclose(dpnp_res, np_res)
 
+    @pytest.mark.parametrize(
+        "rtol, atol",
+        [
+            (1e-5, 1e-8),
+            (dpnp.array(1e-5), dpnp.array(1e-8)),
+        ],
+    )
+    def test_empty_input(self, rtol, atol):
+        a = numpy.array([])
+        b = numpy.array([])
+
+        dpnp_a = dpnp.array(a)
+        dpnp_b = dpnp.array(b)
+
+        np_res = numpy.isclose(a, b, rtol=1e-5, atol=1e-8)
+        dpnp_res = dpnp.isclose(dpnp_a, dpnp_b, rtol=rtol, atol=atol)
+        assert_allclose(dpnp_res, np_res)
+
+    @pytest.mark.parametrize(
+        "rtol, atol",
+        [
+            (1e-5, 1e-8),
+            (dpnp.array(1e-5), dpnp.array(1e-8)),
+        ],
+    )
+    @pytest.mark.parametrize("val", [1.0, numpy.inf, -numpy.inf, numpy.nan])
+    def test_input_0d(self, val, rtol, atol):
+        dp_arr = dpnp.array(val)
+        np_arr = numpy.array(val)
+
+        # array & scalar
+        dp_res = dpnp.isclose(dp_arr, val, rtol=rtol, atol=atol)
+        np_res = numpy.isclose(np_arr, val, rtol=1e-5, atol=1e-8)
+        assert_allclose(dp_res, np_res)
+
+        # scalar & array
+        dp_res = dpnp.isclose(val, dp_arr, rtol=rtol, atol=atol)
+        np_res = numpy.isclose(val, np_arr, rtol=1e-5, atol=1e-8)
+        assert_allclose(dp_res, np_res)
+
+        # array & array
+        dp_res = dpnp.isclose(dp_arr, dp_arr, rtol=rtol, atol=atol)
+        np_res = numpy.isclose(np_arr, np_arr, rtol=1e-5, atol=1e-8)
+        assert_allclose(dp_res, np_res)
+
     @pytest.mark.parametrize(
         "sh_a, sh_b",
         [
@@ -592,15 +617,22 @@ def test_broadcast_shapes(self, sh_a, sh_b):
         dp_res = dpnp.isclose(a_dp, b_dp)
         assert_allclose(dp_res, np_res)
 
-    def test_equal_nan(self):
+    @pytest.mark.parametrize(
+        "rtol, atol",
+        [
+            (1e-5, 1e-8),
+            (dpnp.array(1e-5), dpnp.array(1e-8)),
+        ],
+    )
+    def test_equal_nan(self, rtol, atol):
         a = numpy.array([numpy.nan, 1.0])
         b = numpy.array([numpy.nan, 1.0])
 
         dp_a = dpnp.array(a)
         dp_b = dpnp.array(b)
 
-        np_res = numpy.isclose(a, b, equal_nan=True)
-        dp_res = dpnp.isclose(dp_a, dp_b, equal_nan=True)
+        np_res = numpy.isclose(a, b, rtol=1e-5, atol=1e-8, equal_nan=True)
+        dp_res = dpnp.isclose(dp_a, dp_b, rtol=rtol, atol=atol, equal_nan=True)
         assert_allclose(dp_res, np_res)
 
     # array-like rtol/atol support requires NumPy >= 2.0

From 0822f9813e6c7cc2c4e2d97671538aea721fe712 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 4 Aug 2025 03:56:50 -0700
Subject: [PATCH 32/32] Remove impossible used special case for contig data

---
 .../ufunc/elementwise_functions/isclose.cpp   | 27 -------------------
 1 file changed, 27 deletions(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
index bd6053c9b7ab..43ff66a9a9cb 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/isclose.cpp
@@ -262,33 +262,6 @@ std::pair<sycl::event, sycl::event>
         simplified_shape, simplified_a_strides, simplified_b_strides,
         simplified_res_strides, a_offset, b_offset, res_offset);
 
-    if (nd == 1 && simplified_a_strides[0] == 1 &&
-        simplified_b_strides[0] == 1 && simplified_res_strides[0] == 1)
-    {
-        // Special case of contiguous data
-        auto contig_fn = isclose_contig_dispatch_vector[a_b_typeid];
-
-        if (contig_fn == nullptr) {
-            py::dtype a_b_dtype_py = dtype_from_typenum(a_b_typeid);
-            throw std::runtime_error(
-                "Contiguous implementation is missing for " +
-                std::string(py::str(a_b_dtype_py)) + "data type");
-        }
-
-        int a_elem_size = a.get_elemsize();
-        int b_elem_size = b.get_elemsize();
-        int res_elem_size = res.get_elemsize();
-        auto comp_ev = contig_fn(
-            exec_q, nelems, py_rtol, py_atol, py_equal_nan,
-            a_data + a_elem_size * a_offset, b_data + b_elem_size * b_offset,
-            res_data + res_elem_size * res_offset, depends);
-
-        sycl::event ht_ev =
-            dpctl::utils::keep_args_alive(exec_q, {a, b, res}, {comp_ev});
-
-        return std::make_pair(ht_ev, comp_ev);
-    }
-
     auto strided_fn = isclose_strided_scalar_dispatch_vector[a_b_typeid];
 
     if (strided_fn == nullptr) {