Add modf iumplementation to VM extension

antonwolfy · antonwolfy · commit 9cb9c4aa2f8a · 2025-11-07T04:27:30.000-08:00
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/modf.cpp
@@ -72,10 +72,9 @@ struct OutputType
 {
     using table_type = std::disjunction< // disjunction is C++17
                                          // feature, supported by DPC++
-        td_int_ns::
-            TypeMapTwoResultsEntry<T, sycl::half, sycl::half, sycl::half>,
-        td_int_ns::TypeMapTwoResultsEntry<T, float, float, float>,
-        td_int_ns::TypeMapTwoResultsEntry<T, double, double, double>,
+        td_int_ns::TypeMapTwoResultsEntry<T, sycl::half>,
+        td_int_ns::TypeMapTwoResultsEntry<T, float>,
+        td_int_ns::TypeMapTwoResultsEntry<T, double>,
         td_int_ns::DefaultTwoResultsEntry<void>>;
     using value_type1 = typename table_type::result_type1;
     using value_type2 = typename table_type::result_type2;
diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt
@@ -61,6 +61,7 @@ if(NOT _use_onemath)
         ${CMAKE_CURRENT_SOURCE_DIR}/log10.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/log1p.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/log2.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/modf.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/mul.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/nextafter.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/pow.cpp
diff --git a/dpnp/backend/extensions/vm/common.hpp b/dpnp/backend/extensions/vm/common.hpp
@@ -155,6 +155,115 @@ bool need_to_call_unary_ufunc(sycl::queue &exec_q,
     return true;
 }
 
+template <typename output_typesT, typename contig_dispatchT>
+bool need_to_call_unary_two_outputs_ufunc(
+    sycl::queue &exec_q,
+    const dpctl::tensor::usm_ndarray &src,
+    const dpctl::tensor::usm_ndarray &dst1,
+    const dpctl::tensor::usm_ndarray &dst2,
+    const output_typesT &output_type_vec,
+    const contig_dispatchT &contig_dispatch_vector)
+{
+    // check type_nums
+    int src_typenum = src.get_typenum();
+    int dst1_typenum = dst1.get_typenum();
+    int dst2_typenum = dst2.get_typenum();
+
+    const auto &array_types = td_ns::usm_ndarray_types();
+    int src_typeid = array_types.typenum_to_lookup_id(src_typenum);
+    int dst1_typeid = array_types.typenum_to_lookup_id(dst1_typenum);
+    int dst2_typeid = array_types.typenum_to_lookup_id(dst2_typenum);
+
+    std::pair<int, int> func_output_typeids = output_type_vec[src_typeid];
+
+    // check that types are supported
+    if (dst1_typeid != func_output_typeids.first ||
+        dst2_typeid != func_output_typeids.second)
+    {
+        return false;
+    }
+
+    // OneMKL VM functions perform a copy on host if no double type support
+    if (!exec_q.get_device().has(sycl::aspect::fp64)) {
+        return false;
+    }
+
+    // check that queues are compatible
+    if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst1, dst2})) {
+        return false;
+    }
+
+    // dimensions must be the same
+    int src_nd = src.get_ndim();
+    int dst1_nd = dst1.get_ndim();
+    int dst2_nd = dst2.get_ndim();
+    if (src_nd != dst1_nd || src_nd != dst2_nd) {
+        return false;
+    }
+    else if (dst1_nd == 0 || dst2_nd == 0) {
+        // don't call OneMKL for 0d arrays
+        return false;
+    }
+
+    // shapes must be the same
+    const py::ssize_t *src_shape = src.get_shape_raw();
+    const py::ssize_t *dst1_shape = dst1.get_shape_raw();
+    const py::ssize_t *dst2_shape = dst2.get_shape_raw();
+    bool shapes_equal(true);
+    size_t src_nelems(1);
+
+    for (int i = 0; i < src_nd; ++i) {
+        src_nelems *= static_cast<std::size_t>(src_shape[i]);
+        shapes_equal = shapes_equal && (src_shape[i] == dst1_shape[i]) &&
+                       (src_shape[i] == dst2_shape[i]);
+    }
+    if (!shapes_equal) {
+        return false;
+    }
+
+    // if nelems is zero, return false
+    if (src_nelems == 0) {
+        return false;
+    }
+
+    // ensure that outputs are ample enough to accommodate all elements
+    auto dst1_offsets = dst1.get_minmax_offsets();
+    auto dst2_offsets = dst2.get_minmax_offsets();
+    // destinations must be ample enough to accommodate all elements
+    {
+        size_t range1 =
+            static_cast<size_t>(dst1_offsets.second - dst1_offsets.first);
+        size_t range2 =
+            static_cast<size_t>(dst2_offsets.second - dst2_offsets.first);
+        if ((range1 + 1 < src_nelems) || (range2 + 1 < src_nelems)) {
+            return false;
+        }
+    }
+
+    // check memory overlap
+    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(src, dst1) || overlap(src, dst2) || overlap(dst1, dst2)) {
+        return false;
+    }
+
+    // support only contiguous inputs
+    bool is_src_c_contig = src.is_c_contiguous();
+    bool is_dst1_c_contig = dst1.is_c_contiguous();
+    bool is_dst2_c_contig = dst2.is_c_contiguous();
+
+    bool all_c_contig =
+        (is_src_c_contig && is_dst1_c_contig && is_dst2_c_contig);
+    if (!all_c_contig) {
+        return false;
+    }
+
+    // MKL function is not defined for the type
+    if (contig_dispatch_vector[src_typeid] == nullptr) {
+        return false;
+    }
+    return true;
+}
+
 template <typename output_typesT, typename contig_dispatchT>
 bool need_to_call_binary_ufunc(sycl::queue &exec_q,
                                const dpctl::tensor::usm_ndarray &src1,
@@ -299,6 +408,54 @@ bool need_to_call_binary_ufunc(sycl::queue &exec_q,
                                      ContigFactory>(contig_dispatch_vector);   \
     };
 
+/**
+ * @brief A macro used to define factories and a populating unary functions
+ * with two output arrays to dispatch to a callback with proper OneMKL function
+ * within VM extension scope.
+ */
+#define MACRO_POPULATE_DISPATCH_2OUTS_VECTORS(__name__)                        \
+    template <typename fnT, typename T>                                        \
+    struct ContigFactory                                                       \
+    {                                                                          \
+        fnT get()                                                              \
+        {                                                                      \
+            if constexpr (std::is_same_v<typename OutputType<T>::value_type1,  \
+                                         void> ||                              \
+                          std::is_same_v<typename OutputType<T>::value_type2,  \
+                                         void>)                                \
+            {                                                                  \
+                fnT fn = nullptr;                                              \
+                return fn;                                                     \
+            }                                                                  \
+            else {                                                             \
+                fnT fn = __name__##_contig_impl<T>;                            \
+                return fn;                                                     \
+            }                                                                  \
+        }                                                                      \
+    };                                                                         \
+                                                                               \
+    template <typename fnT, typename T>                                        \
+    struct TypeMapFactory                                                      \
+    {                                                                          \
+        std::enable_if_t<std::is_same<fnT, std::pair<int, int>>::value,        \
+                         std::pair<int, int>>                                  \
+            get()                                                              \
+        {                                                                      \
+            using rT1 = typename OutputType<T>::value_type1;                   \
+            using rT2 = typename OutputType<T>::value_type2;                   \
+            return std::make_pair(td_ns::GetTypeid<rT1>{}.get(),               \
+                                  td_ns::GetTypeid<rT2>{}.get());              \
+        }                                                                      \
+    };                                                                         \
+                                                                               \
+    static void populate_dispatch_vectors(void)                                \
+    {                                                                          \
+        ext_ns::init_dispatch_vector<std::pair<int, int>, TypeMapFactory>(     \
+            output_typeid_vector);                                             \
+        ext_ns::init_dispatch_vector<unary_two_outputs_contig_impl_fn_ptr_t,   \
+                                     ContigFactory>(contig_dispatch_vector);   \
+    };
+
 /**
  * @brief A macro used to define factories and a populating binary functions
  * to dispatch to a callback with proper OneMKL function within VM extension
diff --git a/dpnp/backend/extensions/vm/modf.cpp b/dpnp/backend/extensions/vm/modf.cpp
@@ -0,0 +1,160 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
+#include <oneapi/mkl.hpp>
+#include <sycl/sycl.hpp>
+
+#include "dpctl4pybind11.hpp"
+
+#include "common.hpp"
+#include "modf.hpp"
+
+// include a local copy of elementwise common header from dpctl tensor:
+// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp
+// TODO: replace by including dpctl header once available
+#include "../elementwise_functions/elementwise_functions.hpp"
+
+#include "../elementwise_functions/common.hpp"
+#include "../elementwise_functions/type_dispatch_building.hpp"
+
+// dpctl tensor headers
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpnp::extensions::vm
+{
+namespace py = pybind11;
+namespace py_int = dpnp::extensions::py_internal;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace impl
+{
+namespace ew_cmn_ns = dpnp::extensions::py_internal::elementwise_common;
+namespace mkl_vm = oneapi::mkl::vm; // OneMKL namespace with VM functions
+namespace td_int_ns = py_int::type_dispatch;
+namespace tu_ns = dpctl::tensor::type_utils;
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL VM library provides support in oneapi::mkl::vm::modf<T> function.
+ *
+ * @tparam T Type of input vector `a` and of result vectors `y` and `z`.
+ */
+template <typename T>
+struct OutputType
+{
+    using table_type =
+        std::disjunction<td_int_ns::TypeMapTwoResultsEntry<T, sycl::half>,
+                         td_int_ns::TypeMapTwoResultsEntry<T, float>,
+                         td_int_ns::TypeMapTwoResultsEntry<T, double>,
+                         td_int_ns::DefaultTwoResultsEntry<void>>;
+    using value_type1 = typename table_type::result_type1;
+    using value_type2 = typename table_type::result_type2;
+};
+
+template <typename T>
+static sycl::event modf_contig_impl(sycl::queue &exec_q,
+                                    std::size_t in_n,
+                                    const char *in_a,
+                                    char *out_y,
+                                    char *out_z,
+                                    const std::vector<sycl::event> &depends)
+{
+    tu_ns::validate_type_for_device<T>(exec_q);
+
+    std::int64_t n = static_cast<std::int64_t>(in_n);
+    const T *a = reinterpret_cast<const T *>(in_a);
+
+    using fractT = typename OutputType<T>::value_type1;
+    using intT = typename OutputType<T>::value_type2;
+    fractT *y = reinterpret_cast<fractT *>(out_y);
+    intT *z = reinterpret_cast<intT *>(out_z);
+
+    return mkl_vm::modf(exec_q,
+                        n, // number of elements to be calculated
+                        a, // pointer `a` containing input vector of size n
+                        z, // pointer `z` to the output truncated integer values
+                        y, // pointer `y` to the output remaining fraction parts
+                        depends);
+}
+
+using ew_cmn_ns::unary_two_outputs_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_two_outputs_strided_impl_fn_ptr_t;
+
+static std::pair<int, int> output_typeid_vector[td_ns::num_types];
+static unary_two_outputs_contig_impl_fn_ptr_t
+    contig_dispatch_vector[td_ns::num_types];
+
+MACRO_POPULATE_DISPATCH_2OUTS_VECTORS(modf);
+} // namespace impl
+
+void init_modf(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+
+    impl::populate_dispatch_vectors();
+    using impl::contig_dispatch_vector;
+    using impl::output_typeid_vector;
+
+    auto modf_pyapi = [&](sycl::queue &exec_q, const arrayT &src,
+                          const arrayT &dst1, const arrayT &dst2,
+                          const event_vecT &depends = {}) {
+        return py_int::py_unary_two_outputs_ufunc(
+            src, dst1, dst2, exec_q, depends, output_typeid_vector,
+            contig_dispatch_vector,
+            // no support of strided implementation in OneMKL
+            td_ns::NullPtrVector<
+                impl::unary_two_outputs_strided_impl_fn_ptr_t>{});
+    };
+    m.def("_modf", modf_pyapi,
+          "Call `modf` function from OneMKL VM library to compute "
+          "a truncated integer value and the remaining fraction part "
+          "for each vector elements",
+          py::arg("sycl_queue"), py::arg("src"), py::arg("dst1"),
+          py::arg("dst2"), py::arg("depends") = py::list());
+
+    auto modf_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src,
+                                       const arrayT &dst1, const arrayT &dst2) {
+        return py_internal::need_to_call_unary_two_outputs_ufunc(
+            exec_q, src, dst1, dst2, output_typeid_vector,
+            contig_dispatch_vector);
+    };
+    m.def("_mkl_modf_to_call", modf_need_to_call_pyapi,
+          "Check input arguments to answer if `modf` function from "
+          "OneMKL VM library can be used",
+          py::arg("sycl_queue"), py::arg("src"), py::arg("dst1"),
+          py::arg("dst2"));
+}
+} // namespace dpnp::extensions::vm
diff --git a/dpnp/backend/extensions/vm/modf.hpp b/dpnp/backend/extensions/vm/modf.hpp
@@ -0,0 +1,38 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpnp::extensions::vm
+{
+void init_modf(py::module_ m);
+} // namespace dpnp::extensions::vm
diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp