Skip to content
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Improved performance of copying operation to C-/F-contig array, with optimization for batch of square matrices [gh-1850](https://github.com/IntelPython/dpctl/pull/1850)
* Improved performance of `tensor.argsort` function for all types [gh-1859](https://github.com/IntelPython/dpctl/pull/1859)
* Improved performance of `tensor.sort` and `tensor.argsort` for short arrays in the range [16, 64] elements [gh-1866](https://github.com/IntelPython/dpctl/pull/1866)
* Improved pefrormance of element-wise unary and binary functions [gh-1879](https://github.com/IntelPython/dpctl/pull/1879)

### Fixed
* Fix for `tensor.result_type` when all inputs are Python built-in scalars [gh-1877](https://github.com/IntelPython/dpctl/pull/1877)
Expand Down
2 changes: 1 addition & 1 deletion dpctl/tensor/libtensor/include/kernels/alignment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ namespace kernels
namespace alignment_utils
{

static constexpr size_t required_alignment = 64;
static constexpr size_t required_alignment = 64UL;

template <std::uintptr_t alignment, typename Ptr> bool is_aligned(Ptr p)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,11 @@
#include <type_traits>

#include "cabs_impl.hpp"
#include "kernels/elementwise_functions/common.hpp"
#include "vec_size_util.hpp"

#include "kernels/dpctl_tensor_types.hpp"
#include "kernels/elementwise_functions/common.hpp"

#include "utils/offset_utils.hpp"
#include "utils/type_dispatch_building.hpp"
#include "utils/type_utils.hpp"
Expand All @@ -50,6 +52,7 @@ namespace abs

namespace td_ns = dpctl::tensor::type_dispatch;

using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
using dpctl::tensor::type_utils::is_complex;

template <typename argT, typename resT> struct AbsFunctor
Expand Down Expand Up @@ -89,8 +92,8 @@ template <typename argT, typename resT> struct AbsFunctor

template <typename argT,
typename resT = argT,
unsigned int vec_sz = 4,
unsigned int n_vecs = 2,
unsigned int vec_sz = VecSize_v<argT, resT>,
unsigned int n_vecs = 1,
bool enable_sg_loadstore = true>
using AbsContigFunctor =
elementwise_common::UnaryContigFunctor<argT,
Expand Down Expand Up @@ -132,9 +135,13 @@ sycl::event abs_contig_impl(sycl::queue &exec_q,
char *res_p,
const std::vector<sycl::event> &depends = {})
{
using resTy = typename AbsOutputType<argTy>::value_type;
constexpr auto vec_sz = VecSize_v<argTy, resTy>;
constexpr unsigned int n_vec = 1u;

return elementwise_common::unary_contig_impl<
argTy, AbsOutputType, AbsContigFunctor, abs_contig_kernel>(
exec_q, nelems, arg_p, res_p, depends);
argTy, AbsOutputType, AbsContigFunctor, abs_contig_kernel, vec_sz,
n_vec>(exec_q, nelems, arg_p, res_p, depends);
}

template <typename fnT, typename T> struct AbsContigFactory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@
#include <sycl/sycl.hpp>
#include <type_traits>

#include "kernels/elementwise_functions/common.hpp"
#include "sycl_complex.hpp"
#include "vec_size_util.hpp"

#include "kernels/dpctl_tensor_types.hpp"
#include "kernels/elementwise_functions/common.hpp"

#include "utils/offset_utils.hpp"
#include "utils/type_dispatch_building.hpp"
#include "utils/type_utils.hpp"
Expand All @@ -48,6 +50,7 @@ namespace acos

namespace td_ns = dpctl::tensor::type_dispatch;

using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
using dpctl::tensor::type_utils::is_complex;

template <typename argT, typename resT> struct AcosFunctor
Expand Down Expand Up @@ -128,8 +131,8 @@ template <typename argT, typename resT> struct AcosFunctor

template <typename argTy,
typename resTy = argTy,
unsigned int vec_sz = 4,
unsigned int n_vecs = 2,
unsigned int vec_sz = VecSize_v<argTy, resTy>,
unsigned int n_vecs = 1,
bool enable_sg_loadstore = true>
using AcosContigFunctor =
elementwise_common::UnaryContigFunctor<argTy,
Expand Down Expand Up @@ -166,9 +169,13 @@ sycl::event acos_contig_impl(sycl::queue &exec_q,
char *res_p,
const std::vector<sycl::event> &depends = {})
{
using resTy = typename AcosOutputType<argTy>::value_type;
constexpr auto vec_sz = VecSize_v<argTy, resTy>;
constexpr unsigned int n_vec = 1u;

return elementwise_common::unary_contig_impl<
argTy, AcosOutputType, AcosContigFunctor, acos_contig_kernel>(
exec_q, nelems, arg_p, res_p, depends);
argTy, AcosOutputType, AcosContigFunctor, acos_contig_kernel, vec_sz,
n_vec>(exec_q, nelems, arg_p, res_p, depends);
}

template <typename fnT, typename T> struct AcosContigFactory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@
#include <sycl/sycl.hpp>
#include <type_traits>

#include "kernels/elementwise_functions/common.hpp"
#include "sycl_complex.hpp"
#include "vec_size_util.hpp"

#include "kernels/dpctl_tensor_types.hpp"
#include "kernels/elementwise_functions/common.hpp"

#include "utils/offset_utils.hpp"
#include "utils/type_dispatch_building.hpp"
#include "utils/type_utils.hpp"
Expand All @@ -48,6 +50,7 @@ namespace acosh

namespace td_ns = dpctl::tensor::type_dispatch;

using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
using dpctl::tensor::type_utils::is_complex;

template <typename argT, typename resT> struct AcoshFunctor
Expand Down Expand Up @@ -155,8 +158,8 @@ template <typename argT, typename resT> struct AcoshFunctor

template <typename argTy,
typename resTy = argTy,
unsigned int vec_sz = 4,
unsigned int n_vecs = 2,
unsigned int vec_sz = VecSize_v<argTy, resTy>,
unsigned int n_vecs = 1,
bool enable_sg_loadstore = true>
using AcoshContigFunctor =
elementwise_common::UnaryContigFunctor<argTy,
Expand Down Expand Up @@ -193,9 +196,13 @@ sycl::event acosh_contig_impl(sycl::queue &exec_q,
char *res_p,
const std::vector<sycl::event> &depends = {})
{
using resTy = typename AcoshOutputType<argTy>::value_type;
constexpr auto vec_sz = VecSize_v<argTy, resTy>;
constexpr unsigned int n_vec = 1u;

return elementwise_common::unary_contig_impl<
argTy, AcoshOutputType, AcoshContigFunctor, acosh_contig_kernel>(
exec_q, nelems, arg_p, res_p, depends);
argTy, AcoshOutputType, AcoshContigFunctor, acosh_contig_kernel, vec_sz,
n_vec>(exec_q, nelems, arg_p, res_p, depends);
}

template <typename fnT, typename T> struct AcoshContigFactory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
#include <type_traits>

#include "sycl_complex.hpp"
#include "vec_size_util.hpp"

#include "utils/offset_utils.hpp"
#include "utils/type_dispatch_building.hpp"
#include "utils/type_utils.hpp"
Expand All @@ -50,6 +52,8 @@ namespace add
namespace td_ns = dpctl::tensor::type_dispatch;
namespace tu_ns = dpctl::tensor::type_utils;

using dpctl::tensor::kernels::vec_size_utils::VecSize_v;

template <typename argT1, typename argT2, typename resT> struct AddFunctor
{

Expand Down Expand Up @@ -110,8 +114,8 @@ template <typename argT1, typename argT2, typename resT> struct AddFunctor
template <typename argT1,
typename argT2,
typename resT,
unsigned int vec_sz = 4,
unsigned int n_vecs = 2,
unsigned int vec_sz = VecSize_v<argT1, argT2, resT>,
unsigned int n_vecs = 1,
bool enable_sg_loadstore = true>
using AddContigFunctor =
elementwise_common::BinaryContigFunctor<argT1,
Expand Down Expand Up @@ -214,10 +218,14 @@ sycl::event add_contig_impl(sycl::queue &exec_q,
ssize_t res_offset,
const std::vector<sycl::event> &depends = {})
{
using resTy = typename AddOutputType<argTy1, argTy2>::value_type;
constexpr auto vec_sz = VecSize_v<argTy1, argTy2, resTy>;
constexpr unsigned int n_vecs = 1;

return elementwise_common::binary_contig_impl<
argTy1, argTy2, AddOutputType, AddContigFunctor, add_contig_kernel>(
exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p,
res_offset, depends);
argTy1, argTy2, AddOutputType, AddContigFunctor, add_contig_kernel,
vec_sz, n_vecs>(exec_q, nelems, arg1_p, arg1_offset, arg2_p,
arg2_offset, res_p, res_offset, depends);
}

template <typename fnT, typename T1, typename T2> struct AddContigFactory
Expand Down Expand Up @@ -410,8 +418,8 @@ template <typename argT, typename resT> struct AddInplaceFunctor

template <typename argT,
typename resT,
unsigned int vec_sz = 4,
unsigned int n_vecs = 2,
unsigned int vec_sz = VecSize_v<argT, resT>,
unsigned int n_vecs = 1,
bool enable_sg_loadstore = true>
using AddInplaceContigFunctor = elementwise_common::BinaryInplaceContigFunctor<
argT,
Expand Down Expand Up @@ -489,9 +497,13 @@ add_inplace_contig_impl(sycl::queue &exec_q,
ssize_t res_offset,
const std::vector<sycl::event> &depends = {})
{
constexpr auto vec_sz = VecSize_v<argTy, resTy>;
constexpr unsigned int n_vecs = 1u;

return elementwise_common::binary_inplace_contig_impl<
argTy, resTy, AddInplaceContigFunctor, add_inplace_contig_kernel>(
exec_q, nelems, arg_p, arg_offset, res_p, res_offset, depends);
argTy, resTy, AddInplaceContigFunctor, add_inplace_contig_kernel,
vec_sz, n_vecs>(exec_q, nelems, arg_p, arg_offset, res_p, res_offset,
depends);
}

template <typename fnT, typename T1, typename T2> struct AddInplaceContigFactory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@
#include <sycl/sycl.hpp>
#include <type_traits>

#include "kernels/elementwise_functions/common.hpp"
#include "sycl_complex.hpp"
#include "vec_size_util.hpp"

#include "kernels/dpctl_tensor_types.hpp"
#include "kernels/elementwise_functions/common.hpp"

#include "utils/offset_utils.hpp"
#include "utils/type_dispatch_building.hpp"
#include "utils/type_utils.hpp"
Expand All @@ -49,6 +51,7 @@ namespace angle

namespace td_ns = dpctl::tensor::type_dispatch;

using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
using dpctl::tensor::type_utils::is_complex;

template <typename argT, typename resT> struct AngleFunctor
Expand All @@ -74,8 +77,8 @@ template <typename argT, typename resT> struct AngleFunctor

template <typename argTy,
typename resTy = argTy,
unsigned int vec_sz = 4,
unsigned int n_vecs = 2,
unsigned int vec_sz = VecSize_v<argTy, resTy>,
unsigned int n_vecs = 1,
bool enable_sg_loadstore = true>
using AngleContigFunctor =
elementwise_common::UnaryContigFunctor<argTy,
Expand Down Expand Up @@ -109,9 +112,13 @@ sycl::event angle_contig_impl(sycl::queue &exec_q,
char *res_p,
const std::vector<sycl::event> &depends = {})
{
using resTy = typename AngleOutputType<argTy>::value_type;
constexpr auto vec_sz = VecSize_v<argTy, resTy>;
constexpr unsigned int n_vec = 1u;

return elementwise_common::unary_contig_impl<
argTy, AngleOutputType, AngleContigFunctor, angle_contig_kernel>(
exec_q, nelems, arg_p, res_p, depends);
argTy, AngleOutputType, AngleContigFunctor, angle_contig_kernel, vec_sz,
n_vec>(exec_q, nelems, arg_p, res_p, depends);
}

template <typename fnT, typename T> struct AngleContigFactory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@
#include <sycl/sycl.hpp>
#include <type_traits>

#include "kernels/elementwise_functions/common.hpp"
#include "sycl_complex.hpp"
#include "vec_size_util.hpp"

#include "kernels/dpctl_tensor_types.hpp"
#include "kernels/elementwise_functions/common.hpp"

#include "utils/offset_utils.hpp"
#include "utils/type_dispatch_building.hpp"
#include "utils/type_utils.hpp"
Expand All @@ -48,6 +50,7 @@ namespace asin

namespace td_ns = dpctl::tensor::type_dispatch;

using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
using dpctl::tensor::type_utils::is_complex;

template <typename argT, typename resT> struct AsinFunctor
Expand Down Expand Up @@ -148,8 +151,8 @@ template <typename argT, typename resT> struct AsinFunctor

template <typename argTy,
typename resTy = argTy,
unsigned int vec_sz = 4,
unsigned int n_vecs = 2,
unsigned int vec_sz = VecSize_v<argTy, resTy>,
unsigned int n_vecs = 1,
bool enable_sg_loadstore = true>
using AsinContigFunctor =
elementwise_common::UnaryContigFunctor<argTy,
Expand Down Expand Up @@ -186,9 +189,13 @@ sycl::event asin_contig_impl(sycl::queue &exec_q,
char *res_p,
const std::vector<sycl::event> &depends = {})
{
using resTy = typename AsinOutputType<argTy>::value_type;
constexpr auto vec_sz = VecSize_v<argTy, resTy>;
constexpr unsigned int n_vec = 1u;

return elementwise_common::unary_contig_impl<
argTy, AsinOutputType, AsinContigFunctor, asin_contig_kernel>(
exec_q, nelems, arg_p, res_p, depends);
argTy, AsinOutputType, AsinContigFunctor, asin_contig_kernel, vec_sz,
n_vec>(exec_q, nelems, arg_p, res_p, depends);
}

template <typename fnT, typename T> struct AsinContigFactory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@
#include <sycl/sycl.hpp>
#include <type_traits>

#include "kernels/elementwise_functions/common.hpp"
#include "sycl_complex.hpp"
#include "vec_size_util.hpp"

#include "kernels/dpctl_tensor_types.hpp"
#include "kernels/elementwise_functions/common.hpp"

#include "utils/offset_utils.hpp"
#include "utils/type_dispatch_building.hpp"
#include "utils/type_utils.hpp"
Expand All @@ -48,6 +50,7 @@ namespace asinh

namespace td_ns = dpctl::tensor::type_dispatch;

using dpctl::tensor::kernels::vec_size_utils::VecSize_v;
using dpctl::tensor::type_utils::is_complex;

template <typename argT, typename resT> struct AsinhFunctor
Expand Down Expand Up @@ -131,8 +134,8 @@ template <typename argT, typename resT> struct AsinhFunctor

template <typename argTy,
typename resTy = argTy,
unsigned int vec_sz = 4,
unsigned int n_vecs = 2,
unsigned int vec_sz = VecSize_v<argTy, resTy>,
unsigned int n_vecs = 1,
bool enable_sg_loadstore = true>
using AsinhContigFunctor =
elementwise_common::UnaryContigFunctor<argTy,
Expand Down Expand Up @@ -169,9 +172,13 @@ sycl::event asinh_contig_impl(sycl::queue &exec_q,
char *res_p,
const std::vector<sycl::event> &depends = {})
{
using resTy = typename AsinhOutputType<argTy>::value_type;
constexpr auto vec_sz = VecSize_v<argTy, resTy>;
constexpr unsigned int n_vec = 1u;

return elementwise_common::unary_contig_impl<
argTy, AsinhOutputType, AsinhContigFunctor, asinh_contig_kernel>(
exec_q, nelems, arg_p, res_p, depends);
argTy, AsinhOutputType, AsinhContigFunctor, asinh_contig_kernel, vec_sz,
n_vec>(exec_q, nelems, arg_p, res_p, depends);
}

template <typename fnT, typename T> struct AsinhContigFactory
Expand Down
Loading
Loading