Skip to content

Commit 28df416

Browse files
Generate a swizzle-friendly mask during generic reduction
It changes nothing for clang which has a nice shuffle optimizer, but it does help gcc a lot. Somehow related to #1132
1 parent a625e29 commit 28df416

File tree

2 files changed

+8
-7
lines changed

2 files changed

+8
-7
lines changed

include/xsimd/arch/common/xsimd_common_math.hpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2110,22 +2110,22 @@ namespace xsimd
21102110
{
21112111
static constexpr T get(T i, T)
21122112
{
2113-
return i >= N ? (i % 2) : i + N;
2113+
return i < N ? (i + N) : ((i % N) + N);
21142114
}
21152115
};
21162116

21172117
template <class Op, class A, class T>
2118-
XSIMD_INLINE T reduce(Op, batch<T, A> const& self, std::integral_constant<unsigned, 1>) noexcept
2118+
XSIMD_INLINE T reduce(Op, batch<T, A> const& self, std::integral_constant<unsigned, 1>, requires_arch<common>) noexcept
21192119
{
21202120
return ::xsimd::kernel::first(self, A {});
21212121
}
21222122

21232123
template <class Op, class A, class T, unsigned Lvl>
2124-
XSIMD_INLINE T reduce(Op op, batch<T, A> const& self, std::integral_constant<unsigned, Lvl>) noexcept
2124+
XSIMD_INLINE T reduce(Op op, batch<T, A> const& self, std::integral_constant<unsigned, Lvl>, requires_arch<common>) noexcept
21252125
{
21262126
using index_type = as_unsigned_integer_t<T>;
21272127
batch<T, A> split = swizzle(self, make_batch_constant<index_type, split_high<index_type, Lvl / 2>, A>());
2128-
return reduce(op, op(split, self), std::integral_constant<unsigned, Lvl / 2>());
2128+
return reduce(op, op(split, self), std::integral_constant<unsigned, Lvl / 2>(), A {});
21292129
}
21302130
}
21312131

@@ -2135,7 +2135,7 @@ namespace xsimd
21352135
{
21362136
return detail::reduce([](batch<T, A> const& x, batch<T, A> const& y)
21372137
{ return max(x, y); },
2138-
self, std::integral_constant<unsigned, batch<T, A>::size>());
2138+
self, std::integral_constant<unsigned, batch<T, A>::size>(), A {});
21392139
}
21402140

21412141
// reduce_min
@@ -2144,7 +2144,8 @@ namespace xsimd
21442144
{
21452145
return detail::reduce([](batch<T, A> const& x, batch<T, A> const& y)
21462146
{ return min(x, y); },
2147-
self, std::integral_constant<unsigned, batch<T, A>::size>());
2147+
self, std::integral_constant<unsigned, batch<T, A>::size>()),
2148+
A {};
21482149
}
21492150

21502151
// remainder

include/xsimd/types/xsimd_api.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1834,7 +1834,7 @@ namespace xsimd
18341834
XSIMD_INLINE T reduce(F&& f, batch<T, A> const& x) noexcept
18351835
{
18361836
detail::static_check_supported_config<T, A>();
1837-
return kernel::detail::reduce(std::forward<F>(f), x, std::integral_constant<unsigned, batch<T, A>::size>());
1837+
return kernel::detail::reduce(std::forward<F>(f), x, std::integral_constant<unsigned, batch<T, A>::size>(), A {});
18381838
}
18391839

18401840
/**

0 commit comments

Comments
 (0)