Skip to content

Commit 830509d

Browse files
authored
Merge pull request #439 from serge-sans-paille/feature/improve-select
Provide batch_constant and batch_bool_constant, and use them to speci…
2 parents 1953f19 + a5ac6b5 commit 830509d

13 files changed

+475
-25
lines changed

docs/source/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ This software is licensed under the BSD-3-Clause license. See the LICENSE file f
7777
api/instr_macros
7878
api/batch_index
7979
api/data_transfer
80+
api/batch_manip
8081
api/math_index
8182
api/aligned_allocator
8283

include/xsimd/types/xsimd_avx512_int_base.hpp

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -114,45 +114,64 @@ namespace xsimd
114114

115115
namespace avx512_detail
116116
{
117-
template<class Tup, std::size_t... Is>
118-
__m512i revert_args_set_epi8(Tup&& t, detail::index_sequence<Is...>)
117+
inline __m512i int_init(std::integral_constant<std::size_t, 1>,
118+
int8_t t0, int8_t t1, int8_t t2, int8_t t3,
119+
int8_t t4, int8_t t5, int8_t t6, int8_t t7,
120+
int8_t t8, int8_t t9, int8_t t10, int8_t t11,
121+
int8_t t12, int8_t t13, int8_t t14, int8_t t15,
122+
int8_t t16, int8_t t17, int8_t t18, int8_t t19,
123+
int8_t t20, int8_t t21, int8_t t22, int8_t t23,
124+
int8_t t24, int8_t t25, int8_t t26, int8_t t27,
125+
int8_t t28, int8_t t29, int8_t t30, int8_t t31,
126+
int8_t t32, int8_t t33, int8_t t34, int8_t t35,
127+
int8_t t36, int8_t t37, int8_t t38, int8_t t39,
128+
int8_t t40, int8_t t41, int8_t t42, int8_t t43,
129+
int8_t t44, int8_t t45, int8_t t46, int8_t t47,
130+
int8_t t48, int8_t t49, int8_t t50, int8_t t51,
131+
int8_t t52, int8_t t53, int8_t t54, int8_t t55,
132+
int8_t t56, int8_t t57, int8_t t58, int8_t t59,
133+
int8_t t60, int8_t t61, int8_t t62, int8_t t63)
119134
{
120-
// funny, this instruction is not yet implemented in clang or gcc (will come in future versions)
121135
#if defined(__clang__) || __GNUC__
122136
return __extension__ (__m512i)(__v64qi)
123137
{
124-
static_cast<char>(std::get<Is>(std::forward<Tup>(t)))...
138+
t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15,
139+
t16, t17, t18, t19, t20, t21, t22, t23, t24, t25, t26, t27, t28, t29, t30, t31,
140+
t32, t33, t34, t35, t36, t37, t38, t39, t40, t41, t42, t43, t44, t45, t46, t47,
141+
t48, t49, t50, t51, t52, t53, t54, t55, t56, t57, t58, t59, t60, t61, t62, t63
125142
};
126143
#else
127-
return _mm512_set_epi8(static_cast<char>(std::get<Is>(std::forward<Tup>(t)))...);
144+
return _mm512_set_epi8(
145+
t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15,
146+
t16, t17, t18, t19, t20, t21, t22, t23, t24, t25, t26, t27, t28, t29, t30, t31,
147+
t32, t33, t34, t35, t36, t37, t38, t39, t40, t41, t42, t43, t44, t45, t46, t47,
148+
t48, t49, t50, t51, t52, t53, t54, t55, t56, t57, t58, t59, t60, t61, t62, t63);
128149
#endif
129150
}
130151

131-
template<class Tup, std::size_t... Is>
132-
__m512i revert_args_set_epi16(Tup&& t, detail::index_sequence<Is...>)
152+
inline __m512i int_init(std::integral_constant<std::size_t, 2>,
153+
int16_t t0, int16_t t1, int16_t t2, int16_t t3,
154+
int16_t t4, int16_t t5, int16_t t6, int16_t t7,
155+
int16_t t8, int16_t t9, int16_t t10, int16_t t11,
156+
int16_t t12, int16_t t13, int16_t t14, int16_t t15,
157+
int16_t t16, int16_t t17, int16_t t18, int16_t t19,
158+
int16_t t20, int16_t t21, int16_t t22, int16_t t23,
159+
int16_t t24, int16_t t25, int16_t t26, int16_t t27,
160+
int16_t t28, int16_t t29, int16_t t30, int16_t t31)
133161
{
134162
#if defined(__clang__) || __GNUC__
135163
return __extension__ (__m512i)(__v32hi)
136164
{
137-
static_cast<short>(std::get<Is>(std::forward<Tup>(t)))...
165+
t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15,
166+
t16, t17, t18, t19, t20, t21, t22, t23, t24, t25, t26, t27, t28, t29, t30, t31
138167
};
139168
#else
140-
return _mm512_set_epi16(static_cast<short>(std::get<Is>(std::forward<Tup>(t)))...);
169+
return _mm512_set_epi16(
170+
t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15,
171+
t16, t17, t18, t19, t20, t21, t22, t23, t24, t25, t26, t27, t28, t29, t30, t31);
141172
#endif
142173
}
143174

144-
template <class... Args>
145-
__m512i int_init(std::integral_constant<std::size_t, 1>, Args... args)
146-
{
147-
return revert_args_set_epi8(std::forward_as_tuple(args...), detail::make_index_sequence<sizeof...(Args)>{});
148-
}
149-
150-
template <class... Args>
151-
__m512i int_init(std::integral_constant<std::size_t, 2>, Args... args)
152-
{
153-
return revert_args_set_epi16(std::forward_as_tuple(args...), detail::make_index_sequence<sizeof...(Args)>{});
154-
}
155-
156175
inline __m512i int_init(std::integral_constant<std::size_t, 4>,
157176
int32_t t0, int32_t t1, int32_t t2, int32_t t3,
158177
int32_t t4, int32_t t5, int32_t t6, int32_t t7,

include/xsimd/types/xsimd_avx_double.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,13 @@ namespace xsimd
658658
return _mm256_blendv_pd(b, a, cond);
659659
}
660660

661+
template<bool... Values>
662+
static batch_type select(const batch_bool_constant<value_type, Values...>&, const batch_type& a, const batch_type& b)
663+
{
664+
constexpr int mask = batch_bool_constant<value_type, Values...>::mask();
665+
return _mm256_blend_pd(b, a, mask);
666+
}
667+
661668
static batch_bool_type isnan(const batch_type& x)
662669
{
663670
return _mm256_cmp_pd(x, x, _CMP_UNORD_Q);

include/xsimd/types/xsimd_avx_float.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,13 @@ namespace xsimd
706706
return _mm256_blendv_ps(b, a, cond);
707707
}
708708

709+
template<bool... Values>
710+
static batch_type select(const batch_bool_constant<value_type, Values...>& cond, const batch_type& a, const batch_type& b)
711+
{
712+
constexpr int mask = batch_bool_constant<value_type, Values...>::mask();
713+
return _mm256_blend_ps(b, a, mask);
714+
}
715+
709716
static batch_bool_type isnan(const batch_type& x)
710717
{
711718
return _mm256_cmp_ps(x, x, _CMP_UNORD_Q);

include/xsimd/types/xsimd_base.hpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "../memory/xsimd_alignment.hpp"
2626
#include "xsimd_utils.hpp"
2727
#include "xsimd_base_bool.hpp"
28+
#include "xsimd_base_constant.hpp"
2829

2930
namespace xsimd
3031
{
@@ -220,7 +221,7 @@ namespace xsimd
220221
simd_batch(simd_batch&&) = default;
221222
simd_batch& operator=(simd_batch&&) = default;
222223

223-
simd_batch(storage_type value);
224+
constexpr simd_batch(storage_type value);
224225

225226
using char_itype =
226227
typename std::conditional<std::is_signed<char>::value, int8_t, uint8_t>::type;
@@ -679,7 +680,7 @@ namespace xsimd
679680
*****************************/
680681

681682
template <class X>
682-
inline simd_batch<X>::simd_batch(storage_type value)
683+
constexpr inline simd_batch<X>::simd_batch(storage_type value)
683684
: m_value(value)
684685
{
685686
}
@@ -1754,6 +1755,28 @@ namespace xsimd
17541755
return kernel::select(cond(), a(), b());
17551756
}
17561757

1758+
/**
1759+
* @ingroup simd_batch_miscellaneous
1760+
*
1761+
* Ternary operator for batches: selects values from the batches \c a or \c b
1762+
* depending on the boolean values in the constant batch \c cond. Equivalent to
1763+
* \code{.cpp}
1764+
* for(std::size_t i = 0; i < N; ++i)
1765+
* res[i] = cond[i] ? a[i] : b[i];
1766+
* \endcode
1767+
* @param cond constant batch condition.
1768+
* @param a batch values for truthy condition.
1769+
* @param b batch value for falsy condition.
1770+
* @return the result of the selection.
1771+
*/
1772+
template <class X, bool... Masks>
1773+
inline batch_type_t<X> select(const batch_bool_constant<typename simd_batch_traits<X>::value_type, Masks...>& cond, const simd_base<X>& a, const simd_base<X>& b)
1774+
{
1775+
using value_type = typename simd_batch_traits<X>::value_type;
1776+
using kernel = detail::batch_kernel<value_type, simd_batch_traits<X>::size>;
1777+
return kernel::select(cond, a(), b());
1778+
}
1779+
17571780
/**
17581781
* Determines if the scalars in the given batch \c x are NaN values.
17591782
* @param x batch of floating point values.
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/***************************************************************************
2+
* Copyright (c) Serge Guelton *
3+
* Copyright (c) QuantStack *
4+
* *
5+
* Distributed under the terms of the BSD 3-Clause License. *
6+
* *
7+
* The full license is in the file LICENSE, distributed with this software. *
8+
****************************************************************************/
9+
10+
#ifndef XSIMD_BASE_CONSTANT_HPP
11+
#define XSIMD_BASE_CONSTANT_HPP
12+
13+
namespace xsimd
14+
{
15+
template <class X>
16+
class simd_base;
17+
18+
template <class T, bool... Values>
19+
struct batch_bool_constant
20+
{
21+
static constexpr std::size_t size = sizeof...(Values);
22+
using value_type = bool;
23+
using batch_type =
24+
typename simd_batch_traits<batch<T, size>>::batch_bool_type;
25+
26+
batch_type operator()() const { return *this; }
27+
28+
operator batch_type() const { return {Values...}; }
29+
30+
bool operator[](size_t i) const
31+
{
32+
return std::array<value_type, size>{Values...}[i];
33+
}
34+
static constexpr int mask()
35+
{
36+
return mask_helper(0, static_cast<int>(Values)...);
37+
}
38+
39+
private:
40+
static constexpr int mask_helper(int acc) { return acc; }
41+
template <class... Tys>
42+
static constexpr int mask_helper(int acc, int mask, Tys... masks)
43+
{
44+
return mask_helper(acc | mask, (masks << 1)...);
45+
}
46+
};
47+
48+
template <class T, T... Values>
49+
struct batch_constant
50+
{
51+
static constexpr std::size_t size = sizeof...(Values);
52+
using value_type = T;
53+
using batch_type = batch<T, size>;
54+
55+
batch_type operator()() const { return *this; }
56+
57+
operator batch_type() const { return {Values...}; }
58+
59+
constexpr T operator[](size_t i) const
60+
{
61+
return std::array<value_type, size>{Values...}[i];
62+
}
63+
};
64+
65+
namespace detail
66+
{
67+
template <class G, std::size_t... Is>
68+
constexpr auto make_batch_constant(detail::index_sequence<Is...>)
69+
-> batch_constant<decltype(G::get(0, 0)),
70+
G::get(Is, sizeof...(Is))...>
71+
{
72+
return {};
73+
}
74+
template <class T, class G, std::size_t... Is>
75+
constexpr auto make_batch_bool_constant(detail::index_sequence<Is...>)
76+
-> batch_bool_constant<T, G::get(Is, sizeof...(Is))...>
77+
{
78+
return {};
79+
}
80+
template <class T, T value, std::size_t... Is>
81+
constexpr auto make_batch_constant(detail::index_sequence<Is...>)
82+
-> batch_constant<T, (Is, value)...>
83+
{
84+
return {};
85+
}
86+
template <class T, T value, std::size_t... Is>
87+
constexpr auto make_batch_bool_constant(detail::index_sequence<Is...>)
88+
-> batch_bool_constant<T, (Is, value)...>
89+
{
90+
return {};
91+
}
92+
} // namespace detail
93+
94+
template <class G, std::size_t N>
95+
constexpr auto make_batch_constant() -> decltype(
96+
detail::make_batch_constant<G>(detail::make_index_sequence<N>()))
97+
{
98+
return detail::make_batch_constant<G>(detail::make_index_sequence<N>());
99+
}
100+
101+
template <class T, class G, std::size_t N>
102+
constexpr auto make_batch_bool_constant()
103+
-> decltype(detail::make_batch_bool_constant<T, G>(
104+
detail::make_index_sequence<N>()))
105+
{
106+
return detail::make_batch_bool_constant<T, G>(
107+
detail::make_index_sequence<N>());
108+
}
109+
110+
} // namespace xsimd
111+
112+
#endif

include/xsimd/types/xsimd_fallback.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,12 @@ namespace xsimd
917917
XSIMD_FALLBACK_MAPPING_LOOP(batch, (cond[i] ? a[i] : b[i]))
918918
}
919919

920+
template<bool... Values>
921+
static batch_type select(const batch_bool_constant<value_type, Values...>& cond, const batch_type& a, const batch_type& b)
922+
{
923+
XSIMD_FALLBACK_MAPPING_LOOP(batch, (cond[i] ? a[i] : b[i]))
924+
}
925+
920926
static batch_bool_type isnan(const batch_type& x)
921927
{
922928
XSIMD_FALLBACK_MAPPING_LOOP(batch_bool, std::isnan(x[i]))

include/xsimd/types/xsimd_sse_double.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -629,6 +629,17 @@ namespace xsimd
629629
#endif
630630
}
631631

632+
template<bool... Values>
633+
static batch_type select(const batch_bool_constant<value_type, Values...>& cond, const batch_type& a, const batch_type& b)
634+
{
635+
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_SSE4_1_VERSION
636+
constexpr int mask = batch_bool_constant<value_type, Values...>::mask();
637+
return _mm_blend_pd(b, a, mask);
638+
#else
639+
return select(cond(), a, b);
640+
#endif
641+
}
642+
632643
static batch_bool_type isnan(const batch_type& x)
633644
{
634645
return _mm_cmpunord_pd(x, x);

include/xsimd/types/xsimd_sse_float.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,18 @@ namespace xsimd
692692
#endif
693693
}
694694

695+
template<bool... Values>
696+
static batch_type select(const batch_bool_constant<value_type, Values...>& cond, const batch_type& a, const batch_type& b)
697+
{
698+
#if XSIMD_X86_INSTR_SET >= XSIMD_X86_SSE4_1_VERSION
699+
(void)cond;
700+
constexpr int mask = batch_bool_constant<value_type, Values...>::mask();
701+
return _mm_blend_ps(b, a, mask);
702+
#else
703+
return select((batch_bool_type)cond, a, b);
704+
#endif
705+
}
706+
695707
static batch_bool_type isnan(const batch_type& x)
696708
{
697709
return _mm_cmpunord_ps(x, x);

include/xsimd/types/xsimd_sse_int_base.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ namespace xsimd
7070
sse_int_batch();
7171
explicit sse_int_batch(T i);
7272
template <class... Args, class Enable = detail::is_array_initializer_t<T, N, Args...>>
73-
sse_int_batch(Args... args);
73+
constexpr sse_int_batch(Args... args);
7474
explicit sse_int_batch(const T* src);
7575
sse_int_batch(const T* src, aligned_mode);
7676
sse_int_batch(const T* src, unaligned_mode);
@@ -324,7 +324,7 @@ namespace xsimd
324324

325325
template <class T, std::size_t N>
326326
template <class... Args, class>
327-
inline sse_int_batch<T, N>::sse_int_batch(Args... args)
327+
constexpr inline sse_int_batch<T, N>::sse_int_batch(Args... args)
328328
: base_type(sse_detail::int_init(std::integral_constant<std::size_t, sizeof(T)>{}, args...))
329329
{
330330
}

0 commit comments

Comments
 (0)