Skip to content

Commit ee37f7b

Browse files
committed
NEW API: Fixed shift, Rotation
1. adds the API bitwise_[l|r]shift<N>(...) and rot[l|r]<N>(...) 2. updates the test to use the API 3. Updates documentation
1 parent eb3bacb commit ee37f7b

File tree

10 files changed

+423
-16
lines changed

10 files changed

+423
-16
lines changed

docs/source/api/bitwise_operators_index.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ Bitwise operators
4040
+---------------------------------------+----------------------------------------------------+
4141
| :cpp:func:`bitwise_andnot` | per slot bitwise and not |
4242
+---------------------------------------+----------------------------------------------------+
43-
| :cpp:func:`bitwise_lshift` | per slot bitwise and |
43+
| :cpp:func:`bitwise_lshift` | per slot bitwise left shift |
4444
+---------------------------------------+----------------------------------------------------+
45-
| :cpp:func:`bitwise_rshift` | per slot bitwise and not |
45+
| :cpp:func:`bitwise_rshift` | per slot bitwise right shift |
4646
+---------------------------------------+----------------------------------------------------+
4747
| :cpp:func:`rotr` | per slot rotate right |
4848
+---------------------------------------+----------------------------------------------------+

include/xsimd/arch/common/xsimd_common_arithmetic.hpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ namespace xsimd
3434
{ return x << y; },
3535
self, other);
3636
}
37+
template <size_t shift, class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
38+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<common>) noexcept
39+
{
40+
static_assert(shift < std::numeric_limits<T>::digits, "shift must be less than the number of bits in T");
41+
return bitwise_lshift(self, shift, A {});
42+
}
3743

3844
// bitwise_rshift
3945
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
@@ -43,6 +49,12 @@ namespace xsimd
4349
{ return x >> y; },
4450
self, other);
4551
}
52+
template <size_t shift, class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
53+
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, requires_arch<common>) noexcept
54+
{
55+
static_assert(shift < std::numeric_limits<T>::digits, "shift must be less than the number of bits in T");
56+
return bitwise_rshift(self, shift, A {});
57+
}
4658

4759
// decr
4860
template <class A, class T>
@@ -169,6 +181,13 @@ namespace xsimd
169181
constexpr auto N = std::numeric_limits<T>::digits;
170182
return (self << other) | (self >> (N - other));
171183
}
184+
template <size_t count, class A, class T>
185+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, requires_arch<common>) noexcept
186+
{
187+
static_assert(count < std::numeric_limits<T>::digits, "count must be less than the number of bits in T");
188+
constexpr auto N = std::numeric_limits<T>::digits;
189+
return bitwise_lshift<count>(self) | bitwise_rshift<N - count>(self);
190+
}
172191

173192
// rotr
174193
template <class A, class T, class STy>
@@ -177,6 +196,13 @@ namespace xsimd
177196
constexpr auto N = std::numeric_limits<T>::digits;
178197
return (self >> other) | (self << (N - other));
179198
}
199+
template <size_t count, class A, class T>
200+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, requires_arch<common>) noexcept
201+
{
202+
static_assert(count < std::numeric_limits<T>::digits, "count must be less than the number of bits in T");
203+
constexpr auto N = std::numeric_limits<T>::digits;
204+
return bitwise_rshift<count>(self) | bitwise_lshift<N - count>(self);
205+
}
180206

181207
// sadd
182208
template <class A>

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 75 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
#include "../types/xsimd_avx2_register.hpp"
1919

20+
#include <limits>
21+
2022
namespace xsimd
2123
{
2224

@@ -172,17 +174,35 @@ namespace xsimd
172174
}
173175
}
174176

177+
template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
178+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<avx2>) noexcept
179+
{
180+
static_assert(shift < std::numeric_limits<T>::digits, "Shift amount must be less than the number of value bits in the type");
181+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
182+
{
183+
return _mm256_slli_epi16(self, shift);
184+
}
185+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
186+
{
187+
return _mm256_slli_epi32(self, shift);
188+
}
189+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
190+
{
191+
return _mm256_slli_epi64(self, shift);
192+
}
193+
else
194+
{
195+
return bitwise_lshift<shift>(self, avx {});
196+
}
197+
}
198+
175199
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
176200
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
177201
{
178202
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
179203
{
180204
return _mm256_sllv_epi32(self, other);
181205
}
182-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
183-
{
184-
return _mm256_sllv_epi64(self, other);
185-
}
186206
else
187207
{
188208
return bitwise_lshift(self, other, avx {});
@@ -252,6 +272,57 @@ namespace xsimd
252272
}
253273
}
254274

275+
template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
276+
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, requires_arch<avx2>) noexcept
277+
{
278+
static_assert(shift < std::numeric_limits<T>::digits, "Shift amount must be less than the number of value bits in the type");
279+
if (std::is_signed<T>::value)
280+
{
281+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
282+
{
283+
__m256i sign_mask = _mm256_set1_epi16((0xFF00 >> shift) & 0x00FF);
284+
__m256i cmp_is_negative = _mm256_cmpgt_epi8(_mm256_setzero_si256(), self);
285+
__m256i res = _mm256_srai_epi16(self, shift);
286+
return _mm256_or_si256(
287+
detail::fwd_to_sse([](__m128i s, __m128i o) noexcept
288+
{ return bitwise_and(batch<T, sse4_2>(s), batch<T, sse4_2>(o), sse4_2 {}); },
289+
sign_mask, cmp_is_negative),
290+
_mm256_andnot_si256(sign_mask, res));
291+
}
292+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
293+
{
294+
return _mm256_srai_epi16(self, shift);
295+
}
296+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
297+
{
298+
return _mm256_srai_epi32(self, shift);
299+
}
300+
else
301+
{
302+
return bitwise_rshift<shift>(self, avx {});
303+
}
304+
}
305+
else
306+
{
307+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
308+
{
309+
return _mm256_srli_epi16(self, shift);
310+
}
311+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
312+
{
313+
return _mm256_srli_epi32(self, shift);
314+
}
315+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
316+
{
317+
return _mm256_srli_epi64(self, shift);
318+
}
319+
else
320+
{
321+
return bitwise_rshift<shift>(self, avx {});
322+
}
323+
}
324+
}
325+
255326
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
256327
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
257328
{
@@ -272,10 +343,6 @@ namespace xsimd
272343
{
273344
return _mm256_srlv_epi32(self, other);
274345
}
275-
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
276-
{
277-
return _mm256_srlv_epi64(self, other);
278-
}
279346
else
280347
{
281348
return bitwise_rshift(self, other, avx {});

include/xsimd/arch/xsimd_avx512f.hpp

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,98 @@ namespace xsimd
564564
}
565565
}
566566

567+
// rotl
568+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
569+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
570+
{
571+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
572+
{
573+
return _mm512_rolv_epi32(self, other);
574+
}
575+
XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
576+
{
577+
return _mm512_rolv_epi64(self, other);
578+
}
579+
return detail::fwd_to_avx([](__m256i s, __m256i o) noexcept
580+
{ return rotl(batch<T, avx2>(s), batch<T, avx2>(o), avx2 {}); },
581+
self, other);
582+
}
583+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
584+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, int32_t other, requires_arch<avx512f>) noexcept
585+
{
586+
return rotl(self, batch<T, A>(other), A {});
587+
}
588+
template <size_t count, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
589+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, requires_arch<avx512f>) noexcept
590+
{
591+
static_assert(count < std::numeric_limits<T>::digits, "count must be less than the number of bits in T");
592+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
593+
{
594+
return _mm512_rol_epi32(self, count);
595+
}
596+
XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
597+
{
598+
return _mm512_rol_epi64(self, count);
599+
}
600+
601+
return detail::fwd_to_avx([](__m256i s) noexcept
602+
{ return rotl<count>(batch<T, avx2>(s), avx2 {}); },
603+
self);
604+
}
605+
606+
// rotr
607+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
608+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
609+
{
610+
XSIMD_IF_CONSTEXPR(sizeof(T) < 4)
611+
{
612+
return detail::fwd_to_avx([](__m256i s, __m256i o) noexcept
613+
{ return rotr(batch<T, avx2>(s), batch<T, avx2>(o), avx2 {}); },
614+
self, other);
615+
}
616+
XSIMD_IF_CONSTEXPR(std::is_unsigned<T>::value)
617+
{
618+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
619+
{
620+
return _mm512_rorv_epi32(self, other);
621+
}
622+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
623+
{
624+
return _mm512_rorv_epi64(self, other);
625+
}
626+
}
627+
return rotr(self, other, common {});
628+
}
629+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
630+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, int32_t other, requires_arch<avx512f>) noexcept
631+
{
632+
return rotr(self, batch<T, A>(other), A {});
633+
}
634+
635+
template <size_t count, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
636+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, requires_arch<avx512f>) noexcept
637+
{
638+
static_assert(count < std::numeric_limits<T>::digits, "count must be less than the number of bits in T");
639+
XSIMD_IF_CONSTEXPR(sizeof(T) < 4)
640+
{
641+
return detail::fwd_to_avx([](__m256i s) noexcept
642+
{ return rotr<count>(batch<T, avx2>(s), avx2 {}); },
643+
self);
644+
}
645+
XSIMD_IF_CONSTEXPR(std::is_unsigned<T>::value)
646+
{
647+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
648+
{
649+
return _mm512_ror_epi32(self, count);
650+
}
651+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
652+
{
653+
return _mm512_ror_epi64(self, count);
654+
}
655+
}
656+
return rotr<count>(self, common {});
657+
}
658+
567659
// bitwise_xor
568660
template <class A>
569661
XSIMD_INLINE batch<float, A> bitwise_xor(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
@@ -2551,7 +2643,6 @@ namespace xsimd
25512643
}
25522644

25532645
}
2554-
25552646
}
25562647

25572648
#endif

include/xsimd/arch/xsimd_avx512vbmi2.hpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,62 @@ namespace xsimd
6767
{
6868
return _mm512_maskz_expand_epi8(mask.mask(), self);
6969
}
70+
71+
// rotl
72+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
73+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, int32_t other, requires_arch<avx512vbmi2>) noexcept
74+
{
75+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
76+
{
77+
return _mm512_shldv_epi16(self, self, _mm512_set1_epi16(static_cast<uint16_t>(other)));
78+
}
79+
else
80+
{
81+
return rotl(self, other, avx512bw {});
82+
}
83+
}
84+
85+
template <size_t count, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
86+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, requires_arch<avx512vbmi2>) noexcept
87+
{
88+
static_assert(count < std::numeric_limits<T>::digits, "count must be less than the number of bits in T");
89+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
90+
{
91+
return _mm512_shldi_epi16(self, self, count);
92+
}
93+
else
94+
{
95+
return rotl<count>(self, avx512bw {});
96+
}
97+
}
98+
99+
// rotr
100+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
101+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, int32_t other, requires_arch<avx512vbmi2>) noexcept
102+
{
103+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
104+
{
105+
return _mm512_shrdv_epi16(self, self, _mm512_set1_epi16(static_cast<uint16_t>(other)));
106+
}
107+
else
108+
{
109+
return rotr(self, other, avx512bw {});
110+
}
111+
}
112+
113+
template <size_t count, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
114+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, requires_arch<avx512vbmi2>) noexcept
115+
{
116+
static_assert(count < std::numeric_limits<T>::digits, "count must be less than the number of bits in T");
117+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
118+
{
119+
return _mm512_shrdi_epi16(self, self, count);
120+
}
121+
else
122+
{
123+
return rotr<count>(self, avx512bw {});
124+
}
125+
}
70126
}
71127
}
72128

include/xsimd/arch/xsimd_common_fwd.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,12 @@ namespace xsimd
2626
XSIMD_INLINE batch<T, A> abs(batch<T, A> const& self, requires_arch<common>) noexcept;
2727
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
2828
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept;
29+
template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
30+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<common>) noexcept;
2931
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
3032
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept;
33+
template <size_t shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
34+
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, requires_arch<common>) noexcept;
3135
template <class A, class T>
3236
XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept;
3337
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
@@ -40,6 +44,14 @@ namespace xsimd
4044
XSIMD_INLINE T reduce_add(batch<T, A> const& self, requires_arch<common>) noexcept;
4145
template <class A, class T, class = typename std::enable_if<std::is_scalar<T>::value, void>::type>
4246
XSIMD_INLINE T reduce_mul(batch<T, A> const& self, requires_arch<common>) noexcept;
47+
template <class A, class T, class STy>
48+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, STy other, requires_arch<common>) noexcept;
49+
template <size_t count, class A, class T>
50+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, requires_arch<common>) noexcept;
51+
template <class A, class T, class STy>
52+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, STy other, requires_arch<common>) noexcept;
53+
template <size_t count, class A, class T>
54+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, requires_arch<common>) noexcept;
4355
// Forward declarations for pack-level helpers
4456
namespace detail
4557
{

0 commit comments

Comments
 (0)