Skip to content

Commit f7ddaf5

Browse files
committed
NEW:API Fixed shift, Rotation
1. adds the API bitwise_[l|r]shift<N>(...) and rot[l|r]<N>(...) 2. updates the test to use the API 3. Updates documentation
1 parent 429da70 commit f7ddaf5

File tree

9 files changed

+320
-6
lines changed

9 files changed

+320
-6
lines changed

include/xsimd/arch/common/xsimd_common_arithmetic.hpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ namespace xsimd
3434
{ return x << y; },
3535
self, other);
3636
}
37+
template <int shift, class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
38+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<common>) noexcept
39+
{
40+
return bitwise_lshift(self, shift, A {});
41+
}
3742

3843
// bitwise_rshift
3944
template <class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
@@ -43,6 +48,11 @@ namespace xsimd
4348
{ return x >> y; },
4449
self, other);
4550
}
51+
template <int shift, class A, class T, class /*=typename std::enable_if<std::is_integral<T>::value, void>::type*/>
52+
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, requires_arch<common>) noexcept
53+
{
54+
return bitwise_rshift(self, shift, A {});
55+
}
4656

4757
// decr
4858
template <class A, class T>
@@ -183,6 +193,12 @@ namespace xsimd
183193
constexpr auto N = std::numeric_limits<T>::digits;
184194
return (self << other) | (self >> (N - other));
185195
}
196+
template <int count, class A, class T>
197+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, requires_arch<common>) noexcept
198+
{
199+
constexpr auto N = std::numeric_limits<T>::digits;
200+
return bitwise_lshift<count>(self) | bitwise_rshift<N - count>(self);
201+
}
186202

187203
// rotr
188204
template <class A, class T, class STy>
@@ -191,6 +207,12 @@ namespace xsimd
191207
constexpr auto N = std::numeric_limits<T>::digits;
192208
return (self >> other) | (self << (N - other));
193209
}
210+
template <int count, class A, class T>
211+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, requires_arch<common>) noexcept
212+
{
213+
constexpr auto N = std::numeric_limits<T>::digits;
214+
return bitwise_rshift<count>(self) | bitwise_lshift<N - count>(self);
215+
}
194216

195217
// sadd
196218
template <class A>

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,27 @@ namespace xsimd
172172
}
173173
}
174174

175+
template <int shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
176+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<avx2>) noexcept
177+
{
178+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
179+
{
180+
return _mm256_slli_epi16(self, shift);
181+
}
182+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
183+
{
184+
return _mm256_slli_epi32(self, shift);
185+
}
186+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
187+
{
188+
return _mm256_slli_epi64(self, shift);
189+
}
190+
else
191+
{
192+
return bitwise_lshift<shift>(self, avx {});
193+
}
194+
}
195+
175196
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
176197
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
177198
{
@@ -252,6 +273,56 @@ namespace xsimd
252273
}
253274
}
254275

276+
template <int shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
277+
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, requires_arch<avx2>) noexcept
278+
{
279+
if (std::is_signed<T>::value)
280+
{
281+
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
282+
{
283+
__m256i sign_mask = _mm256_set1_epi16((0xFF00 >> shift) & 0x00FF);
284+
__m256i cmp_is_negative = _mm256_cmpgt_epi8(_mm256_setzero_si256(), self);
285+
__m256i res = _mm256_srai_epi16(self, shift);
286+
return _mm256_or_si256(
287+
detail::fwd_to_sse([](__m128i s, __m128i o) noexcept
288+
{ return bitwise_and(batch<T, sse4_2>(s), batch<T, sse4_2>(o), sse4_2 {}); },
289+
sign_mask, cmp_is_negative),
290+
_mm256_andnot_si256(sign_mask, res));
291+
}
292+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
293+
{
294+
return _mm256_srai_epi16(self, shift);
295+
}
296+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
297+
{
298+
return _mm256_srai_epi32(self, shift);
299+
}
300+
else
301+
{
302+
return bitwise_rshift<shift>(self, avx {});
303+
}
304+
}
305+
else
306+
{
307+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
308+
{
309+
return _mm256_srli_epi16(self, shift);
310+
}
311+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
312+
{
313+
return _mm256_srli_epi32(self, shift);
314+
}
315+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
316+
{
317+
return _mm256_srli_epi64(self, shift);
318+
}
319+
else
320+
{
321+
return bitwise_rshift<shift>(self, avx {});
322+
}
323+
}
324+
}
325+
255326
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
256327
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx2>) noexcept
257328
{

include/xsimd/arch/xsimd_avx512bw.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,6 @@ namespace xsimd
223223
}
224224
}
225225
}
226-
227226
// decr_if
228227
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
229228
XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx512bw>) noexcept

include/xsimd/arch/xsimd_avx512f.hpp

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,96 @@ namespace xsimd
564564
}
565565
}
566566

567+
// rotl
568+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
569+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
570+
{
571+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
572+
{
573+
return _mm512_rolv_epi32(self, other);
574+
}
575+
XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
576+
{
577+
return _mm512_rolv_epi64(self, other);
578+
}
579+
return detail::fwd_to_avx([](__m256i s, __m256i o) noexcept
580+
{ return rotl(batch<T, avx2>(s), batch<T, avx2>(o), avx2 {}); },
581+
self, other);
582+
}
583+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
584+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, int32_t other, requires_arch<avx512f>) noexcept
585+
{
586+
return rotl(self, batch<T, A>(other), A {});
587+
}
588+
template <int count, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
589+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, requires_arch<avx512f>) noexcept
590+
{
591+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
592+
{
593+
return _mm512_rol_epi32(self, count);
594+
}
595+
XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
596+
{
597+
return _mm512_rol_epi64(self, count);
598+
}
599+
600+
return detail::fwd_to_avx([](__m256i s) noexcept
601+
{ return rotl<count>(batch<T, avx2>(s), avx2 {}); },
602+
self);
603+
}
604+
605+
// rotr
606+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
607+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
608+
{
609+
XSIMD_IF_CONSTEXPR(sizeof(T) < 4)
610+
{
611+
return detail::fwd_to_avx([](__m256i s, __m256i o) noexcept
612+
{ return rotr(batch<T, avx2>(s), batch<T, avx2>(o), avx2 {}); },
613+
self, other);
614+
}
615+
XSIMD_IF_CONSTEXPR(std::is_unsigned<T>::value)
616+
{
617+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
618+
{
619+
return _mm512_rorv_epi32(self, other);
620+
}
621+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
622+
{
623+
return _mm512_rorv_epi64(self, other);
624+
}
625+
}
626+
return rotr(self, other, common {});
627+
}
628+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
629+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, int32_t other, requires_arch<avx512f>) noexcept
630+
{
631+
return rotr(self, batch<T, A>(other), A {});
632+
}
633+
634+
template <int count, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
635+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, requires_arch<avx512f>) noexcept
636+
{
637+
XSIMD_IF_CONSTEXPR(sizeof(T) < 4)
638+
{
639+
return detail::fwd_to_avx([](__m256i s) noexcept
640+
{ return rotr<count>(batch<T, avx2>(s), avx2 {}); },
641+
self);
642+
}
643+
XSIMD_IF_CONSTEXPR(std::is_unsigned<T>::value)
644+
{
645+
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
646+
{
647+
return _mm512_ror_epi32(self, count);
648+
}
649+
else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
650+
{
651+
return _mm512_ror_epi64(self, count);
652+
}
653+
}
654+
return rotr<count>(self, common {});
655+
}
656+
567657
// bitwise_xor
568658
template <class A>
569659
XSIMD_INLINE batch<float, A> bitwise_xor(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
@@ -2520,7 +2610,6 @@ namespace xsimd
25202610
}
25212611

25222612
}
2523-
25242613
}
25252614

25262615
#endif

include/xsimd/arch/xsimd_avx512vbmi2.hpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,60 @@ namespace xsimd
6767
{
6868
return _mm512_maskz_expand_epi8(mask.mask(), self);
6969
}
70+
71+
// rotl
72+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
73+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, int32_t other, requires_arch<avx512vbmi2>) noexcept
74+
{
75+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
76+
{
77+
return _mm512_shldv_epi16(self, self, _mm512_set1_epi16(static_cast<uint16_t>(other)));
78+
}
79+
else
80+
{
81+
return rotl(self, other, avx512bw {});
82+
}
83+
}
84+
85+
template <int count, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
86+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, requires_arch<avx512vbmi2>) noexcept
87+
{
88+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
89+
{
90+
return _mm512_shldi_epi16(self, self, count);
91+
}
92+
else
93+
{
94+
return rotl<count>(self, avx512bw {});
95+
}
96+
}
97+
98+
// rotr
99+
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
100+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, int32_t other, requires_arch<avx512vbmi2>) noexcept
101+
{
102+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
103+
{
104+
return _mm512_shrdv_epi16(self, self, _mm512_set1_epi16(static_cast<uint16_t>(other)));
105+
}
106+
else
107+
{
108+
return rotr(self, other, avx512bw {});
109+
}
110+
}
111+
112+
template <int count, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
113+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, requires_arch<avx512vbmi2>) noexcept
114+
{
115+
XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
116+
{
117+
return _mm512_shrdi_epi16(self, self, count);
118+
}
119+
else
120+
{
121+
return rotr<count>(self, avx512bw {});
122+
}
123+
}
70124
}
71125
}
72126

include/xsimd/arch/xsimd_common_fwd.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,12 @@ namespace xsimd
2626
XSIMD_INLINE batch<T, A> abs(batch<T, A> const& self, requires_arch<common>) noexcept;
2727
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
2828
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept;
29+
template <int shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
30+
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, requires_arch<common>) noexcept;
2931
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
3032
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept;
33+
template <int shift, class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
34+
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, requires_arch<common>) noexcept;
3135
template <class A, class T>
3236
XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept;
3337
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
@@ -38,6 +42,14 @@ namespace xsimd
3842
XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<common>) noexcept;
3943
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
4044
XSIMD_INLINE T hadd(batch<T, A> const& self, requires_arch<common>) noexcept;
45+
template <class A, class T, class STy>
46+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, STy other, requires_arch<common>) noexcept;
47+
template <int count, class A, class T>
48+
XSIMD_INLINE batch<T, A> rotl(batch<T, A> const& self, requires_arch<common>) noexcept;
49+
template <class A, class T, class STy>
50+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, STy other, requires_arch<common>) noexcept;
51+
template <int count, class A, class T>
52+
XSIMD_INLINE batch<T, A> rotr(batch<T, A> const& self, requires_arch<common>) noexcept;
4153
// Forward declarations for pack-level helpers
4254
namespace detail
4355
{

include/xsimd/arch/xsimd_scalar.hpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,12 +300,25 @@ namespace xsimd
300300
return x << shift;
301301
}
302302

303+
template <int shift, class T>
304+
XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type
305+
bitwise_lshift(T x) noexcept
306+
{
307+
return x << shift;
308+
}
309+
303310
template <class T0, class T1>
304311
XSIMD_INLINE typename std::enable_if<std::is_integral<T0>::value && std::is_integral<T1>::value, T0>::type
305312
bitwise_rshift(T0 x, T1 shift) noexcept
306313
{
307314
return x >> shift;
308315
}
316+
template <int shift, class T>
317+
XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type
318+
bitwise_rshift(T x) noexcept
319+
{
320+
return x >> shift;
321+
}
309322

310323
template <class T>
311324
XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type
@@ -450,6 +463,13 @@ namespace xsimd
450463
constexpr auto N = std::numeric_limits<T0>::digits;
451464
return (x << shift) | (x >> (N - shift));
452465
}
466+
template <int count, class T>
467+
XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type
468+
rotl(T x) noexcept
469+
{
470+
constexpr auto N = std::numeric_limits<T>::digits;
471+
return (x << count) | (x >> (N - count));
472+
}
453473

454474
template <class T0, class T1>
455475
XSIMD_INLINE typename std::enable_if<std::is_integral<T0>::value && std::is_integral<T1>::value, T0>::type
@@ -458,6 +478,13 @@ namespace xsimd
458478
constexpr auto N = std::numeric_limits<T0>::digits;
459479
return (x >> shift) | (x << (N - shift));
460480
}
481+
template <int count, class T>
482+
XSIMD_INLINE typename std::enable_if<std::is_integral<T>::value, T>::type
483+
rotr(T x) noexcept
484+
{
485+
constexpr auto N = std::numeric_limits<T>::digits;
486+
return (x >> count) | (x << (N - count));
487+
}
461488

462489
template <class T>
463490
XSIMD_INLINE bool isnan(std::complex<T> var) noexcept

0 commit comments

Comments
 (0)