Skip to content

Commit a969324

Browse files
Fix rotate_left implementation on ssse3 and avx2 for [u]int16
And also provide an optimized version for [u]int8
1 parent fc2659d commit a969324

File tree

2 files changed

+23
-2
lines changed

2 files changed

+23
-2
lines changed

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -657,11 +657,21 @@ namespace xsimd
657657

658658
// rotate_left
659659
template <size_t N, class A>
660-
XSIMD_INLINE batch<uint16_t, A> rotate_left(batch<uint16_t, A> const& self, requires_arch<avx2>) noexcept
660+
XSIMD_INLINE batch<uint8_t, A> rotate_left(batch<uint8_t, A> const& self, requires_arch<avx2>) noexcept
661661
{
662662
return _mm256_alignr_epi8(self, self, N);
663663
}
664664
template <size_t N, class A>
665+
XSIMD_INLINE batch<int8_t, A> rotate_left(batch<int8_t, A> const& self, requires_arch<avx2>) noexcept
666+
{
667+
return bitwise_cast<int8_t>(rotate_left<N, A>(bitwise_cast<uint8_t>(self), avx2 {}));
668+
}
669+
template <size_t N, class A>
670+
XSIMD_INLINE batch<uint16_t, A> rotate_left(batch<uint16_t, A> const& self, requires_arch<avx2>) noexcept
671+
{
672+
return _mm256_alignr_epi8(self, self, 2 * N);
673+
}
674+
template <size_t N, class A>
665675
XSIMD_INLINE batch<int16_t, A> rotate_left(batch<int16_t, A> const& self, requires_arch<avx2>) noexcept
666676
{
667677
return bitwise_cast<int16_t>(rotate_left<N, A>(bitwise_cast<uint16_t>(self), avx2 {}));

include/xsimd/arch/xsimd_ssse3.hpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,11 +107,22 @@ namespace xsimd
107107

108108
// rotate_left
109109
template <size_t N, class A>
110-
XSIMD_INLINE batch<uint16_t, A> rotate_left(batch<uint16_t, A> const& self, requires_arch<ssse3>) noexcept
110+
XSIMD_INLINE batch<uint8_t, A> rotate_left(batch<uint8_t, A> const& self, requires_arch<ssse3>) noexcept
111111
{
112112
return _mm_alignr_epi8(self, self, N);
113113
}
114114
template <size_t N, class A>
115+
XSIMD_INLINE batch<int8_t, A> rotate_left(batch<int8_t, A> const& self, requires_arch<ssse3>) noexcept
116+
{
117+
return bitwise_cast<int8_t>(rotate_left<N, A>(bitwise_cast<uint8_t>(self), ssse3 {}));
118+
}
119+
120+
template <size_t N, class A>
121+
XSIMD_INLINE batch<uint16_t, A> rotate_left(batch<uint16_t, A> const& self, requires_arch<ssse3>) noexcept
122+
{
123+
return _mm_alignr_epi8(self, self, 2 * N);
124+
}
125+
template <size_t N, class A>
115126
XSIMD_INLINE batch<int16_t, A> rotate_left(batch<int16_t, A> const& self, requires_arch<ssse3>) noexcept
116127
{
117128
return bitwise_cast<int16_t>(rotate_left<N, A>(bitwise_cast<uint16_t>(self), ssse3 {}));

0 commit comments

Comments
 (0)