Skip to content

Commit d5f9b78

Browse files
Fix implementation of rotate_left on avx2 and improve test coverage for rotate_left
1 parent a969324 commit d5f9b78

File tree

2 files changed

+37
-3
lines changed

2 files changed

+37
-3
lines changed

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,15 @@ namespace xsimd
659659
template <size_t N, class A>
660660
XSIMD_INLINE batch<uint8_t, A> rotate_left(batch<uint8_t, A> const& self, requires_arch<avx2>) noexcept
661661
{
662-
return _mm256_alignr_epi8(self, self, N);
662+
auto other = _mm256_permute2x128_si256(self, self, 0x1);
663+
if (N < 16)
664+
{
665+
return _mm256_alignr_epi8(other, self, 2 * N);
666+
}
667+
else
668+
{
669+
return _mm256_alignr_epi8(self, other, 2 * (N - 16));
670+
}
663671
}
664672
template <size_t N, class A>
665673
XSIMD_INLINE batch<int8_t, A> rotate_left(batch<int8_t, A> const& self, requires_arch<avx2>) noexcept
@@ -669,7 +677,15 @@ namespace xsimd
669677
template <size_t N, class A>
670678
XSIMD_INLINE batch<uint16_t, A> rotate_left(batch<uint16_t, A> const& self, requires_arch<avx2>) noexcept
671679
{
672-
return _mm256_alignr_epi8(self, self, 2 * N);
680+
auto other = _mm256_permute2x128_si256(self, self, 0x1);
681+
if (N < 8)
682+
{
683+
return _mm256_alignr_epi8(other, self, 2 * N);
684+
}
685+
else
686+
{
687+
return _mm256_alignr_epi8(self, other, 2 * (N - 8));
688+
}
673689
}
674690
template <size_t N, class A>
675691
XSIMD_INLINE batch<int16_t, A> rotate_left(batch<int16_t, A> const& self, requires_arch<avx2>) noexcept
@@ -886,6 +902,7 @@ namespace xsimd
886902
}
887903

888904
// swizzle (dynamic mask)
905+
889906
template <class A>
890907
XSIMD_INLINE batch<float, A> swizzle(batch<float, A> const& self, batch<uint32_t, A> mask, requires_arch<avx2>) noexcept
891908
{

test/test_batch_manip.cpp

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ namespace xsimd
2020
struct init_swizzle_base
2121
{
2222
using swizzle_vector_type = std::array<T, N>;
23-
swizzle_vector_type lhs_in, exped_reverse, exped_fill, exped_dup, exped_ror, exped_rol;
23+
swizzle_vector_type lhs_in, exped_reverse, exped_fill, exped_dup, exped_ror, exped_rol, exped_rol2;
2424

2525
template <int... Indices>
2626
std::vector<swizzle_vector_type> create_swizzle_vectors()
@@ -42,12 +42,14 @@ namespace xsimd
4242
exped_dup[i] = lhs_in[2 * (i / 2)];
4343
exped_ror[i] = lhs_in[(i - 1) % N];
4444
exped_rol[i] = lhs_in[(i + 1) % N];
45+
exped_rol2[i] = lhs_in[(i + N - 1) % N];
4546
}
4647
vects.push_back(std::move(exped_reverse));
4748
vects.push_back(std::move(exped_fill));
4849
vects.push_back(std::move(exped_dup));
4950
vects.push_back(std::move(exped_ror));
5051
vects.push_back(std::move(exped_rol));
52+
vects.push_back(std::move(exped_rol2));
5153

5254
return vects;
5355
}
@@ -176,6 +178,20 @@ struct swizzle_test
176178
CHECK_BATCH_EQ(b_res, b_exped);
177179
}
178180

181+
void rotate_left_inv()
182+
{
183+
xsimd::init_swizzle_base<value_type, size> swizzle_base;
184+
auto swizzle_vecs = swizzle_base.create_swizzle_vectors();
185+
auto v_lhs = swizzle_vecs[0];
186+
auto v_exped = swizzle_vecs[6];
187+
188+
B b_lhs = B::load_unaligned(v_lhs.data());
189+
B b_exped = B::load_unaligned(v_exped.data());
190+
191+
B b_res = xsimd::rotate_left<size - 1>(b_lhs);
192+
CHECK_BATCH_EQ(b_res, b_exped);
193+
}
194+
179195
void swizzle_reverse()
180196
{
181197
xsimd::init_swizzle_base<value_type, size> swizzle_base;
@@ -248,6 +264,7 @@ TEST_CASE_TEMPLATE("[swizzle]", B, BATCH_SWIZZLE_TYPES)
248264
SUBCASE("rotate")
249265
{
250266
Test.rotate_left();
267+
Test.rotate_left_inv();
251268
Test.rotate_right();
252269
}
253270

0 commit comments

Comments
 (0)