Skip to content

Commit 06ae37c

Browse files
committed
small avx2 tweak
1 parent 3b478a3 commit 06ae37c

File tree

1 file changed

+4
-28
lines changed

1 file changed

+4
-28
lines changed

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 4 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -906,21 +906,9 @@ namespace xsimd
906906
template <class A>
907907
XSIMD_INLINE batch<float, A> swizzle(batch<float, A> const& self, batch<uint32_t, A> mask, requires_arch<avx2>) noexcept
908908
{
909-
constexpr std::size_t N = batch<uint32_t, A>::size;
910-
alignas(avx2::alignment()) std::array<uint32_t, N> tmp {};
911-
mask.store_aligned(tmp.data());
912-
for (std::size_t i = 0; i < N; ++i)
913-
{
914-
for (std::size_t j = i + 1; j < N; ++j)
915-
{
916-
if (tmp[i] == tmp[j])
917-
{
918-
return swizzle(self, mask, avx {});
919-
}
920-
}
921-
}
909+
return swizzle(self, mask, avx {});
922910
// this does not allow duplicates in the output
923-
return _mm256_permutevar8x32_ps(self, mask);
911+
// return _mm256_permutevar8x32_ps(self, mask);
924912
}
925913

926914
template <class A>
@@ -944,21 +932,9 @@ namespace xsimd
944932
template <class A>
945933
XSIMD_INLINE batch<uint32_t, A> swizzle(batch<uint32_t, A> const& self, batch<uint32_t, A> mask, requires_arch<avx2>) noexcept
946934
{
947-
constexpr std::size_t N = batch<uint32_t, A>::size;
948-
alignas(avx2::alignment()) std::array<uint32_t, N> tmp {};
949-
mask.store_aligned(tmp.data());
950-
for (std::size_t i = 0; i < N; ++i)
951-
{
952-
for (std::size_t j = i + 1; j < N; ++j)
953-
{
954-
if (tmp[i] == tmp[j])
955-
{
956-
return swizzle(self, mask, avx {});
957-
}
958-
}
959-
}
960935
// this does not allow duplicates in the output
961-
return _mm256_permutevar8x32_epi32(self, mask);
936+
// return _mm256_permutevar8x32_epi32(self, mask);
937+
return swizzle(self, mask, avx {});
962938
}
963939
template <class A>
964940
XSIMD_INLINE batch<int32_t, A> swizzle(batch<int32_t, A> const& self, batch<uint32_t, A> mask, requires_arch<avx2>) noexcept

0 commit comments

Comments
 (0)