@@ -906,21 +906,9 @@ namespace xsimd
906906 template <class A >
907907 XSIMD_INLINE batch<float , A> swizzle (batch<float , A> const & self, batch<uint32_t , A> mask, requires_arch<avx2>) noexcept
908908 {
909- constexpr std::size_t N = batch<uint32_t , A>::size;
910- alignas (avx2::alignment ()) std::array<uint32_t , N> tmp {};
911- mask.store_aligned (tmp.data ());
912- for (std::size_t i = 0 ; i < N; ++i)
913- {
914- for (std::size_t j = i + 1 ; j < N; ++j)
915- {
916- if (tmp[i] == tmp[j])
917- {
918- return swizzle (self, mask, avx {});
919- }
920- }
921- }
909+ return swizzle (self, mask, avx {});
922910 // this does not allow duplicates in the output
923- return _mm256_permutevar8x32_ps (self, mask);
911+ // return _mm256_permutevar8x32_ps(self, mask);
924912 }
925913
926914 template <class A >
@@ -944,21 +932,9 @@ namespace xsimd
944932 template <class A >
945933 XSIMD_INLINE batch<uint32_t , A> swizzle (batch<uint32_t , A> const & self, batch<uint32_t , A> mask, requires_arch<avx2>) noexcept
946934 {
947- constexpr std::size_t N = batch<uint32_t , A>::size;
948- alignas (avx2::alignment ()) std::array<uint32_t , N> tmp {};
949- mask.store_aligned (tmp.data ());
950- for (std::size_t i = 0 ; i < N; ++i)
951- {
952- for (std::size_t j = i + 1 ; j < N; ++j)
953- {
954- if (tmp[i] == tmp[j])
955- {
956- return swizzle (self, mask, avx {});
957- }
958- }
959- }
960935 // this does not allow duplicates in the output
961- return _mm256_permutevar8x32_epi32 (self, mask);
936+ // return _mm256_permutevar8x32_epi32(self, mask);
937+ return swizzle (self, mask, avx {});
962938 }
963939 template <class A >
964940 XSIMD_INLINE batch<int32_t , A> swizzle (batch<int32_t , A> const & self, batch<uint32_t , A> mask, requires_arch<avx2>) noexcept
0 commit comments