Skip to content

Commit f7116e7

Browse files
committed
fixing shuffle
1 parent 9dc75a0 commit f7116e7

File tree

2 files changed

+4
-7
lines changed

2 files changed

+4
-7
lines changed

include/xsimd/arch/common/xsimd_common_swizzle.hpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -232,12 +232,8 @@ namespace xsimd
232232
template <uint32_t... Values>
233233
XSIMD_INLINE constexpr std::uint32_t shuffle() noexcept
234234
{
235-
return shuffle_impl<0,
236-
sizeof...(Values),
237-
log2_c<sizeof...(Values)>::value,
238-
Values...>::value;
235+
return shuffle_impl<0, sizeof...(Values), log2_c<sizeof...(Values)>::value, Values...>::value;
239236
}
240-
241237
template <uint32_t... Values>
242238
XSIMD_INLINE constexpr std::uint32_t mod_shuffle() noexcept
243239
{

include/xsimd/arch/xsimd_avx2.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -964,14 +964,15 @@ namespace xsimd
964964
constexpr auto imm = ((V0 & 1) << 0) | ((V1 & 1) << 1) | ((V2 & 1) << 2) | ((V3 & 1) << 3);
965965
return _mm256_permute_pd(self, imm);
966966
}
967+
constexpr auto imm = detail::mod_shuffle<V0, V1, V2, V3>();
967968
// fallback to full 4-element permute
968-
return _mm256_permute4x64_pd(self, detail::shuffle<V0, V1, V2, V3>());
969+
return _mm256_permute4x64_pd(self, imm);
969970
}
970971

971972
template <class A, uint64_t V0, uint64_t V1, uint64_t V2, uint64_t V3>
972973
XSIMD_INLINE batch<uint64_t, A> swizzle(batch<uint64_t, A> const& self, batch_constant<uint64_t, A, V0, V1, V2, V3>, requires_arch<avx2>) noexcept
973974
{
974-
constexpr auto mask = detail::shuffle<V0, V1, V2, V3>();
975+
constexpr auto mask = detail::mod_shuffle<V0, V1, V2, V3>();
975976
return _mm256_permute4x64_epi64(self, mask);
976977
}
977978
template <class A, uint64_t V0, uint64_t V1, uint64_t V2, uint64_t V3>

0 commit comments

Comments
 (0)