Skip to content

Commit 3ded4ee

Browse files
committed
Fix swizzle avx double type
1 parent 72f1073 commit 3ded4ee

File tree

2 files changed

+7
-5
lines changed

2 files changed

+7
-5
lines changed

include/xsimd/arch/xsimd_avx.hpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1447,14 +1447,15 @@ namespace xsimd
14471447
XSIMD_INLINE batch<double, A> swizzle(batch<double, A> const& self, batch<uint64_t, A> mask, requires_arch<avx>) noexcept
14481448
{
14491449
// swap lanes
1450-
__m256 swapped = _mm256_permute2f128_pd(self, self, 0x01); // [high | low]
1450+
__m256d swapped = _mm256_permute2f128_pd(self, self, 0x01); // [high | low]
14511451

1452-
// normalize mask taking modulo 2
1453-
batch<uint64_t, A> half_mask = mask & 0b1u;
1452+
// The half mask value is found in mask modulo 2, but the intrinsic expect it in the
1453+
// second least significant bit. We use negative as a cheap alternative to lshift.
1454+
batch<uint64_t, A> half_mask = -(mask & 0b1u);
14541455

14551456
// permute within each lane
1456-
__m256 r0 = _mm256_permutevar_pd(self, half_mask);
1457-
__m256 r1 = _mm256_permutevar_pd(swapped, half_mask);
1457+
__m256d r0 = _mm256_permutevar_pd(self, half_mask);
1458+
__m256d r1 = _mm256_permutevar_pd(swapped, half_mask);
14581459

14591460
// select lane by the mask index divided by 2
14601461
constexpr auto lane = batch_constant<uint64_t, A, 0, 0, 2, 2> {};

test/test_batch_manip.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ struct swizzle_test
247247
using idx_t = typename xsimd::as_index<value_type>::type;
248248
auto idx_batch = xsimd::make_batch_constant<idx_t, Pattern<idx_t>, arch_type>();
249249

250+
CAPTURE(idx_batch.as_batch());
250251
CHECK_BATCH_EQ(xsimd::swizzle(b_lhs, idx_batch), b_expect);
251252
CHECK_BATCH_EQ(xsimd::swizzle(b_lhs,
252253
static_cast<xsimd::batch<idx_t, arch_type>>(idx_batch)),

0 commit comments

Comments
 (0)