Skip to content

Commit 72f1073

Browse files
committed
Remove one permute from swizzle double
1 parent f7912f0 commit 72f1073

File tree

1 file changed

+9
-11
lines changed

1 file changed

+9
-11
lines changed

include/xsimd/arch/xsimd_avx.hpp

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1446,21 +1446,19 @@ namespace xsimd
14461446
template <class A>
14471447
XSIMD_INLINE batch<double, A> swizzle(batch<double, A> const& self, batch<uint64_t, A> mask, requires_arch<avx>) noexcept
14481448
{
1449-
// duplicate low and high part of input
1450-
__m256d lo = _mm256_permute2f128_pd(self, self, 0x00);
1451-
__m256d hi = _mm256_permute2f128_pd(self, self, 0x11);
1449+
// swap lanes
1450+
__m256 swapped = _mm256_permute2f128_pd(self, self, 0x01); // [high | low]
14521451

1453-
// normalize mask
1454-
batch<uint64_t, A> half_mask = -(mask & 1);
1452+
// normalize mask taking modulo 2
1453+
batch<uint64_t, A> half_mask = mask & 0b1u;
14551454

14561455
// permute within each lane
1457-
__m256d r0 = _mm256_permutevar_pd(lo, half_mask);
1458-
__m256d r1 = _mm256_permutevar_pd(hi, half_mask);
1456+
__m256 r0 = _mm256_permutevar_pd(self, half_mask);
1457+
__m256 r1 = _mm256_permutevar_pd(swapped, half_mask);
14591458

1460-
// mask to choose the right lane
1461-
batch_bool<uint64_t, A> blend_mask = mask >= 2;
1462-
1463-
// blend the two permutes
1459+
// select lane by the mask index divided by 2
1460+
constexpr auto lane = batch_constant<uint64_t, A, 0, 0, 2, 2> {};
1461+
batch_bool<uint64_t, A> blend_mask = (mask & 0b10u) != lane;
14641462
return _mm256_blendv_pd(r0, r1, batch_bool_cast<double>(blend_mask));
14651463
}
14661464

0 commit comments

Comments
 (0)