@@ -342,7 +342,8 @@ constexpr bool isOnlyFromLow(xsimd::batch_constant<T, A, Vals...>) {
342342// / Merged in xsimd 14.0, simply use swizzle
343343template <typename Arch, uint8_t ... kIdx >
344344auto swizzle_bytes (const xsimd::batch<uint8_t , Arch>& batch,
345- xsimd::batch_constant<uint8_t , Arch, kIdx ...> mask) {
345+ xsimd::batch_constant<uint8_t , Arch, kIdx ...> mask)
346+ -> xsimd::batch<uint8_t, Arch> {
346347 if constexpr (std::is_base_of_v<xsimd::avx2, Arch>) {
347348 static constexpr auto kPlan = BuildSwizzleBiLaneGenericPlan (std::array{kIdx ...});
348349 static constexpr auto kSelfSwizzleArr = kPlan .self_lane ;
@@ -370,7 +371,7 @@ auto swizzle_bytes(const xsimd::batch<uint8_t, Arch>& batch,
370371 auto self = _mm256_shuffle_epi8 (batch, kSelfSwizzle .as_batch ());
371372 auto swapped = _mm256_permute2x128_si256 (batch, batch, 0x01 );
372373 auto cross = _mm256_shuffle_epi8 (swapped, kCrossSwizzle .as_batch ());
373- return xsimd::batch< uint8_t , Arch>( _mm256_or_si256 (self, cross) );
374+ return _mm256_or_si256 (self, cross);
374375 } else {
375376 return xsimd::swizzle (batch, mask);
376377 }
@@ -386,7 +387,8 @@ auto swizzle_bytes(const xsimd::batch<uint8_t, Arch>& batch,
386387// http://arxiv.org/abs/1209.2137
387388template <typename Arch, typename Int, Int... kShifts >
388389auto left_shift_no_overflow (const xsimd::batch<Int, Arch>& batch,
389- xsimd::batch_constant<Int, Arch, kShifts ...> shifts) {
390+ xsimd::batch_constant<Int, Arch, kShifts ...> shifts)
391+ -> xsimd::batch<Int, Arch> {
390392 constexpr bool kHasSse2 = std::is_base_of_v<xsimd::sse2, Arch>;
391393 constexpr bool kHasAvx2 = std::is_base_of_v<xsimd::avx2, Arch>;
392394
@@ -407,9 +409,8 @@ auto left_shift_no_overflow(const xsimd::batch<Int, Arch>& batch,
407409 // TODO that is latency 10 so maybe it is not worth it
408410 return _mm_mullo_epi32 (batch, kMults .as_batch ());
409411 }
410- } else {
411- return batch << shifts;
412412 }
413+ return batch << shifts;
413414}
414415
415416// Intel x86-64 does not have variable right shifts before AVX2.
0 commit comments