@@ -1629,6 +1629,17 @@ namespace xsimd
16291629 }
16301630 return split;
16311631 }
1632+ constexpr auto lane_mask = mask % make_batch_constant<uint32_t , (mask.size / 2 ), A>();
1633+ XSIMD_IF_CONSTEXPR (detail::is_only_from_lo (mask))
1634+ {
1635+ __m256i broadcast = _mm256_permute2f128_pd (self, self, 0x00 ); // [low | low]
1636+ return _mm256_permutevar_ps (broadcast, lane_mask.as_batch ());
1637+ }
1638+ XSIMD_IF_CONSTEXPR (detail::is_only_from_hi (mask))
1639+ {
1640+ __m256i broadcast = _mm256_permute2f128_pd (self, self, 0x11 ); // [high | high]
1641+ return _mm256_permutevar_ps (broadcast, lane_mask.as_batch ());
1642+ }
16321643
16331644 // Fallback to general algorithm. This is the same as the dynamic version with the exception
16341645 // that possible operations are done at compile time.
@@ -1655,11 +1666,25 @@ namespace xsimd
16551666 {
16561667 // cannot use detail::mod_shuffle as the mod and shift are different in this case
16571668 constexpr auto imm = ((V0 % 2 ) << 0 ) | ((V1 % 2 ) << 1 ) | ((V2 % 2 ) << 2 ) | ((V3 % 2 ) << 3 );
1658- XSIMD_IF_CONSTEXPR (detail::is_identity (mask)) { return self; }
1669+ XSIMD_IF_CONSTEXPR (detail::is_identity (mask))
1670+ {
1671+ return self;
1672+ }
16591673 XSIMD_IF_CONSTEXPR (!detail::is_cross_lane (mask))
16601674 {
16611675 return _mm256_permute_pd (self, imm);
16621676 }
1677+ constexpr auto lane_mask = mask % make_batch_constant<uint64_t , (mask.size / 2 ), A>();
1678+ XSIMD_IF_CONSTEXPR (detail::is_only_from_lo (mask))
1679+ {
1680+ __m256i broadcast = _mm256_permute2f128_pd (self, self, 0x00 ); // [low | low]
1681+ return _mm256_permute_pd (broadcast, lane_mask.as_batch ());
1682+ }
1683+ XSIMD_IF_CONSTEXPR (detail::is_only_from_hi (mask))
1684+ {
1685+ __m256i broadcast = _mm256_permute2f128_pd (self, self, 0x11 ); // [high | high]
1686+ return _mm256_permute_pd (broadcast, lane_mask.as_batch ());
1687+ }
16631688
16641689 // Fallback to general algorithm. This is the same as the dynamic version with the exception
16651690 // that possible operations are done at compile time.
0 commit comments