File tree Expand file tree Collapse file tree 1 file changed +2
-8
lines changed
Expand file tree Collapse file tree 1 file changed +2
-8
lines changed Original file line number Diff line number Diff line change @@ -1320,15 +1320,9 @@ namespace xsimd
13201320 }
13211321 XSIMD_IF_CONSTEXPR (!detail::is_cross_lane (mask))
13221322 {
1323- // The lane mask value is found in mask modulo 2, but the intrinsic expect it in the
1324- // second least significant bit.
1325- constexpr auto two = make_batch_constant<uint64_t , 2 , A>();
1326- constexpr auto half_size = make_batch_constant<uint64_t , (mask.size / 2 ), A>();
1327- constexpr auto lane_mask = (mask % half_size) * two; // `* two` for `<< one`
1323+ constexpr uint8_t lane_mask = (V0 % 2 ) | ((V1 % 2 ) << 1 ) | ((V2 % 2 ) << 2 ) | ((V3 % 2 ) << 3 );
13281324 // Cheaper intrinsics when not crossing lanes
1329- // We could also use _mm256_permute_pd which uses a imm8 constant, though it has the
1330- // same latency/throughput according to Intel manual.
1331- batch<double , A> permuted = _mm256_permutevar_pd (bitwise_cast<double >(self), lane_mask.as_batch ());
1325+ batch<double , A> permuted = _mm256_permute_pd (bitwise_cast<double >(self), lane_mask);
13321326 return bitwise_cast<uint64_t >(permuted);
13331327 }
13341328 constexpr auto mask_int = detail::mod_shuffle (V0, V1, V2, V3);
You can’t perform that action at this time.
0 commit comments