Skip to content

Commit e2c7e4f

Browse files
lqdAmanieu
authored andcommitted
convert _mm256_permute4x64_pd to const generics
1 parent b15f7dc commit e2c7e4f

File tree

1 file changed

+15
-47
lines changed

1 file changed

+15
-47
lines changed

crates/core_arch/src/x86/avx2.rs

Lines changed: 15 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2351,53 +2351,21 @@ pub unsafe fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i)
23512351
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute4x64_pd)
23522352
#[inline]
23532353
#[target_feature(enable = "avx2")]
2354-
#[cfg_attr(test, assert_instr(vpermpd, imm8 = 1))]
2355-
#[rustc_args_required_const(1)]
2354+
#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
2355+
#[rustc_legacy_const_generics(1)]
23562356
#[stable(feature = "simd_x86", since = "1.27.0")]
2357-
pub unsafe fn _mm256_permute4x64_pd(a: __m256d, imm8: i32) -> __m256d {
2358-
let imm8 = (imm8 & 0xFF) as u8;
2359-
let undef = _mm256_undefined_pd();
2360-
macro_rules! shuffle_done {
2361-
($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
2362-
simd_shuffle4(a, undef, [$x01, $x23, $x45, $x67])
2363-
};
2364-
}
2365-
macro_rules! shuffle_x67 {
2366-
($x01:expr, $x23:expr, $x45:expr) => {
2367-
match (imm8 >> 6) & 0b11 {
2368-
0b00 => shuffle_done!($x01, $x23, $x45, 0),
2369-
0b01 => shuffle_done!($x01, $x23, $x45, 1),
2370-
0b10 => shuffle_done!($x01, $x23, $x45, 2),
2371-
_ => shuffle_done!($x01, $x23, $x45, 3),
2372-
}
2373-
};
2374-
}
2375-
macro_rules! shuffle_x45 {
2376-
($x01:expr, $x23:expr) => {
2377-
match (imm8 >> 4) & 0b11 {
2378-
0b00 => shuffle_x67!($x01, $x23, 0),
2379-
0b01 => shuffle_x67!($x01, $x23, 1),
2380-
0b10 => shuffle_x67!($x01, $x23, 2),
2381-
_ => shuffle_x67!($x01, $x23, 3),
2382-
}
2383-
};
2384-
}
2385-
macro_rules! shuffle_x23 {
2386-
($x01:expr) => {
2387-
match (imm8 >> 2) & 0b11 {
2388-
0b00 => shuffle_x45!($x01, 0),
2389-
0b01 => shuffle_x45!($x01, 1),
2390-
0b10 => shuffle_x45!($x01, 2),
2391-
_ => shuffle_x45!($x01, 3),
2392-
}
2393-
};
2394-
}
2395-
match imm8 & 0b11 {
2396-
0b00 => shuffle_x23!(0),
2397-
0b01 => shuffle_x23!(1),
2398-
0b10 => shuffle_x23!(2),
2399-
_ => shuffle_x23!(3),
2400-
}
2357+
pub unsafe fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
2358+
static_assert_imm8!(IMM8);
2359+
simd_shuffle4(
2360+
a,
2361+
_mm256_undefined_pd(),
2362+
[
2363+
IMM8 as u32 & 0b11,
2364+
(IMM8 as u32 >> 2) & 0b11,
2365+
(IMM8 as u32 >> 4) & 0b11,
2366+
(IMM8 as u32 >> 6) & 0b11,
2367+
],
2368+
)
24012369
}
24022370

24032371
/// Shuffles eight 32-bit foating-point elements in `a` across lanes using
@@ -5406,7 +5374,7 @@ mod tests {
54065374
#[simd_test(enable = "avx2")]
54075375
unsafe fn test_mm256_permute4x64_pd() {
54085376
let a = _mm256_setr_pd(1., 2., 3., 4.);
5409-
let r = _mm256_permute4x64_pd(a, 0b00_01_00_11);
5377+
let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
54105378
let e = _mm256_setr_pd(4., 1., 2., 1.);
54115379
assert_eq_m256d(r, e);
54125380
}

0 commit comments

Comments
 (0)