Skip to content

Commit b15f7dc

Browse files
lqdAmanieu
authored andcommitted
convert _mm256_permute4x64_epi64 to const generics
1 parent babe45c commit b15f7dc

File tree

1 file changed

+15
-47
lines changed

1 file changed

+15
-47
lines changed

crates/core_arch/src/x86/avx2.rs

Lines changed: 15 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2313,54 +2313,22 @@ pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
23132313
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute4x64_epi64)
23142314
#[inline]
23152315
#[target_feature(enable = "avx2")]
2316-
#[cfg_attr(test, assert_instr(vpermpd, imm8 = 9))]
2317-
#[rustc_args_required_const(1)]
2316+
#[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
2317+
#[rustc_legacy_const_generics(1)]
23182318
#[stable(feature = "simd_x86", since = "1.27.0")]
2319-
pub unsafe fn _mm256_permute4x64_epi64(a: __m256i, imm8: i32) -> __m256i {
2320-
let imm8 = (imm8 & 0xFF) as u8;
2319+
pub unsafe fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
2320+
static_assert_imm8!(IMM8);
23212321
let zero = _mm256_setzero_si256().as_i64x4();
2322-
let a = a.as_i64x4();
2323-
macro_rules! permute4 {
2324-
($a:expr, $b:expr, $c:expr, $d:expr) => {
2325-
simd_shuffle4(a, zero, [$a, $b, $c, $d])
2326-
};
2327-
}
2328-
macro_rules! permute3 {
2329-
($a:expr, $b:expr, $c:expr) => {
2330-
match (imm8 >> 6) & 0b11 {
2331-
0b00 => permute4!($a, $b, $c, 0),
2332-
0b01 => permute4!($a, $b, $c, 1),
2333-
0b10 => permute4!($a, $b, $c, 2),
2334-
_ => permute4!($a, $b, $c, 3),
2335-
}
2336-
};
2337-
}
2338-
macro_rules! permute2 {
2339-
($a:expr, $b:expr) => {
2340-
match (imm8 >> 4) & 0b11 {
2341-
0b00 => permute3!($a, $b, 0),
2342-
0b01 => permute3!($a, $b, 1),
2343-
0b10 => permute3!($a, $b, 2),
2344-
_ => permute3!($a, $b, 3),
2345-
}
2346-
};
2347-
}
2348-
macro_rules! permute1 {
2349-
($a:expr) => {
2350-
match (imm8 >> 2) & 0b11 {
2351-
0b00 => permute2!($a, 0),
2352-
0b01 => permute2!($a, 1),
2353-
0b10 => permute2!($a, 2),
2354-
_ => permute2!($a, 3),
2355-
}
2356-
};
2357-
}
2358-
let r: i64x4 = match imm8 & 0b11 {
2359-
0b00 => permute1!(0),
2360-
0b01 => permute1!(1),
2361-
0b10 => permute1!(2),
2362-
_ => permute1!(3),
2363-
};
2322+
let r: i64x4 = simd_shuffle4(
2323+
a.as_i64x4(),
2324+
zero,
2325+
[
2326+
IMM8 as u32 & 0b11,
2327+
(IMM8 as u32 >> 2) & 0b11,
2328+
(IMM8 as u32 >> 4) & 0b11,
2329+
(IMM8 as u32 >> 6) & 0b11,
2330+
],
2331+
);
23642332
transmute(r)
23652333
}
23662334

@@ -5422,7 +5390,7 @@ mod tests {
54225390
unsafe fn test_mm256_permute4x64_epi64() {
54235391
let a = _mm256_setr_epi64x(100, 200, 300, 400);
54245392
let expected = _mm256_setr_epi64x(400, 100, 200, 100);
5425-
let r = _mm256_permute4x64_epi64(a, 0b00010011);
5393+
let r = _mm256_permute4x64_epi64::<0b00010011>(a);
54265394
assert_eq_m256i(r, expected);
54275395
}
54285396

0 commit comments

Comments
 (0)