Skip to content

Commit 3e20523

Browse files
lqdAmanieu
authored andcommitted
convert _mm256_shufflehi_epi16 to const generics
1 parent e2c7e4f commit 3e20523

File tree

2 files changed

+29
-52
lines changed

2 files changed

+29
-52
lines changed

crates/core_arch/src/x86/avx2.rs

Lines changed: 27 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2493,57 +2493,34 @@ pub unsafe fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
24932493
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shufflehi_epi16)
24942494
#[inline]
24952495
#[target_feature(enable = "avx2")]
2496-
#[cfg_attr(test, assert_instr(vpshufhw, imm8 = 9))]
2497-
#[rustc_args_required_const(1)]
2496+
#[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
2497+
#[rustc_legacy_const_generics(1)]
24982498
#[stable(feature = "simd_x86", since = "1.27.0")]
2499-
pub unsafe fn _mm256_shufflehi_epi16(a: __m256i, imm8: i32) -> __m256i {
2500-
let imm8 = (imm8 & 0xFF) as u8;
2499+
pub unsafe fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
2500+
static_assert_imm8!(IMM8);
25012501
let a = a.as_i16x16();
2502-
macro_rules! shuffle_done {
2503-
($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
2504-
#[rustfmt::skip]
2505-
simd_shuffle16(a, a, [
2506-
0, 1, 2, 3, 4+$x01, 4+$x23, 4+$x45, 4+$x67,
2507-
8, 9, 10, 11, 12+$x01, 12+$x23, 12+$x45, 12+$x67
2508-
])
2509-
};
2510-
}
2511-
macro_rules! shuffle_x67 {
2512-
($x01:expr, $x23:expr, $x45:expr) => {
2513-
match (imm8 >> 6) & 0b11 {
2514-
0b00 => shuffle_done!($x01, $x23, $x45, 0),
2515-
0b01 => shuffle_done!($x01, $x23, $x45, 1),
2516-
0b10 => shuffle_done!($x01, $x23, $x45, 2),
2517-
_ => shuffle_done!($x01, $x23, $x45, 3),
2518-
}
2519-
};
2520-
}
2521-
macro_rules! shuffle_x45 {
2522-
($x01:expr, $x23:expr) => {
2523-
match (imm8 >> 4) & 0b11 {
2524-
0b00 => shuffle_x67!($x01, $x23, 0),
2525-
0b01 => shuffle_x67!($x01, $x23, 1),
2526-
0b10 => shuffle_x67!($x01, $x23, 2),
2527-
_ => shuffle_x67!($x01, $x23, 3),
2528-
}
2529-
};
2530-
}
2531-
macro_rules! shuffle_x23 {
2532-
($x01:expr) => {
2533-
match (imm8 >> 2) & 0b11 {
2534-
0b00 => shuffle_x45!($x01, 0),
2535-
0b01 => shuffle_x45!($x01, 1),
2536-
0b10 => shuffle_x45!($x01, 2),
2537-
_ => shuffle_x45!($x01, 3),
2538-
}
2539-
};
2540-
}
2541-
let r: i16x16 = match imm8 & 0b11 {
2542-
0b00 => shuffle_x23!(0),
2543-
0b01 => shuffle_x23!(1),
2544-
0b10 => shuffle_x23!(2),
2545-
_ => shuffle_x23!(3),
2546-
};
2502+
let r: i16x16 = simd_shuffle16(
2503+
a,
2504+
a,
2505+
[
2506+
0,
2507+
1,
2508+
2,
2509+
3,
2510+
4 + (IMM8 as u32 & 0b11),
2511+
4 + ((IMM8 as u32 >> 2) & 0b11),
2512+
4 + ((IMM8 as u32 >> 4) & 0b11),
2513+
4 + ((IMM8 as u32 >> 6) & 0b11),
2514+
8,
2515+
9,
2516+
10,
2517+
11,
2518+
12 + (IMM8 as u32 & 0b11),
2519+
12 + ((IMM8 as u32 >> 2) & 0b11),
2520+
12 + ((IMM8 as u32 >> 4) & 0b11),
2521+
12 + ((IMM8 as u32 >> 6) & 0b11),
2522+
],
2523+
);
25472524
transmute(r)
25482525
}
25492526

@@ -4891,7 +4868,7 @@ mod tests {
48914868
0, 1, 2, 3, 44, 22, 22, 11,
48924869
4, 5, 6, 7, 88, 66, 66, 55,
48934870
);
4894-
let r = _mm256_shufflehi_epi16(a, 0b00_01_01_11);
4871+
let r = _mm256_shufflehi_epi16::<0b00_01_01_11>(a);
48954872
assert_eq_m256i(r, e);
48964873
}
48974874

crates/core_arch/src/x86/avx512bw.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7384,7 +7384,7 @@ pub unsafe fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
73847384
a: __m256i,
73857385
) -> __m256i {
73867386
static_assert_imm8!(IMM8);
7387-
let shuffle = _mm256_shufflehi_epi16(a, IMM8);
7387+
let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
73887388
transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
73897389
}
73907390

@@ -7397,7 +7397,7 @@ pub unsafe fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
73977397
#[rustc_legacy_const_generics(2)]
73987398
pub unsafe fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
73997399
static_assert_imm8!(IMM8);
7400-
let shuffle = _mm256_shufflehi_epi16(a, IMM8);
7400+
let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
74017401
let zero = _mm256_setzero_si256().as_i16x16();
74027402
transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero))
74037403
}

0 commit comments

Comments
 (0)