diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs index 3a53e54b3c..9adc2f5089 100644 --- a/crates/core_arch/src/simd.rs +++ b/crates/core_arch/src/simd.rs @@ -10,6 +10,9 @@ macro_rules! simd_ty { #[allow(clippy::use_self)] impl $id { + /// A value of this type where all elements are zeroed out. + pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() }; + #[inline(always)] pub(crate) const fn new($($param_name: $elem_type),*) -> Self { $id([$($param_name),*]) diff --git a/crates/core_arch/src/wasm32/simd128.rs b/crates/core_arch/src/wasm32/simd128.rs index 1a9330121d..4ccdb8bec1 100644 --- a/crates/core_arch/src/wasm32/simd128.rs +++ b/crates/core_arch/src/wasm32/simd128.rs @@ -2232,7 +2232,7 @@ pub fn v128_any_true(a: v128) -> bool { pub fn i8x16_abs(a: v128) -> v128 { unsafe { let a = a.as_i8x16(); - let zero = simd::i8x16::splat(0); + let zero = simd::i8x16::ZERO; simd_select::(simd_lt(a, zero), simd_sub(zero, a), a).v128() } } @@ -2524,7 +2524,7 @@ pub use i16x8_extadd_pairwise_u8x16 as u16x8_extadd_pairwise_u8x16; #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i16x8_abs(a: v128) -> v128 { let a = a.as_i16x8(); - let zero = simd::i16x8::splat(0); + let zero = simd::i16x8::ZERO; unsafe { simd_select::(simd_lt(a, zero), simd_sub(zero, a), a).v128() } @@ -3012,7 +3012,7 @@ pub use i32x4_extadd_pairwise_u16x8 as u32x4_extadd_pairwise_u16x8; #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i32x4_abs(a: v128) -> v128 { let a = a.as_i32x4(); - let zero = simd::i32x4::splat(0); + let zero = simd::i32x4::ZERO; unsafe { simd_select::(simd_lt(a, zero), simd_sub(zero, a), a).v128() } @@ -3394,7 +3394,7 @@ pub use i32x4_extmul_high_u16x8 as u32x4_extmul_high_u16x8; #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i64x2_abs(a: v128) -> v128 { let a = a.as_i64x2(); - let zero = simd::i64x2::splat(0); + let zero = simd::i64x2::ZERO; unsafe { simd_select::(simd_lt(a, zero), simd_sub(zero, a), a).v128() } @@ -4105,7 +4105,7 @@ pub fn i32x4_trunc_sat_f64x2_zero(a: v128) -> v128 { let ret: simd::i32x4 = unsafe { simd_shuffle!( llvm_i32x2_trunc_sat_f64x2_s(a.as_f64x2()), - simd::i32x2::splat(0), + simd::i32x2::ZERO, [0, 1, 2, 3], ) }; @@ -4129,7 +4129,7 @@ pub fn u32x4_trunc_sat_f64x2_zero(a: v128) -> v128 { let ret: simd::i32x4 = unsafe { simd_shuffle!( llvm_i32x2_trunc_sat_f64x2_u(a.as_f64x2()), - simd::i32x2::splat(0), + simd::i32x2::ZERO, [0, 1, 2, 3], ) }; @@ -4176,7 +4176,7 @@ pub fn f32x4_demote_f64x2_zero(a: v128) -> v128 { unsafe { simd_cast::(simd_shuffle!( a.as_f64x2(), - simd::f64x2::splat(0.0), + simd::f64x2::ZERO, [0, 1, 2, 3] )) .v128() diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 5b19bb95b3..fd37c6c077 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -515,7 +515,7 @@ pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256) -> __m256 { #[cfg_attr(test, assert_instr(vblendvpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { - let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::splat(0)); + let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO); transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4())) } @@ -528,7 +528,7 @@ pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { #[cfg_attr(test, assert_instr(vblendvps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { - let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::splat(0)); + let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO); transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8())) } @@ -983,11 +983,7 @@ pub unsafe fn _mm256_extractf128_pd(a: __m256d) -> __m128d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extractf128_si256(a: __m256i) -> __m128i { static_assert_uimm_bits!(IMM1, 1); - let dst: i64x2 = simd_shuffle!( - a.as_i64x4(), - _mm256_undefined_si256().as_i64x4(), - [[0, 1], [2, 3]][IMM1 as usize], - ); + let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],); transmute(dst) } @@ -2139,7 +2135,7 @@ pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 { pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 { // Propagate the highest bit to the rest, because simd_bitmask // requires all-1 or all-0. - let mask: i64x4 = simd_lt(transmute(a), i64x4::splat(0)); + let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO); simd_bitmask::(mask).into() } @@ -2155,7 +2151,7 @@ pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 { pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 { // Propagate the highest bit to the rest, because simd_bitmask // requires all-1 or all-0. - let mask: i32x8 = simd_lt(transmute(a), i32x8::splat(0)); + let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO); simd_bitmask::(mask).into() } @@ -2167,7 +2163,7 @@ pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 { #[cfg_attr(test, assert_instr(vxorp))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setzero_pd() -> __m256d { - _mm256_set1_pd(0.0) + const { mem::zeroed() } } /// Returns vector of type __m256 with all elements set to zero. @@ -2178,7 +2174,7 @@ pub unsafe fn _mm256_setzero_pd() -> __m256d { #[cfg_attr(test, assert_instr(vxorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setzero_ps() -> __m256 { - _mm256_set1_ps(0.0) + const { mem::zeroed() } } /// Returns vector of type __m256i with all elements set to zero. @@ -2189,7 +2185,7 @@ pub unsafe fn _mm256_setzero_ps() -> __m256 { #[cfg_attr(test, assert_instr(vxor))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setzero_si256() -> __m256i { - _mm256_set1_epi8(0) + const { mem::zeroed() } } /// Sets packed double-precision (64-bit) floating-point elements in returned @@ -2722,7 +2718,7 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i { let a = a.as_i64x2(); - let undefined = _mm_undefined_si128().as_i64x2(); + let undefined = i64x2::ZERO; let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]); transmute(dst) } @@ -2752,7 +2748,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 { // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { - let b = _mm_setzero_si128().as_i64x2(); + let b = i64x2::ZERO; let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]); transmute(dst) } @@ -2782,7 +2778,7 @@ pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d { // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_undefined_ps() -> __m256 { - _mm256_set1_ps(0.0) + const { mem::zeroed() } } /// Returns vector of type `__m256d` with indeterminate elements. @@ -2795,7 +2791,7 @@ pub unsafe fn _mm256_undefined_ps() -> __m256 { // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_undefined_pd() -> __m256d { - _mm256_set1_pd(0.0) + const { mem::zeroed() } } /// Returns vector of type __m256i with with indeterminate elements. @@ -2808,7 +2804,7 @@ pub unsafe fn _mm256_undefined_pd() -> __m256d { // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_undefined_si256() -> __m256i { - __m256i([0, 0, 0, 0]) + const { mem::zeroed() } } /// Sets packed __m256 returned vector with the supplied values. diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index 75a393d707..4c52939061 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -33,8 +33,7 @@ use stdarch_test::assert_instr; #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i { let a = a.as_i32x8(); - let zero = i32x8::splat(0); - let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + let r = simd_select::(simd_lt(a, i32x8::ZERO), simd_neg(a), a); transmute(r) } @@ -47,8 +46,7 @@ pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i { let a = a.as_i16x16(); - let zero = i16x16::splat(0); - let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + let r = simd_select::(simd_lt(a, i16x16::ZERO), simd_neg(a), a); transmute(r) } @@ -61,8 +59,7 @@ pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_abs_epi8(a: __m256i) -> __m256i { let a = a.as_i8x32(); - let zero = i8x32::splat(0); - let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + let r = simd_select::(simd_lt(a, i8x32::ZERO), simd_neg(a), a); transmute(r) } @@ -168,12 +165,12 @@ pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i) -> __m // If palignr is shifting the pair of vectors more than the size of two // lanes, emit zero. if IMM8 > 32 { - return _mm256_set1_epi8(0); + return _mm256_setzero_si256(); } // If palignr is shifting the pair of input vectors more than one lane, // but less than two lanes, convert to shifting in zeroes. let (a, b) = if IMM8 > 16 { - (_mm256_set1_epi8(0), a) + (_mm256_setzero_si256(), a) } else { (a, b) }; @@ -471,7 +468,7 @@ pub unsafe fn _mm256_blend_epi16(a: __m256i, b: __m256i) -> __m #[cfg_attr(test, assert_instr(vpblendvb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i { - let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::splat(0)); + let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO); transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32())) } @@ -484,8 +481,7 @@ pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m25 #[cfg_attr(test, assert_instr(vpbroadcastb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i { - let zero = _mm_setzero_si128(); - let ret = simd_shuffle!(a.as_i8x16(), zero.as_i8x16(), [0_u32; 16]); + let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]); transmute::(ret) } @@ -498,8 +494,7 @@ pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpbroadcastb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i { - let zero = _mm_setzero_si128(); - let ret = simd_shuffle!(a.as_i8x16(), zero.as_i8x16(), [0_u32; 32]); + let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]); transmute::(ret) } @@ -514,8 +509,7 @@ pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i { #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i { - let zero = _mm_setzero_si128(); - let ret = simd_shuffle!(a.as_i32x4(), zero.as_i32x4(), [0_u32; 4]); + let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]); transmute::(ret) } @@ -530,8 +524,7 @@ pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i { - let zero = _mm_setzero_si128(); - let ret = simd_shuffle!(a.as_i32x4(), zero.as_i32x4(), [0_u32; 8]); + let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]); transmute::(ret) } @@ -595,8 +588,7 @@ pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d { #[target_feature(enable = "avx2")] #[stable(feature = "simd_x86_updates", since = "1.82.0")] pub unsafe fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i { - let zero = _mm_setzero_si128(); - let ret = simd_shuffle!(a.as_i64x2(), zero.as_i64x2(), [0, 1, 0, 1]); + let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]); transmute::(ret) } @@ -610,8 +602,7 @@ pub unsafe fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i { #[target_feature(enable = "avx2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i { - let zero = _mm_setzero_si128(); - let ret = simd_shuffle!(a.as_i64x2(), zero.as_i64x2(), [0, 1, 0, 1]); + let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]); transmute::(ret) } @@ -648,8 +639,7 @@ pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 { #[cfg_attr(test, assert_instr(vpbroadcastw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i { - let zero = _mm_setzero_si128(); - let ret = simd_shuffle!(a.as_i16x8(), zero.as_i16x8(), [0_u32; 8]); + let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]); transmute::(ret) } @@ -662,8 +652,7 @@ pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpbroadcastw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i { - let zero = _mm_setzero_si128(); - let ret = simd_shuffle!(a.as_i16x8(), zero.as_i16x8(), [0_u32; 16]); + let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]); transmute::(ret) } @@ -917,7 +906,7 @@ pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i { pub unsafe fn _mm256_extracti128_si256(a: __m256i) -> __m128i { static_assert_uimm_bits!(IMM1, 1); let a = a.as_i64x4(); - let b = _mm256_undefined_si256().as_i64x4(); + let b = i64x4::ZERO; let dst: i64x2 = simd_shuffle!(a, b, [[0, 1], [2, 3]][IMM1 as usize]); transmute(dst) } @@ -1005,7 +994,7 @@ pub unsafe fn _mm_i32gather_epi32( offsets: __m128i, ) -> __m128i { static_assert_imm8_scale!(SCALE); - let zero = _mm_setzero_si128().as_i32x4(); + let zero = i32x4::ZERO; let neg_one = _mm_set1_epi32(-1).as_i32x4(); let offsets = offsets.as_i32x4(); let slice = slice as *const i8; @@ -1054,7 +1043,7 @@ pub unsafe fn _mm256_i32gather_epi32( offsets: __m256i, ) -> __m256i { static_assert_imm8_scale!(SCALE); - let zero = _mm256_setzero_si256().as_i32x8(); + let zero = i32x8::ZERO; let neg_one = _mm256_set1_epi32(-1).as_i32x8(); let offsets = offsets.as_i32x8(); let slice = slice as *const i8; @@ -1187,7 +1176,7 @@ pub unsafe fn _mm_i32gather_epi64( offsets: __m128i, ) -> __m128i { static_assert_imm8_scale!(SCALE); - let zero = _mm_setzero_si128().as_i64x2(); + let zero = i64x2::ZERO; let neg_one = _mm_set1_epi64x(-1).as_i64x2(); let offsets = offsets.as_i32x4(); let slice = slice as *const i8; @@ -1236,7 +1225,7 @@ pub unsafe fn _mm256_i32gather_epi64( offsets: __m128i, ) -> __m256i { static_assert_imm8_scale!(SCALE); - let zero = _mm256_setzero_si256().as_i64x4(); + let zero = i64x4::ZERO; let neg_one = _mm256_set1_epi64x(-1).as_i64x4(); let offsets = offsets.as_i32x4(); let slice = slice as *const i8; @@ -1372,7 +1361,7 @@ pub unsafe fn _mm_i64gather_epi32( offsets: __m128i, ) -> __m128i { static_assert_imm8_scale!(SCALE); - let zero = _mm_setzero_si128().as_i32x4(); + let zero = i32x4::ZERO; let neg_one = _mm_set1_epi64x(-1).as_i32x4(); let offsets = offsets.as_i64x2(); let slice = slice as *const i8; @@ -1421,7 +1410,7 @@ pub unsafe fn _mm256_i64gather_epi32( offsets: __m256i, ) -> __m128i { static_assert_imm8_scale!(SCALE); - let zero = _mm_setzero_si128().as_i32x4(); + let zero = i32x4::ZERO; let neg_one = _mm_set1_epi64x(-1).as_i32x4(); let offsets = offsets.as_i64x4(); let slice = slice as *const i8; @@ -1554,7 +1543,7 @@ pub unsafe fn _mm_i64gather_epi64( offsets: __m128i, ) -> __m128i { static_assert_imm8_scale!(SCALE); - let zero = _mm_setzero_si128().as_i64x2(); + let zero = i64x2::ZERO; let neg_one = _mm_set1_epi64x(-1).as_i64x2(); let slice = slice as *const i8; let offsets = offsets.as_i64x2(); @@ -1603,7 +1592,7 @@ pub unsafe fn _mm256_i64gather_epi64( offsets: __m256i, ) -> __m256i { static_assert_imm8_scale!(SCALE); - let zero = _mm256_setzero_si256().as_i64x4(); + let zero = i64x4::ZERO; let neg_one = _mm256_set1_epi64x(-1).as_i64x4(); let slice = slice as *const i8; let offsets = offsets.as_i64x4(); @@ -2052,7 +2041,7 @@ pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpmovmskb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 { - let z = i8x32::splat(0); + let z = i8x32::ZERO; let m: i8x32 = simd_lt(a.as_i8x32(), z); simd_bitmask::<_, u32>(m) as i32 } @@ -2265,7 +2254,7 @@ pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute4x64_epi64(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); - let zero = _mm256_setzero_si256().as_i64x4(); + let zero = i64x4::ZERO; let r: i64x4 = simd_shuffle!( a.as_i64x4(), zero, @@ -2670,9 +2659,8 @@ pub unsafe fn _mm256_bslli_epi128(a: __m256i) -> __m256i { } } let a = a.as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); let r: i8x32 = simd_shuffle!( - zero, + i8x32::ZERO, a, [ mask(IMM8, 0), @@ -2864,7 +2852,7 @@ pub unsafe fn _mm256_srli_si256(a: __m256i) -> __m256i { pub unsafe fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let a = a.as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); + let zero = i8x32::ZERO; let r: i8x32 = match IMM8 % 16 { 0 => simd_shuffle!( a, diff --git a/crates/core_arch/src/x86/avx512bf16.rs b/crates/core_arch/src/x86/avx512bf16.rs index c70950b358..5a852e32f4 100644 --- a/crates/core_arch/src/x86/avx512bf16.rs +++ b/crates/core_arch/src/x86/avx512bf16.rs @@ -66,8 +66,7 @@ pub unsafe fn _mm_mask_cvtne2ps_pbh(src: __m128bh, k: __mmask8, a: __m128, b: __ #[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] pub unsafe fn _mm_maskz_cvtne2ps_pbh(k: __mmask8, a: __m128, b: __m128) -> __m128bh { let cvt = _mm_cvtne2ps_pbh(a, b).as_u16x8(); - let zero = _mm_setzero_si128().as_u16x8(); - transmute(simd_select_bitmask(k, cvt, zero)) + transmute(simd_select_bitmask(k, cvt, u16x8::ZERO)) } /// Convert packed single-precision (32-bit) floating-point elements in two 256-bit vectors @@ -110,8 +109,7 @@ pub unsafe fn _mm256_mask_cvtne2ps_pbh( #[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] pub unsafe fn _mm256_maskz_cvtne2ps_pbh(k: __mmask16, a: __m256, b: __m256) -> __m256bh { let cvt = _mm256_cvtne2ps_pbh(a, b).as_u16x16(); - let zero = _mm256_setzero_si256().as_u16x16(); - transmute(simd_select_bitmask(k, cvt, zero)) + transmute(simd_select_bitmask(k, cvt, u16x16::ZERO)) } /// Convert packed single-precision (32-bit) floating-point elements in two 512-bit vectors @@ -156,8 +154,7 @@ pub unsafe fn _mm512_mask_cvtne2ps_pbh( #[cfg_attr(test, assert_instr("vcvtne2ps2bf16"))] pub unsafe fn _mm512_maskz_cvtne2ps_pbh(k: __mmask32, a: __m512, b: __m512) -> __m512bh { let cvt = _mm512_cvtne2ps_pbh(a, b).as_u16x32(); - let zero = _mm512_setzero_si512().as_u16x32(); - transmute(simd_select_bitmask(k, cvt, zero)) + transmute(simd_select_bitmask(k, cvt, u16x32::ZERO)) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) @@ -194,8 +191,7 @@ pub unsafe fn _mm256_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m256) -> #[cfg_attr(test, assert_instr("vcvtneps2bf16"))] pub unsafe fn _mm256_maskz_cvtneps_pbh(k: __mmask8, a: __m256) -> __m128bh { let cvt = _mm256_cvtneps_pbh(a).as_u16x8(); - let zero = _mm_setzero_si128().as_u16x8(); - transmute(simd_select_bitmask(k, cvt, zero)) + transmute(simd_select_bitmask(k, cvt, u16x8::ZERO)) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit) @@ -232,8 +228,7 @@ pub unsafe fn _mm512_mask_cvtneps_pbh(src: __m256bh, k: __mmask16, a: __m512) -> #[cfg_attr(test, assert_instr("vcvtneps2bf16"))] pub unsafe fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh { let cvt = _mm512_cvtneps_pbh(a).as_u16x16(); - let zero = _mm256_setzero_si256().as_u16x16(); - transmute(simd_select_bitmask(k, cvt, zero)) + transmute(simd_select_bitmask(k, cvt, u16x16::ZERO)) } /// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, @@ -314,8 +309,7 @@ pub unsafe fn _mm256_mask_dpbf16_ps(src: __m256, k: __mmask8, a: __m256bh, b: __ #[cfg_attr(test, assert_instr("vdpbf16ps"))] pub unsafe fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: __m256bh) -> __m256 { let rst = _mm256_dpbf16_ps(src, a, b).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, rst, zero)) + transmute(simd_select_bitmask(k, rst, f32x8::ZERO)) } /// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b, @@ -362,8 +356,7 @@ pub unsafe fn _mm512_maskz_dpbf16_ps( b: __m512bh, ) -> __m512 { let rst = _mm512_dpbf16_ps(src, a, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, rst, zero)) + transmute(simd_select_bitmask(k, rst, f32x16::ZERO)) } /// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit) @@ -400,8 +393,7 @@ pub unsafe fn _mm512_mask_cvtpbh_ps(src: __m512, k: __mmask16, a: __m256bh) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_cvtpbh_ps(k: __mmask16, a: __m256bh) -> __m512 { let cvt = _mm512_cvtpbh_ps(a); - let zero = _mm512_setzero_ps(); - transmute(simd_select_bitmask(k, cvt.as_f32x16(), zero.as_f32x16())) + transmute(simd_select_bitmask(k, cvt.as_f32x16(), f32x16::ZERO)) } /// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit) @@ -438,8 +430,7 @@ pub unsafe fn _mm256_mask_cvtpbh_ps(src: __m256, k: __mmask8, a: __m128bh) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m256 { let cvt = _mm256_cvtpbh_ps(a); - let zero = _mm256_setzero_ps(); - transmute(simd_select_bitmask(k, cvt.as_f32x8(), zero.as_f32x8())) + transmute(simd_select_bitmask(k, cvt.as_f32x8(), f32x8::ZERO)) } /// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point @@ -476,8 +467,7 @@ pub unsafe fn _mm_mask_cvtpbh_ps(src: __m128, k: __mmask8, a: __m128bh) -> __m12 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_maskz_cvtpbh_ps(k: __mmask8, a: __m128bh) -> __m128 { let cvt = _mm_cvtpbh_ps(a); - let zero = _mm_setzero_ps(); - transmute(simd_select_bitmask(k, cvt.as_f32x4(), zero.as_f32x4())) + transmute(simd_select_bitmask(k, cvt.as_f32x4(), f32x4::ZERO)) } /// Converts a single BF16 (16-bit) floating-point element in a to a single-precision (32-bit) floating-point diff --git a/crates/core_arch/src/x86/avx512bitalg.rs b/crates/core_arch/src/x86/avx512bitalg.rs index 0e2ef3aab8..69392dafcb 100644 --- a/crates/core_arch/src/x86/avx512bitalg.rs +++ b/crates/core_arch/src/x86/avx512bitalg.rs @@ -7,6 +7,9 @@ //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +use crate::core_arch::simd::i16x16; +use crate::core_arch::simd::i16x32; +use crate::core_arch::simd::i16x8; use crate::core_arch::simd::i8x16; use crate::core_arch::simd::i8x32; use crate::core_arch::simd::i8x64; @@ -17,9 +20,6 @@ use crate::core_arch::x86::__mmask16; use crate::core_arch::x86::__mmask32; use crate::core_arch::x86::__mmask64; use crate::core_arch::x86::__mmask8; -use crate::core_arch::x86::_mm256_setzero_si256; -use crate::core_arch::x86::_mm512_setzero_si512; -use crate::core_arch::x86::_mm_setzero_si128; use crate::core_arch::x86::m128iExt; use crate::core_arch::x86::m256iExt; use crate::core_arch::x86::m512iExt; @@ -61,8 +61,11 @@ pub unsafe fn _mm512_popcnt_epi16(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntw))] pub unsafe fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i { - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, simd_ctpop(a.as_i16x32()), zero)) + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i16x32()), + i16x32::ZERO, + )) } /// For each packed 16-bit integer maps the value to the number of logical 1 bits. @@ -105,8 +108,11 @@ pub unsafe fn _mm256_popcnt_epi16(a: __m256i) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntw))] pub unsafe fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i { - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, simd_ctpop(a.as_i16x16()), zero)) + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i16x16()), + i16x16::ZERO, + )) } /// For each packed 16-bit integer maps the value to the number of logical 1 bits. @@ -149,8 +155,11 @@ pub unsafe fn _mm_popcnt_epi16(a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntw))] pub unsafe fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i { - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, simd_ctpop(a.as_i16x8()), zero)) + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i16x8()), + i16x8::ZERO, + )) } /// For each packed 16-bit integer maps the value to the number of logical 1 bits. @@ -193,8 +202,11 @@ pub unsafe fn _mm512_popcnt_epi8(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntb))] pub unsafe fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i { - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, simd_ctpop(a.as_i8x64()), zero)) + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i8x64()), + i8x64::ZERO, + )) } /// For each packed 8-bit integer maps the value to the number of logical 1 bits. @@ -237,8 +249,11 @@ pub unsafe fn _mm256_popcnt_epi8(a: __m256i) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntb))] pub unsafe fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i { - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, simd_ctpop(a.as_i8x32()), zero)) + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i8x32()), + i8x32::ZERO, + )) } /// For each packed 8-bit integer maps the value to the number of logical 1 bits. @@ -281,8 +296,11 @@ pub unsafe fn _mm_popcnt_epi8(a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntb))] pub unsafe fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i { - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, simd_ctpop(a.as_i8x16()), zero)) + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i8x16()), + i8x16::ZERO, + )) } /// For each packed 8-bit integer maps the value to the number of logical 1 bits. diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index 40b358bc2a..81b2f7c1ca 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -16,7 +16,7 @@ use stdarch_test::assert_instr; #[cfg_attr(test, assert_instr(vpabsw))] pub unsafe fn _mm512_abs_epi16(a: __m512i) -> __m512i { let a = a.as_i16x32(); - let cmp: i16x32 = simd_gt(a, i16x32::splat(0)); + let cmp: i16x32 = simd_gt(a, i16x32::ZERO); transmute(simd_select(cmp, a, simd_neg(a))) } @@ -41,8 +41,7 @@ pub unsafe fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> _ #[cfg_attr(test, assert_instr(vpabsw))] pub unsafe fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i { let abs = _mm512_abs_epi16(a).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, abs, zero)) + transmute(simd_select_bitmask(k, abs, i16x32::ZERO)) } /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -66,8 +65,7 @@ pub unsafe fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> _ #[cfg_attr(test, assert_instr(vpabsw))] pub unsafe fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i { let abs = _mm256_abs_epi16(a).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, abs, zero)) + transmute(simd_select_bitmask(k, abs, i16x16::ZERO)) } /// Compute the absolute value of packed signed 16-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -91,8 +89,7 @@ pub unsafe fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m12 #[cfg_attr(test, assert_instr(vpabsw))] pub unsafe fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i { let abs = _mm_abs_epi16(a).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, abs, zero)) + transmute(simd_select_bitmask(k, abs, i16x8::ZERO)) } /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst. @@ -104,7 +101,7 @@ pub unsafe fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpabsb))] pub unsafe fn _mm512_abs_epi8(a: __m512i) -> __m512i { let a = a.as_i8x64(); - let cmp: i8x64 = simd_gt(a, i8x64::splat(0)); + let cmp: i8x64 = simd_gt(a, i8x64::ZERO); transmute(simd_select(cmp, a, simd_neg(a))) } @@ -129,8 +126,7 @@ pub unsafe fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __ #[cfg_attr(test, assert_instr(vpabsb))] pub unsafe fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i { let abs = _mm512_abs_epi8(a).as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, abs, zero)) + transmute(simd_select_bitmask(k, abs, i8x64::ZERO)) } /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -154,8 +150,7 @@ pub unsafe fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __ #[cfg_attr(test, assert_instr(vpabsb))] pub unsafe fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i { let abs = _mm256_abs_epi8(a).as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, abs, zero)) + transmute(simd_select_bitmask(k, abs, i8x32::ZERO)) } /// Compute the absolute value of packed signed 8-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set) @@ -179,8 +174,7 @@ pub unsafe fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m12 #[cfg_attr(test, assert_instr(vpabsb))] pub unsafe fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i { let abs = _mm_abs_epi8(a).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, abs, zero)) + transmute(simd_select_bitmask(k, abs, i8x16::ZERO)) } /// Add packed 16-bit integers in a and b, and store the results in dst. @@ -215,8 +209,7 @@ pub unsafe fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpaddw))] pub unsafe fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let add = _mm512_add_epi16(a, b).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, i16x32::ZERO)) } /// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -240,8 +233,7 @@ pub unsafe fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vpaddw))] pub unsafe fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let add = _mm256_add_epi16(a, b).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, i16x16::ZERO)) } /// Add packed 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -265,8 +257,7 @@ pub unsafe fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpaddw))] pub unsafe fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let add = _mm_add_epi16(a, b).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, i16x8::ZERO)) } /// Add packed 8-bit integers in a and b, and store the results in dst. @@ -301,8 +292,7 @@ pub unsafe fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpaddb))] pub unsafe fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let add = _mm512_add_epi8(a, b).as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, i8x64::ZERO)) } /// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -326,8 +316,7 @@ pub unsafe fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpaddb))] pub unsafe fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let add = _mm256_add_epi8(a, b).as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, i8x32::ZERO)) } /// Add packed 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -351,8 +340,7 @@ pub unsafe fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpaddb))] pub unsafe fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let add = _mm_add_epi8(a, b).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, i8x16::ZERO)) } /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst. @@ -392,7 +380,7 @@ pub unsafe fn _mm512_mask_adds_epu16( #[cfg_attr(test, assert_instr(vpaddusw))] pub unsafe fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let add = _mm512_adds_epu16(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, add, u16x32::splat(0))) + transmute(simd_select_bitmask(k, add, u16x32::ZERO)) } /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -421,7 +409,7 @@ pub unsafe fn _mm256_mask_adds_epu16( #[cfg_attr(test, assert_instr(vpaddusw))] pub unsafe fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let add = _mm256_adds_epu16(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, add, u16x16::splat(0))) + transmute(simd_select_bitmask(k, add, u16x16::ZERO)) } /// Add packed unsigned 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -445,7 +433,7 @@ pub unsafe fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[cfg_attr(test, assert_instr(vpaddusw))] pub unsafe fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let add = _mm_adds_epu16(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, add, u16x8::splat(0))) + transmute(simd_select_bitmask(k, add, u16x8::ZERO)) } /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst. @@ -480,7 +468,7 @@ pub unsafe fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpaddusb))] pub unsafe fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let add = _mm512_adds_epu8(a, b).as_u8x64(); - transmute(simd_select_bitmask(k, add, u8x64::splat(0))) + transmute(simd_select_bitmask(k, add, u8x64::ZERO)) } /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -504,7 +492,7 @@ pub unsafe fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vpaddusb))] pub unsafe fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let add = _mm256_adds_epu8(a, b).as_u8x32(); - transmute(simd_select_bitmask(k, add, u8x32::splat(0))) + transmute(simd_select_bitmask(k, add, u8x32::ZERO)) } /// Add packed unsigned 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -528,7 +516,7 @@ pub unsafe fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m1 #[cfg_attr(test, assert_instr(vpaddusb))] pub unsafe fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let add = _mm_adds_epu8(a, b).as_u8x16(); - transmute(simd_select_bitmask(k, add, u8x16::splat(0))) + transmute(simd_select_bitmask(k, add, u8x16::ZERO)) } /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst. @@ -568,7 +556,7 @@ pub unsafe fn _mm512_mask_adds_epi16( #[cfg_attr(test, assert_instr(vpaddsw))] pub unsafe fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let add = _mm512_adds_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, add, i16x32::splat(0))) + transmute(simd_select_bitmask(k, add, i16x32::ZERO)) } /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -597,7 +585,7 @@ pub unsafe fn _mm256_mask_adds_epi16( #[cfg_attr(test, assert_instr(vpaddsw))] pub unsafe fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let add = _mm256_adds_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, add, i16x16::splat(0))) + transmute(simd_select_bitmask(k, add, i16x16::ZERO)) } /// Add packed signed 16-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -621,7 +609,7 @@ pub unsafe fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[cfg_attr(test, assert_instr(vpaddsw))] pub unsafe fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let add = _mm_adds_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, add, i16x8::splat(0))) + transmute(simd_select_bitmask(k, add, i16x8::ZERO)) } /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst. @@ -656,7 +644,7 @@ pub unsafe fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpaddsb))] pub unsafe fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let add = _mm512_adds_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, add, i8x64::splat(0))) + transmute(simd_select_bitmask(k, add, i8x64::ZERO)) } /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -680,7 +668,7 @@ pub unsafe fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vpaddsb))] pub unsafe fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let add = _mm256_adds_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, add, i8x32::splat(0))) + transmute(simd_select_bitmask(k, add, i8x32::ZERO)) } /// Add packed signed 8-bit integers in a and b using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -704,7 +692,7 @@ pub unsafe fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m1 #[cfg_attr(test, assert_instr(vpaddsb))] pub unsafe fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let add = _mm_adds_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, add, i8x16::splat(0))) + transmute(simd_select_bitmask(k, add, i8x16::ZERO)) } /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst. @@ -739,8 +727,7 @@ pub unsafe fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpsubw))] pub unsafe fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let sub = _mm512_sub_epi16(a, b).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, i16x32::ZERO)) } /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -764,8 +751,7 @@ pub unsafe fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vpsubw))] pub unsafe fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let sub = _mm256_sub_epi16(a, b).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, i16x16::ZERO)) } /// Subtract packed 16-bit integers in b from packed 16-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -789,8 +775,7 @@ pub unsafe fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpsubw))] pub unsafe fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let sub = _mm_sub_epi16(a, b).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, i16x8::ZERO)) } /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst. @@ -825,8 +810,7 @@ pub unsafe fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpsubb))] pub unsafe fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let sub = _mm512_sub_epi8(a, b).as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, i8x64::ZERO)) } /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -850,8 +834,7 @@ pub unsafe fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpsubb))] pub unsafe fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let sub = _mm256_sub_epi8(a, b).as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, i8x32::ZERO)) } /// Subtract packed 8-bit integers in b from packed 8-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -875,8 +858,7 @@ pub unsafe fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpsubb))] pub unsafe fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let sub = _mm_sub_epi8(a, b).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, i8x16::ZERO)) } /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst. @@ -916,7 +898,7 @@ pub unsafe fn _mm512_mask_subs_epu16( #[cfg_attr(test, assert_instr(vpsubusw))] pub unsafe fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let sub = _mm512_subs_epu16(a, b).as_u16x32(); - transmute(simd_select_bitmask(k, sub, u16x32::splat(0))) + transmute(simd_select_bitmask(k, sub, u16x32::ZERO)) } /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -945,7 +927,7 @@ pub unsafe fn _mm256_mask_subs_epu16( #[cfg_attr(test, assert_instr(vpsubusw))] pub unsafe fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let sub = _mm256_subs_epu16(a, b).as_u16x16(); - transmute(simd_select_bitmask(k, sub, u16x16::splat(0))) + transmute(simd_select_bitmask(k, sub, u16x16::ZERO)) } /// Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -969,7 +951,7 @@ pub unsafe fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[cfg_attr(test, assert_instr(vpsubusw))] pub unsafe fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let sub = _mm_subs_epu16(a, b).as_u16x8(); - transmute(simd_select_bitmask(k, sub, u16x8::splat(0))) + transmute(simd_select_bitmask(k, sub, u16x8::ZERO)) } /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst. @@ -1004,7 +986,7 @@ pub unsafe fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpsubusb))] pub unsafe fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let sub = _mm512_subs_epu8(a, b).as_u8x64(); - transmute(simd_select_bitmask(k, sub, u8x64::splat(0))) + transmute(simd_select_bitmask(k, sub, u8x64::ZERO)) } /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1028,7 +1010,7 @@ pub unsafe fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vpsubusb))] pub unsafe fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let sub = _mm256_subs_epu8(a, b).as_u8x32(); - transmute(simd_select_bitmask(k, sub, u8x32::splat(0))) + transmute(simd_select_bitmask(k, sub, u8x32::ZERO)) } /// Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1052,7 +1034,7 @@ pub unsafe fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m1 #[cfg_attr(test, assert_instr(vpsubusb))] pub unsafe fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let sub = _mm_subs_epu8(a, b).as_u8x16(); - transmute(simd_select_bitmask(k, sub, u8x16::splat(0))) + transmute(simd_select_bitmask(k, sub, u8x16::ZERO)) } /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst. @@ -1092,7 +1074,7 @@ pub unsafe fn _mm512_mask_subs_epi16( #[cfg_attr(test, assert_instr(vpsubsw))] pub unsafe fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let sub = _mm512_subs_epi16(a, b).as_i16x32(); - transmute(simd_select_bitmask(k, sub, i16x32::splat(0))) + transmute(simd_select_bitmask(k, sub, i16x32::ZERO)) } /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1121,7 +1103,7 @@ pub unsafe fn _mm256_mask_subs_epi16( #[cfg_attr(test, assert_instr(vpsubsw))] pub unsafe fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let sub = _mm256_subs_epi16(a, b).as_i16x16(); - transmute(simd_select_bitmask(k, sub, i16x16::splat(0))) + transmute(simd_select_bitmask(k, sub, i16x16::ZERO)) } /// Subtract packed signed 16-bit integers in b from packed 16-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1145,7 +1127,7 @@ pub unsafe fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[cfg_attr(test, assert_instr(vpsubsw))] pub unsafe fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let sub = _mm_subs_epi16(a, b).as_i16x8(); - transmute(simd_select_bitmask(k, sub, i16x8::splat(0))) + transmute(simd_select_bitmask(k, sub, i16x8::ZERO)) } /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst. @@ -1180,7 +1162,7 @@ pub unsafe fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpsubsb))] pub unsafe fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let sub = _mm512_subs_epi8(a, b).as_i8x64(); - transmute(simd_select_bitmask(k, sub, i8x64::splat(0))) + transmute(simd_select_bitmask(k, sub, i8x64::ZERO)) } /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1204,7 +1186,7 @@ pub unsafe fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vpsubsb))] pub unsafe fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let sub = _mm256_subs_epi8(a, b).as_i8x32(); - transmute(simd_select_bitmask(k, sub, i8x32::splat(0))) + transmute(simd_select_bitmask(k, sub, i8x32::ZERO)) } /// Subtract packed signed 8-bit integers in b from packed 8-bit integers in a using saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1228,7 +1210,7 @@ pub unsafe fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m1 #[cfg_attr(test, assert_instr(vpsubsb))] pub unsafe fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let sub = _mm_subs_epi8(a, b).as_i8x16(); - transmute(simd_select_bitmask(k, sub, i8x16::splat(0))) + transmute(simd_select_bitmask(k, sub, i8x16::ZERO)) } /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst. @@ -1271,8 +1253,7 @@ pub unsafe fn _mm512_mask_mulhi_epu16( #[cfg_attr(test, assert_instr(vpmulhuw))] pub unsafe fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let mul = _mm512_mulhi_epu16(a, b).as_u16x32(); - let zero = _mm512_setzero_si512().as_u16x32(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, u16x32::ZERO)) } /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1301,8 +1282,7 @@ pub unsafe fn _mm256_mask_mulhi_epu16( #[cfg_attr(test, assert_instr(vpmulhuw))] pub unsafe fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let mul = _mm256_mulhi_epu16(a, b).as_u16x16(); - let zero = _mm256_setzero_si256().as_u16x16(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, u16x16::ZERO)) } /// Multiply the packed unsigned 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1326,8 +1306,7 @@ pub unsafe fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m #[cfg_attr(test, assert_instr(vpmulhuw))] pub unsafe fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let mul = _mm_mulhi_epu16(a, b).as_u16x8(); - let zero = _mm_setzero_si128().as_u16x8(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, u16x8::ZERO)) } /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst. @@ -1370,8 +1349,7 @@ pub unsafe fn _mm512_mask_mulhi_epi16( #[cfg_attr(test, assert_instr(vpmulhw))] pub unsafe fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let mul = _mm512_mulhi_epi16(a, b).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, i16x32::ZERO)) } /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1400,8 +1378,7 @@ pub unsafe fn _mm256_mask_mulhi_epi16( #[cfg_attr(test, assert_instr(vpmulhw))] pub unsafe fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let mul = _mm256_mulhi_epi16(a, b).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, i16x16::ZERO)) } /// Multiply the packed signed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the high 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1425,8 +1402,7 @@ pub unsafe fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m #[cfg_attr(test, assert_instr(vpmulhw))] pub unsafe fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let mul = _mm_mulhi_epi16(a, b).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, i16x8::ZERO)) } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst. @@ -1466,8 +1442,7 @@ pub unsafe fn _mm512_mask_mulhrs_epi16( #[cfg_attr(test, assert_instr(vpmulhrsw))] pub unsafe fn _mm512_maskz_mulhrs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let mul = _mm512_mulhrs_epi16(a, b).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, i16x32::ZERO)) } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1496,8 +1471,7 @@ pub unsafe fn _mm256_mask_mulhrs_epi16( #[cfg_attr(test, assert_instr(vpmulhrsw))] pub unsafe fn _mm256_maskz_mulhrs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let mul = _mm256_mulhrs_epi16(a, b).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, i16x16::ZERO)) } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits \[16:1\] to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1521,8 +1495,7 @@ pub unsafe fn _mm_mask_mulhrs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __ #[cfg_attr(test, assert_instr(vpmulhrsw))] pub unsafe fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let mul = _mm_mulhrs_epi16(a, b).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, i16x8::ZERO)) } /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst. @@ -1562,8 +1535,7 @@ pub unsafe fn _mm512_mask_mullo_epi16( #[cfg_attr(test, assert_instr(vpmullw))] pub unsafe fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let mul = _mm512_mullo_epi16(a, b).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, i16x32::ZERO)) } /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1592,8 +1564,7 @@ pub unsafe fn _mm256_mask_mullo_epi16( #[cfg_attr(test, assert_instr(vpmullw))] pub unsafe fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let mul = _mm256_mullo_epi16(a, b).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, i16x16::ZERO)) } /// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit integers, and store the low 16 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1617,8 +1588,7 @@ pub unsafe fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m #[cfg_attr(test, assert_instr(vpmullw))] pub unsafe fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let mul = _mm_mullo_epi16(a, b).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, i16x8::ZERO)) } /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst. @@ -1655,8 +1625,7 @@ pub unsafe fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpmaxuw))] pub unsafe fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let max = _mm512_max_epu16(a, b).as_u16x32(); - let zero = _mm512_setzero_si512().as_u16x32(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, u16x32::ZERO)) } /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1680,8 +1649,7 @@ pub unsafe fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vpmaxuw))] pub unsafe fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let max = _mm256_max_epu16(a, b).as_u16x16(); - let zero = _mm256_setzero_si256().as_u16x16(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, u16x16::ZERO)) } /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1705,8 +1673,7 @@ pub unsafe fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpmaxuw))] pub unsafe fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let max = _mm_max_epu16(a, b).as_u16x8(); - let zero = _mm_setzero_si128().as_u16x8(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, u16x8::ZERO)) } /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst. @@ -1743,8 +1710,7 @@ pub unsafe fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpmaxub))] pub unsafe fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let max = _mm512_max_epu8(a, b).as_u8x64(); - let zero = _mm512_setzero_si512().as_u8x64(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, u8x64::ZERO)) } /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1768,8 +1734,7 @@ pub unsafe fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpmaxub))] pub unsafe fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let max = _mm256_max_epu8(a, b).as_u8x32(); - let zero = _mm256_setzero_si256().as_u8x32(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, u8x32::ZERO)) } /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1793,8 +1758,7 @@ pub unsafe fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpmaxub))] pub unsafe fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let max = _mm_max_epu8(a, b).as_u8x16(); - let zero = _mm_setzero_si128().as_u8x16(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, u8x16::ZERO)) } /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst. @@ -1831,8 +1795,7 @@ pub unsafe fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpmaxsw))] pub unsafe fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let max = _mm512_max_epi16(a, b).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, i16x32::ZERO)) } /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1856,8 +1819,7 @@ pub unsafe fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vpmaxsw))] pub unsafe fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let max = _mm256_max_epi16(a, b).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, i16x16::ZERO)) } /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1881,8 +1843,7 @@ pub unsafe fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpmaxsw))] pub unsafe fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let max = _mm_max_epi16(a, b).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, i16x8::ZERO)) } /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst. @@ -1919,8 +1880,7 @@ pub unsafe fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpmaxsb))] pub unsafe fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let max = _mm512_max_epi8(a, b).as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, i8x64::ZERO)) } /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1944,8 +1904,7 @@ pub unsafe fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpmaxsb))] pub unsafe fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let max = _mm256_max_epi8(a, b).as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, i8x32::ZERO)) } /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1969,8 +1928,7 @@ pub unsafe fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpmaxsb))] pub unsafe fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let max = _mm_max_epi8(a, b).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, i8x16::ZERO)) } /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst. @@ -2007,8 +1965,7 @@ pub unsafe fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpminuw))] pub unsafe fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let min = _mm512_min_epu16(a, b).as_u16x32(); - let zero = _mm512_setzero_si512().as_u16x32(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, u16x32::ZERO)) } /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2032,8 +1989,7 @@ pub unsafe fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vpminuw))] pub unsafe fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let min = _mm256_min_epu16(a, b).as_u16x16(); - let zero = _mm256_setzero_si256().as_u16x16(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, u16x16::ZERO)) } /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2057,8 +2013,7 @@ pub unsafe fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpminuw))] pub unsafe fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let min = _mm_min_epu16(a, b).as_u16x8(); - let zero = _mm_setzero_si128().as_u16x8(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, u16x8::ZERO)) } /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst. @@ -2095,8 +2050,7 @@ pub unsafe fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpminub))] pub unsafe fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let min = _mm512_min_epu8(a, b).as_u8x64(); - let zero = _mm512_setzero_si512().as_u8x64(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, u8x64::ZERO)) } /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2120,8 +2074,7 @@ pub unsafe fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpminub))] pub unsafe fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let min = _mm256_min_epu8(a, b).as_u8x32(); - let zero = _mm256_setzero_si256().as_u8x32(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, u8x32::ZERO)) } /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2145,8 +2098,7 @@ pub unsafe fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpminub))] pub unsafe fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let min = _mm_min_epu8(a, b).as_u8x16(); - let zero = _mm_setzero_si128().as_u8x16(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, u8x16::ZERO)) } /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst. @@ -2183,8 +2135,7 @@ pub unsafe fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpminsw))] pub unsafe fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let min = _mm512_min_epi16(a, b).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, i16x32::ZERO)) } /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2208,8 +2159,7 @@ pub unsafe fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vpminsw))] pub unsafe fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let min = _mm256_min_epi16(a, b).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, i16x16::ZERO)) } /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2233,8 +2183,7 @@ pub unsafe fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpminsw))] pub unsafe fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let min = _mm_min_epi16(a, b).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, i16x8::ZERO)) } /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst. @@ -2271,8 +2220,7 @@ pub unsafe fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpminsb))] pub unsafe fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let min = _mm512_min_epi8(a, b).as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, i8x64::ZERO)) } /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2296,8 +2244,7 @@ pub unsafe fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpminsb))] pub unsafe fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let min = _mm256_min_epi8(a, b).as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, i8x32::ZERO)) } /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2321,8 +2268,7 @@ pub unsafe fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpminsb))] pub unsafe fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let min = _mm_min_epi8(a, b).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, i8x16::ZERO)) } /// Compare packed unsigned 16-bit integers in a and b for less-than, and store the results in mask vector k. @@ -3925,7 +3871,7 @@ pub unsafe fn _mm512_cmp_epu16_mask(a: __m512i, b: __m512i) -> 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i16x32::splat(0), + 3 => i16x32::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -3950,12 +3896,12 @@ pub unsafe fn _mm512_mask_cmp_epu16_mask( static_assert_uimm_bits!(IMM8, 3); let a = a.as_u16x32(); let b = b.as_u16x32(); - let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::splat(0)); + let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO); let r = match IMM8 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i16x32::splat(0), + 3 => i16x32::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -3980,7 +3926,7 @@ pub unsafe fn _mm256_cmp_epu16_mask(a: __m256i, b: __m256i) -> 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i16x16::splat(0), + 3 => i16x16::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -4005,12 +3951,12 @@ pub unsafe fn _mm256_mask_cmp_epu16_mask( static_assert_uimm_bits!(IMM8, 3); let a = a.as_u16x16(); let b = b.as_u16x16(); - let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::splat(0)); + let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO); let r = match IMM8 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i16x16::splat(0), + 3 => i16x16::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -4035,7 +3981,7 @@ pub unsafe fn _mm_cmp_epu16_mask(a: __m128i, b: __m128i) -> __m 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i16x8::splat(0), + 3 => i16x8::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -4060,12 +4006,12 @@ pub unsafe fn _mm_mask_cmp_epu16_mask( static_assert_uimm_bits!(IMM8, 3); let a = a.as_u16x8(); let b = b.as_u16x8(); - let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::splat(0)); + let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO); let r = match IMM8 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i16x8::splat(0), + 3 => i16x8::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -4090,7 +4036,7 @@ pub unsafe fn _mm512_cmp_epu8_mask(a: __m512i, b: __m512i) -> _ 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i8x64::splat(0), + 3 => i8x64::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -4115,12 +4061,12 @@ pub unsafe fn _mm512_mask_cmp_epu8_mask( static_assert_uimm_bits!(IMM8, 3); let a = a.as_u8x64(); let b = b.as_u8x64(); - let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::splat(0)); + let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO); let r = match IMM8 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i8x64::splat(0), + 3 => i8x64::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -4145,7 +4091,7 @@ pub unsafe fn _mm256_cmp_epu8_mask(a: __m256i, b: __m256i) -> _ 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i8x32::splat(0), + 3 => i8x32::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -4170,12 +4116,12 @@ pub unsafe fn _mm256_mask_cmp_epu8_mask( static_assert_uimm_bits!(IMM8, 3); let a = a.as_u8x32(); let b = b.as_u8x32(); - let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::splat(0)); + let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO); let r = match IMM8 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i8x32::splat(0), + 3 => i8x32::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -4200,7 +4146,7 @@ pub unsafe fn _mm_cmp_epu8_mask(a: __m128i, b: __m128i) -> __mm 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i8x16::splat(0), + 3 => i8x16::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -4225,12 +4171,12 @@ pub unsafe fn _mm_mask_cmp_epu8_mask( static_assert_uimm_bits!(IMM8, 3); let a = a.as_u8x16(); let b = b.as_u8x16(); - let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::splat(0)); + let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO); let r = match IMM8 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i8x16::splat(0), + 3 => i8x16::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -4255,7 +4201,7 @@ pub unsafe fn _mm512_cmp_epi16_mask(a: __m512i, b: __m512i) -> 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i16x32::splat(0), + 3 => i16x32::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -4280,12 +4226,12 @@ pub unsafe fn _mm512_mask_cmp_epi16_mask( static_assert_uimm_bits!(IMM8, 3); let a = a.as_i16x32(); let b = b.as_i16x32(); - let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::splat(0)); + let k1 = simd_select_bitmask(k1, i16x32::splat(-1), i16x32::ZERO); let r = match IMM8 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i16x32::splat(0), + 3 => i16x32::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -4310,7 +4256,7 @@ pub unsafe fn _mm256_cmp_epi16_mask(a: __m256i, b: __m256i) -> 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i16x16::splat(0), + 3 => i16x16::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -4335,12 +4281,12 @@ pub unsafe fn _mm256_mask_cmp_epi16_mask( static_assert_uimm_bits!(IMM8, 3); let a = a.as_i16x16(); let b = b.as_i16x16(); - let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::splat(0)); + let k1 = simd_select_bitmask(k1, i16x16::splat(-1), i16x16::ZERO); let r = match IMM8 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i16x16::splat(0), + 3 => i16x16::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -4365,7 +4311,7 @@ pub unsafe fn _mm_cmp_epi16_mask(a: __m128i, b: __m128i) -> __m 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i16x8::splat(0), + 3 => i16x8::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -4390,12 +4336,12 @@ pub unsafe fn _mm_mask_cmp_epi16_mask( static_assert_uimm_bits!(IMM8, 3); let a = a.as_i16x8(); let b = b.as_i16x8(); - let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::splat(0)); + let k1 = simd_select_bitmask(k1, i16x8::splat(-1), i16x8::ZERO); let r = match IMM8 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i16x8::splat(0), + 3 => i16x8::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -4420,7 +4366,7 @@ pub unsafe fn _mm512_cmp_epi8_mask(a: __m512i, b: __m512i) -> _ 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i8x64::splat(0), + 3 => i8x64::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -4445,12 +4391,12 @@ pub unsafe fn _mm512_mask_cmp_epi8_mask( static_assert_uimm_bits!(IMM8, 3); let a = a.as_i8x64(); let b = b.as_i8x64(); - let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::splat(0)); + let k1 = simd_select_bitmask(k1, i8x64::splat(-1), i8x64::ZERO); let r = match IMM8 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i8x64::splat(0), + 3 => i8x64::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -4475,7 +4421,7 @@ pub unsafe fn _mm256_cmp_epi8_mask(a: __m256i, b: __m256i) -> _ 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i8x32::splat(0), + 3 => i8x32::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -4500,12 +4446,12 @@ pub unsafe fn _mm256_mask_cmp_epi8_mask( static_assert_uimm_bits!(IMM8, 3); let a = a.as_i8x32(); let b = b.as_i8x32(); - let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::splat(0)); + let k1 = simd_select_bitmask(k1, i8x32::splat(-1), i8x32::ZERO); let r = match IMM8 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i8x32::splat(0), + 3 => i8x32::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -4530,7 +4476,7 @@ pub unsafe fn _mm_cmp_epi8_mask(a: __m128i, b: __m128i) -> __mm 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i8x16::splat(0), + 3 => i8x16::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -4555,12 +4501,12 @@ pub unsafe fn _mm_mask_cmp_epi8_mask( static_assert_uimm_bits!(IMM8, 3); let a = a.as_i8x16(); let b = b.as_i8x16(); - let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::splat(0)); + let k1 = simd_select_bitmask(k1, i8x16::splat(-1), i8x16::ZERO); let r = match IMM8 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i8x16::splat(0), + 3 => i8x16::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -4586,11 +4532,7 @@ pub unsafe fn _mm256_reduce_add_epi16(a: __m256i) -> i16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 { - simd_reduce_add_unordered(simd_select_bitmask( - k, - a.as_i16x16(), - _mm256_setzero_si256().as_i16x16(), - )) + simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) } /// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a. @@ -4610,11 +4552,7 @@ pub unsafe fn _mm_reduce_add_epi16(a: __m128i) -> i16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 { - simd_reduce_add_unordered(simd_select_bitmask( - k, - a.as_i16x8(), - _mm_setzero_si128().as_i16x8(), - )) + simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) } /// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a. @@ -4634,11 +4572,7 @@ pub unsafe fn _mm256_reduce_add_epi8(a: __m256i) -> i8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 { - simd_reduce_add_unordered(simd_select_bitmask( - k, - a.as_i8x32(), - _mm256_setzero_si256().as_i8x32(), - )) + simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) } /// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a. @@ -4658,11 +4592,7 @@ pub unsafe fn _mm_reduce_add_epi8(a: __m128i) -> i8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 { - simd_reduce_add_unordered(simd_select_bitmask( - k, - a.as_i8x16(), - _mm_setzero_si128().as_i8x16(), - )) + simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) } /// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a. @@ -4858,7 +4788,7 @@ pub unsafe fn _mm256_reduce_max_epu16(a: __m256i) -> u16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 { - simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0))) + simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO)) } /// Reduce the packed unsigned 16-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -4878,7 +4808,7 @@ pub unsafe fn _mm_reduce_max_epu16(a: __m128i) -> u16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 { - simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0))) + simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO)) } /// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -4898,7 +4828,7 @@ pub unsafe fn _mm256_reduce_max_epu8(a: __m256i) -> u8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 { - simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0))) + simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO)) } /// Reduce the packed unsigned 8-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -4918,7 +4848,7 @@ pub unsafe fn _mm_reduce_max_epu8(a: __m128i) -> u8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 { - simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0))) + simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO)) } /// Reduce the packed 16-bit integers in a by minimum. Returns the minimum of all elements in a. @@ -5178,11 +5108,7 @@ pub unsafe fn _mm256_reduce_or_epi16(a: __m256i) -> i16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 { - simd_reduce_or(simd_select_bitmask( - k, - a.as_i16x16(), - _mm256_setzero_si256().as_i16x16(), - )) + simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) } /// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. @@ -5202,11 +5128,7 @@ pub unsafe fn _mm_reduce_or_epi16(a: __m128i) -> i16 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 { - simd_reduce_or(simd_select_bitmask( - k, - a.as_i16x8(), - _mm_setzero_si128().as_i16x8(), - )) + simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) } /// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. @@ -5226,11 +5148,7 @@ pub unsafe fn _mm256_reduce_or_epi8(a: __m256i) -> i8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 { - simd_reduce_or(simd_select_bitmask( - k, - a.as_i8x32(), - _mm256_setzero_si256().as_i8x32(), - )) + simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) } /// Reduce the packed 8-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. @@ -5250,11 +5168,7 @@ pub unsafe fn _mm_reduce_or_epi8(a: __m128i) -> i8 { #[target_feature(enable = "avx512bw,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 { - simd_reduce_or(simd_select_bitmask( - k, - a.as_i8x16(), - _mm_setzero_si128().as_i8x16(), - )) + simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) } /// Load 512-bits (composed of 32 packed 16-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. @@ -5654,8 +5568,7 @@ pub unsafe fn _mm512_mask_madd_epi16( #[cfg_attr(test, assert_instr(vpmaddwd))] pub unsafe fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let madd = _mm512_madd_epi16(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, madd, zero)) + transmute(simd_select_bitmask(k, madd, i32x16::ZERO)) } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5679,8 +5592,7 @@ pub unsafe fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vpmaddwd))] pub unsafe fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let madd = _mm256_madd_epi16(a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, madd, zero)) + transmute(simd_select_bitmask(k, madd, i32x8::ZERO)) } /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5704,8 +5616,7 @@ pub unsafe fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[cfg_attr(test, assert_instr(vpmaddwd))] pub unsafe fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let madd = _mm_madd_epi16(a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, madd, zero)) + transmute(simd_select_bitmask(k, madd, i32x4::ZERO)) } /// Vertically multiply each unsigned 8-bit integer from a with the corresponding signed 8-bit integer from b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst. @@ -5745,8 +5656,7 @@ pub unsafe fn _mm512_mask_maddubs_epi16( #[cfg_attr(test, assert_instr(vpmaddubsw))] pub unsafe fn _mm512_maskz_maddubs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let madd = _mm512_maddubs_epi16(a, b).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, madd, zero)) + transmute(simd_select_bitmask(k, madd, i16x32::ZERO)) } /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5775,8 +5685,7 @@ pub unsafe fn _mm256_mask_maddubs_epi16( #[cfg_attr(test, assert_instr(vpmaddubsw))] pub unsafe fn _mm256_maskz_maddubs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let madd = _mm256_maddubs_epi16(a, b).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, madd, zero)) + transmute(simd_select_bitmask(k, madd, i16x16::ZERO)) } /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5800,8 +5709,7 @@ pub unsafe fn _mm_mask_maddubs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: _ #[cfg_attr(test, assert_instr(vpmaddubsw))] pub unsafe fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let madd = _mm_maddubs_epi16(a, b).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, madd, zero)) + transmute(simd_select_bitmask(k, madd, i16x8::ZERO)) } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst. @@ -5841,8 +5749,7 @@ pub unsafe fn _mm512_mask_packs_epi32( #[cfg_attr(test, assert_instr(vpackssdw))] pub unsafe fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let pack = _mm512_packs_epi32(a, b).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, pack, zero)) + transmute(simd_select_bitmask(k, pack, i16x32::ZERO)) } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5871,8 +5778,7 @@ pub unsafe fn _mm256_mask_packs_epi32( #[cfg_attr(test, assert_instr(vpackssdw))] pub unsafe fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let pack = _mm256_packs_epi32(a, b).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, pack, zero)) + transmute(simd_select_bitmask(k, pack, i16x16::ZERO)) } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5896,8 +5802,7 @@ pub unsafe fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m #[cfg_attr(test, assert_instr(vpackssdw))] pub unsafe fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let pack = _mm_packs_epi32(a, b).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, pack, zero)) + transmute(simd_select_bitmask(k, pack, i16x8::ZERO)) } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst. @@ -5937,8 +5842,7 @@ pub unsafe fn _mm512_mask_packs_epi16( #[cfg_attr(test, assert_instr(vpacksswb))] pub unsafe fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let pack = _mm512_packs_epi16(a, b).as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, pack, zero)) + transmute(simd_select_bitmask(k, pack, i8x64::ZERO)) } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5967,8 +5871,7 @@ pub unsafe fn _mm256_mask_packs_epi16( #[cfg_attr(test, assert_instr(vpacksswb))] pub unsafe fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let pack = _mm256_packs_epi16(a, b).as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, pack, zero)) + transmute(simd_select_bitmask(k, pack, i8x32::ZERO)) } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -5992,8 +5895,7 @@ pub unsafe fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __ #[cfg_attr(test, assert_instr(vpacksswb))] pub unsafe fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let pack = _mm_packs_epi16(a, b).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, pack, zero)) + transmute(simd_select_bitmask(k, pack, i8x16::ZERO)) } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst. @@ -6033,8 +5935,7 @@ pub unsafe fn _mm512_mask_packus_epi32( #[cfg_attr(test, assert_instr(vpackusdw))] pub unsafe fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let pack = _mm512_packus_epi32(a, b).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, pack, zero)) + transmute(simd_select_bitmask(k, pack, i16x32::ZERO)) } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6063,8 +5964,7 @@ pub unsafe fn _mm256_mask_packus_epi32( #[cfg_attr(test, assert_instr(vpackusdw))] pub unsafe fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let pack = _mm256_packus_epi32(a, b).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, pack, zero)) + transmute(simd_select_bitmask(k, pack, i16x16::ZERO)) } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6088,8 +5988,7 @@ pub unsafe fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __ #[cfg_attr(test, assert_instr(vpackusdw))] pub unsafe fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let pack = _mm_packus_epi32(a, b).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, pack, zero)) + transmute(simd_select_bitmask(k, pack, i16x8::ZERO)) } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst. @@ -6129,8 +6028,7 @@ pub unsafe fn _mm512_mask_packus_epi16( #[cfg_attr(test, assert_instr(vpackuswb))] pub unsafe fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let pack = _mm512_packus_epi16(a, b).as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, pack, zero)) + transmute(simd_select_bitmask(k, pack, i8x64::ZERO)) } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6159,8 +6057,7 @@ pub unsafe fn _mm256_mask_packus_epi16( #[cfg_attr(test, assert_instr(vpackuswb))] pub unsafe fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let pack = _mm256_packus_epi16(a, b).as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, pack, zero)) + transmute(simd_select_bitmask(k, pack, i8x32::ZERO)) } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6184,8 +6081,7 @@ pub unsafe fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: _ #[cfg_attr(test, assert_instr(vpackuswb))] pub unsafe fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let pack = _mm_packus_epi16(a, b).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, pack, zero)) + transmute(simd_select_bitmask(k, pack, i8x16::ZERO)) } /// Average packed unsigned 16-bit integers in a and b, and store the results in dst. @@ -6223,8 +6119,7 @@ pub unsafe fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpavgw))] pub unsafe fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let avg = _mm512_avg_epu16(a, b).as_u16x32(); - let zero = _mm512_setzero_si512().as_u16x32(); - transmute(simd_select_bitmask(k, avg, zero)) + transmute(simd_select_bitmask(k, avg, u16x32::ZERO)) } /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6248,8 +6143,7 @@ pub unsafe fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vpavgw))] pub unsafe fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let avg = _mm256_avg_epu16(a, b).as_u16x16(); - let zero = _mm256_setzero_si256().as_u16x16(); - transmute(simd_select_bitmask(k, avg, zero)) + transmute(simd_select_bitmask(k, avg, u16x16::ZERO)) } /// Average packed unsigned 16-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6273,8 +6167,7 @@ pub unsafe fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpavgw))] pub unsafe fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let avg = _mm_avg_epu16(a, b).as_u16x8(); - let zero = _mm_setzero_si128().as_u16x8(); - transmute(simd_select_bitmask(k, avg, zero)) + transmute(simd_select_bitmask(k, avg, u16x8::ZERO)) } /// Average packed unsigned 8-bit integers in a and b, and store the results in dst. @@ -6312,8 +6205,7 @@ pub unsafe fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpavgb))] pub unsafe fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let avg = _mm512_avg_epu8(a, b).as_u8x64(); - let zero = _mm512_setzero_si512().as_u8x64(); - transmute(simd_select_bitmask(k, avg, zero)) + transmute(simd_select_bitmask(k, avg, u8x64::ZERO)) } /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6337,8 +6229,7 @@ pub unsafe fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpavgb))] pub unsafe fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let avg = _mm256_avg_epu8(a, b).as_u8x32(); - let zero = _mm256_setzero_si256().as_u8x32(); - transmute(simd_select_bitmask(k, avg, zero)) + transmute(simd_select_bitmask(k, avg, u8x32::ZERO)) } /// Average packed unsigned 8-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6362,8 +6253,7 @@ pub unsafe fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpavgb))] pub unsafe fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let avg = _mm_avg_epu8(a, b).as_u8x16(); - let zero = _mm_setzero_si128().as_u8x16(); - transmute(simd_select_bitmask(k, avg, zero)) + transmute(simd_select_bitmask(k, avg, u8x16::ZERO)) } /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst. @@ -6403,8 +6293,7 @@ pub unsafe fn _mm512_mask_sll_epi16( #[cfg_attr(test, assert_instr(vpsllw))] pub unsafe fn _mm512_maskz_sll_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sll_epi16(a, count).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) } /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6433,8 +6322,7 @@ pub unsafe fn _mm256_mask_sll_epi16( #[cfg_attr(test, assert_instr(vpsllw))] pub unsafe fn _mm256_maskz_sll_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { let shf = _mm256_sll_epi16(a, count).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) } /// Shift packed 16-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6458,8 +6346,7 @@ pub unsafe fn _mm_mask_sll_epi16(src: __m128i, k: __mmask8, a: __m128i, count: _ #[cfg_attr(test, assert_instr(vpsllw))] pub unsafe fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_sll_epi16(a, count).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) } /// Shift packed 16-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. @@ -6494,7 +6381,7 @@ pub unsafe fn _mm512_mask_slli_epi16( ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let shf = if IMM8 >= 16 { - u16x32::splat(0) + u16x32::ZERO } else { simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)) }; @@ -6515,8 +6402,7 @@ pub unsafe fn _mm512_maskz_slli_epi16(k: __mmask32, a: __m512i) _mm512_setzero_si512() } else { let shf = simd_shl(a.as_u16x32(), u16x32::splat(IMM8 as u16)); - let zero = u16x32::splat(0); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, u16x32::ZERO)) } } @@ -6535,7 +6421,7 @@ pub unsafe fn _mm256_mask_slli_epi16( ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let shf = if IMM8 >= 16 { - u16x16::splat(0) + u16x16::ZERO } else { simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)) }; @@ -6556,8 +6442,7 @@ pub unsafe fn _mm256_maskz_slli_epi16(k: __mmask16, a: __m256i) _mm256_setzero_si256() } else { let shf = simd_shl(a.as_u16x16(), u16x16::splat(IMM8 as u16)); - let zero = u16x16::splat(0); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, u16x16::ZERO)) } } @@ -6576,7 +6461,7 @@ pub unsafe fn _mm_mask_slli_epi16( ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let shf = if IMM8 >= 16 { - u16x8::splat(0) + u16x8::ZERO } else { simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)) }; @@ -6597,8 +6482,7 @@ pub unsafe fn _mm_maskz_slli_epi16(k: __mmask8, a: __m128i) -> _mm_setzero_si128() } else { let shf = simd_shl(a.as_u16x8(), u16x8::splat(IMM8 as u16)); - let zero = u16x8::splat(0); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, u16x8::ZERO)) } } @@ -6639,8 +6523,7 @@ pub unsafe fn _mm512_mask_sllv_epi16( #[cfg_attr(test, assert_instr(vpsllvw))] pub unsafe fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_sllv_epi16(a, count).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -6680,8 +6563,7 @@ pub unsafe fn _mm256_mask_sllv_epi16( #[cfg_attr(test, assert_instr(vpsllvw))] pub unsafe fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { let shf = _mm256_sllv_epi16(a, count).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) } /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -6721,8 +6603,7 @@ pub unsafe fn _mm_mask_sllv_epi16( #[cfg_attr(test, assert_instr(vpsllvw))] pub unsafe fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_sllv_epi16(a, count).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) } /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst. @@ -6762,8 +6643,7 @@ pub unsafe fn _mm512_mask_srl_epi16( #[cfg_attr(test, assert_instr(vpsrlw))] pub unsafe fn _mm512_maskz_srl_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_srl_epi16(a, count).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) } /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6792,8 +6672,7 @@ pub unsafe fn _mm256_mask_srl_epi16( #[cfg_attr(test, assert_instr(vpsrlw))] pub unsafe fn _mm256_maskz_srl_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { let shf = _mm256_srl_epi16(a, count).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) } /// Shift packed 16-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6817,8 +6696,7 @@ pub unsafe fn _mm_mask_srl_epi16(src: __m128i, k: __mmask8, a: __m128i, count: _ #[cfg_attr(test, assert_instr(vpsrlw))] pub unsafe fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_srl_epi16(a, count).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) } /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. @@ -6853,7 +6731,7 @@ pub unsafe fn _mm512_mask_srli_epi16( ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let shf = if IMM8 >= 16 { - u16x32::splat(0) + u16x32::ZERO } else { simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)) }; @@ -6875,8 +6753,7 @@ pub unsafe fn _mm512_maskz_srli_epi16(k: __mmask32, a: __m512i) _mm512_setzero_si512() } else { let shf = simd_shr(a.as_u16x32(), u16x32::splat(IMM8 as u16)); - let zero = u16x32::splat(0); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, u16x32::ZERO)) } } @@ -6909,8 +6786,7 @@ pub unsafe fn _mm256_mask_srli_epi16( pub unsafe fn _mm256_maskz_srli_epi16(k: __mmask16, a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm256_srli_epi16::(a); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, shf.as_i16x16(), zero)) + transmute(simd_select_bitmask(k, shf.as_i16x16(), i16x16::ZERO)) } /// Shift packed 16-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6942,8 +6818,7 @@ pub unsafe fn _mm_mask_srli_epi16( pub unsafe fn _mm_maskz_srli_epi16(k: __mmask8, a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm_srli_epi16::(a); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, shf.as_i16x8(), zero)) + transmute(simd_select_bitmask(k, shf.as_i16x8(), i16x8::ZERO)) } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -6983,8 +6858,7 @@ pub unsafe fn _mm512_mask_srlv_epi16( #[cfg_attr(test, assert_instr(vpsrlvw))] pub unsafe fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srlv_epi16(a, count).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -7024,8 +6898,7 @@ pub unsafe fn _mm256_mask_srlv_epi16( #[cfg_attr(test, assert_instr(vpsrlvw))] pub unsafe fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { let shf = _mm256_srlv_epi16(a, count).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -7065,8 +6938,7 @@ pub unsafe fn _mm_mask_srlv_epi16( #[cfg_attr(test, assert_instr(vpsrlvw))] pub unsafe fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_srlv_epi16(a, count).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) } /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst. @@ -7106,8 +6978,7 @@ pub unsafe fn _mm512_mask_sra_epi16( #[cfg_attr(test, assert_instr(vpsraw))] pub unsafe fn _mm512_maskz_sra_epi16(k: __mmask32, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sra_epi16(a, count).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) } /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7136,8 +7007,7 @@ pub unsafe fn _mm256_mask_sra_epi16( #[cfg_attr(test, assert_instr(vpsraw))] pub unsafe fn _mm256_maskz_sra_epi16(k: __mmask16, a: __m256i, count: __m128i) -> __m256i { let shf = _mm256_sra_epi16(a, count).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) } /// Shift packed 16-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7161,8 +7031,7 @@ pub unsafe fn _mm_mask_sra_epi16(src: __m128i, k: __mmask8, a: __m128i, count: _ #[cfg_attr(test, assert_instr(vpsraw))] pub unsafe fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_sra_epi16(a, count).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) } /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. @@ -7207,8 +7076,7 @@ pub unsafe fn _mm512_mask_srai_epi16( pub unsafe fn _mm512_maskz_srai_epi16(k: __mmask32, a: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)); - let zero = i16x32::splat(0); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) } /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7240,8 +7108,7 @@ pub unsafe fn _mm256_mask_srai_epi16( pub unsafe fn _mm256_maskz_srai_epi16(k: __mmask16, a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16)); - let zero = i16x16::splat(0); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i16x16::ZERO)) } /// Shift packed 16-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7273,8 +7140,7 @@ pub unsafe fn _mm_mask_srai_epi16( pub unsafe fn _mm_maskz_srai_epi16(k: __mmask8, a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16)); - let zero = i16x8::splat(0); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i16x8::ZERO)) } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. @@ -7314,8 +7180,7 @@ pub unsafe fn _mm512_mask_srav_epi16( #[cfg_attr(test, assert_instr(vpsravw))] pub unsafe fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srav_epi16(a, count).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. @@ -7355,8 +7220,7 @@ pub unsafe fn _mm256_mask_srav_epi16( #[cfg_attr(test, assert_instr(vpsravw))] pub unsafe fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i { let shf = _mm256_srav_epi16(a, count).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) } /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. @@ -7396,8 +7260,7 @@ pub unsafe fn _mm_mask_srav_epi16( #[cfg_attr(test, assert_instr(vpsravw))] pub unsafe fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_srav_epi16(a, count).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -7442,8 +7305,7 @@ pub unsafe fn _mm512_maskz_permutex2var_epi16( b: __m512i, ) -> __m512i { let permute = _mm512_permutex2var_epi16(a, idx, b).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i16x32::ZERO)) } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -7505,8 +7367,7 @@ pub unsafe fn _mm256_maskz_permutex2var_epi16( b: __m256i, ) -> __m256i { let permute = _mm256_permutex2var_epi16(a, idx, b).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i16x16::ZERO)) } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -7568,8 +7429,7 @@ pub unsafe fn _mm_maskz_permutex2var_epi16( b: __m128i, ) -> __m128i { let permute = _mm_permutex2var_epi16(a, idx, b).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i16x8::ZERO)) } /// Shuffle 16-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -7626,8 +7486,7 @@ pub unsafe fn _mm512_mask_permutexvar_epi16( #[cfg_attr(test, assert_instr(vpermw))] pub unsafe fn _mm512_maskz_permutexvar_epi16(k: __mmask32, idx: __m512i, a: __m512i) -> __m512i { let permute = _mm512_permutexvar_epi16(idx, a).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i16x32::ZERO)) } /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -7667,8 +7526,7 @@ pub unsafe fn _mm256_mask_permutexvar_epi16( #[cfg_attr(test, assert_instr(vpermw))] pub unsafe fn _mm256_maskz_permutexvar_epi16(k: __mmask16, idx: __m256i, a: __m256i) -> __m256i { let permute = _mm256_permutexvar_epi16(idx, a).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i16x16::ZERO)) } /// Shuffle 16-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -7708,8 +7566,7 @@ pub unsafe fn _mm_mask_permutexvar_epi16( #[cfg_attr(test, assert_instr(vpermw))] pub unsafe fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m128i { let permute = _mm_permutexvar_epi16(idx, a).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i16x8::ZERO)) } /// Blend packed 16-bit integers from a and b using control mask k, and store the results in dst. @@ -7819,8 +7676,7 @@ pub unsafe fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128 #[cfg_attr(test, assert_instr(vpbroadcastw))] pub unsafe fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i { let broadcast = _mm512_broadcastw_epi16(a).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO)) } /// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7844,8 +7700,7 @@ pub unsafe fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128 #[cfg_attr(test, assert_instr(vpbroadcastw))] pub unsafe fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i { let broadcast = _mm256_broadcastw_epi16(a).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO)) } /// Broadcast the low packed 16-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7869,8 +7724,7 @@ pub unsafe fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) - #[cfg_attr(test, assert_instr(vpbroadcastw))] pub unsafe fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i { let broadcast = _mm_broadcastw_epi16(a).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO)) } /// Broadcast the low packed 8-bit integer from a to all elements of dst. @@ -7915,8 +7769,7 @@ pub unsafe fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i #[cfg_attr(test, assert_instr(vpbroadcastb))] pub unsafe fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i { let broadcast = _mm512_broadcastb_epi8(a).as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO)) } /// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7940,8 +7793,7 @@ pub unsafe fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i #[cfg_attr(test, assert_instr(vpbroadcastb))] pub unsafe fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i { let broadcast = _mm256_broadcastb_epi8(a).as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO)) } /// Broadcast the low packed 8-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -7965,8 +7817,7 @@ pub unsafe fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) - #[cfg_attr(test, assert_instr(vpbroadcastb))] pub unsafe fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i { let broadcast = _mm_broadcastb_epi8(a).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO)) } /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. @@ -8023,8 +7874,7 @@ pub unsafe fn _mm512_mask_unpackhi_epi16( #[cfg_attr(test, assert_instr(vpunpckhwd))] pub unsafe fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO)) } /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8053,8 +7903,7 @@ pub unsafe fn _mm256_mask_unpackhi_epi16( #[cfg_attr(test, assert_instr(vpunpckhwd))] pub unsafe fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO)) } /// Unpack and interleave 16-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8083,8 +7932,7 @@ pub unsafe fn _mm_mask_unpackhi_epi16( #[cfg_attr(test, assert_instr(vpunpckhwd))] pub unsafe fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO)) } /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. @@ -8149,8 +7997,7 @@ pub unsafe fn _mm512_mask_unpackhi_epi8( #[cfg_attr(test, assert_instr(vpunpckhbw))] pub unsafe fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO)) } /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8179,8 +8026,7 @@ pub unsafe fn _mm256_mask_unpackhi_epi8( #[cfg_attr(test, assert_instr(vpunpckhbw))] pub unsafe fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO)) } /// Unpack and interleave 8-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8209,8 +8055,7 @@ pub unsafe fn _mm_mask_unpackhi_epi8( #[cfg_attr(test, assert_instr(vpunpckhbw))] pub unsafe fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO)) } /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. @@ -8267,8 +8112,7 @@ pub unsafe fn _mm512_mask_unpacklo_epi16( #[cfg_attr(test, assert_instr(vpunpcklwd))] pub unsafe fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO)) } /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8297,8 +8141,7 @@ pub unsafe fn _mm256_mask_unpacklo_epi16( #[cfg_attr(test, assert_instr(vpunpcklwd))] pub unsafe fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO)) } /// Unpack and interleave 16-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8327,8 +8170,7 @@ pub unsafe fn _mm_mask_unpacklo_epi16( #[cfg_attr(test, assert_instr(vpunpcklwd))] pub unsafe fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO)) } /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. @@ -8393,8 +8235,7 @@ pub unsafe fn _mm512_mask_unpacklo_epi8( #[cfg_attr(test, assert_instr(vpunpcklbw))] pub unsafe fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO)) } /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8423,8 +8264,7 @@ pub unsafe fn _mm256_mask_unpacklo_epi8( #[cfg_attr(test, assert_instr(vpunpcklbw))] pub unsafe fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO)) } /// Unpack and interleave 8-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8453,8 +8293,7 @@ pub unsafe fn _mm_mask_unpacklo_epi8( #[cfg_attr(test, assert_instr(vpunpcklbw))] pub unsafe fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO)) } /// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8478,8 +8317,7 @@ pub unsafe fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> _ #[cfg_attr(test, assert_instr(vmovdqu16))] pub unsafe fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i { let mov = a.as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, i16x32::ZERO)) } /// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8503,8 +8341,7 @@ pub unsafe fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> _ #[cfg_attr(test, assert_instr(vmovdqu16))] pub unsafe fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i { let mov = a.as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, i16x16::ZERO)) } /// Move packed 16-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8528,8 +8365,7 @@ pub unsafe fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m12 #[cfg_attr(test, assert_instr(vmovdqu16))] pub unsafe fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i { let mov = a.as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, i16x8::ZERO)) } /// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8553,8 +8389,7 @@ pub unsafe fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __ #[cfg_attr(test, assert_instr(vmovdqu8))] pub unsafe fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i { let mov = a.as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, i8x64::ZERO)) } /// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8578,8 +8413,7 @@ pub unsafe fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __ #[cfg_attr(test, assert_instr(vmovdqu8))] pub unsafe fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i { let mov = a.as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, i8x32::ZERO)) } /// Move packed 8-bit integers from a into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8603,8 +8437,7 @@ pub unsafe fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m12 #[cfg_attr(test, assert_instr(vmovdqu8))] pub unsafe fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i { let mov = a.as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, i8x16::ZERO)) } /// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8628,8 +8461,7 @@ pub unsafe fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m5 #[cfg_attr(test, assert_instr(vpbroadcastw))] pub unsafe fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i { let r = _mm512_set1_epi16(a).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i16x32::ZERO)) } /// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8653,8 +8485,7 @@ pub unsafe fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m2 #[cfg_attr(test, assert_instr(vpbroadcastw))] pub unsafe fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i { let r = _mm256_set1_epi16(a).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i16x16::ZERO)) } /// Broadcast 16-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8678,8 +8509,7 @@ pub unsafe fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i #[cfg_attr(test, assert_instr(vpbroadcastw))] pub unsafe fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i { let r = _mm_set1_epi16(a).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i16x8::ZERO)) } /// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8703,8 +8533,7 @@ pub unsafe fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512 #[cfg_attr(test, assert_instr(vpbroadcast))] pub unsafe fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i { let r = _mm512_set1_epi8(a).as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i8x64::ZERO)) } /// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8728,8 +8557,7 @@ pub unsafe fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256 #[cfg_attr(test, assert_instr(vpbroadcast))] pub unsafe fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i { let r = _mm256_set1_epi8(a).as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i8x32::ZERO)) } /// Broadcast 8-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8753,8 +8581,7 @@ pub unsafe fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i { #[cfg_attr(test, assert_instr(vpbroadcast))] pub unsafe fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i { let r = _mm_set1_epi8(a).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i8x16::ZERO)) } /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst. @@ -8838,8 +8665,7 @@ pub unsafe fn _mm512_mask_shufflelo_epi16( pub unsafe fn _mm512_maskz_shufflelo_epi16(k: __mmask32, a: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let r = _mm512_shufflelo_epi16::(a); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, r.as_i16x32(), zero)) + transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO)) } /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8871,8 +8697,7 @@ pub unsafe fn _mm256_mask_shufflelo_epi16( pub unsafe fn _mm256_maskz_shufflelo_epi16(k: __mmask16, a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let shuffle = _mm256_shufflelo_epi16::(a); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero)) + transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO)) } /// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -8904,8 +8729,7 @@ pub unsafe fn _mm_mask_shufflelo_epi16( pub unsafe fn _mm_maskz_shufflelo_epi16(k: __mmask8, a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let shuffle = _mm_shufflelo_epi16::(a); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, shuffle.as_i16x8(), zero)) + transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO)) } /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst. @@ -8989,8 +8813,7 @@ pub unsafe fn _mm512_mask_shufflehi_epi16( pub unsafe fn _mm512_maskz_shufflehi_epi16(k: __mmask32, a: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let r = _mm512_shufflehi_epi16::(a); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, r.as_i16x32(), zero)) + transmute(simd_select_bitmask(k, r.as_i16x32(), i16x32::ZERO)) } /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -9022,8 +8845,7 @@ pub unsafe fn _mm256_mask_shufflehi_epi16( pub unsafe fn _mm256_maskz_shufflehi_epi16(k: __mmask16, a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let shuffle = _mm256_shufflehi_epi16::(a); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero)) + transmute(simd_select_bitmask(k, shuffle.as_i16x16(), i16x16::ZERO)) } /// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -9055,8 +8877,7 @@ pub unsafe fn _mm_mask_shufflehi_epi16( pub unsafe fn _mm_maskz_shufflehi_epi16(k: __mmask8, a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let shuffle = _mm_shufflehi_epi16::(a); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, shuffle.as_i16x8(), zero)) + transmute(simd_select_bitmask(k, shuffle.as_i16x8(), i16x8::ZERO)) } /// Shuffle packed 8-bit integers in a according to shuffle control mask in the corresponding 8-bit element of b, and store the results in dst. @@ -9096,8 +8917,7 @@ pub unsafe fn _mm512_mask_shuffle_epi8( #[cfg_attr(test, assert_instr(vpshufb))] pub unsafe fn _mm512_maskz_shuffle_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let shuffle = _mm512_shuffle_epi8(a, b).as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, shuffle, zero)) + transmute(simd_select_bitmask(k, shuffle, i8x64::ZERO)) } /// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -9126,8 +8946,7 @@ pub unsafe fn _mm256_mask_shuffle_epi8( #[cfg_attr(test, assert_instr(vpshufb))] pub unsafe fn _mm256_maskz_shuffle_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let shuffle = _mm256_shuffle_epi8(a, b).as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, shuffle, zero)) + transmute(simd_select_bitmask(k, shuffle, i8x32::ZERO)) } /// Shuffle 8-bit integers in a within 128-bit lanes using the control in the corresponding 8-bit element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -9151,8 +8970,7 @@ pub unsafe fn _mm_mask_shuffle_epi8(src: __m128i, k: __mmask16, a: __m128i, b: _ #[cfg_attr(test, assert_instr(vpshufb))] pub unsafe fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let shuffle = _mm_shuffle_epi8(a, b).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, shuffle, zero)) + transmute(simd_select_bitmask(k, shuffle, i8x16::ZERO)) } /// Compute the bitwise AND of packed 16-bit integers in a and b, producing intermediate 16-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero. @@ -9576,11 +9394,7 @@ pub unsafe fn _mm512_maskz_dbsad_epu8( let a = a.as_u8x64(); let b = b.as_u8x64(); let r = vdbpsadbw(a, b, IMM8); - transmute(simd_select_bitmask( - k, - r, - _mm512_setzero_si512().as_u16x32(), - )) + transmute(simd_select_bitmask(k, r, u16x32::ZERO)) } /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. @@ -9637,11 +9451,7 @@ pub unsafe fn _mm256_maskz_dbsad_epu8( let a = a.as_u8x32(); let b = b.as_u8x32(); let r = vdbpsadbw256(a, b, IMM8); - transmute(simd_select_bitmask( - k, - r, - _mm256_setzero_si256().as_u16x16(), - )) + transmute(simd_select_bitmask(k, r, u16x16::ZERO)) } /// Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in a compared to those in b, and store the 16-bit results in dst. Four SADs are performed on four 8-bit quadruplets for each 64-bit lane. The first two SADs use the lower 8-bit quadruplet of the lane from a, and the last two SADs use the uppper 8-bit quadruplet of the lane from a. Quadruplets from b are selected from within 128-bit lanes according to the control in imm8, and each SAD in each 64-bit lane uses the selected quadruplet at 8-bit offsets. @@ -9698,7 +9508,7 @@ pub unsafe fn _mm_maskz_dbsad_epu8( let a = a.as_u8x16(); let b = b.as_u8x16(); let r = vdbpsadbw128(a, b, IMM8); - transmute(simd_select_bitmask(k, r, _mm_setzero_si128().as_u16x8())) + transmute(simd_select_bitmask(k, r, u16x8::ZERO)) } /// Set each bit of mask register k based on the most significant bit of the corresponding packed 16-bit integer in a. @@ -9808,8 +9618,7 @@ pub unsafe fn _mm512_movm_epi16(k: __mmask32) -> __m512i { | 1 << 0, ) .as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, one, zero)) + transmute(simd_select_bitmask(k, one, i16x32::ZERO)) } /// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. @@ -9839,8 +9648,7 @@ pub unsafe fn _mm256_movm_epi16(k: __mmask16) -> __m256i { | 1 << 0, ) .as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, one, zero)) + transmute(simd_select_bitmask(k, one, i16x16::ZERO)) } /// Set each packed 16-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. @@ -9870,8 +9678,7 @@ pub unsafe fn _mm_movm_epi16(k: __mmask8) -> __m128i { | 1 << 0, ) .as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, one, zero)) + transmute(simd_select_bitmask(k, one, i16x8::ZERO)) } /// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. @@ -9885,8 +9692,7 @@ pub unsafe fn _mm512_movm_epi8(k: __mmask64) -> __m512i { let one = _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) .as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, one, zero)) + transmute(simd_select_bitmask(k, one, i8x64::ZERO)) } /// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. @@ -9900,8 +9706,7 @@ pub unsafe fn _mm256_movm_epi8(k: __mmask32) -> __m256i { let one = _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) .as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, one, zero)) + transmute(simd_select_bitmask(k, one, i8x32::ZERO)) } /// Set each packed 8-bit integer in dst to all ones or all zeros based on the value of the corresponding bit in k. @@ -9914,8 +9719,7 @@ pub unsafe fn _mm256_movm_epi8(k: __mmask32) -> __m256i { pub unsafe fn _mm_movm_epi8(k: __mmask16) -> __m128i { let one = _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0) .as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, one, zero)) + transmute(simd_select_bitmask(k, one, i8x16::ZERO)) } /// Convert 32-bit mask a into an integer value, and store the result in dst. @@ -10317,11 +10121,7 @@ pub unsafe fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) #[cfg_attr(test, assert_instr(vpmovwb))] pub unsafe fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { let convert = _mm512_cvtepi16_epi8(a).as_i8x32(); - transmute(simd_select_bitmask( - k, - convert, - _mm256_setzero_si256().as_i8x32(), - )) + transmute(simd_select_bitmask(k, convert, i8x32::ZERO)) } /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -10357,11 +10157,7 @@ pub unsafe fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) #[cfg_attr(test, assert_instr(vpmovwb))] pub unsafe fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { let convert = _mm256_cvtepi16_epi8(a).as_i8x16(); - transmute(simd_select_bitmask( - k, - convert, - _mm_setzero_si128().as_i8x16(), - )) + transmute(simd_select_bitmask(k, convert, i8x16::ZERO)) } /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -10373,8 +10169,11 @@ pub unsafe fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { #[cfg_attr(test, assert_instr(vpmovwb))] pub unsafe fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i { let a = a.as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - let v256: i16x16 = simd_shuffle!(a, zero, [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8]); + let v256: i16x16 = simd_shuffle!( + a, + i16x8::ZERO, + [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8] + ); transmute::(simd_cast(v256)) } @@ -10401,8 +10200,7 @@ pub unsafe fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> _ pub unsafe fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { let convert = _mm_cvtepi16_epi8(a).as_i8x16(); let k: __mmask16 = 0b11111111_11111111 & k as __mmask16; - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i8x16::ZERO)) } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -10415,7 +10213,7 @@ pub unsafe fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { pub unsafe fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i { transmute(vpmovswb( a.as_i16x32(), - _mm256_setzero_si256().as_i8x32(), + i8x32::ZERO, 0b11111111_11111111_11111111_11111111, )) } @@ -10439,11 +10237,7 @@ pub unsafe fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovswb))] pub unsafe fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { - transmute(vpmovswb( - a.as_i16x32(), - _mm256_setzero_si256().as_i8x32(), - k, - )) + transmute(vpmovswb(a.as_i16x32(), i8x32::ZERO, k)) } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -10454,11 +10248,7 @@ pub unsafe fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovswb))] pub unsafe fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i { - transmute(vpmovswb256( - a.as_i16x16(), - _mm_setzero_si128().as_i8x16(), - 0b11111111_11111111, - )) + transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, 0b11111111_11111111)) } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10480,11 +10270,7 @@ pub unsafe fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovswb))] pub unsafe fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { - transmute(vpmovswb256( - a.as_i16x16(), - _mm_setzero_si128().as_i8x16(), - k, - )) + transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, k)) } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -10495,11 +10281,7 @@ pub unsafe fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovswb))] pub unsafe fn _mm_cvtsepi16_epi8(a: __m128i) -> __m128i { - transmute(vpmovswb128( - a.as_i16x8(), - _mm_setzero_si128().as_i8x16(), - 0b11111111, - )) + transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, 0b11111111)) } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10521,7 +10303,7 @@ pub unsafe fn _mm_mask_cvtsepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovswb))] pub unsafe fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovswb128(a.as_i16x8(), _mm_setzero_si128().as_i8x16(), k)) + transmute(vpmovswb128(a.as_i16x8(), i8x16::ZERO, k)) } /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -10534,7 +10316,7 @@ pub unsafe fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { pub unsafe fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i { transmute(vpmovuswb( a.as_u16x32(), - _mm256_setzero_si256().as_u8x32(), + u8x32::ZERO, 0b11111111_11111111_11111111_11111111, )) } @@ -10558,11 +10340,7 @@ pub unsafe fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovuswb))] pub unsafe fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i { - transmute(vpmovuswb( - a.as_u16x32(), - _mm256_setzero_si256().as_u8x32(), - k, - )) + transmute(vpmovuswb(a.as_u16x32(), u8x32::ZERO, k)) } /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -10575,7 +10353,7 @@ pub unsafe fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i pub unsafe fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i { transmute(vpmovuswb256( a.as_u16x16(), - _mm_setzero_si128().as_u8x16(), + u8x16::ZERO, 0b11111111_11111111, )) } @@ -10599,11 +10377,7 @@ pub unsafe fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovuswb))] pub unsafe fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i { - transmute(vpmovuswb256( - a.as_u16x16(), - _mm_setzero_si128().as_u8x16(), - k, - )) + transmute(vpmovuswb256(a.as_u16x16(), u8x16::ZERO, k)) } /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -10614,11 +10388,7 @@ pub unsafe fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovuswb))] pub unsafe fn _mm_cvtusepi16_epi8(a: __m128i) -> __m128i { - transmute(vpmovuswb128( - a.as_u16x8(), - _mm_setzero_si128().as_u8x16(), - 0b11111111, - )) + transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, 0b11111111)) } /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10640,11 +10410,7 @@ pub unsafe fn _mm_mask_cvtusepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovuswb))] pub unsafe fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovuswb128( - a.as_u16x8(), - _mm_setzero_si128().as_u8x16(), - k, - )) + transmute(vpmovuswb128(a.as_u16x8(), u8x16::ZERO, k)) } /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst. @@ -10680,11 +10446,7 @@ pub unsafe fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) #[cfg_attr(test, assert_instr(vpmovsxbw))] pub unsafe fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i { let convert = _mm512_cvtepi8_epi16(a).as_i16x32(); - transmute(simd_select_bitmask( - k, - convert, - _mm512_setzero_si512().as_i16x32(), - )) + transmute(simd_select_bitmask(k, convert, i16x32::ZERO)) } /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10708,11 +10470,7 @@ pub unsafe fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) #[cfg_attr(test, assert_instr(vpmovsxbw))] pub unsafe fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i { let convert = _mm256_cvtepi8_epi16(a).as_i16x16(); - transmute(simd_select_bitmask( - k, - convert, - _mm256_setzero_si256().as_i16x16(), - )) + transmute(simd_select_bitmask(k, convert, i16x16::ZERO)) } /// Sign extend packed 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10736,11 +10494,7 @@ pub unsafe fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> _ #[cfg_attr(test, assert_instr(vpmovsxbw))] pub unsafe fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i { let convert = _mm_cvtepi8_epi16(a).as_i16x8(); - transmute(simd_select_bitmask( - k, - convert, - _mm_setzero_si128().as_i16x8(), - )) + transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) } /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst. @@ -10776,11 +10530,7 @@ pub unsafe fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) #[cfg_attr(test, assert_instr(vpmovzxbw))] pub unsafe fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i { let convert = _mm512_cvtepu8_epi16(a).as_i16x32(); - transmute(simd_select_bitmask( - k, - convert, - _mm512_setzero_si512().as_i16x32(), - )) + transmute(simd_select_bitmask(k, convert, i16x32::ZERO)) } /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10804,11 +10554,7 @@ pub unsafe fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) #[cfg_attr(test, assert_instr(vpmovzxbw))] pub unsafe fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i { let convert = _mm256_cvtepu8_epi16(a).as_i16x16(); - transmute(simd_select_bitmask( - k, - convert, - _mm256_setzero_si256().as_i16x16(), - )) + transmute(simd_select_bitmask(k, convert, i16x16::ZERO)) } /// Zero extend packed unsigned 8-bit integers in a to packed 16-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10832,11 +10578,7 @@ pub unsafe fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> _ #[cfg_attr(test, assert_instr(vpmovzxbw))] pub unsafe fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i { let convert = _mm_cvtepu8_epi16(a).as_i16x8(); - transmute(simd_select_bitmask( - k, - convert, - _mm_setzero_si128().as_i16x8(), - )) + transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) } /// Shift 128-bit lanes in a left by imm8 bytes while shifting in zeros, and store the results in dst. @@ -10858,7 +10600,7 @@ pub unsafe fn _mm512_bslli_epi128(a: __m512i) -> __m512i { } } let a = a.as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); + let zero = i8x64::ZERO; let r: i8x64 = simd_shuffle!( zero, a, @@ -10943,7 +10685,7 @@ pub unsafe fn _mm512_bslli_epi128(a: __m512i) -> __m512i { pub unsafe fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let a = a.as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); + let zero = i8x64::ZERO; let r: i8x64 = match IMM8 % 16 { 0 => simd_shuffle!( a, @@ -11119,12 +10861,12 @@ pub unsafe fn _mm512_alignr_epi8(a: __m512i, b: __m512i) -> __m // If palignr is shifting the pair of vectors more than the size of two // lanes, emit zero. if IMM8 > 32 { - return _mm512_set1_epi8(0); + return _mm512_setzero_si512(); } // If palignr is shifting the pair of input vectors more than one lane, // but less than two lanes, convert to shifting in zeroes. let (a, b) = if IMM8 > 16 { - (_mm512_set1_epi8(0), a) + (_mm512_setzero_si512(), a) } else { (a, b) }; @@ -11328,8 +11070,7 @@ pub unsafe fn _mm512_maskz_alignr_epi8( ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let r = _mm512_alignr_epi8::(a, b); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, r.as_i8x64(), zero)) + transmute(simd_select_bitmask(k, r.as_i8x64(), i8x64::ZERO)) } /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11366,11 +11107,7 @@ pub unsafe fn _mm256_maskz_alignr_epi8( ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let r = _mm256_alignr_epi8::(a, b); - transmute(simd_select_bitmask( - k, - r.as_i8x32(), - _mm256_setzero_si256().as_i8x32(), - )) + transmute(simd_select_bitmask(k, r.as_i8x32(), i8x32::ZERO)) } /// Concatenate pairs of 16-byte blocks in a and b into a 32-byte temporary result, shift the result right by imm8 bytes, and store the low 16 bytes in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11407,8 +11144,7 @@ pub unsafe fn _mm_maskz_alignr_epi8( ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let r = _mm_alignr_epi8::(a, b); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, r.as_i8x16(), zero)) + transmute(simd_select_bitmask(k, r.as_i8x16(), i8x16::ZERO)) } /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. diff --git a/crates/core_arch/src/x86/avx512cd.rs b/crates/core_arch/src/x86/avx512cd.rs index 648d134040..f8e3a37307 100644 --- a/crates/core_arch/src/x86/avx512cd.rs +++ b/crates/core_arch/src/x86/avx512cd.rs @@ -102,8 +102,7 @@ pub unsafe fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) #[cfg_attr(test, assert_instr(vpconflictd))] pub unsafe fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i { let conflict = _mm512_conflict_epi32(a).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, conflict, zero)) + transmute(simd_select_bitmask(k, conflict, i32x16::ZERO)) } /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. @@ -138,8 +137,7 @@ pub unsafe fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) #[cfg_attr(test, assert_instr(vpconflictd))] pub unsafe fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i { let conflict = _mm256_conflict_epi32(a).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, conflict, zero)) + transmute(simd_select_bitmask(k, conflict, i32x8::ZERO)) } /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. @@ -174,8 +172,7 @@ pub unsafe fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> #[cfg_attr(test, assert_instr(vpconflictd))] pub unsafe fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i { let conflict = _mm_conflict_epi32(a).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, conflict, zero)) + transmute(simd_select_bitmask(k, conflict, i32x4::ZERO)) } /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. @@ -210,8 +207,7 @@ pub unsafe fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) #[cfg_attr(test, assert_instr(vpconflictq))] pub unsafe fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i { let conflict = _mm512_conflict_epi64(a).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, conflict, zero)) + transmute(simd_select_bitmask(k, conflict, i64x8::ZERO)) } /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. @@ -246,8 +242,7 @@ pub unsafe fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) #[cfg_attr(test, assert_instr(vpconflictq))] pub unsafe fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i { let conflict = _mm256_conflict_epi64(a).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, conflict, zero)) + transmute(simd_select_bitmask(k, conflict, i64x4::ZERO)) } /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. @@ -282,8 +277,7 @@ pub unsafe fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> #[cfg_attr(test, assert_instr(vpconflictq))] pub unsafe fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i { let conflict = _mm_conflict_epi64(a).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, conflict, zero)) + transmute(simd_select_bitmask(k, conflict, i64x2::ZERO)) } /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst. @@ -318,8 +312,7 @@ pub unsafe fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> #[cfg_attr(test, assert_instr(vplzcntd))] pub unsafe fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i { let zerocount = _mm512_lzcnt_epi32(a).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, zerocount, zero)) + transmute(simd_select_bitmask(k, zerocount, i32x16::ZERO)) } /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst. @@ -354,8 +347,7 @@ pub unsafe fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> #[cfg_attr(test, assert_instr(vplzcntd))] pub unsafe fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i { let zerocount = _mm256_lzcnt_epi32(a).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, zerocount, zero)) + transmute(simd_select_bitmask(k, zerocount, i32x8::ZERO)) } /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst. @@ -390,8 +382,7 @@ pub unsafe fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m #[cfg_attr(test, assert_instr(vplzcntd))] pub unsafe fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i { let zerocount = _mm_lzcnt_epi32(a).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, zerocount, zero)) + transmute(simd_select_bitmask(k, zerocount, i32x4::ZERO)) } /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst. @@ -426,8 +417,7 @@ pub unsafe fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> #[cfg_attr(test, assert_instr(vplzcntq))] pub unsafe fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i { let zerocount = _mm512_lzcnt_epi64(a).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, zerocount, zero)) + transmute(simd_select_bitmask(k, zerocount, i64x8::ZERO)) } /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst. @@ -462,8 +452,7 @@ pub unsafe fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> #[cfg_attr(test, assert_instr(vplzcntq))] pub unsafe fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i { let zerocount = _mm256_lzcnt_epi64(a).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, zerocount, zero)) + transmute(simd_select_bitmask(k, zerocount, i64x4::ZERO)) } /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst. @@ -498,8 +487,7 @@ pub unsafe fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m #[cfg_attr(test, assert_instr(vplzcntq))] pub unsafe fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i { let zerocount = _mm_lzcnt_epi64(a).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, zerocount, zero)) + transmute(simd_select_bitmask(k, zerocount, i64x2::ZERO)) } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/avx512dq.rs b/crates/core_arch/src/x86/avx512dq.rs index 3fe03b78ac..757231279e 100644 --- a/crates/core_arch/src/x86/avx512dq.rs +++ b/crates/core_arch/src/x86/avx512dq.rs @@ -30,8 +30,7 @@ pub unsafe fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let and = _mm_and_pd(a, b).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, and, zero)) + transmute(simd_select_bitmask(k, and, f64x2::ZERO)) } /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b @@ -58,8 +57,7 @@ pub unsafe fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let and = _mm256_and_pd(a, b).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, and, zero)) + transmute(simd_select_bitmask(k, and, f64x4::ZERO)) } /// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b @@ -98,8 +96,7 @@ pub unsafe fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let and = _mm512_and_pd(a, b).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, and, zero)) + transmute(simd_select_bitmask(k, and, f64x8::ZERO)) } /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b @@ -126,8 +123,7 @@ pub unsafe fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { let and = _mm_and_ps(a, b).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, and, zero)) + transmute(simd_select_bitmask(k, and, f32x4::ZERO)) } /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b @@ -154,8 +150,7 @@ pub unsafe fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { let and = _mm256_and_ps(a, b).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, and, zero)) + transmute(simd_select_bitmask(k, and, f32x8::ZERO)) } /// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b @@ -197,8 +192,7 @@ pub unsafe fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let and = _mm512_and_ps(a, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, and, zero)) + transmute(simd_select_bitmask(k, and, f32x16::ZERO)) } // Andnot @@ -228,8 +222,7 @@ pub unsafe fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m12 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let andnot = _mm_andnot_pd(a, b).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, andnot, zero)) + transmute(simd_select_bitmask(k, andnot, f64x2::ZERO)) } /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then @@ -257,8 +250,7 @@ pub unsafe fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let andnot = _mm256_andnot_pd(a, b).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, andnot, zero)) + transmute(simd_select_bitmask(k, andnot, f64x4::ZERO)) } /// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then @@ -298,8 +290,7 @@ pub unsafe fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let andnot = _mm512_andnot_pd(a, b).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, andnot, zero)) + transmute(simd_select_bitmask(k, andnot, f64x8::ZERO)) } /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then @@ -327,8 +318,7 @@ pub unsafe fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { let andnot = _mm_andnot_ps(a, b).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, andnot, zero)) + transmute(simd_select_bitmask(k, andnot, f32x4::ZERO)) } /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then @@ -356,8 +346,7 @@ pub unsafe fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m2 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { let andnot = _mm256_andnot_ps(a, b).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, andnot, zero)) + transmute(simd_select_bitmask(k, andnot, f32x8::ZERO)) } /// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then @@ -397,8 +386,7 @@ pub unsafe fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let andnot = _mm512_andnot_ps(a, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, andnot, zero)) + transmute(simd_select_bitmask(k, andnot, f32x16::ZERO)) } // Or @@ -427,8 +415,7 @@ pub unsafe fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let or = _mm_or_pd(a, b).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, or, zero)) + transmute(simd_select_bitmask(k, or, f64x2::ZERO)) } /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b @@ -455,8 +442,7 @@ pub unsafe fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let or = _mm256_or_pd(a, b).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, or, zero)) + transmute(simd_select_bitmask(k, or, f64x4::ZERO)) } /// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b @@ -495,8 +481,7 @@ pub unsafe fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let or = _mm512_or_pd(a, b).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, or, zero)) + transmute(simd_select_bitmask(k, or, f64x8::ZERO)) } /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b @@ -523,8 +508,7 @@ pub unsafe fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { let or = _mm_or_ps(a, b).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, or, zero)) + transmute(simd_select_bitmask(k, or, f32x4::ZERO)) } /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b @@ -551,8 +535,7 @@ pub unsafe fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { let or = _mm256_or_ps(a, b).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, or, zero)) + transmute(simd_select_bitmask(k, or, f32x8::ZERO)) } /// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b @@ -594,8 +577,7 @@ pub unsafe fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let or = _mm512_or_ps(a, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, or, zero)) + transmute(simd_select_bitmask(k, or, f32x16::ZERO)) } // Xor @@ -624,8 +606,7 @@ pub unsafe fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let xor = _mm_xor_pd(a, b).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, xor, zero)) + transmute(simd_select_bitmask(k, xor, f64x2::ZERO)) } /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b @@ -652,8 +633,7 @@ pub unsafe fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let xor = _mm256_xor_pd(a, b).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, xor, zero)) + transmute(simd_select_bitmask(k, xor, f64x4::ZERO)) } /// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b @@ -692,8 +672,7 @@ pub unsafe fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let xor = _mm512_xor_pd(a, b).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, xor, zero)) + transmute(simd_select_bitmask(k, xor, f64x8::ZERO)) } /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b @@ -720,8 +699,7 @@ pub unsafe fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { let xor = _mm_xor_ps(a, b).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, xor, zero)) + transmute(simd_select_bitmask(k, xor, f32x4::ZERO)) } /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b @@ -748,8 +726,7 @@ pub unsafe fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { let xor = _mm256_xor_ps(a, b).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, xor, zero)) + transmute(simd_select_bitmask(k, xor, f32x8::ZERO)) } /// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b @@ -791,8 +768,7 @@ pub unsafe fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let xor = _mm512_xor_ps(a, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, xor, zero)) + transmute(simd_select_bitmask(k, xor, f32x16::ZERO)) } // Broadcast @@ -832,8 +808,7 @@ pub unsafe fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 { let b = _mm256_broadcast_f32x2(a).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f32x8::ZERO)) } /// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all @@ -871,8 +846,7 @@ pub unsafe fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 { let b = _mm512_broadcast_f32x2(a).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f32x16::ZERO)) } /// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all @@ -908,8 +882,7 @@ pub unsafe fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 { let b = _mm512_broadcast_f32x8(a).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f32x16::ZERO)) } /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all @@ -945,8 +918,7 @@ pub unsafe fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d { let b = _mm256_broadcast_f64x2(a).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f64x4::ZERO)) } /// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all @@ -982,8 +954,7 @@ pub unsafe fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d { let b = _mm512_broadcast_f64x2(a).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f64x8::ZERO)) } /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst. @@ -1021,8 +992,7 @@ pub unsafe fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i { let b = _mm_broadcast_i32x2(a).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, i32x4::ZERO)) } /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst. @@ -1060,8 +1030,7 @@ pub unsafe fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i { let b = _mm256_broadcast_i32x2(a).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, i32x8::ZERO)) } /// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst. @@ -1099,8 +1068,7 @@ pub unsafe fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i { let b = _mm512_broadcast_i32x2(a).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, i32x16::ZERO)) } /// Broadcasts the 8 packed 32-bit integers from a to all elements of dst. @@ -1136,8 +1104,7 @@ pub unsafe fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i { let b = _mm512_broadcast_i32x8(a).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, i32x16::ZERO)) } /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst. @@ -1173,8 +1140,7 @@ pub unsafe fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i { let b = _mm256_broadcast_i64x2(a).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, i64x4::ZERO)) } /// Broadcasts the 2 packed 64-bit integers from a to all elements of dst. @@ -1210,8 +1176,7 @@ pub unsafe fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i { let b = _mm512_broadcast_i64x2(a).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, i64x8::ZERO)) } // Extract @@ -1265,8 +1230,7 @@ pub unsafe fn _mm512_mask_extractf32x8_ps( pub unsafe fn _mm512_maskz_extractf32x8_ps(k: __mmask8, a: __m512) -> __m256 { static_assert_uimm_bits!(IMM8, 1); let b = _mm512_extractf32x8_ps::(a); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, b.as_f32x8(), zero)) + transmute(simd_select_bitmask(k, b.as_f32x8(), f32x8::ZERO)) } /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, @@ -1318,8 +1282,7 @@ pub unsafe fn _mm256_mask_extractf64x2_pd( pub unsafe fn _mm256_maskz_extractf64x2_pd(k: __mmask8, a: __m256d) -> __m128d { static_assert_uimm_bits!(IMM8, 1); let b = _mm256_extractf64x2_pd::(a); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, b.as_f64x2(), zero)) + transmute(simd_select_bitmask(k, b.as_f64x2(), f64x2::ZERO)) } /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a, @@ -1373,8 +1336,7 @@ pub unsafe fn _mm512_mask_extractf64x2_pd( pub unsafe fn _mm512_maskz_extractf64x2_pd(k: __mmask8, a: __m512d) -> __m128d { static_assert_uimm_bits!(IMM8, 2); let b = _mm512_extractf64x2_pd::(a).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f64x2::ZERO)) } /// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores @@ -1426,8 +1388,7 @@ pub unsafe fn _mm512_mask_extracti32x8_epi32( pub unsafe fn _mm512_maskz_extracti32x8_epi32(k: __mmask8, a: __m512i) -> __m256i { static_assert_uimm_bits!(IMM8, 1); let b = _mm512_extracti32x8_epi32::(a).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, i32x8::ZERO)) } /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores @@ -1478,8 +1439,7 @@ pub unsafe fn _mm256_mask_extracti64x2_epi64( pub unsafe fn _mm256_maskz_extracti64x2_epi64(k: __mmask8, a: __m256i) -> __m128i { static_assert_uimm_bits!(IMM8, 1); let b = _mm256_extracti64x2_epi64::(a).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, i64x2::ZERO)) } /// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores @@ -1532,8 +1492,7 @@ pub unsafe fn _mm512_mask_extracti64x2_epi64( pub unsafe fn _mm512_maskz_extracti64x2_epi64(k: __mmask8, a: __m512i) -> __m128i { static_assert_uimm_bits!(IMM8, 2); let b = _mm512_extracti64x2_epi64::(a).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, i64x2::ZERO)) } // Insert @@ -1601,8 +1560,7 @@ pub unsafe fn _mm512_maskz_insertf32x8( ) -> __m512 { static_assert_uimm_bits!(IMM8, 1); let c = _mm512_insertf32x8::(a, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, c, zero)) + transmute(simd_select_bitmask(k, c, f32x16::ZERO)) } /// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point @@ -1660,8 +1618,7 @@ pub unsafe fn _mm256_maskz_insertf64x2( ) -> __m256d { static_assert_uimm_bits!(IMM8, 1); let c = _mm256_insertf64x2::(a, b).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, c, zero)) + transmute(simd_select_bitmask(k, c, f64x4::ZERO)) } /// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point @@ -1721,8 +1678,7 @@ pub unsafe fn _mm512_maskz_insertf64x2( ) -> __m512d { static_assert_uimm_bits!(IMM8, 2); let c = _mm512_insertf64x2::(a, b).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, c, zero)) + transmute(simd_select_bitmask(k, c, f64x8::ZERO)) } /// Copy a to dst, then insert 256 bits (composed of 8 packed 32-bit integers) from b into dst at the @@ -1790,8 +1746,7 @@ pub unsafe fn _mm512_maskz_inserti32x8( ) -> __m512i { static_assert_uimm_bits!(IMM8, 1); let c = _mm512_inserti32x8::(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, c, zero)) + transmute(simd_select_bitmask(k, c, i32x16::ZERO)) } /// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the @@ -1850,8 +1805,7 @@ pub unsafe fn _mm256_maskz_inserti64x2( ) -> __m256i { static_assert_uimm_bits!(IMM8, 1); let c = _mm256_inserti64x2::(a, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, c, zero)) + transmute(simd_select_bitmask(k, c, i64x4::ZERO)) } /// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the @@ -1912,8 +1866,7 @@ pub unsafe fn _mm512_maskz_inserti64x2( ) -> __m512i { static_assert_uimm_bits!(IMM8, 2); let c = _mm512_inserti64x2::(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, c, zero)) + transmute(simd_select_bitmask(k, c, i64x8::ZERO)) } // Convert @@ -1986,8 +1939,7 @@ pub unsafe fn _mm512_maskz_cvt_roundepi64_pd( ) -> __m512d { static_assert_rounding!(ROUNDING); let b = _mm512_cvt_roundepi64_pd::(a).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f64x8::ZERO)) } /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2026,8 +1978,7 @@ pub unsafe fn _mm_mask_cvtepi64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_maskz_cvtepi64_pd(k: __mmask8, a: __m128i) -> __m128d { let b = _mm_cvtepi64_pd(a).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f64x2::ZERO)) } /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2066,8 +2017,7 @@ pub unsafe fn _mm256_mask_cvtepi64_pd(src: __m256d, k: __mmask8, a: __m256i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_cvtepi64_pd(k: __mmask8, a: __m256i) -> __m256d { let b = _mm256_cvtepi64_pd(a).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f64x4::ZERO)) } /// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2106,8 +2056,7 @@ pub unsafe fn _mm512_mask_cvtepi64_pd(src: __m512d, k: __mmask8, a: __m512i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_cvtepi64_pd(k: __mmask8, a: __m512i) -> __m512d { let b = _mm512_cvtepi64_pd(a).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f64x8::ZERO)) } /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2178,8 +2127,7 @@ pub unsafe fn _mm512_maskz_cvt_roundepi64_ps( ) -> __m256 { static_assert_rounding!(ROUNDING); let b = _mm512_cvt_roundepi64_ps::(a).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f32x8::ZERO)) } /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2255,8 +2203,7 @@ pub unsafe fn _mm256_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m256i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_cvtepi64_ps(k: __mmask8, a: __m256i) -> __m128 { let b = _mm256_cvtepi64_ps(a).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f32x4::ZERO)) } /// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2295,8 +2242,7 @@ pub unsafe fn _mm512_mask_cvtepi64_ps(src: __m256, k: __mmask8, a: __m512i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_cvtepi64_ps(k: __mmask8, a: __m512i) -> __m256 { let b = _mm512_cvtepi64_ps(a).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f32x8::ZERO)) } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2367,8 +2313,7 @@ pub unsafe fn _mm512_maskz_cvt_roundepu64_pd( ) -> __m512d { static_assert_rounding!(ROUNDING); let b = _mm512_cvt_roundepu64_pd::(a).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f64x8::ZERO)) } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2407,8 +2352,7 @@ pub unsafe fn _mm_mask_cvtepu64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_maskz_cvtepu64_pd(k: __mmask8, a: __m128i) -> __m128d { let b = _mm_cvtepu64_pd(a).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f64x2::ZERO)) } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2447,8 +2391,7 @@ pub unsafe fn _mm256_mask_cvtepu64_pd(src: __m256d, k: __mmask8, a: __m256i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_cvtepu64_pd(k: __mmask8, a: __m256i) -> __m256d { let b = _mm256_cvtepu64_pd(a).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f64x4::ZERO)) } /// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements, @@ -2487,8 +2430,7 @@ pub unsafe fn _mm512_mask_cvtepu64_pd(src: __m512d, k: __mmask8, a: __m512i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_cvtepu64_pd(k: __mmask8, a: __m512i) -> __m512d { let b = _mm512_cvtepu64_pd(a).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f64x8::ZERO)) } /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2559,8 +2501,7 @@ pub unsafe fn _mm512_maskz_cvt_roundepu64_ps( ) -> __m256 { static_assert_rounding!(ROUNDING); let b = _mm512_cvt_roundepu64_ps::(a).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f32x8::ZERO)) } /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2636,8 +2577,7 @@ pub unsafe fn _mm256_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m256i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_cvtepu64_ps(k: __mmask8, a: __m256i) -> __m128 { let b = _mm256_cvtepu64_ps(a).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f32x4::ZERO)) } /// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements, @@ -2676,8 +2616,7 @@ pub unsafe fn _mm512_mask_cvtepu64_ps(src: __m256, k: __mmask8, a: __m512i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_cvtepu64_ps(k: __mmask8, a: __m512i) -> __m256 { let b = _mm512_cvtepu64_ps(a).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, f32x8::ZERO)) } /// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers, @@ -4131,8 +4070,7 @@ pub unsafe fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let b = _mm_mullo_epi64(a, b).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, i64x2::ZERO)) } /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store @@ -4177,8 +4115,7 @@ pub unsafe fn _mm256_mask_mullo_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let b = _mm256_mullo_epi64(a, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, i64x4::ZERO)) } /// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store @@ -4223,8 +4160,7 @@ pub unsafe fn _mm512_mask_mullo_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let b = _mm512_mullo_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, b, zero)) + transmute(simd_select_bitmask(k, b, i64x8::ZERO)) } // Mask Registers diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 8a5ba26956..d33bcfbc0c 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -18,8 +18,7 @@ use stdarch_test::assert_instr; #[cfg_attr(test, assert_instr(vpabsd))] pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i { let a = a.as_i32x16(); - let zero = i32x16::splat(0); - let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + let r = simd_select::(simd_lt(a, i32x16::ZERO), simd_neg(a), a); transmute(r) } @@ -48,8 +47,7 @@ pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> _ #[cfg_attr(test, assert_instr(vpabsd))] pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i { let abs = _mm512_abs_epi32(a).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, abs, zero)) + transmute(simd_select_bitmask(k, abs, i32x16::ZERO)) } /// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -73,8 +71,7 @@ pub unsafe fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __ #[cfg_attr(test, assert_instr(vpabsd))] pub unsafe fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i { let abs = _mm256_abs_epi32(a).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, abs, zero)) + transmute(simd_select_bitmask(k, abs, i32x8::ZERO)) } /// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -98,8 +95,7 @@ pub unsafe fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m12 #[cfg_attr(test, assert_instr(vpabsd))] pub unsafe fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i { let abs = _mm_abs_epi32(a).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, abs, zero)) + transmute(simd_select_bitmask(k, abs, i32x4::ZERO)) } /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst. @@ -111,8 +107,7 @@ pub unsafe fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(vpabsq))] pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i { let a = a.as_i64x8(); - let zero = i64x8::splat(0); - let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + let r = simd_select::(simd_lt(a, i64x8::ZERO), simd_neg(a), a); transmute(r) } @@ -137,8 +132,7 @@ pub unsafe fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __ #[cfg_attr(test, assert_instr(vpabsq))] pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i { let abs = _mm512_abs_epi64(a).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, abs, zero)) + transmute(simd_select_bitmask(k, abs, i64x8::ZERO)) } /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst. @@ -150,8 +144,7 @@ pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i { #[cfg_attr(test, assert_instr(vpabsq))] pub unsafe fn _mm256_abs_epi64(a: __m256i) -> __m256i { let a = a.as_i64x4(); - let zero = i64x4::splat(0); - let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + let r = simd_select::(simd_lt(a, i64x4::ZERO), simd_neg(a), a); transmute(r) } @@ -176,8 +169,7 @@ pub unsafe fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __ #[cfg_attr(test, assert_instr(vpabsq))] pub unsafe fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i { let abs = _mm256_abs_epi64(a).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, abs, zero)) + transmute(simd_select_bitmask(k, abs, i64x4::ZERO)) } /// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst. @@ -189,8 +181,7 @@ pub unsafe fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpabsq))] pub unsafe fn _mm_abs_epi64(a: __m128i) -> __m128i { let a = a.as_i64x2(); - let zero = i64x2::splat(0); - let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); + let r = simd_select::(simd_lt(a, i64x2::ZERO), simd_neg(a), a); transmute(r) } @@ -215,8 +206,7 @@ pub unsafe fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m12 #[cfg_attr(test, assert_instr(vpabsq))] pub unsafe fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i { let abs = _mm_abs_epi64(a).as_i64x2(); - let zero = i64x2::splat(0); - transmute(simd_select_bitmask(k, abs, zero)) + transmute(simd_select_bitmask(k, abs, i64x2::ZERO)) } /// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst. @@ -284,8 +274,7 @@ pub unsafe fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> _ #[cfg_attr(test, assert_instr(vmovdqa32))] pub unsafe fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i { let mov = a.as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, i32x16::ZERO)) } /// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -309,8 +298,7 @@ pub unsafe fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __ #[cfg_attr(test, assert_instr(vmovdqa32))] pub unsafe fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i { let mov = a.as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, i32x8::ZERO)) } /// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -334,8 +322,7 @@ pub unsafe fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m12 #[cfg_attr(test, assert_instr(vmovdqa32))] pub unsafe fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i { let mov = a.as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, i32x4::ZERO)) } /// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -359,8 +346,7 @@ pub unsafe fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __ #[cfg_attr(test, assert_instr(vmovdqa64))] pub unsafe fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i { let mov = a.as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, i64x8::ZERO)) } /// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -384,8 +370,7 @@ pub unsafe fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __ #[cfg_attr(test, assert_instr(vmovdqa64))] pub unsafe fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i { let mov = a.as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, i64x4::ZERO)) } /// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -409,8 +394,7 @@ pub unsafe fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m12 #[cfg_attr(test, assert_instr(vmovdqa64))] pub unsafe fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i { let mov = a.as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, i64x2::ZERO)) } /// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -434,8 +418,7 @@ pub unsafe fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 #[cfg_attr(test, assert_instr(vmovaps))] pub unsafe fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 { let mov = a.as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, f32x16::ZERO)) } /// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -459,8 +442,7 @@ pub unsafe fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 #[cfg_attr(test, assert_instr(vmovaps))] pub unsafe fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 { let mov = a.as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, f32x8::ZERO)) } /// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -484,8 +466,7 @@ pub unsafe fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { #[cfg_attr(test, assert_instr(vmovaps))] pub unsafe fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 { let mov = a.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, f32x4::ZERO)) } /// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -509,8 +490,7 @@ pub unsafe fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m51 #[cfg_attr(test, assert_instr(vmovapd))] pub unsafe fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d { let mov = a.as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, f64x8::ZERO)) } /// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -534,8 +514,7 @@ pub unsafe fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m25 #[cfg_attr(test, assert_instr(vmovapd))] pub unsafe fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d { let mov = a.as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, f64x4::ZERO)) } /// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -559,8 +538,7 @@ pub unsafe fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d #[cfg_attr(test, assert_instr(vmovapd))] pub unsafe fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d { let mov = a.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, f64x2::ZERO)) } /// Add packed 32-bit integers in a and b, and store the results in dst. @@ -595,8 +573,7 @@ pub unsafe fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpaddd))] pub unsafe fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let add = _mm512_add_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, i32x16::ZERO)) } /// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -620,8 +597,7 @@ pub unsafe fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpaddd))] pub unsafe fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let add = _mm256_add_epi32(a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, i32x8::ZERO)) } /// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -645,8 +621,7 @@ pub unsafe fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpaddd))] pub unsafe fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let add = _mm_add_epi32(a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, i32x4::ZERO)) } /// Add packed 64-bit integers in a and b, and store the results in dst. @@ -681,8 +656,7 @@ pub unsafe fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpaddq))] pub unsafe fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let add = _mm512_add_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, i64x8::ZERO)) } /// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -706,8 +680,7 @@ pub unsafe fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpaddq))] pub unsafe fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let add = _mm256_add_epi64(a, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, i64x4::ZERO)) } /// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -731,8 +704,7 @@ pub unsafe fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpaddq))] pub unsafe fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let add = _mm_add_epi64(a, b).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, i64x2::ZERO)) } /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst. @@ -767,8 +739,7 @@ pub unsafe fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[cfg_attr(test, assert_instr(vaddps))] pub unsafe fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let add = _mm512_add_ps(a, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, f32x16::ZERO)) } /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -792,8 +763,7 @@ pub unsafe fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[cfg_attr(test, assert_instr(vaddps))] pub unsafe fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { let add = _mm256_add_ps(a, b).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, f32x8::ZERO)) } /// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -817,8 +787,7 @@ pub unsafe fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[cfg_attr(test, assert_instr(vaddps))] pub unsafe fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { let add = _mm_add_ps(a, b).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, f32x4::ZERO)) } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst. @@ -853,8 +822,7 @@ pub unsafe fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[cfg_attr(test, assert_instr(vaddpd))] pub unsafe fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let add = _mm512_add_pd(a, b).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, f64x8::ZERO)) } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -878,8 +846,7 @@ pub unsafe fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[cfg_attr(test, assert_instr(vaddpd))] pub unsafe fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let add = _mm256_add_pd(a, b).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, f64x4::ZERO)) } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -903,8 +870,7 @@ pub unsafe fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[cfg_attr(test, assert_instr(vaddpd))] pub unsafe fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let add = _mm_add_pd(a, b).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, add, zero)) + transmute(simd_select_bitmask(k, add, f64x2::ZERO)) } /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst. @@ -939,8 +905,7 @@ pub unsafe fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpsubd))] pub unsafe fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let sub = _mm512_sub_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, i32x16::ZERO)) } /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -964,8 +929,7 @@ pub unsafe fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpsubd))] pub unsafe fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let sub = _mm256_sub_epi32(a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, i32x8::ZERO)) } /// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -989,8 +953,7 @@ pub unsafe fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpsubd))] pub unsafe fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let sub = _mm_sub_epi32(a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, i32x4::ZERO)) } /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst. @@ -1025,8 +988,7 @@ pub unsafe fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpsubq))] pub unsafe fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let sub = _mm512_sub_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, i64x8::ZERO)) } /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1050,8 +1012,7 @@ pub unsafe fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpsubq))] pub unsafe fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let sub = _mm256_sub_epi64(a, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, i64x4::ZERO)) } /// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1075,8 +1036,7 @@ pub unsafe fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpsubq))] pub unsafe fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let sub = _mm_sub_epi64(a, b).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, i64x2::ZERO)) } /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst. @@ -1111,8 +1071,7 @@ pub unsafe fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[cfg_attr(test, assert_instr(vsubps))] pub unsafe fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let sub = _mm512_sub_ps(a, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, f32x16::ZERO)) } /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1136,8 +1095,7 @@ pub unsafe fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[cfg_attr(test, assert_instr(vsubps))] pub unsafe fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { let sub = _mm256_sub_ps(a, b).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, f32x8::ZERO)) } /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1161,8 +1119,7 @@ pub unsafe fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[cfg_attr(test, assert_instr(vsubps))] pub unsafe fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { let sub = _mm_sub_ps(a, b).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, f32x4::ZERO)) } /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst. @@ -1197,8 +1154,7 @@ pub unsafe fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[cfg_attr(test, assert_instr(vsubpd))] pub unsafe fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let sub = _mm512_sub_pd(a, b).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, f64x8::ZERO)) } /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1222,8 +1178,7 @@ pub unsafe fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[cfg_attr(test, assert_instr(vsubpd))] pub unsafe fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let sub = _mm256_sub_pd(a, b).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, f64x4::ZERO)) } /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1247,8 +1202,7 @@ pub unsafe fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[cfg_attr(test, assert_instr(vsubpd))] pub unsafe fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let sub = _mm_sub_pd(a, b).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, sub, zero)) + transmute(simd_select_bitmask(k, sub, f64x2::ZERO)) } /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst. @@ -1285,8 +1239,7 @@ pub unsafe fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpmuldq))] pub unsafe fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let mul = _mm512_mul_epi32(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, i64x8::ZERO)) } /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1310,8 +1263,7 @@ pub unsafe fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpmuldq))] pub unsafe fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let mul = _mm256_mul_epi32(a, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, i64x4::ZERO)) } /// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1335,8 +1287,7 @@ pub unsafe fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpmuldq))] pub unsafe fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let mul = _mm_mul_epi32(a, b).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, i64x2::ZERO)) } /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst. @@ -1376,8 +1327,7 @@ pub unsafe fn _mm512_mask_mullo_epi32( #[cfg_attr(test, assert_instr(vpmulld))] pub unsafe fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let mul = _mm512_mullo_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, i32x16::ZERO)) } /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1406,8 +1356,7 @@ pub unsafe fn _mm256_mask_mullo_epi32( #[cfg_attr(test, assert_instr(vpmulld))] pub unsafe fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let mul = _mm256_mullo_epi32(a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, i32x8::ZERO)) } /// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1431,8 +1380,7 @@ pub unsafe fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m #[cfg_attr(test, assert_instr(vpmulld))] pub unsafe fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let mul = _mm_mullo_epi32(a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, i32x4::ZERO)) } /// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst. @@ -1500,8 +1448,7 @@ pub unsafe fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpmuludq))] pub unsafe fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let mul = _mm512_mul_epu32(a, b).as_u64x8(); - let zero = _mm512_setzero_si512().as_u64x8(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, u64x8::ZERO)) } /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1525,8 +1472,7 @@ pub unsafe fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpmuludq))] pub unsafe fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let mul = _mm256_mul_epu32(a, b).as_u64x4(); - let zero = _mm256_setzero_si256().as_u64x4(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, u64x4::ZERO)) } /// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1550,8 +1496,7 @@ pub unsafe fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpmuludq))] pub unsafe fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let mul = _mm_mul_epu32(a, b).as_u64x2(); - let zero = _mm_setzero_si128().as_u64x2(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, u64x2::ZERO)) } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst. @@ -1586,8 +1531,7 @@ pub unsafe fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[cfg_attr(test, assert_instr(vmulps))] pub unsafe fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let mul = _mm512_mul_ps(a, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, f32x16::ZERO)) } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1611,8 +1555,7 @@ pub unsafe fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[cfg_attr(test, assert_instr(vmulps))] pub unsafe fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { let mul = _mm256_mul_ps(a, b).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, f32x8::ZERO)) } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1636,8 +1579,7 @@ pub unsafe fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[cfg_attr(test, assert_instr(vmulps))] pub unsafe fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { let mul = _mm_mul_ps(a, b).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, f32x4::ZERO)) } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst. @@ -1672,8 +1614,7 @@ pub unsafe fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[cfg_attr(test, assert_instr(vmulpd))] pub unsafe fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let mul = _mm512_mul_pd(a, b).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, f64x8::ZERO)) } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1697,8 +1638,7 @@ pub unsafe fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[cfg_attr(test, assert_instr(vmulpd))] pub unsafe fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let mul = _mm256_mul_pd(a, b).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, f64x4::ZERO)) } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1722,8 +1662,7 @@ pub unsafe fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[cfg_attr(test, assert_instr(vmulpd))] pub unsafe fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let mul = _mm_mul_pd(a, b).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, mul, zero)) + transmute(simd_select_bitmask(k, mul, f64x2::ZERO)) } /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst. @@ -1758,8 +1697,7 @@ pub unsafe fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[cfg_attr(test, assert_instr(vdivps))] pub unsafe fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let div = _mm512_div_ps(a, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, div, zero)) + transmute(simd_select_bitmask(k, div, f32x16::ZERO)) } /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1783,8 +1721,7 @@ pub unsafe fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[cfg_attr(test, assert_instr(vdivps))] pub unsafe fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { let div = _mm256_div_ps(a, b).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, div, zero)) + transmute(simd_select_bitmask(k, div, f32x8::ZERO)) } /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1808,8 +1745,7 @@ pub unsafe fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[cfg_attr(test, assert_instr(vdivps))] pub unsafe fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { let div = _mm_div_ps(a, b).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, div, zero)) + transmute(simd_select_bitmask(k, div, f32x4::ZERO)) } /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst. @@ -1844,8 +1780,7 @@ pub unsafe fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[cfg_attr(test, assert_instr(vdivpd))] pub unsafe fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let div = _mm512_div_pd(a, b).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, div, zero)) + transmute(simd_select_bitmask(k, div, f64x8::ZERO)) } /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1869,8 +1804,7 @@ pub unsafe fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[cfg_attr(test, assert_instr(vdivpd))] pub unsafe fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let div = _mm256_div_pd(a, b).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, div, zero)) + transmute(simd_select_bitmask(k, div, f64x4::ZERO)) } /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1894,8 +1828,7 @@ pub unsafe fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[cfg_attr(test, assert_instr(vdivpd))] pub unsafe fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let div = _mm_div_pd(a, b).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, div, zero)) + transmute(simd_select_bitmask(k, div, f64x2::ZERO)) } /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst. @@ -1932,8 +1865,7 @@ pub unsafe fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpmaxsd))] pub unsafe fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let max = _mm512_max_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, i32x16::ZERO)) } /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1957,8 +1889,7 @@ pub unsafe fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpmaxsd))] pub unsafe fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let max = _mm256_max_epi32(a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, i32x8::ZERO)) } /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -1982,8 +1913,7 @@ pub unsafe fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpmaxsd))] pub unsafe fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let max = _mm_max_epi32(a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, i32x4::ZERO)) } /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst. @@ -2020,8 +1950,7 @@ pub unsafe fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpmaxsq))] pub unsafe fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let max = _mm512_max_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, i64x8::ZERO)) } /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst. @@ -2058,8 +1987,7 @@ pub unsafe fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpmaxsq))] pub unsafe fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let max = _mm256_max_epi64(a, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, i64x4::ZERO)) } /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst. @@ -2096,8 +2024,7 @@ pub unsafe fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpmaxsq))] pub unsafe fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let max = _mm_max_epi64(a, b).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, i64x2::ZERO)) } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst. @@ -2136,8 +2063,7 @@ pub unsafe fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[cfg_attr(test, assert_instr(vmaxps))] pub unsafe fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let max = _mm512_max_ps(a, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, f32x16::ZERO)) } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2161,8 +2087,7 @@ pub unsafe fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[cfg_attr(test, assert_instr(vmaxps))] pub unsafe fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { let max = _mm256_max_ps(a, b).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, f32x8::ZERO)) } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2186,8 +2111,7 @@ pub unsafe fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[cfg_attr(test, assert_instr(vmaxps))] pub unsafe fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { let max = _mm_max_ps(a, b).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, f32x4::ZERO)) } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst. @@ -2222,8 +2146,7 @@ pub unsafe fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[cfg_attr(test, assert_instr(vmaxpd))] pub unsafe fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let max = _mm512_max_pd(a, b).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, f64x8::ZERO)) } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2247,8 +2170,7 @@ pub unsafe fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[cfg_attr(test, assert_instr(vmaxpd))] pub unsafe fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let max = _mm256_max_pd(a, b).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, f64x4::ZERO)) } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2272,8 +2194,7 @@ pub unsafe fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[cfg_attr(test, assert_instr(vmaxpd))] pub unsafe fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let max = _mm_max_pd(a, b).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, f64x2::ZERO)) } /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst. @@ -2310,8 +2231,7 @@ pub unsafe fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpmaxud))] pub unsafe fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let max = _mm512_max_epu32(a, b).as_u32x16(); - let zero = _mm512_setzero_si512().as_u32x16(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, u32x16::ZERO)) } /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2335,8 +2255,7 @@ pub unsafe fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpmaxud))] pub unsafe fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let max = _mm256_max_epu32(a, b).as_u32x8(); - let zero = _mm256_setzero_si256().as_u32x8(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, u32x8::ZERO)) } /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2360,8 +2279,7 @@ pub unsafe fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpmaxud))] pub unsafe fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let max = _mm_max_epu32(a, b).as_u32x4(); - let zero = _mm_setzero_si128().as_u32x4(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, u32x4::ZERO)) } /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst. @@ -2398,8 +2316,7 @@ pub unsafe fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpmaxuq))] pub unsafe fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let max = _mm512_max_epu64(a, b).as_u64x8(); - let zero = _mm512_setzero_si512().as_u64x8(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, u64x8::ZERO)) } /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst. @@ -2436,8 +2353,7 @@ pub unsafe fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpmaxuq))] pub unsafe fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let max = _mm256_max_epu64(a, b).as_u64x4(); - let zero = _mm256_setzero_si256().as_u64x4(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, u64x4::ZERO)) } /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst. @@ -2474,8 +2390,7 @@ pub unsafe fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpmaxuq))] pub unsafe fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let max = _mm_max_epu64(a, b).as_u64x2(); - let zero = _mm_setzero_si128().as_u64x2(); - transmute(simd_select_bitmask(k, max, zero)) + transmute(simd_select_bitmask(k, max, u64x2::ZERO)) } /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst. @@ -2512,8 +2427,7 @@ pub unsafe fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpminsd))] pub unsafe fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let min = _mm512_min_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, i32x16::ZERO)) } /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2537,8 +2451,7 @@ pub unsafe fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpminsd))] pub unsafe fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let min = _mm256_min_epi32(a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, i32x8::ZERO)) } /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2562,8 +2475,7 @@ pub unsafe fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpminsd))] pub unsafe fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let min = _mm_min_epi32(a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, i32x4::ZERO)) } /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst. @@ -2600,8 +2512,7 @@ pub unsafe fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpminsq))] pub unsafe fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let min = _mm512_min_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, i64x8::ZERO)) } /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst. @@ -2638,8 +2549,7 @@ pub unsafe fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpminsq))] pub unsafe fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let min = _mm256_min_epi64(a, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, i64x4::ZERO)) } /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst. @@ -2676,8 +2586,7 @@ pub unsafe fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpminsq))] pub unsafe fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let min = _mm_min_epi64(a, b).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, i64x2::ZERO)) } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst. @@ -2716,8 +2625,7 @@ pub unsafe fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512 #[cfg_attr(test, assert_instr(vminps))] pub unsafe fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let min = _mm512_min_ps(a, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, f32x16::ZERO)) } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2741,8 +2649,7 @@ pub unsafe fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) #[cfg_attr(test, assert_instr(vminps))] pub unsafe fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { let min = _mm256_min_ps(a, b).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, f32x8::ZERO)) } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2766,8 +2673,7 @@ pub unsafe fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> #[cfg_attr(test, assert_instr(vminps))] pub unsafe fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { let min = _mm_min_ps(a, b).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, f32x4::ZERO)) } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst. @@ -2802,8 +2708,7 @@ pub unsafe fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m51 #[cfg_attr(test, assert_instr(vminpd))] pub unsafe fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let min = _mm512_min_pd(a, b).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, f64x8::ZERO)) } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2827,8 +2732,7 @@ pub unsafe fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m25 #[cfg_attr(test, assert_instr(vminpd))] pub unsafe fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let min = _mm256_min_pd(a, b).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, f64x4::ZERO)) } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2852,8 +2756,7 @@ pub unsafe fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) #[cfg_attr(test, assert_instr(vminpd))] pub unsafe fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let min = _mm_min_pd(a, b).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, f64x2::ZERO)) } /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst. @@ -2890,8 +2793,7 @@ pub unsafe fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpminud))] pub unsafe fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let min = _mm512_min_epu32(a, b).as_u32x16(); - let zero = _mm512_setzero_si512().as_u32x16(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, u32x16::ZERO)) } /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2915,8 +2817,7 @@ pub unsafe fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpminud))] pub unsafe fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let min = _mm256_min_epu32(a, b).as_u32x8(); - let zero = _mm256_setzero_si256().as_u32x8(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, u32x8::ZERO)) } /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -2940,8 +2841,7 @@ pub unsafe fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpminud))] pub unsafe fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let min = _mm_min_epu32(a, b).as_u32x4(); - let zero = _mm_setzero_si128().as_u32x4(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, u32x4::ZERO)) } /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst. @@ -2978,8 +2878,7 @@ pub unsafe fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpminuq))] pub unsafe fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let min = _mm512_min_epu64(a, b).as_u64x8(); - let zero = _mm512_setzero_si512().as_u64x8(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, u64x8::ZERO)) } /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst. @@ -3016,8 +2915,7 @@ pub unsafe fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpminuq))] pub unsafe fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let min = _mm256_min_epu64(a, b).as_u64x4(); - let zero = _mm256_setzero_si256().as_u64x4(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, u64x4::ZERO)) } /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst. @@ -3054,8 +2952,7 @@ pub unsafe fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpminuq))] pub unsafe fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let min = _mm_min_epu64(a, b).as_u64x2(); - let zero = _mm_setzero_si128().as_u64x2(); - transmute(simd_select_bitmask(k, min, zero)) + transmute(simd_select_bitmask(k, min, u64x2::ZERO)) } /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. @@ -4556,11 +4453,7 @@ pub unsafe fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ps))] pub unsafe fn _mm512_rcp14_ps(a: __m512) -> __m512 { - transmute(vrcp14ps( - a.as_f32x16(), - _mm512_setzero_ps().as_f32x16(), - 0b11111111_11111111, - )) + transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) } /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4582,7 +4475,7 @@ pub unsafe fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m5 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ps))] pub unsafe fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 { - transmute(vrcp14ps(a.as_f32x16(), _mm512_setzero_ps().as_f32x16(), k)) + transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, k)) } /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4593,11 +4486,7 @@ pub unsafe fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ps))] pub unsafe fn _mm256_rcp14_ps(a: __m256) -> __m256 { - transmute(vrcp14ps256( - a.as_f32x8(), - _mm256_setzero_ps().as_f32x8(), - 0b11111111, - )) + transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) } /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4619,7 +4508,7 @@ pub unsafe fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m25 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ps))] pub unsafe fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 { - transmute(vrcp14ps256(a.as_f32x8(), _mm256_setzero_ps().as_f32x8(), k)) + transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, k)) } /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4630,11 +4519,7 @@ pub unsafe fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ps))] pub unsafe fn _mm_rcp14_ps(a: __m128) -> __m128 { - transmute(vrcp14ps128( - a.as_f32x4(), - _mm_setzero_ps().as_f32x4(), - 0b00001111, - )) + transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) } /// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4656,7 +4541,7 @@ pub unsafe fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ps))] pub unsafe fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 { - transmute(vrcp14ps128(a.as_f32x4(), _mm_setzero_ps().as_f32x4(), k)) + transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, k)) } /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4667,11 +4552,7 @@ pub unsafe fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14pd))] pub unsafe fn _mm512_rcp14_pd(a: __m512d) -> __m512d { - transmute(vrcp14pd( - a.as_f64x8(), - _mm512_setzero_pd().as_f64x8(), - 0b11111111, - )) + transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) } /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4693,7 +4574,7 @@ pub unsafe fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14pd))] pub unsafe fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d { - transmute(vrcp14pd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k)) + transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, k)) } /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4704,11 +4585,7 @@ pub unsafe fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14pd))] pub unsafe fn _mm256_rcp14_pd(a: __m256d) -> __m256d { - transmute(vrcp14pd256( - a.as_f64x4(), - _mm256_setzero_pd().as_f64x4(), - 0b00001111, - )) + transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) } /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4730,7 +4607,7 @@ pub unsafe fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14pd))] pub unsafe fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d { - transmute(vrcp14pd256(a.as_f64x4(), _mm256_setzero_pd().as_f64x4(), k)) + transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, k)) } /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4741,11 +4618,7 @@ pub unsafe fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14pd))] pub unsafe fn _mm_rcp14_pd(a: __m128d) -> __m128d { - transmute(vrcp14pd128( - a.as_f64x2(), - _mm_setzero_pd().as_f64x2(), - 0b00000011, - )) + transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) } /// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4767,7 +4640,7 @@ pub unsafe fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14pd))] pub unsafe fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d { - transmute(vrcp14pd128(a.as_f64x2(), _mm_setzero_pd().as_f64x2(), k)) + transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, k)) } /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4778,11 +4651,7 @@ pub unsafe fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ps))] pub unsafe fn _mm512_rsqrt14_ps(a: __m512) -> __m512 { - transmute(vrsqrt14ps( - a.as_f32x16(), - _mm512_setzero_ps().as_f32x16(), - 0b11111111_11111111, - )) + transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) } /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4804,11 +4673,7 @@ pub unsafe fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ps))] pub unsafe fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 { - transmute(vrsqrt14ps( - a.as_f32x16(), - _mm512_setzero_ps().as_f32x16(), - k, - )) + transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, k)) } /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4819,11 +4684,7 @@ pub unsafe fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ps))] pub unsafe fn _mm256_rsqrt14_ps(a: __m256) -> __m256 { - transmute(vrsqrt14ps256( - a.as_f32x8(), - _mm256_setzero_ps().as_f32x8(), - 0b11111111, - )) + transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) } /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4845,11 +4706,7 @@ pub unsafe fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ps))] pub unsafe fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 { - transmute(vrsqrt14ps256( - a.as_f32x8(), - _mm256_setzero_ps().as_f32x8(), - k, - )) + transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, k)) } /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4860,11 +4717,7 @@ pub unsafe fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ps))] pub unsafe fn _mm_rsqrt14_ps(a: __m128) -> __m128 { - transmute(vrsqrt14ps128( - a.as_f32x4(), - _mm_setzero_ps().as_f32x4(), - 0b00001111, - )) + transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) } /// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4886,7 +4739,7 @@ pub unsafe fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ps))] pub unsafe fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 { - transmute(vrsqrt14ps128(a.as_f32x4(), _mm_setzero_ps().as_f32x4(), k)) + transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, k)) } /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4897,11 +4750,7 @@ pub unsafe fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14pd))] pub unsafe fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d { - transmute(vrsqrt14pd( - a.as_f64x8(), - _mm512_setzero_pd().as_f64x8(), - 0b11111111, - )) + transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) } /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4923,7 +4772,7 @@ pub unsafe fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14pd))] pub unsafe fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d { - transmute(vrsqrt14pd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k)) + transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, k)) } /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4934,11 +4783,7 @@ pub unsafe fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14pd))] pub unsafe fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d { - transmute(vrsqrt14pd256( - a.as_f64x4(), - _mm256_setzero_pd().as_f64x4(), - 0b00001111, - )) + transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) } /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -4960,11 +4805,7 @@ pub unsafe fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14pd))] pub unsafe fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d { - transmute(vrsqrt14pd256( - a.as_f64x4(), - _mm256_setzero_pd().as_f64x4(), - k, - )) + transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, k)) } /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14. @@ -4975,11 +4816,7 @@ pub unsafe fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14pd))] pub unsafe fn _mm_rsqrt14_pd(a: __m128d) -> __m128d { - transmute(vrsqrt14pd128( - a.as_f64x2(), - _mm_setzero_pd().as_f64x2(), - 0b00000011, - )) + transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) } /// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14. @@ -5001,7 +4838,7 @@ pub unsafe fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m1 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14pd))] pub unsafe fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d { - transmute(vrsqrt14pd128(a.as_f64x2(), _mm_setzero_pd().as_f64x2(), k)) + transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, k)) } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. @@ -5014,7 +4851,7 @@ pub unsafe fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d { pub unsafe fn _mm512_getexp_ps(a: __m512) -> __m512 { transmute(vgetexpps( a.as_f32x16(), - _mm512_setzero_ps().as_f32x16(), + f32x16::ZERO, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -5046,7 +4883,7 @@ pub unsafe fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m pub unsafe fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 { transmute(vgetexpps( a.as_f32x16(), - _mm512_setzero_ps().as_f32x16(), + f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -5060,11 +4897,7 @@ pub unsafe fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps))] pub unsafe fn _mm256_getexp_ps(a: __m256) -> __m256 { - transmute(vgetexpps256( - a.as_f32x8(), - _mm256_setzero_ps().as_f32x8(), - 0b11111111, - )) + transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. @@ -5086,11 +4919,7 @@ pub unsafe fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m2 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps))] pub unsafe fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 { - transmute(vgetexpps256( - a.as_f32x8(), - _mm256_setzero_ps().as_f32x8(), - k, - )) + transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, k)) } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. @@ -5101,11 +4930,7 @@ pub unsafe fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps))] pub unsafe fn _mm_getexp_ps(a: __m128) -> __m128 { - transmute(vgetexpps128( - a.as_f32x4(), - _mm_setzero_ps().as_f32x4(), - 0b00001111, - )) + transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. @@ -5127,7 +4952,7 @@ pub unsafe fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexpps))] pub unsafe fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 { - transmute(vgetexpps128(a.as_f32x4(), _mm_setzero_ps().as_f32x4(), k)) + transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, k)) } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. @@ -5140,7 +4965,7 @@ pub unsafe fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 { pub unsafe fn _mm512_getexp_pd(a: __m512d) -> __m512d { transmute(vgetexppd( a.as_f64x8(), - _mm512_setzero_pd().as_f64x8(), + f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -5172,7 +4997,7 @@ pub unsafe fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __ pub unsafe fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d { transmute(vgetexppd( a.as_f64x8(), - _mm512_setzero_pd().as_f64x8(), + f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -5186,11 +5011,7 @@ pub unsafe fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd))] pub unsafe fn _mm256_getexp_pd(a: __m256d) -> __m256d { - transmute(vgetexppd256( - a.as_f64x4(), - _mm256_setzero_pd().as_f64x4(), - 0b00001111, - )) + transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. @@ -5212,11 +5033,7 @@ pub unsafe fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd))] pub unsafe fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d { - transmute(vgetexppd256( - a.as_f64x4(), - _mm256_setzero_pd().as_f64x4(), - k, - )) + transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, k)) } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element. @@ -5227,11 +5044,7 @@ pub unsafe fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd))] pub unsafe fn _mm_getexp_pd(a: __m128d) -> __m128d { - transmute(vgetexppd128( - a.as_f64x2(), - _mm_setzero_pd().as_f64x2(), - 0b00000011, - )) + transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) } /// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element. @@ -5253,7 +5066,7 @@ pub unsafe fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m12 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgetexppd))] pub unsafe fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d { - transmute(vgetexppd128(a.as_f64x2(), _mm_setzero_pd().as_f64x2(), k)) + transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, k)) } /// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\ @@ -5273,8 +5086,13 @@ pub unsafe fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d { pub unsafe fn _mm512_roundscale_ps(a: __m512) -> __m512 { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - let r = vrndscaleps(a, IMM8, zero, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION); + let r = vrndscaleps( + a, + IMM8, + f32x16::ZERO, + 0b11111111_11111111, + _MM_FROUND_CUR_DIRECTION, + ); transmute(r) } @@ -5321,8 +5139,7 @@ pub unsafe fn _mm512_mask_roundscale_ps( pub unsafe fn _mm512_maskz_roundscale_ps(k: __mmask16, a: __m512) -> __m512 { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - let r = vrndscaleps(a, IMM8, zero, k, _MM_FROUND_CUR_DIRECTION); + let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -5343,8 +5160,7 @@ pub unsafe fn _mm512_maskz_roundscale_ps(k: __mmask16, a: __m51 pub unsafe fn _mm256_roundscale_ps(a: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - let r = vrndscaleps256(a, IMM8, zero, 0b11111111); + let r = vrndscaleps256(a, IMM8, f32x8::ZERO, 0b11111111); transmute(r) } @@ -5391,8 +5207,7 @@ pub unsafe fn _mm256_mask_roundscale_ps( pub unsafe fn _mm256_maskz_roundscale_ps(k: __mmask8, a: __m256) -> __m256 { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - let r = vrndscaleps256(a, IMM8, zero, k); + let r = vrndscaleps256(a, IMM8, f32x8::ZERO, k); transmute(r) } @@ -5413,8 +5228,7 @@ pub unsafe fn _mm256_maskz_roundscale_ps(k: __mmask8, a: __m256 pub unsafe fn _mm_roundscale_ps(a: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vrndscaleps128(a, IMM8, zero, 0b00001111); + let r = vrndscaleps128(a, IMM8, f32x4::ZERO, 0b00001111); transmute(r) } @@ -5461,8 +5275,7 @@ pub unsafe fn _mm_mask_roundscale_ps( pub unsafe fn _mm_maskz_roundscale_ps(k: __mmask8, a: __m128) -> __m128 { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vrndscaleps128(a, IMM8, zero, k); + let r = vrndscaleps128(a, IMM8, f32x4::ZERO, k); transmute(r) } @@ -5483,8 +5296,7 @@ pub unsafe fn _mm_maskz_roundscale_ps(k: __mmask8, a: __m128) - pub unsafe fn _mm512_roundscale_pd(a: __m512d) -> __m512d { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - let r = vrndscalepd(a, IMM8, zero, 0b11111111, _MM_FROUND_CUR_DIRECTION); + let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -5531,8 +5343,7 @@ pub unsafe fn _mm512_mask_roundscale_pd( pub unsafe fn _mm512_maskz_roundscale_pd(k: __mmask8, a: __m512d) -> __m512d { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - let r = vrndscalepd(a, IMM8, zero, k, _MM_FROUND_CUR_DIRECTION); + let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -5553,8 +5364,7 @@ pub unsafe fn _mm512_maskz_roundscale_pd(k: __mmask8, a: __m512 pub unsafe fn _mm256_roundscale_pd(a: __m256d) -> __m256d { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - let r = vrndscalepd256(a, IMM8, zero, 0b00001111); + let r = vrndscalepd256(a, IMM8, f64x4::ZERO, 0b00001111); transmute(r) } @@ -5601,8 +5411,7 @@ pub unsafe fn _mm256_mask_roundscale_pd( pub unsafe fn _mm256_maskz_roundscale_pd(k: __mmask8, a: __m256d) -> __m256d { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - let r = vrndscalepd256(a, IMM8, zero, k); + let r = vrndscalepd256(a, IMM8, f64x4::ZERO, k); transmute(r) } @@ -5623,8 +5432,7 @@ pub unsafe fn _mm256_maskz_roundscale_pd(k: __mmask8, a: __m256 pub unsafe fn _mm_roundscale_pd(a: __m128d) -> __m128d { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vrndscalepd128(a, IMM8, zero, 0b00000011); + let r = vrndscalepd128(a, IMM8, f64x2::ZERO, 0b00000011); transmute(r) } @@ -5671,8 +5479,7 @@ pub unsafe fn _mm_mask_roundscale_pd( pub unsafe fn _mm_maskz_roundscale_pd(k: __mmask8, a: __m128d) -> __m128d { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vrndscalepd128(a, IMM8, zero, k); + let r = vrndscalepd128(a, IMM8, f64x2::ZERO, k); transmute(r) } @@ -5687,7 +5494,7 @@ pub unsafe fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 { transmute(vscalefps( a.as_f32x16(), b.as_f32x16(), - _mm512_setzero_ps().as_f32x16(), + f32x16::ZERO, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -5721,7 +5528,7 @@ pub unsafe fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m5 transmute(vscalefps( a.as_f32x16(), b.as_f32x16(), - _mm512_setzero_ps().as_f32x16(), + f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -5738,7 +5545,7 @@ pub unsafe fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 { transmute(vscalefps256( a.as_f32x8(), b.as_f32x8(), - _mm256_setzero_ps().as_f32x8(), + f32x8::ZERO, 0b11111111, )) } @@ -5762,12 +5569,7 @@ pub unsafe fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m2 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefps))] pub unsafe fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { - transmute(vscalefps256( - a.as_f32x8(), - b.as_f32x8(), - _mm256_setzero_ps().as_f32x8(), - k, - )) + transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), f32x8::ZERO, k)) } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst. @@ -5781,7 +5583,7 @@ pub unsafe fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 { transmute(vscalefps128( a.as_f32x4(), b.as_f32x4(), - _mm_setzero_ps().as_f32x4(), + f32x4::ZERO, 0b00001111, )) } @@ -5805,12 +5607,7 @@ pub unsafe fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefps))] pub unsafe fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vscalefps128( - a.as_f32x4(), - b.as_f32x4(), - _mm_setzero_ps().as_f32x4(), - k, - )) + transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst. @@ -5824,7 +5621,7 @@ pub unsafe fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d { transmute(vscalefpd( a.as_f64x8(), b.as_f64x8(), - _mm512_setzero_pd().as_f64x8(), + f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -5858,7 +5655,7 @@ pub unsafe fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m transmute(vscalefpd( a.as_f64x8(), b.as_f64x8(), - _mm512_setzero_pd().as_f64x8(), + f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -5875,7 +5672,7 @@ pub unsafe fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d { transmute(vscalefpd256( a.as_f64x4(), b.as_f64x4(), - _mm256_setzero_pd().as_f64x4(), + f64x4::ZERO, 0b00001111, )) } @@ -5899,12 +5696,7 @@ pub unsafe fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefpd))] pub unsafe fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { - transmute(vscalefpd256( - a.as_f64x4(), - b.as_f64x4(), - _mm256_setzero_pd().as_f64x4(), - k, - )) + transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), f64x4::ZERO, k)) } /// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst. @@ -5918,7 +5710,7 @@ pub unsafe fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d { transmute(vscalefpd128( a.as_f64x2(), b.as_f64x2(), - _mm_setzero_pd().as_f64x2(), + f64x2::ZERO, 0b00000011, )) } @@ -5942,12 +5734,7 @@ pub unsafe fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m12 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vscalefpd))] pub unsafe fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vscalefpd128( - a.as_f64x2(), - b.as_f64x2(), - _mm_setzero_pd().as_f64x2(), - k, - )) + transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) } /// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting. @@ -6378,8 +6165,7 @@ pub unsafe fn _mm512_maskz_ternarylogic_epi32( let b = b.as_i32x16(); let c = c.as_i32x16(); let r = vpternlogd(a, b, c, IMM8); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6444,8 +6230,7 @@ pub unsafe fn _mm256_maskz_ternarylogic_epi32( let b = b.as_i32x8(); let c = c.as_i32x8(); let r = vpternlogd256(a, b, c, IMM8); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6510,8 +6295,7 @@ pub unsafe fn _mm_maskz_ternarylogic_epi32( let b = b.as_i32x4(); let c = c.as_i32x4(); let r = vpternlogd128(a, b, c, IMM8); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6576,8 +6360,7 @@ pub unsafe fn _mm512_maskz_ternarylogic_epi64( let b = b.as_i64x8(); let c = c.as_i64x8(); let r = vpternlogq(a, b, c, IMM8); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i64x8::ZERO)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6642,8 +6425,7 @@ pub unsafe fn _mm256_maskz_ternarylogic_epi64( let b = b.as_i64x4(); let c = c.as_i64x4(); let r = vpternlogq256(a, b, c, IMM8); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i64x4::ZERO)) } /// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst. @@ -6708,8 +6490,7 @@ pub unsafe fn _mm_maskz_ternarylogic_epi64( let b = b.as_i64x2(); let c = c.as_i64x2(); let r = vpternlogq128(a, b, c, IMM8); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i64x2::ZERO)) } /// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign. @@ -6738,7 +6519,7 @@ pub unsafe fn _mm512_getmant_ps< static_assert_uimm_bits!(NORM, 4); static_assert_uimm_bits!(SIGN, 2); let a = a.as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); + let zero = f32x16::ZERO; let r = vgetmantps( a, SIGN << 2 | NORM, @@ -6809,8 +6590,13 @@ pub unsafe fn _mm512_maskz_getmant_ps< static_assert_uimm_bits!(NORM, 4); static_assert_uimm_bits!(SIGN, 2); let a = a.as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - let r = vgetmantps(a, SIGN << 2 | NORM, zero, k, _MM_FROUND_CUR_DIRECTION); + let r = vgetmantps( + a, + SIGN << 2 | NORM, + f32x16::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + ); transmute(r) } @@ -6840,8 +6626,7 @@ pub unsafe fn _mm256_getmant_ps< static_assert_uimm_bits!(NORM, 4); static_assert_uimm_bits!(SIGN, 2); let a = a.as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - let r = vgetmantps256(a, SIGN << 2 | NORM, zero, 0b11111111); + let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, 0b11111111); transmute(r) } @@ -6905,8 +6690,7 @@ pub unsafe fn _mm256_maskz_getmant_ps< static_assert_uimm_bits!(NORM, 4); static_assert_uimm_bits!(SIGN, 2); let a = a.as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - let r = vgetmantps256(a, SIGN << 2 | NORM, zero, k); + let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, k); transmute(r) } @@ -6936,8 +6720,7 @@ pub unsafe fn _mm_getmant_ps< static_assert_uimm_bits!(NORM, 4); static_assert_uimm_bits!(SIGN, 2); let a = a.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vgetmantps128(a, SIGN << 2 | NORM, zero, 0b00001111); + let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, 0b00001111); transmute(r) } @@ -7001,8 +6784,7 @@ pub unsafe fn _mm_maskz_getmant_ps< static_assert_uimm_bits!(NORM, 4); static_assert_uimm_bits!(SIGN, 2); let a = a.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vgetmantps128(a, SIGN << 2 | NORM, zero, k); + let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, k); transmute(r) } @@ -7032,7 +6814,7 @@ pub unsafe fn _mm512_getmant_pd< static_assert_uimm_bits!(NORM, 4); static_assert_uimm_bits!(SIGN, 2); let a = a.as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); + let zero = f64x8::ZERO; let r = vgetmantpd( a, SIGN << 2 | NORM, @@ -7103,8 +6885,13 @@ pub unsafe fn _mm512_maskz_getmant_pd< static_assert_uimm_bits!(NORM, 4); static_assert_uimm_bits!(SIGN, 2); let a = a.as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - let r = vgetmantpd(a, SIGN << 2 | NORM, zero, k, _MM_FROUND_CUR_DIRECTION); + let r = vgetmantpd( + a, + SIGN << 2 | NORM, + f64x8::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + ); transmute(r) } @@ -7134,8 +6921,7 @@ pub unsafe fn _mm256_getmant_pd< static_assert_uimm_bits!(NORM, 4); static_assert_uimm_bits!(SIGN, 2); let a = a.as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - let r = vgetmantpd256(a, SIGN << 2 | NORM, zero, 0b00001111); + let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, 0b00001111); transmute(r) } @@ -7199,8 +6985,7 @@ pub unsafe fn _mm256_maskz_getmant_pd< static_assert_uimm_bits!(NORM, 4); static_assert_uimm_bits!(SIGN, 2); let a = a.as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - let r = vgetmantpd256(a, SIGN << 2 | NORM, zero, k); + let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, k); transmute(r) } @@ -7230,8 +7015,7 @@ pub unsafe fn _mm_getmant_pd< static_assert_uimm_bits!(NORM, 4); static_assert_uimm_bits!(SIGN, 2); let a = a.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vgetmantpd128(a, SIGN << 2 | NORM, zero, 0b00000011); + let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, 0b00000011); transmute(r) } @@ -7295,8 +7079,7 @@ pub unsafe fn _mm_maskz_getmant_pd< static_assert_uimm_bits!(NORM, 4); static_assert_uimm_bits!(SIGN, 2); let a = a.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vgetmantpd128(a, SIGN << 2 | NORM, zero, k); + let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, k); transmute(r) } @@ -7375,8 +7158,7 @@ pub unsafe fn _mm512_maskz_add_round_ps( let a = a.as_f32x16(); let b = b.as_f32x16(); let r = vaddps(a, b, ROUNDING); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) } /// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\ @@ -7454,8 +7236,7 @@ pub unsafe fn _mm512_maskz_add_round_pd( let a = a.as_f64x8(); let b = b.as_f64x8(); let r = vaddpd(a, b, ROUNDING); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) } /// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\ @@ -7533,8 +7314,7 @@ pub unsafe fn _mm512_maskz_sub_round_ps( let a = a.as_f32x16(); let b = b.as_f32x16(); let r = vsubps(a, b, ROUNDING); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) } /// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\ @@ -7612,8 +7392,7 @@ pub unsafe fn _mm512_maskz_sub_round_pd( let a = a.as_f64x8(); let b = b.as_f64x8(); let r = vsubpd(a, b, ROUNDING); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\ @@ -7691,8 +7470,7 @@ pub unsafe fn _mm512_maskz_mul_round_ps( let a = a.as_f32x16(); let b = b.as_f32x16(); let r = vmulps(a, b, ROUNDING); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) } /// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\ @@ -7770,8 +7548,7 @@ pub unsafe fn _mm512_maskz_mul_round_pd( let a = a.as_f64x8(); let b = b.as_f64x8(); let r = vmulpd(a, b, ROUNDING); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) } /// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\ @@ -7849,8 +7626,7 @@ pub unsafe fn _mm512_maskz_div_round_ps( let a = a.as_f32x16(); let b = b.as_f32x16(); let r = vdivps(a, b, ROUNDING); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) } /// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\ @@ -7928,8 +7704,7 @@ pub unsafe fn _mm512_maskz_div_round_pd( let a = a.as_f64x8(); let b = b.as_f64x8(); let r = vdivpd(a, b, ROUNDING); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) } /// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\ @@ -7999,8 +7774,7 @@ pub unsafe fn _mm512_maskz_sqrt_round_ps(k: __mmask16, a: _ static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let r = vsqrtps(a, ROUNDING); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) } /// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\ @@ -8070,8 +7844,7 @@ pub unsafe fn _mm512_maskz_sqrt_round_pd(k: __mmask8, a: __ static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let r = vsqrtpd(a, ROUNDING); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) } /// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\ @@ -9343,8 +9116,7 @@ pub unsafe fn _mm512_maskz_max_round_ps( let a = a.as_f32x16(); let b = b.as_f32x16(); let r = vmaxps(a, b, SAE); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\ @@ -9404,8 +9176,7 @@ pub unsafe fn _mm512_maskz_max_round_pd( let a = a.as_f64x8(); let b = b.as_f64x8(); let r = vmaxpd(a, b, SAE); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) } /// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\ @@ -9465,8 +9236,7 @@ pub unsafe fn _mm512_maskz_min_round_ps( let a = a.as_f32x16(); let b = b.as_f32x16(); let r = vminps(a, b, SAE); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) } /// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\ @@ -9526,8 +9296,7 @@ pub unsafe fn _mm512_maskz_min_round_pd( let a = a.as_f64x8(); let b = b.as_f64x8(); let r = vminpd(a, b, SAE); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f64x8::ZERO)) } /// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\ @@ -9542,8 +9311,7 @@ pub unsafe fn _mm512_maskz_min_round_pd( pub unsafe fn _mm512_getexp_round_ps(a: __m512) -> __m512 { static_assert_sae!(SAE); let a = a.as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - let r = vgetexpps(a, zero, 0b11111111_11111111, SAE); + let r = vgetexpps(a, f32x16::ZERO, 0b11111111_11111111, SAE); transmute(r) } @@ -9580,8 +9348,7 @@ pub unsafe fn _mm512_mask_getexp_round_ps( pub unsafe fn _mm512_maskz_getexp_round_ps(k: __mmask16, a: __m512) -> __m512 { static_assert_sae!(SAE); let a = a.as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - let r = vgetexpps(a, zero, k, SAE); + let r = vgetexpps(a, f32x16::ZERO, k, SAE); transmute(r) } @@ -9597,8 +9364,7 @@ pub unsafe fn _mm512_maskz_getexp_round_ps(k: __mmask16, a: __m5 pub unsafe fn _mm512_getexp_round_pd(a: __m512d) -> __m512d { static_assert_sae!(SAE); let a = a.as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - let r = vgetexppd(a, zero, 0b11111111, SAE); + let r = vgetexppd(a, f64x8::ZERO, 0b11111111, SAE); transmute(r) } @@ -9635,8 +9401,7 @@ pub unsafe fn _mm512_mask_getexp_round_pd( pub unsafe fn _mm512_maskz_getexp_round_pd(k: __mmask8, a: __m512d) -> __m512d { static_assert_sae!(SAE); let a = a.as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - let r = vgetexppd(a, zero, k, SAE); + let r = vgetexppd(a, f64x8::ZERO, k, SAE); transmute(r) } @@ -9659,8 +9424,7 @@ pub unsafe fn _mm512_roundscale_round_ps(a: __m static_assert_uimm_bits!(IMM8, 8); static_assert_mantissas_sae!(SAE); let a = a.as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - let r = vrndscaleps(a, IMM8, zero, 0b11111111_11111111, SAE); + let r = vrndscaleps(a, IMM8, f32x16::ZERO, 0b11111111_11111111, SAE); transmute(r) } @@ -9714,8 +9478,7 @@ pub unsafe fn _mm512_maskz_roundscale_round_ps( static_assert_uimm_bits!(IMM8, 8); static_assert_mantissas_sae!(SAE); let a = a.as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - let r = vrndscaleps(a, IMM8, zero, k, SAE); + let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, SAE); transmute(r) } @@ -9738,8 +9501,7 @@ pub unsafe fn _mm512_roundscale_round_pd(a: __m static_assert_uimm_bits!(IMM8, 8); static_assert_mantissas_sae!(SAE); let a = a.as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - let r = vrndscalepd(a, IMM8, zero, 0b11111111, SAE); + let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, SAE); transmute(r) } @@ -9793,8 +9555,7 @@ pub unsafe fn _mm512_maskz_roundscale_round_pd( static_assert_uimm_bits!(IMM8, 8); static_assert_mantissas_sae!(SAE); let a = a.as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - let r = vrndscalepd(a, IMM8, zero, k, SAE); + let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, SAE); transmute(r) } @@ -9817,8 +9578,7 @@ pub unsafe fn _mm512_scalef_round_ps(a: __m512, b: __m512) static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let b = b.as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - let r = vscalefps(a, b, zero, 0b11111111_11111111, ROUNDING); + let r = vscalefps(a, b, f32x16::ZERO, 0b11111111_11111111, ROUNDING); transmute(r) } @@ -9874,8 +9634,7 @@ pub unsafe fn _mm512_maskz_scalef_round_ps( static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); let b = b.as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - let r = vscalefps(a, b, zero, k, ROUNDING); + let r = vscalefps(a, b, f32x16::ZERO, k, ROUNDING); transmute(r) } @@ -9898,8 +9657,7 @@ pub unsafe fn _mm512_scalef_round_pd(a: __m512d, b: __m512d static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let b = b.as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - let r = vscalefpd(a, b, zero, 0b11111111, ROUNDING); + let r = vscalefpd(a, b, f64x8::ZERO, 0b11111111, ROUNDING); transmute(r) } @@ -9955,8 +9713,7 @@ pub unsafe fn _mm512_maskz_scalef_round_pd( static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); let b = b.as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - let r = vscalefpd(a, b, zero, k, ROUNDING); + let r = vscalefpd(a, b, f64x8::ZERO, k, ROUNDING); transmute(r) } @@ -10131,8 +9888,7 @@ pub unsafe fn _mm512_getmant_round_ps< static_assert_uimm_bits!(SIGN, 2); static_assert_mantissas_sae!(SAE); let a = a.as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - let r = vgetmantps(a, SIGN << 2 | NORM, zero, 0b11111111_11111111, SAE); + let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, 0b11111111_11111111, SAE); transmute(r) } @@ -10202,8 +9958,7 @@ pub unsafe fn _mm512_maskz_getmant_round_ps< static_assert_uimm_bits!(SIGN, 2); static_assert_mantissas_sae!(SAE); let a = a.as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - let r = vgetmantps(a, SIGN << 2 | NORM, zero, k, SAE); + let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, k, SAE); transmute(r) } @@ -10236,8 +9991,7 @@ pub unsafe fn _mm512_getmant_round_pd< static_assert_uimm_bits!(SIGN, 2); static_assert_mantissas_sae!(SAE); let a = a.as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - let r = vgetmantpd(a, SIGN << 2 | NORM, zero, 0b11111111, SAE); + let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, 0b11111111, SAE); transmute(r) } @@ -10307,8 +10061,7 @@ pub unsafe fn _mm512_maskz_getmant_round_pd< static_assert_uimm_bits!(SIGN, 2); static_assert_mantissas_sae!(SAE); let a = a.as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - let r = vgetmantpd(a, SIGN << 2 | NORM, zero, k, SAE); + let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, k, SAE); transmute(r) } @@ -10322,7 +10075,7 @@ pub unsafe fn _mm512_maskz_getmant_round_pd< pub unsafe fn _mm512_cvtps_epi32(a: __m512) -> __m512i { transmute(vcvtps2dq( a.as_f32x16(), - _mm512_setzero_si512().as_i32x16(), + i32x16::ZERO, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -10354,7 +10107,7 @@ pub unsafe fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> pub unsafe fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i { transmute(vcvtps2dq( a.as_f32x16(), - _mm512_setzero_si512().as_i32x16(), + i32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -10381,8 +10134,7 @@ pub unsafe fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> _ #[cfg_attr(test, assert_instr(vcvtps2dq))] pub unsafe fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i { let convert = _mm256_cvtps_epi32(a); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, convert.as_i32x8(), zero)) + transmute(simd_select_bitmask(k, convert.as_i32x8(), i32x8::ZERO)) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10406,8 +10158,7 @@ pub unsafe fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m1 #[cfg_attr(test, assert_instr(vcvtps2dq))] pub unsafe fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i { let convert = _mm_cvtps_epi32(a); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, convert.as_i32x4(), zero)) + transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO)) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. @@ -10420,7 +10171,7 @@ pub unsafe fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i { pub unsafe fn _mm512_cvtps_epu32(a: __m512) -> __m512i { transmute(vcvtps2udq( a.as_f32x16(), - _mm512_setzero_si512().as_u32x16(), + u32x16::ZERO, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -10452,7 +10203,7 @@ pub unsafe fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> pub unsafe fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i { transmute(vcvtps2udq( a.as_f32x16(), - _mm512_setzero_si512().as_u32x16(), + u32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -10466,11 +10217,7 @@ pub unsafe fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq))] pub unsafe fn _mm256_cvtps_epu32(a: __m256) -> __m256i { - transmute(vcvtps2udq256( - a.as_f32x8(), - _mm256_setzero_si256().as_u32x8(), - 0b11111111, - )) + transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10492,11 +10239,7 @@ pub unsafe fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq))] pub unsafe fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i { - transmute(vcvtps2udq256( - a.as_f32x8(), - _mm256_setzero_si256().as_u32x8(), - k, - )) + transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, k)) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. @@ -10507,11 +10250,7 @@ pub unsafe fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq))] pub unsafe fn _mm_cvtps_epu32(a: __m128) -> __m128i { - transmute(vcvtps2udq128( - a.as_f32x4(), - _mm_setzero_si128().as_u32x4(), - 0b11111111, - )) + transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10533,11 +10272,7 @@ pub unsafe fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m1 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtps2udq))] pub unsafe fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i { - transmute(vcvtps2udq128( - a.as_f32x4(), - _mm_setzero_si128().as_u32x4(), - k, - )) + transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, k)) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. @@ -10550,7 +10285,7 @@ pub unsafe fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i { pub unsafe fn _mm512_cvtps_pd(a: __m256) -> __m512d { transmute(vcvtps2pd( a.as_f32x8(), - _mm512_setzero_pd().as_f64x8(), + f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -10582,7 +10317,7 @@ pub unsafe fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m5 pub unsafe fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d { transmute(vcvtps2pd( a.as_f32x8(), - _mm512_setzero_pd().as_f64x8(), + f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -10598,7 +10333,7 @@ pub unsafe fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d { pub unsafe fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d { transmute(vcvtps2pd( _mm512_castps512_ps256(v2).as_f32x8(), - _mm512_setzero_pd().as_f64x8(), + f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -10630,7 +10365,7 @@ pub unsafe fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> _ pub unsafe fn _mm512_cvtpd_ps(a: __m512d) -> __m256 { transmute(vcvtpd2ps( a.as_f64x8(), - _mm256_setzero_ps().as_f32x8(), + f32x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -10662,7 +10397,7 @@ pub unsafe fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m2 pub unsafe fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 { transmute(vcvtpd2ps( a.as_f64x8(), - _mm256_setzero_ps().as_f32x8(), + f32x8::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -10689,8 +10424,7 @@ pub unsafe fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m1 #[cfg_attr(test, assert_instr(vcvtpd2ps))] pub unsafe fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 { let convert = _mm256_cvtpd_ps(a); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, convert.as_f32x4(), zero)) + transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO)) } /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10714,8 +10448,7 @@ pub unsafe fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 #[cfg_attr(test, assert_instr(vcvtpd2ps))] pub unsafe fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 { let convert = _mm_cvtpd_ps(a); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, convert.as_f32x4(), zero)) + transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO)) } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst. @@ -10728,7 +10461,7 @@ pub unsafe fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 { pub unsafe fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i { transmute(vcvtpd2dq( a.as_f64x8(), - _mm256_setzero_si256().as_i32x8(), + i32x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -10760,7 +10493,7 @@ pub unsafe fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> pub unsafe fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i { transmute(vcvtpd2dq( a.as_f64x8(), - _mm256_setzero_si256().as_i32x8(), + i32x8::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -10787,11 +10520,7 @@ pub unsafe fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> #[cfg_attr(test, assert_instr(vcvtpd2dq))] pub unsafe fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i { let convert = _mm256_cvtpd_epi32(a); - transmute(simd_select_bitmask( - k, - convert.as_i32x4(), - _mm_setzero_si128().as_i32x4(), - )) + transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO)) } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10815,11 +10544,7 @@ pub unsafe fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m #[cfg_attr(test, assert_instr(vcvtpd2dq))] pub unsafe fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i { let convert = _mm_cvtpd_epi32(a); - transmute(simd_select_bitmask( - k, - convert.as_i32x4(), - _mm_setzero_si128().as_i32x4(), - )) + transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO)) } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. @@ -10832,7 +10557,7 @@ pub unsafe fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i { pub unsafe fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i { transmute(vcvtpd2udq( a.as_f64x8(), - _mm256_setzero_si256().as_u32x8(), + u32x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -10864,7 +10589,7 @@ pub unsafe fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> pub unsafe fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i { transmute(vcvtpd2udq( a.as_f64x8(), - _mm256_setzero_si256().as_u32x8(), + u32x8::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -10878,11 +10603,7 @@ pub unsafe fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq))] pub unsafe fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i { - transmute(vcvtpd2udq256( - a.as_f64x4(), - _mm_setzero_si128().as_u32x4(), - 0b11111111, - )) + transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, 0b11111111)) } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10904,11 +10625,7 @@ pub unsafe fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq))] pub unsafe fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i { - transmute(vcvtpd2udq256( - a.as_f64x4(), - _mm_setzero_si128().as_u32x4(), - k, - )) + transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, k)) } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. @@ -10919,11 +10636,7 @@ pub unsafe fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq))] pub unsafe fn _mm_cvtpd_epu32(a: __m128d) -> __m128i { - transmute(vcvtpd2udq128( - a.as_f64x2(), - _mm_setzero_si128().as_u32x4(), - 0b11111111, - )) + transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, 0b11111111)) } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -10945,11 +10658,7 @@ pub unsafe fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtpd2udq))] pub unsafe fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i { - transmute(vcvtpd2udq128( - a.as_f64x2(), - _mm_setzero_si128().as_u32x4(), - k, - )) + transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, k)) } /// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0. @@ -10962,13 +10671,13 @@ pub unsafe fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i { pub unsafe fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 { let r: f32x8 = vcvtpd2ps( v2.as_f64x8(), - _mm256_setzero_ps().as_f32x8(), + f32x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION, ); simd_shuffle!( r, - _mm256_setzero_ps().as_f32x8(), + f32x8::ZERO, [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], ) } @@ -10989,7 +10698,7 @@ pub unsafe fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> _ ); simd_shuffle!( r, - _mm256_setzero_ps().as_f32x8(), + f32x8::ZERO, [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], ) } @@ -11027,8 +10736,7 @@ pub unsafe fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) #[cfg_attr(test, assert_instr(vpmovsxbd))] pub unsafe fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i { let convert = _mm512_cvtepi8_epi32(a).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) } /// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11052,8 +10760,7 @@ pub unsafe fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) - #[cfg_attr(test, assert_instr(vpmovsxbd))] pub unsafe fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i { let convert = _mm256_cvtepi8_epi32(a).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) } /// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11077,8 +10784,7 @@ pub unsafe fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> _ #[cfg_attr(test, assert_instr(vpmovsxbd))] pub unsafe fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i { let convert = _mm_cvtepi8_epi32(a).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) } /// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst. @@ -11115,8 +10821,7 @@ pub unsafe fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) - #[cfg_attr(test, assert_instr(vpmovsxbq))] pub unsafe fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i { let convert = _mm512_cvtepi8_epi64(a).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) } /// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11140,8 +10845,7 @@ pub unsafe fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) - #[cfg_attr(test, assert_instr(vpmovsxbq))] pub unsafe fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i { let convert = _mm256_cvtepi8_epi64(a).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) } /// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11165,8 +10869,7 @@ pub unsafe fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> _ #[cfg_attr(test, assert_instr(vpmovsxbq))] pub unsafe fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i { let convert = _mm_cvtepi8_epi64(a).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) } /// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst. @@ -11202,8 +10905,7 @@ pub unsafe fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) #[cfg_attr(test, assert_instr(vpmovzxbd))] pub unsafe fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i { let convert = _mm512_cvtepu8_epi32(a).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) } /// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11227,8 +10929,7 @@ pub unsafe fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) - #[cfg_attr(test, assert_instr(vpmovzxbd))] pub unsafe fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i { let convert = _mm256_cvtepu8_epi32(a).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) } /// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11252,8 +10953,7 @@ pub unsafe fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> _ #[cfg_attr(test, assert_instr(vpmovzxbd))] pub unsafe fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i { let convert = _mm_cvtepu8_epi32(a).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) } /// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst. @@ -11290,8 +10990,7 @@ pub unsafe fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) - #[cfg_attr(test, assert_instr(vpmovzxbq))] pub unsafe fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i { let convert = _mm512_cvtepu8_epi64(a).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) } /// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11315,8 +11014,7 @@ pub unsafe fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) - #[cfg_attr(test, assert_instr(vpmovzxbq))] pub unsafe fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i { let convert = _mm256_cvtepu8_epi64(a).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) } /// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11340,8 +11038,7 @@ pub unsafe fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> _ #[cfg_attr(test, assert_instr(vpmovzxbq))] pub unsafe fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i { let convert = _mm_cvtepu8_epi64(a).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) } /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst. @@ -11377,8 +11074,7 @@ pub unsafe fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) #[cfg_attr(test, assert_instr(vpmovsxwd))] pub unsafe fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i { let convert = _mm512_cvtepi16_epi32(a).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) } /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11402,8 +11098,7 @@ pub unsafe fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) #[cfg_attr(test, assert_instr(vpmovsxwd))] pub unsafe fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i { let convert = _mm256_cvtepi16_epi32(a).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) } /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11427,8 +11122,7 @@ pub unsafe fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> #[cfg_attr(test, assert_instr(vpmovsxwd))] pub unsafe fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i { let convert = _mm_cvtepi16_epi32(a).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) } /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst. @@ -11464,8 +11158,7 @@ pub unsafe fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) #[cfg_attr(test, assert_instr(vpmovsxwq))] pub unsafe fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i { let convert = _mm512_cvtepi16_epi64(a).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) } /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11489,8 +11182,7 @@ pub unsafe fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) #[cfg_attr(test, assert_instr(vpmovsxwq))] pub unsafe fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i { let convert = _mm256_cvtepi16_epi64(a).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) } /// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11514,8 +11206,7 @@ pub unsafe fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> #[cfg_attr(test, assert_instr(vpmovsxwq))] pub unsafe fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i { let convert = _mm_cvtepi16_epi64(a).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) } /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst. @@ -11551,8 +11242,7 @@ pub unsafe fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) #[cfg_attr(test, assert_instr(vpmovzxwd))] pub unsafe fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i { let convert = _mm512_cvtepu16_epi32(a).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i32x16::ZERO)) } /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11576,8 +11266,7 @@ pub unsafe fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) #[cfg_attr(test, assert_instr(vpmovzxwd))] pub unsafe fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i { let convert = _mm256_cvtepu16_epi32(a).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) } /// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11601,8 +11290,7 @@ pub unsafe fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> #[cfg_attr(test, assert_instr(vpmovzxwd))] pub unsafe fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i { let convert = _mm_cvtepu16_epi32(a).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) } /// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst. @@ -11638,8 +11326,7 @@ pub unsafe fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) #[cfg_attr(test, assert_instr(vpmovzxwq))] pub unsafe fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i { let convert = _mm512_cvtepu16_epi64(a).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) } /// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11663,8 +11350,7 @@ pub unsafe fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) #[cfg_attr(test, assert_instr(vpmovzxwq))] pub unsafe fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i { let convert = _mm256_cvtepu16_epi64(a).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) } /// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11688,8 +11374,7 @@ pub unsafe fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> #[cfg_attr(test, assert_instr(vpmovzxwq))] pub unsafe fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i { let convert = _mm_cvtepu16_epi64(a).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) } /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst. @@ -11725,8 +11410,7 @@ pub unsafe fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) #[cfg_attr(test, assert_instr(vpmovsxdq))] pub unsafe fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i { let convert = _mm512_cvtepi32_epi64(a).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) } /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11750,8 +11434,7 @@ pub unsafe fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) #[cfg_attr(test, assert_instr(vpmovsxdq))] pub unsafe fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i { let convert = _mm256_cvtepi32_epi64(a).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) } /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11775,8 +11458,7 @@ pub unsafe fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> #[cfg_attr(test, assert_instr(vpmovsxdq))] pub unsafe fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i { let convert = _mm_cvtepi32_epi64(a).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) } /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst. @@ -11812,8 +11494,7 @@ pub unsafe fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) #[cfg_attr(test, assert_instr(vpmovzxdq))] pub unsafe fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i { let convert = _mm512_cvtepu32_epi64(a).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x8::ZERO)) } /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11837,8 +11518,7 @@ pub unsafe fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) #[cfg_attr(test, assert_instr(vpmovzxdq))] pub unsafe fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i { let convert = _mm256_cvtepu32_epi64(a).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x4::ZERO)) } /// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11862,8 +11542,7 @@ pub unsafe fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> #[cfg_attr(test, assert_instr(vpmovzxdq))] pub unsafe fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i { let convert = _mm_cvtepu32_epi64(a).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i64x2::ZERO)) } /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst. @@ -11899,8 +11578,7 @@ pub unsafe fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> #[cfg_attr(test, assert_instr(vcvtdq2ps))] pub unsafe fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 { let convert = _mm512_cvtepi32_ps(a).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, f32x16::ZERO)) } /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11924,8 +11602,7 @@ pub unsafe fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> _ #[cfg_attr(test, assert_instr(vcvtdq2ps))] pub unsafe fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 { let convert = _mm256_cvtepi32_ps(a).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, f32x8::ZERO)) } /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -11949,8 +11626,7 @@ pub unsafe fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m1 #[cfg_attr(test, assert_instr(vcvtdq2ps))] pub unsafe fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 { let convert = _mm_cvtepi32_ps(a).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, f32x4::ZERO)) } /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. @@ -11986,8 +11662,7 @@ pub unsafe fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> #[cfg_attr(test, assert_instr(vcvtdq2pd))] pub unsafe fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d { let convert = _mm512_cvtepi32_pd(a).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, f64x8::ZERO)) } /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12011,8 +11686,7 @@ pub unsafe fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> #[cfg_attr(test, assert_instr(vcvtdq2pd))] pub unsafe fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d { let convert = _mm256_cvtepi32_pd(a).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, f64x4::ZERO)) } /// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12036,8 +11710,7 @@ pub unsafe fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m #[cfg_attr(test, assert_instr(vcvtdq2pd))] pub unsafe fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d { let convert = _mm_cvtepi32_pd(a).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, f64x2::ZERO)) } /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst. @@ -12073,8 +11746,7 @@ pub unsafe fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> #[cfg_attr(test, assert_instr(vcvtudq2ps))] pub unsafe fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 { let convert = _mm512_cvtepu32_ps(a).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, f32x16::ZERO)) } /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. @@ -12110,8 +11782,7 @@ pub unsafe fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> #[cfg_attr(test, assert_instr(vcvtudq2pd))] pub unsafe fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d { let convert = _mm512_cvtepu32_pd(a).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, f64x8::ZERO)) } /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. @@ -12147,8 +11818,7 @@ pub unsafe fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> #[cfg_attr(test, assert_instr(vcvtudq2pd))] pub unsafe fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d { let convert = _mm256_cvtepu32_pd(a).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, f64x4::ZERO)) } /// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. @@ -12185,8 +11855,7 @@ pub unsafe fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m #[cfg_attr(test, assert_instr(vcvtudq2pd))] pub unsafe fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d { let convert = _mm_cvtepu32_pd(a).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, f64x2::ZERO)) } /// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst. @@ -12272,8 +11941,7 @@ pub unsafe fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) #[cfg_attr(test, assert_instr(vpmovdw))] pub unsafe fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i { let convert = _mm512_cvtepi32_epi16(a).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i16x16::ZERO)) } /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. @@ -12309,8 +11977,7 @@ pub unsafe fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) #[cfg_attr(test, assert_instr(vpmovdw))] pub unsafe fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { let convert = _mm256_cvtepi32_epi16(a).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) } /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. @@ -12321,11 +11988,7 @@ pub unsafe fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdw))] pub unsafe fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i { - transmute(vpmovdw128( - a.as_i32x4(), - _mm_setzero_si128().as_i16x8(), - 0b11111111, - )) + transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) } /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12347,7 +12010,7 @@ pub unsafe fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdw))] pub unsafe fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovdw128(a.as_i32x4(), _mm_setzero_si128().as_i16x8(), k)) + transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, k)) } /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -12383,8 +12046,7 @@ pub unsafe fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) #[cfg_attr(test, assert_instr(vpmovdb))] pub unsafe fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { let convert = _mm512_cvtepi32_epi8(a).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i8x16::ZERO)) } /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -12395,11 +12057,7 @@ pub unsafe fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdb))] pub unsafe fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i { - transmute(vpmovdb256( - a.as_i32x8(), - _mm_setzero_si128().as_i8x16(), - 0b11111111, - )) + transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) } /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12421,7 +12079,7 @@ pub unsafe fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdb))] pub unsafe fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovdb256(a.as_i32x8(), _mm_setzero_si128().as_i8x16(), k)) + transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, k)) } /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -12432,11 +12090,7 @@ pub unsafe fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdb))] pub unsafe fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i { - transmute(vpmovdb128( - a.as_i32x4(), - _mm_setzero_si128().as_i8x16(), - 0b11111111, - )) + transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) } /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12458,7 +12112,7 @@ pub unsafe fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdb))] pub unsafe fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovdb128(a.as_i32x4(), _mm_setzero_si128().as_i8x16(), k)) + transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, k)) } /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst. @@ -12494,8 +12148,7 @@ pub unsafe fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) #[cfg_attr(test, assert_instr(vpmovqd))] pub unsafe fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { let convert = _mm512_cvtepi64_epi32(a).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i32x8::ZERO)) } /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst. @@ -12531,8 +12184,7 @@ pub unsafe fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) #[cfg_attr(test, assert_instr(vpmovqd))] pub unsafe fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { let convert = _mm256_cvtepi64_epi32(a).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i32x4::ZERO)) } /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst. @@ -12543,11 +12195,7 @@ pub unsafe fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqd))] pub unsafe fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i { - transmute(vpmovqd128( - a.as_i64x2(), - _mm_setzero_si128().as_i32x4(), - 0b11111111, - )) + transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) } /// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12569,7 +12217,7 @@ pub unsafe fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqd))] pub unsafe fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovqd128(a.as_i64x2(), _mm_setzero_si128().as_i32x4(), k)) + transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, k)) } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. @@ -12605,8 +12253,7 @@ pub unsafe fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) #[cfg_attr(test, assert_instr(vpmovqw))] pub unsafe fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { let convert = _mm512_cvtepi64_epi16(a).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, convert, zero)) + transmute(simd_select_bitmask(k, convert, i16x8::ZERO)) } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. @@ -12617,11 +12264,7 @@ pub unsafe fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] pub unsafe fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i { - transmute(vpmovqw256( - a.as_i64x4(), - _mm_setzero_si128().as_i16x8(), - 0b11111111, - )) + transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12643,7 +12286,7 @@ pub unsafe fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] pub unsafe fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovqw256(a.as_i64x4(), _mm_setzero_si128().as_i16x8(), k)) + transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, k)) } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst. @@ -12654,11 +12297,7 @@ pub unsafe fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] pub unsafe fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i { - transmute(vpmovqw128( - a.as_i64x2(), - _mm_setzero_si128().as_i16x8(), - 0b11111111, - )) + transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12680,7 +12319,7 @@ pub unsafe fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] pub unsafe fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovqw128(a.as_i64x2(), _mm_setzero_si128().as_i16x8(), k)) + transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, k)) } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -12691,11 +12330,7 @@ pub unsafe fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqb))] pub unsafe fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i { - transmute(vpmovqb( - a.as_i64x8(), - _mm_setzero_si128().as_i8x16(), - 0b11111111, - )) + transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12717,7 +12352,7 @@ pub unsafe fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqb))] pub unsafe fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { - transmute(vpmovqb(a.as_i64x8(), _mm_setzero_si128().as_i8x16(), k)) + transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, k)) } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -12728,11 +12363,7 @@ pub unsafe fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqb))] pub unsafe fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i { - transmute(vpmovqb256( - a.as_i64x4(), - _mm_setzero_si128().as_i8x16(), - 0b11111111, - )) + transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12754,7 +12385,7 @@ pub unsafe fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqb))] pub unsafe fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovqb256(a.as_i64x4(), _mm_setzero_si128().as_i8x16(), k)) + transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, k)) } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst. @@ -12765,11 +12396,7 @@ pub unsafe fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqb))] pub unsafe fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i { - transmute(vpmovqb128( - a.as_i64x2(), - _mm_setzero_si128().as_i8x16(), - 0b11111111, - )) + transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12791,7 +12418,7 @@ pub unsafe fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqb))] pub unsafe fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovqb128(a.as_i64x2(), _mm_setzero_si128().as_i8x16(), k)) + transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, k)) } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. @@ -12802,11 +12429,7 @@ pub unsafe fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] pub unsafe fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i { - transmute(vpmovsdw( - a.as_i32x16(), - _mm256_setzero_si256().as_i16x16(), - 0b11111111_11111111, - )) + transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, 0b11111111_11111111)) } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12828,11 +12451,7 @@ pub unsafe fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] pub unsafe fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i { - transmute(vpmovsdw( - a.as_i32x16(), - _mm256_setzero_si256().as_i16x16(), - k, - )) + transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, k)) } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. @@ -12843,11 +12462,7 @@ pub unsafe fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] pub unsafe fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i { - transmute(vpmovsdw256( - a.as_i32x8(), - _mm_setzero_si128().as_i16x8(), - 0b11111111, - )) + transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, 0b11111111)) } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12869,7 +12484,7 @@ pub unsafe fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] pub unsafe fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovsdw256(a.as_i32x8(), _mm_setzero_si128().as_i16x8(), k)) + transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, k)) } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. @@ -12880,11 +12495,7 @@ pub unsafe fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] pub unsafe fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i { - transmute(vpmovsdw128( - a.as_i32x4(), - _mm_setzero_si128().as_i16x8(), - 0b11111111, - )) + transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12906,7 +12517,7 @@ pub unsafe fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] pub unsafe fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovsdw128(a.as_i32x4(), _mm_setzero_si128().as_i16x8(), k)) + transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, k)) } /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -12917,11 +12528,7 @@ pub unsafe fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdb))] pub unsafe fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i { - transmute(vpmovsdb( - a.as_i32x16(), - _mm_setzero_si128().as_i8x16(), - 0b11111111_11111111, - )) + transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, 0b11111111_11111111)) } /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12943,7 +12550,7 @@ pub unsafe fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdb))] pub unsafe fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { - transmute(vpmovsdb(a.as_i32x16(), _mm_setzero_si128().as_i8x16(), k)) + transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, k)) } /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -12954,11 +12561,7 @@ pub unsafe fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdb))] pub unsafe fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i { - transmute(vpmovsdb256( - a.as_i32x8(), - _mm_setzero_si128().as_i8x16(), - 0b11111111, - )) + transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) } /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -12980,7 +12583,7 @@ pub unsafe fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdb))] pub unsafe fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovsdb256(a.as_i32x8(), _mm_setzero_si128().as_i8x16(), k)) + transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, k)) } /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -12991,11 +12594,7 @@ pub unsafe fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdb))] pub unsafe fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i { - transmute(vpmovsdb128( - a.as_i32x4(), - _mm_setzero_si128().as_i8x16(), - 0b11111111, - )) + transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) } /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13017,7 +12616,7 @@ pub unsafe fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdb))] pub unsafe fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovsdb128(a.as_i32x4(), _mm_setzero_si128().as_i8x16(), k)) + transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, k)) } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst. @@ -13028,11 +12627,7 @@ pub unsafe fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] pub unsafe fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i { - transmute(vpmovsqd( - a.as_i64x8(), - _mm256_setzero_si256().as_i32x8(), - 0b11111111, - )) + transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, 0b11111111)) } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13054,7 +12649,7 @@ pub unsafe fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] pub unsafe fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { - transmute(vpmovsqd(a.as_i64x8(), _mm256_setzero_si256().as_i32x8(), k)) + transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, k)) } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst. @@ -13065,11 +12660,7 @@ pub unsafe fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] pub unsafe fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i { - transmute(vpmovsqd256( - a.as_i64x4(), - _mm_setzero_si128().as_i32x4(), - 0b11111111, - )) + transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, 0b11111111)) } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13091,7 +12682,7 @@ pub unsafe fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] pub unsafe fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovsqd256(a.as_i64x4(), _mm_setzero_si128().as_i32x4(), k)) + transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, k)) } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst. @@ -13102,11 +12693,7 @@ pub unsafe fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] pub unsafe fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i { - transmute(vpmovsqd128( - a.as_i64x2(), - _mm_setzero_si128().as_i32x4(), - 0b11111111, - )) + transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13128,7 +12715,7 @@ pub unsafe fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] pub unsafe fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovsqd128(a.as_i64x2(), _mm_setzero_si128().as_i32x4(), k)) + transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, k)) } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. @@ -13139,11 +12726,7 @@ pub unsafe fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] pub unsafe fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i { - transmute(vpmovsqw( - a.as_i64x8(), - _mm_setzero_si128().as_i16x8(), - 0b11111111, - )) + transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, 0b11111111)) } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13165,7 +12748,7 @@ pub unsafe fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] pub unsafe fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { - transmute(vpmovsqw(a.as_i64x8(), _mm_setzero_si128().as_i16x8(), k)) + transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, k)) } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. @@ -13176,11 +12759,7 @@ pub unsafe fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] pub unsafe fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i { - transmute(vpmovsqw256( - a.as_i64x4(), - _mm_setzero_si128().as_i16x8(), - 0b11111111, - )) + transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13202,7 +12781,7 @@ pub unsafe fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] pub unsafe fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovsqw256(a.as_i64x4(), _mm_setzero_si128().as_i16x8(), k)) + transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, k)) } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. @@ -13213,11 +12792,7 @@ pub unsafe fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] pub unsafe fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i { - transmute(vpmovsqw128( - a.as_i64x2(), - _mm_setzero_si128().as_i16x8(), - 0b11111111, - )) + transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13239,7 +12814,7 @@ pub unsafe fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] pub unsafe fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovsqw128(a.as_i64x2(), _mm_setzero_si128().as_i16x8(), k)) + transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, k)) } /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -13250,11 +12825,7 @@ pub unsafe fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqb))] pub unsafe fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i { - transmute(vpmovsqb( - a.as_i64x8(), - _mm_setzero_si128().as_i8x16(), - 0b11111111, - )) + transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) } /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13276,7 +12847,7 @@ pub unsafe fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqb))] pub unsafe fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { - transmute(vpmovsqb(a.as_i64x8(), _mm_setzero_si128().as_i8x16(), k)) + transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, k)) } /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -13287,11 +12858,7 @@ pub unsafe fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqb))] pub unsafe fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i { - transmute(vpmovsqb256( - a.as_i64x4(), - _mm_setzero_si128().as_i8x16(), - 0b11111111, - )) + transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) } /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13313,7 +12880,7 @@ pub unsafe fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqb))] pub unsafe fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovsqb256(a.as_i64x4(), _mm_setzero_si128().as_i8x16(), k)) + transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, k)) } /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. @@ -13324,11 +12891,7 @@ pub unsafe fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqb))] pub unsafe fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i { - transmute(vpmovsqb128( - a.as_i64x2(), - _mm_setzero_si128().as_i8x16(), - 0b11111111, - )) + transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) } /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13350,7 +12913,7 @@ pub unsafe fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqb))] pub unsafe fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovsqb128(a.as_i64x2(), _mm_setzero_si128().as_i8x16(), k)) + transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, k)) } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. @@ -13361,11 +12924,7 @@ pub unsafe fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] pub unsafe fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i { - transmute(vpmovusdw( - a.as_u32x16(), - _mm256_setzero_si256().as_u16x16(), - 0b11111111_11111111, - )) + transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, 0b11111111_11111111)) } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13387,11 +12946,7 @@ pub unsafe fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] pub unsafe fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i { - transmute(vpmovusdw( - a.as_u32x16(), - _mm256_setzero_si256().as_u16x16(), - k, - )) + transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, k)) } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. @@ -13402,11 +12957,7 @@ pub unsafe fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] pub unsafe fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i { - transmute(vpmovusdw256( - a.as_u32x8(), - _mm_setzero_si128().as_u16x8(), - 0b11111111, - )) + transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, 0b11111111)) } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13428,11 +12979,7 @@ pub unsafe fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] pub unsafe fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovusdw256( - a.as_u32x8(), - _mm_setzero_si128().as_u16x8(), - k, - )) + transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, k)) } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. @@ -13443,11 +12990,7 @@ pub unsafe fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] pub unsafe fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i { - transmute(vpmovusdw128( - a.as_u32x4(), - _mm_setzero_si128().as_u16x8(), - 0b11111111, - )) + transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, 0b11111111)) } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13469,11 +13012,7 @@ pub unsafe fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] pub unsafe fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovusdw128( - a.as_u32x4(), - _mm_setzero_si128().as_u16x8(), - k, - )) + transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, k)) } /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -13484,11 +13023,7 @@ pub unsafe fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdb))] pub unsafe fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i { - transmute(vpmovusdb( - a.as_u32x16(), - _mm_setzero_si128().as_u8x16(), - 0b11111111_11111111, - )) + transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, 0b11111111_11111111)) } /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13510,7 +13045,7 @@ pub unsafe fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdb))] pub unsafe fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { - transmute(vpmovusdb(a.as_u32x16(), _mm_setzero_si128().as_u8x16(), k)) + transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, k)) } /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -13521,11 +13056,7 @@ pub unsafe fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdb))] pub unsafe fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i { - transmute(vpmovusdb256( - a.as_u32x8(), - _mm_setzero_si128().as_u8x16(), - 0b11111111, - )) + transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, 0b11111111)) } /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13547,11 +13078,7 @@ pub unsafe fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdb))] pub unsafe fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovusdb256( - a.as_u32x8(), - _mm_setzero_si128().as_u8x16(), - k, - )) + transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, k)) } /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -13562,11 +13089,7 @@ pub unsafe fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdb))] pub unsafe fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i { - transmute(vpmovusdb128( - a.as_u32x4(), - _mm_setzero_si128().as_u8x16(), - 0b11111111, - )) + transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, 0b11111111)) } /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13588,11 +13111,7 @@ pub unsafe fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdb))] pub unsafe fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovusdb128( - a.as_u32x4(), - _mm_setzero_si128().as_u8x16(), - k, - )) + transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, k)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst. @@ -13603,11 +13122,7 @@ pub unsafe fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] pub unsafe fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i { - transmute(vpmovusqd( - a.as_u64x8(), - _mm256_setzero_si256().as_u32x8(), - 0b11111111, - )) + transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, 0b11111111)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13629,11 +13144,7 @@ pub unsafe fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] pub unsafe fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { - transmute(vpmovusqd( - a.as_u64x8(), - _mm256_setzero_si256().as_u32x8(), - k, - )) + transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, k)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst. @@ -13644,11 +13155,7 @@ pub unsafe fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] pub unsafe fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i { - transmute(vpmovusqd256( - a.as_u64x4(), - _mm_setzero_si128().as_u32x4(), - 0b11111111, - )) + transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, 0b11111111)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13670,11 +13177,7 @@ pub unsafe fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] pub unsafe fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovusqd256( - a.as_u64x4(), - _mm_setzero_si128().as_u32x4(), - k, - )) + transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, k)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst. @@ -13685,11 +13188,7 @@ pub unsafe fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] pub unsafe fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i { - transmute(vpmovusqd128( - a.as_u64x2(), - _mm_setzero_si128().as_u32x4(), - 0b11111111, - )) + transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, 0b11111111)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13711,11 +13210,7 @@ pub unsafe fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] pub unsafe fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovusqd128( - a.as_u64x2(), - _mm_setzero_si128().as_u32x4(), - k, - )) + transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, k)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. @@ -13726,11 +13221,7 @@ pub unsafe fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] pub unsafe fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i { - transmute(vpmovusqw( - a.as_u64x8(), - _mm_setzero_si128().as_u16x8(), - 0b11111111, - )) + transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, 0b11111111)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13752,7 +13243,7 @@ pub unsafe fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] pub unsafe fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { - transmute(vpmovusqw(a.as_u64x8(), _mm_setzero_si128().as_u16x8(), k)) + transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, k)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. @@ -13763,11 +13254,7 @@ pub unsafe fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] pub unsafe fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i { - transmute(vpmovusqw256( - a.as_u64x4(), - _mm_setzero_si128().as_u16x8(), - 0b11111111, - )) + transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, 0b11111111)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13789,11 +13276,7 @@ pub unsafe fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] pub unsafe fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovusqw256( - a.as_u64x4(), - _mm_setzero_si128().as_u16x8(), - k, - )) + transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, k)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. @@ -13804,11 +13287,7 @@ pub unsafe fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] pub unsafe fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i { - transmute(vpmovusqw128( - a.as_u64x2(), - _mm_setzero_si128().as_u16x8(), - 0b11111111, - )) + transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, 0b11111111)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13830,11 +13309,7 @@ pub unsafe fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] pub unsafe fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovusqw128( - a.as_u64x2(), - _mm_setzero_si128().as_u16x8(), - k, - )) + transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, k)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -13845,11 +13320,7 @@ pub unsafe fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqb))] pub unsafe fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i { - transmute(vpmovusqb( - a.as_u64x8(), - _mm_setzero_si128().as_u8x16(), - 0b11111111, - )) + transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, 0b11111111)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13871,7 +13342,7 @@ pub unsafe fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqb))] pub unsafe fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { - transmute(vpmovusqb(a.as_u64x8(), _mm_setzero_si128().as_u8x16(), k)) + transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, k)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -13882,11 +13353,7 @@ pub unsafe fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqb))] pub unsafe fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i { - transmute(vpmovusqb256( - a.as_u64x4(), - _mm_setzero_si128().as_u8x16(), - 0b11111111, - )) + transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, 0b11111111)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13908,11 +13375,7 @@ pub unsafe fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqb))] pub unsafe fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { - transmute(vpmovusqb256( - a.as_u64x4(), - _mm_setzero_si128().as_u8x16(), - k, - )) + transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, k)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. @@ -13923,11 +13386,7 @@ pub unsafe fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqb))] pub unsafe fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i { - transmute(vpmovusqb128( - a.as_u64x2(), - _mm_setzero_si128().as_u8x16(), - 0b11111111, - )) + transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, 0b11111111)) } /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -13949,11 +13408,7 @@ pub unsafe fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqb))] pub unsafe fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpmovusqb128( - a.as_u64x2(), - _mm_setzero_si128().as_u8x16(), - k, - )) + transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, k)) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst. @@ -13974,8 +13429,7 @@ pub unsafe fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { pub unsafe fn _mm512_cvt_roundps_epi32(a: __m512) -> __m512i { static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - let r = vcvtps2dq(a, zero, 0b11111111_11111111, ROUNDING); + let r = vcvtps2dq(a, i32x16::ZERO, 0b11111111_11111111, ROUNDING); transmute(r) } @@ -14027,8 +13481,7 @@ pub unsafe fn _mm512_maskz_cvt_roundps_epi32( ) -> __m512i { static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - let r = vcvtps2dq(a, zero, k, ROUNDING); + let r = vcvtps2dq(a, i32x16::ZERO, k, ROUNDING); transmute(r) } @@ -14050,8 +13503,7 @@ pub unsafe fn _mm512_maskz_cvt_roundps_epi32( pub unsafe fn _mm512_cvt_roundps_epu32(a: __m512) -> __m512i { static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); - let zero = _mm512_setzero_si512().as_u32x16(); - let r = vcvtps2udq(a, zero, 0b11111111_11111111, ROUNDING); + let r = vcvtps2udq(a, u32x16::ZERO, 0b11111111_11111111, ROUNDING); transmute(r) } @@ -14103,8 +13555,7 @@ pub unsafe fn _mm512_maskz_cvt_roundps_epu32( ) -> __m512i { static_assert_rounding!(ROUNDING); let a = a.as_f32x16(); - let zero = _mm512_setzero_si512().as_u32x16(); - let r = vcvtps2udq(a, zero, k, ROUNDING); + let r = vcvtps2udq(a, u32x16::ZERO, k, ROUNDING); transmute(r) } @@ -14120,8 +13571,7 @@ pub unsafe fn _mm512_maskz_cvt_roundps_epu32( pub unsafe fn _mm512_cvt_roundps_pd(a: __m256) -> __m512d { static_assert_sae!(SAE); let a = a.as_f32x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - let r = vcvtps2pd(a, zero, 0b11111111, SAE); + let r = vcvtps2pd(a, f64x8::ZERO, 0b11111111, SAE); transmute(r) } @@ -14158,8 +13608,7 @@ pub unsafe fn _mm512_mask_cvt_roundps_pd( pub unsafe fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256) -> __m512d { static_assert_sae!(SAE); let a = a.as_f32x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - let r = vcvtps2pd(a, zero, k, SAE); + let r = vcvtps2pd(a, f64x8::ZERO, k, SAE); transmute(r) } @@ -14181,8 +13630,7 @@ pub unsafe fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256 pub unsafe fn _mm512_cvt_roundpd_epi32(a: __m512d) -> __m256i { static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - let r = vcvtpd2dq(a, zero, 0b11111111, ROUNDING); + let r = vcvtpd2dq(a, i32x8::ZERO, 0b11111111, ROUNDING); transmute(r) } @@ -14234,8 +13682,7 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_epi32( ) -> __m256i { static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - let r = vcvtpd2dq(a, zero, k, ROUNDING); + let r = vcvtpd2dq(a, i32x8::ZERO, k, ROUNDING); transmute(r) } @@ -14257,8 +13704,7 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_epi32( pub unsafe fn _mm512_cvt_roundpd_epu32(a: __m512d) -> __m256i { static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); - let zero = _mm256_setzero_si256().as_u32x8(); - let r = vcvtpd2udq(a, zero, 0b11111111, ROUNDING); + let r = vcvtpd2udq(a, u32x8::ZERO, 0b11111111, ROUNDING); transmute(r) } @@ -14310,8 +13756,7 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_epu32( ) -> __m256i { static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); - let zero = _mm256_setzero_si256().as_u32x8(); - let r = vcvtpd2udq(a, zero, k, ROUNDING); + let r = vcvtpd2udq(a, u32x8::ZERO, k, ROUNDING); transmute(r) } @@ -14333,8 +13778,7 @@ pub unsafe fn _mm512_maskz_cvt_roundpd_epu32( pub unsafe fn _mm512_cvt_roundpd_ps(a: __m512d) -> __m256 { static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - let r = vcvtpd2ps(a, zero, 0b11111111, ROUNDING); + let r = vcvtpd2ps(a, f32x8::ZERO, 0b11111111, ROUNDING); transmute(r) } @@ -14383,8 +13827,7 @@ pub unsafe fn _mm512_mask_cvt_roundpd_ps( pub unsafe fn _mm512_maskz_cvt_roundpd_ps(k: __mmask8, a: __m512d) -> __m256 { static_assert_rounding!(ROUNDING); let a = a.as_f64x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - let r = vcvtpd2ps(a, zero, k, ROUNDING); + let r = vcvtpd2ps(a, f32x8::ZERO, k, ROUNDING); transmute(r) } @@ -14458,8 +13901,7 @@ pub unsafe fn _mm512_maskz_cvt_roundepi32_ps( static_assert_rounding!(ROUNDING); let a = a.as_i32x16(); let r = vcvtdq2ps(a, ROUNDING); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) } /// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\ @@ -14532,8 +13974,7 @@ pub unsafe fn _mm512_maskz_cvt_roundepu32_ps( static_assert_rounding!(ROUNDING); let a = a.as_u32x16(); let r = vcvtudq2ps(a, ROUNDING); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, f32x16::ZERO)) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\ @@ -14548,8 +13989,7 @@ pub unsafe fn _mm512_maskz_cvt_roundepu32_ps( pub unsafe fn _mm512_cvt_roundps_ph(a: __m512) -> __m256i { static_assert_sae!(SAE); let a = a.as_f32x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - let r = vcvtps2ph(a, SAE, zero, 0b11111111_11111111); + let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111); transmute(r) } @@ -14586,8 +14026,7 @@ pub unsafe fn _mm512_mask_cvt_roundps_ph( pub unsafe fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512) -> __m256i { static_assert_sae!(SAE); let a = a.as_f32x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - let r = vcvtps2ph(a, SAE, zero, k); + let r = vcvtps2ph(a, SAE, i16x16::ZERO, k); transmute(r) } @@ -14634,8 +14073,7 @@ pub unsafe fn _mm256_mask_cvt_roundps_ph( pub unsafe fn _mm256_maskz_cvt_roundps_ph(k: __mmask8, a: __m256) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f32x8(); - let zero = _mm_setzero_si128().as_i16x8(); - let r = vcvtps2ph256(a, IMM8, zero, k); + let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k); transmute(r) } @@ -14682,8 +14120,7 @@ pub unsafe fn _mm_mask_cvt_roundps_ph( pub unsafe fn _mm_maskz_cvt_roundps_ph(k: __mmask8, a: __m128) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f32x4(); - let zero = _mm_setzero_si128().as_i16x8(); - let r = vcvtps2ph128(a, IMM8, zero, k); + let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k); transmute(r) } @@ -14699,8 +14136,7 @@ pub unsafe fn _mm_maskz_cvt_roundps_ph(k: __mmask8, a: __m128) pub unsafe fn _mm512_cvtps_ph(a: __m512) -> __m256i { static_assert_sae!(SAE); let a = a.as_f32x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - let r = vcvtps2ph(a, SAE, zero, 0b11111111_11111111); + let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111); transmute(r) } @@ -14737,8 +14173,7 @@ pub unsafe fn _mm512_mask_cvtps_ph( pub unsafe fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512) -> __m256i { static_assert_sae!(SAE); let a = a.as_f32x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - let r = vcvtps2ph(a, SAE, zero, k); + let r = vcvtps2ph(a, SAE, i16x16::ZERO, k); transmute(r) } @@ -14785,8 +14220,7 @@ pub unsafe fn _mm256_mask_cvtps_ph( pub unsafe fn _mm256_maskz_cvtps_ph(k: __mmask8, a: __m256) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f32x8(); - let zero = _mm_setzero_si128().as_i16x8(); - let r = vcvtps2ph256(a, IMM8, zero, k); + let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k); transmute(r) } @@ -14829,8 +14263,7 @@ pub unsafe fn _mm_mask_cvtps_ph(src: __m128i, k: __mmask8, a: _ pub unsafe fn _mm_maskz_cvtps_ph(k: __mmask8, a: __m128) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let a = a.as_f32x4(); - let zero = _mm_setzero_si128().as_i16x8(); - let r = vcvtps2ph128(a, IMM8, zero, k); + let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k); transmute(r) } @@ -14846,8 +14279,7 @@ pub unsafe fn _mm_maskz_cvtps_ph(k: __mmask8, a: __m128) -> __m pub unsafe fn _mm512_cvt_roundph_ps(a: __m256i) -> __m512 { static_assert_sae!(SAE); let a = a.as_i16x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - let r = vcvtph2ps(a, zero, 0b11111111_11111111, SAE); + let r = vcvtph2ps(a, f32x16::ZERO, 0b11111111_11111111, SAE); transmute(r) } @@ -14884,8 +14316,7 @@ pub unsafe fn _mm512_mask_cvt_roundph_ps( pub unsafe fn _mm512_maskz_cvt_roundph_ps(k: __mmask16, a: __m256i) -> __m512 { static_assert_sae!(SAE); let a = a.as_i16x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - let r = vcvtph2ps(a, zero, k, SAE); + let r = vcvtph2ps(a, f32x16::ZERO, k, SAE); transmute(r) } @@ -14899,7 +14330,7 @@ pub unsafe fn _mm512_maskz_cvt_roundph_ps(k: __mmask16, a: __m25 pub unsafe fn _mm512_cvtph_ps(a: __m256i) -> __m512 { transmute(vcvtph2ps( a.as_i16x16(), - _mm512_setzero_ps().as_f32x16(), + f32x16::ZERO, 0b11111111_11111111, _MM_FROUND_NO_EXC, )) @@ -14929,12 +14360,7 @@ pub unsafe fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvtph2ps))] pub unsafe fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 { - transmute(vcvtph2ps( - a.as_i16x16(), - _mm512_setzero_ps().as_f32x16(), - k, - _MM_FROUND_NO_EXC, - )) + transmute(vcvtph2ps(a.as_i16x16(), f32x16::ZERO, k, _MM_FROUND_NO_EXC)) } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14958,8 +14384,7 @@ pub unsafe fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m2 #[cfg_attr(test, assert_instr(vcvtph2ps))] pub unsafe fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 { let convert = _mm256_cvtph_ps(a); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, convert.as_f32x8(), zero)) + transmute(simd_select_bitmask(k, convert.as_f32x8(), f32x8::ZERO)) } /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -14983,8 +14408,7 @@ pub unsafe fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 #[cfg_attr(test, assert_instr(vcvtph2ps))] pub unsafe fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 { let convert = _mm_cvtph_ps(a); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, convert.as_f32x4(), zero)) + transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO)) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\ @@ -14999,8 +14423,7 @@ pub unsafe fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 { pub unsafe fn _mm512_cvtt_roundps_epi32(a: __m512) -> __m512i { static_assert_sae!(SAE); let a = a.as_f32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - let r = vcvttps2dq(a, zero, 0b11111111_11111111, SAE); + let r = vcvttps2dq(a, i32x16::ZERO, 0b11111111_11111111, SAE); transmute(r) } @@ -15037,8 +14460,7 @@ pub unsafe fn _mm512_mask_cvtt_roundps_epi32( pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512) -> __m512i { static_assert_sae!(SAE); let a = a.as_f32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - let r = vcvttps2dq(a, zero, k, SAE); + let r = vcvttps2dq(a, i32x16::ZERO, k, SAE); transmute(r) } @@ -15054,8 +14476,7 @@ pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: _ pub unsafe fn _mm512_cvtt_roundps_epu32(a: __m512) -> __m512i { static_assert_sae!(SAE); let a = a.as_f32x16(); - let zero = _mm512_setzero_si512().as_u32x16(); - let r = vcvttps2udq(a, zero, 0b11111111_11111111, SAE); + let r = vcvttps2udq(a, u32x16::ZERO, 0b11111111_11111111, SAE); transmute(r) } @@ -15092,8 +14513,7 @@ pub unsafe fn _mm512_mask_cvtt_roundps_epu32( pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512) -> __m512i { static_assert_sae!(SAE); let a = a.as_f32x16(); - let zero = _mm512_setzero_si512().as_u32x16(); - let r = vcvttps2udq(a, zero, k, SAE); + let r = vcvttps2udq(a, u32x16::ZERO, k, SAE); transmute(r) } @@ -15109,8 +14529,7 @@ pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: _ pub unsafe fn _mm512_cvtt_roundpd_epi32(a: __m512d) -> __m256i { static_assert_sae!(SAE); let a = a.as_f64x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - let r = vcvttpd2dq(a, zero, 0b11111111, SAE); + let r = vcvttpd2dq(a, i32x8::ZERO, 0b11111111, SAE); transmute(r) } @@ -15147,8 +14566,7 @@ pub unsafe fn _mm512_mask_cvtt_roundpd_epi32( pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32(k: __mmask8, a: __m512d) -> __m256i { static_assert_sae!(SAE); let a = a.as_f64x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - let r = vcvttpd2dq(a, zero, k, SAE); + let r = vcvttpd2dq(a, i32x8::ZERO, k, SAE); transmute(r) } @@ -15164,8 +14582,7 @@ pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32(k: __mmask8, a: __ pub unsafe fn _mm512_cvtt_roundpd_epu32(a: __m512d) -> __m256i { static_assert_sae!(SAE); let a = a.as_f64x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - let r = vcvttpd2udq(a, zero, 0b11111111, SAE); + let r = vcvttpd2udq(a, i32x8::ZERO, 0b11111111, SAE); transmute(r) } @@ -15200,7 +14617,7 @@ pub unsafe fn _mm512_mask_cvtt_roundpd_epu32( pub unsafe fn _mm512_cvttps_epi32(a: __m512) -> __m512i { transmute(vcvttps2dq( a.as_f32x16(), - _mm512_setzero_si512().as_i32x16(), + i32x16::ZERO, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -15232,7 +14649,7 @@ pub unsafe fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> pub unsafe fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i { transmute(vcvttps2dq( a.as_f32x16(), - _mm512_setzero_si512().as_i32x16(), + i32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -15257,11 +14674,7 @@ pub unsafe fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2dq))] pub unsafe fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i { - transmute(vcvttps2dq256( - a.as_f32x8(), - _mm256_setzero_si256().as_i32x8(), - k, - )) + transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k)) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -15283,11 +14696,7 @@ pub unsafe fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2dq))] pub unsafe fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i { - transmute(vcvttps2dq128( - a.as_f32x4(), - _mm_setzero_si128().as_i32x4(), - k, - )) + transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k)) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. @@ -15300,7 +14709,7 @@ pub unsafe fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i { pub unsafe fn _mm512_cvttps_epu32(a: __m512) -> __m512i { transmute(vcvttps2udq( a.as_f32x16(), - _mm512_setzero_si512().as_u32x16(), + u32x16::ZERO, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -15332,7 +14741,7 @@ pub unsafe fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> pub unsafe fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i { transmute(vcvttps2udq( a.as_f32x16(), - _mm512_setzero_si512().as_u32x16(), + u32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -15346,11 +14755,7 @@ pub unsafe fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq))] pub unsafe fn _mm256_cvttps_epu32(a: __m256) -> __m256i { - transmute(vcvttps2udq256( - a.as_f32x8(), - _mm256_setzero_si256().as_u32x8(), - 0b11111111, - )) + transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) } /// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -15372,11 +14777,7 @@ pub unsafe fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq))] pub unsafe fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i { - transmute(vcvttps2udq256( - a.as_f32x8(), - _mm256_setzero_si256().as_u32x8(), - k, - )) + transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, k)) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. @@ -15387,11 +14788,7 @@ pub unsafe fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq))] pub unsafe fn _mm_cvttps_epu32(a: __m128) -> __m128i { - transmute(vcvttps2udq128( - a.as_f32x4(), - _mm_setzero_si128().as_u32x4(), - 0b11111111, - )) + transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) } /// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -15413,11 +14810,7 @@ pub unsafe fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttps2udq))] pub unsafe fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i { - transmute(vcvttps2udq128( - a.as_f32x4(), - _mm_setzero_si128().as_u32x4(), - k, - )) + transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, k)) } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ @@ -15432,8 +14825,7 @@ pub unsafe fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i { pub unsafe fn _mm512_maskz_cvtt_roundpd_epu32(k: __mmask8, a: __m512d) -> __m256i { static_assert_sae!(SAE); let a = a.as_f64x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - let r = vcvttpd2udq(a, zero, k, SAE); + let r = vcvttpd2udq(a, i32x8::ZERO, k, SAE); transmute(r) } @@ -15447,7 +14839,7 @@ pub unsafe fn _mm512_maskz_cvtt_roundpd_epu32(k: __mmask8, a: __ pub unsafe fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i { transmute(vcvttpd2dq( a.as_f64x8(), - _mm256_setzero_si256().as_i32x8(), + i32x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -15479,7 +14871,7 @@ pub unsafe fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> pub unsafe fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i { transmute(vcvttpd2dq( a.as_f64x8(), - _mm256_setzero_si256().as_i32x8(), + i32x8::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -15504,11 +14896,7 @@ pub unsafe fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] pub unsafe fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i { - transmute(vcvttpd2dq256( - a.as_f64x4(), - _mm_setzero_si128().as_i32x4(), - k, - )) + transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k)) } /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -15530,11 +14918,7 @@ pub unsafe fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] pub unsafe fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i { - transmute(vcvttpd2dq128( - a.as_f64x2(), - _mm_setzero_si128().as_i32x4(), - k, - )) + transmute(vcvttpd2dq128(a.as_f64x2(), i32x4::ZERO, k)) } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. @@ -15547,7 +14931,7 @@ pub unsafe fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i { pub unsafe fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i { transmute(vcvttpd2udq( a.as_f64x8(), - _mm256_setzero_si256().as_i32x8(), + i32x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -15579,7 +14963,7 @@ pub unsafe fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> pub unsafe fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i { transmute(vcvttpd2udq( a.as_f64x8(), - _mm256_setzero_si256().as_i32x8(), + i32x8::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -15593,11 +14977,7 @@ pub unsafe fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq))] pub unsafe fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i { - transmute(vcvttpd2udq256( - a.as_f64x4(), - _mm_setzero_si128().as_i32x4(), - 0b11111111, - )) + transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, 0b11111111)) } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -15619,11 +14999,7 @@ pub unsafe fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq))] pub unsafe fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i { - transmute(vcvttpd2udq256( - a.as_f64x4(), - _mm_setzero_si128().as_i32x4(), - k, - )) + transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, k)) } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. @@ -15634,11 +15010,7 @@ pub unsafe fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq))] pub unsafe fn _mm_cvttpd_epu32(a: __m128d) -> __m128i { - transmute(vcvttpd2udq128( - a.as_f64x2(), - _mm_setzero_si128().as_i32x4(), - 0b11111111, - )) + transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, 0b11111111)) } /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -15660,11 +15032,7 @@ pub unsafe fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcvttpd2udq))] pub unsafe fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i { - transmute(vcvttpd2udq128( - a.as_f64x2(), - _mm_setzero_si128().as_i32x4(), - k, - )) + transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, k)) } /// Returns vector of type `__m512d` with all elements set to zero. @@ -15995,7 +15363,7 @@ pub unsafe fn _mm512_setr_epi64( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_i32gather_pd(offsets: __m256i, slice: *const u8) -> __m512d { static_assert_imm8_scale!(SCALE); - let zero = _mm512_setzero_pd().as_f64x8(); + let zero = f64x8::ZERO; let neg_one = -1; let slice = slice as *const i8; let offsets = offsets.as_i32x8(); @@ -16035,7 +15403,7 @@ pub unsafe fn _mm512_mask_i32gather_pd( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_i64gather_pd(offsets: __m512i, slice: *const u8) -> __m512d { static_assert_imm8_scale!(SCALE); - let zero = _mm512_setzero_pd().as_f64x8(); + let zero = f64x8::ZERO; let neg_one = -1; let slice = slice as *const i8; let offsets = offsets.as_i64x8(); @@ -16075,7 +15443,7 @@ pub unsafe fn _mm512_mask_i64gather_pd( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_i64gather_ps(offsets: __m512i, slice: *const u8) -> __m256 { static_assert_imm8_scale!(SCALE); - let zero = _mm256_setzero_ps().as_f32x8(); + let zero = f32x8::ZERO; let neg_one = -1; let slice = slice as *const i8; let offsets = offsets.as_i64x8(); @@ -16115,7 +15483,7 @@ pub unsafe fn _mm512_mask_i64gather_ps( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_i32gather_ps(offsets: __m512i, slice: *const u8) -> __m512 { static_assert_imm8_scale!(SCALE); - let zero = _mm512_setzero_ps().as_f32x16(); + let zero = f32x16::ZERO; let neg_one = -1; let slice = slice as *const i8; let offsets = offsets.as_i32x16(); @@ -16158,7 +15526,7 @@ pub unsafe fn _mm512_i32gather_epi32( slice: *const u8, ) -> __m512i { static_assert_imm8_scale!(SCALE); - let zero = _mm512_setzero_si512().as_i32x16(); + let zero = i32x16::ZERO; let neg_one = -1; let slice = slice as *const i8; let offsets = offsets.as_i32x16(); @@ -16202,7 +15570,7 @@ pub unsafe fn _mm512_i32gather_epi64( slice: *const u8, ) -> __m512i { static_assert_imm8_scale!(SCALE); - let zero = _mm512_setzero_si512().as_i64x8(); + let zero = i64x8::ZERO; let neg_one = -1; let slice = slice as *const i8; let offsets = offsets.as_i32x8(); @@ -16246,7 +15614,7 @@ pub unsafe fn _mm512_i64gather_epi64( slice: *const u8, ) -> __m512i { static_assert_imm8_scale!(SCALE); - let zero = _mm512_setzero_si512().as_i64x8(); + let zero = i64x8::ZERO; let neg_one = -1; let slice = slice as *const i8; let offsets = offsets.as_i64x8(); @@ -16290,7 +15658,7 @@ pub unsafe fn _mm512_i64gather_epi32( slice: *const u8, ) -> __m256i { static_assert_imm8_scale!(SCALE); - let zeros = _mm256_setzero_si256().as_i32x8(); + let zeros = i32x8::ZERO; let neg_one = -1; let slice = slice as *const i8; let offsets = offsets.as_i64x8(); @@ -17831,11 +17199,7 @@ pub unsafe fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressd))] pub unsafe fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i { - transmute(vpcompressd( - a.as_i32x16(), - _mm512_setzero_si512().as_i32x16(), - k, - )) + transmute(vpcompressd(a.as_i32x16(), i32x16::ZERO, k)) } /// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17857,11 +17221,7 @@ pub unsafe fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressd))] pub unsafe fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i { - transmute(vpcompressd256( - a.as_i32x8(), - _mm256_setzero_si256().as_i32x8(), - k, - )) + transmute(vpcompressd256(a.as_i32x8(), i32x8::ZERO, k)) } /// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17883,11 +17243,7 @@ pub unsafe fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressd))] pub unsafe fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpcompressd128( - a.as_i32x4(), - _mm_setzero_si128().as_i32x4(), - k, - )) + transmute(vpcompressd128(a.as_i32x4(), i32x4::ZERO, k)) } /// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17909,11 +17265,7 @@ pub unsafe fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressq))] pub unsafe fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i { - transmute(vpcompressq( - a.as_i64x8(), - _mm512_setzero_si512().as_i64x8(), - k, - )) + transmute(vpcompressq(a.as_i64x8(), i64x8::ZERO, k)) } /// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17935,11 +17287,7 @@ pub unsafe fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressq))] pub unsafe fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i { - transmute(vpcompressq256( - a.as_i64x4(), - _mm256_setzero_si256().as_i64x4(), - k, - )) + transmute(vpcompressq256(a.as_i64x4(), i64x4::ZERO, k)) } /// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17961,11 +17309,7 @@ pub unsafe fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressq))] pub unsafe fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpcompressq128( - a.as_i64x2(), - _mm_setzero_si128().as_i64x2(), - k, - )) + transmute(vpcompressq128(a.as_i64x2(), i64x2::ZERO, k)) } /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -17987,11 +17331,7 @@ pub unsafe fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> _ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompressps))] pub unsafe fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 { - transmute(vcompressps( - a.as_f32x16(), - _mm512_setzero_ps().as_f32x16(), - k, - )) + transmute(vcompressps(a.as_f32x16(), f32x16::ZERO, k)) } /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -18013,11 +17353,7 @@ pub unsafe fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompressps))] pub unsafe fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 { - transmute(vcompressps256( - a.as_f32x8(), - _mm256_setzero_ps().as_f32x8(), - k, - )) + transmute(vcompressps256(a.as_f32x8(), f32x8::ZERO, k)) } /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -18039,7 +17375,7 @@ pub unsafe fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m12 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompressps))] pub unsafe fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 { - transmute(vcompressps128(a.as_f32x4(), _mm_setzero_ps().as_f32x4(), k)) + transmute(vcompressps128(a.as_f32x4(), f32x4::ZERO, k)) } /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -18061,7 +17397,7 @@ pub unsafe fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompresspd))] pub unsafe fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d { - transmute(vcompresspd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k)) + transmute(vcompresspd(a.as_f64x8(), f64x8::ZERO, k)) } /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -18083,11 +17419,7 @@ pub unsafe fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompresspd))] pub unsafe fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d { - transmute(vcompresspd256( - a.as_f64x4(), - _mm256_setzero_pd().as_f64x4(), - k, - )) + transmute(vcompresspd256(a.as_f64x4(), f64x4::ZERO, k)) } /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -18109,7 +17441,7 @@ pub unsafe fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompresspd))] pub unsafe fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d { - transmute(vcompresspd128(a.as_f64x2(), _mm_setzero_pd().as_f64x2(), k)) + transmute(vcompresspd128(a.as_f64x2(), f64x2::ZERO, k)) } /// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -18263,11 +17595,7 @@ pub unsafe fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandd))] pub unsafe fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i { - transmute(vpexpandd( - a.as_i32x16(), - _mm512_setzero_si512().as_i32x16(), - k, - )) + transmute(vpexpandd(a.as_i32x16(), i32x16::ZERO, k)) } /// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18289,11 +17617,7 @@ pub unsafe fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandd))] pub unsafe fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i { - transmute(vpexpandd256( - a.as_i32x8(), - _mm256_setzero_si256().as_i32x8(), - k, - )) + transmute(vpexpandd256(a.as_i32x8(), i32x8::ZERO, k)) } /// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18315,11 +17639,7 @@ pub unsafe fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandd))] pub unsafe fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpexpandd128( - a.as_i32x4(), - _mm_setzero_si128().as_i32x4(), - k, - )) + transmute(vpexpandd128(a.as_i32x4(), i32x4::ZERO, k)) } /// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18341,11 +17661,7 @@ pub unsafe fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandq))] pub unsafe fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i { - transmute(vpexpandq( - a.as_i64x8(), - _mm512_setzero_si512().as_i64x8(), - k, - )) + transmute(vpexpandq(a.as_i64x8(), i64x8::ZERO, k)) } /// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18367,11 +17683,7 @@ pub unsafe fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandq))] pub unsafe fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i { - transmute(vpexpandq256( - a.as_i64x4(), - _mm256_setzero_si256().as_i64x4(), - k, - )) + transmute(vpexpandq256(a.as_i64x4(), i64x4::ZERO, k)) } /// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18393,11 +17705,7 @@ pub unsafe fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandq))] pub unsafe fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpexpandq128( - a.as_i64x2(), - _mm_setzero_si128().as_i64x2(), - k, - )) + transmute(vpexpandq128(a.as_i64x2(), i64x2::ZERO, k)) } /// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18419,7 +17727,7 @@ pub unsafe fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandps))] pub unsafe fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 { - transmute(vexpandps(a.as_f32x16(), _mm512_setzero_ps().as_f32x16(), k)) + transmute(vexpandps(a.as_f32x16(), f32x16::ZERO, k)) } /// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18441,11 +17749,7 @@ pub unsafe fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m2 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandps))] pub unsafe fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 { - transmute(vexpandps256( - a.as_f32x8(), - _mm256_setzero_ps().as_f32x8(), - k, - )) + transmute(vexpandps256(a.as_f32x8(), f32x8::ZERO, k)) } /// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18467,7 +17771,7 @@ pub unsafe fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandps))] pub unsafe fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 { - transmute(vexpandps128(a.as_f32x4(), _mm_setzero_ps().as_f32x4(), k)) + transmute(vexpandps128(a.as_f32x4(), f32x4::ZERO, k)) } /// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18489,7 +17793,7 @@ pub unsafe fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandpd))] pub unsafe fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d { - transmute(vexpandpd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k)) + transmute(vexpandpd(a.as_f64x8(), f64x8::ZERO, k)) } /// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18511,11 +17815,7 @@ pub unsafe fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandpd))] pub unsafe fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d { - transmute(vexpandpd256( - a.as_f64x4(), - _mm256_setzero_pd().as_f64x4(), - k, - )) + transmute(vexpandpd256(a.as_f64x4(), f64x4::ZERO, k)) } /// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -18537,7 +17837,7 @@ pub unsafe fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m12 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vexpandpd))] pub unsafe fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d { - transmute(vexpandpd128(a.as_f64x2(), _mm_setzero_pd().as_f64x2(), k)) + transmute(vexpandpd128(a.as_f64x2(), f64x2::ZERO, k)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -18586,8 +17886,7 @@ pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i) static_assert_uimm_bits!(IMM8, 8); let a = a.as_i32x16(); let r = vprold(a, IMM8); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -18636,8 +17935,7 @@ pub unsafe fn _mm256_maskz_rol_epi32(k: __mmask8, a: __m256i) - static_assert_uimm_bits!(IMM8, 8); let a = a.as_i32x8(); let r = vprold256(a, IMM8); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -18686,8 +17984,7 @@ pub unsafe fn _mm_maskz_rol_epi32(k: __mmask8, a: __m128i) -> _ static_assert_uimm_bits!(IMM8, 8); let a = a.as_i32x4(); let r = vprold128(a, IMM8); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -18736,8 +18033,7 @@ pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i) static_assert_uimm_bits!(IMM8, 8); let a = a.as_i32x16(); let r = vprord(a, IMM8); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -18786,8 +18082,7 @@ pub unsafe fn _mm256_maskz_ror_epi32(k: __mmask8, a: __m256i) - static_assert_uimm_bits!(IMM8, 8); let a = a.as_i32x8(); let r = vprord256(a, IMM8); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -18836,8 +18131,7 @@ pub unsafe fn _mm_maskz_ror_epi32(k: __mmask8, a: __m128i) -> _ static_assert_uimm_bits!(IMM8, 8); let a = a.as_i32x4(); let r = vprord128(a, IMM8); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -18886,8 +18180,7 @@ pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i) - static_assert_uimm_bits!(IMM8, 8); let a = a.as_i64x8(); let r = vprolq(a, IMM8); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i64x8::ZERO)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -18936,8 +18229,7 @@ pub unsafe fn _mm256_maskz_rol_epi64(k: __mmask8, a: __m256i) - static_assert_uimm_bits!(IMM8, 8); let a = a.as_i64x4(); let r = vprolq256(a, IMM8); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i64x4::ZERO)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. @@ -18986,8 +18278,7 @@ pub unsafe fn _mm_maskz_rol_epi64(k: __mmask8, a: __m128i) -> _ static_assert_uimm_bits!(IMM8, 8); let a = a.as_i64x2(); let r = vprolq128(a, IMM8); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i64x2::ZERO)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -19036,8 +18327,7 @@ pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i) - static_assert_uimm_bits!(IMM8, 8); let a = a.as_i64x8(); let r = vprorq(a, IMM8); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i64x8::ZERO)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -19086,8 +18376,7 @@ pub unsafe fn _mm256_maskz_ror_epi64(k: __mmask8, a: __m256i) - static_assert_uimm_bits!(IMM8, 8); let a = a.as_i64x4(); let r = vprorq256(a, IMM8); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i64x4::ZERO)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. @@ -19136,8 +18425,7 @@ pub unsafe fn _mm_maskz_ror_epi64(k: __mmask8, a: __m128i) -> _ static_assert_uimm_bits!(IMM8, 8); let a = a.as_i64x2(); let r = vprorq128(a, IMM8); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i64x2::ZERO)) } /// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. @@ -19172,7 +18460,7 @@ pub unsafe fn _mm512_mask_slli_epi32( ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let shf = if IMM8 >= 32 { - u32x16::splat(0) + u32x16::ZERO } else { simd_shl(a.as_u32x16(), u32x16::splat(IMM8)) }; @@ -19193,8 +18481,7 @@ pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i) _mm512_setzero_si512() } else { let shf = simd_shl(a.as_u32x16(), u32x16::splat(IMM8)); - let zero = u32x16::splat(0); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, u32x16::ZERO)) } } @@ -19213,7 +18500,7 @@ pub unsafe fn _mm256_mask_slli_epi32( ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let r = if IMM8 >= 32 { - u32x8::splat(0) + u32x8::ZERO } else { simd_shl(a.as_u32x8(), u32x8::splat(IMM8)) }; @@ -19234,8 +18521,7 @@ pub unsafe fn _mm256_maskz_slli_epi32(k: __mmask8, a: __m256i) _mm256_setzero_si256() } else { let r = simd_shl(a.as_u32x8(), u32x8::splat(IMM8)); - let zero = u32x8::splat(0); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, u32x8::ZERO)) } } @@ -19254,7 +18540,7 @@ pub unsafe fn _mm_mask_slli_epi32( ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let r = if IMM8 >= 32 { - u32x4::splat(0) + u32x4::ZERO } else { simd_shl(a.as_u32x4(), u32x4::splat(IMM8)) }; @@ -19275,8 +18561,7 @@ pub unsafe fn _mm_maskz_slli_epi32(k: __mmask8, a: __m128i) -> _mm_setzero_si128() } else { let r = simd_shl(a.as_u32x4(), u32x4::splat(IMM8)); - let zero = u32x4::splat(0); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, u32x4::ZERO)) } } @@ -19312,7 +18597,7 @@ pub unsafe fn _mm512_mask_srli_epi32( ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let shf = if IMM8 >= 32 { - u32x16::splat(0) + u32x16::ZERO } else { simd_shr(a.as_u32x16(), u32x16::splat(IMM8)) }; @@ -19333,8 +18618,7 @@ pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i) _mm512_setzero_si512() } else { let shf = simd_shr(a.as_u32x16(), u32x16::splat(IMM8)); - let zero = u32x16::splat(0); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, u32x16::ZERO)) } } @@ -19353,7 +18637,7 @@ pub unsafe fn _mm256_mask_srli_epi32( ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let r = if IMM8 >= 32 { - u32x8::splat(0) + u32x8::ZERO } else { simd_shr(a.as_u32x8(), u32x8::splat(IMM8)) }; @@ -19374,8 +18658,7 @@ pub unsafe fn _mm256_maskz_srli_epi32(k: __mmask8, a: __m256i) _mm256_setzero_si256() } else { let r = simd_shr(a.as_u32x8(), u32x8::splat(IMM8)); - let zero = u32x8::splat(0); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, u32x8::ZERO)) } } @@ -19394,7 +18677,7 @@ pub unsafe fn _mm_mask_srli_epi32( ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let r = if IMM8 >= 32 { - u32x4::splat(0) + u32x4::ZERO } else { simd_shr(a.as_u32x4(), u32x4::splat(IMM8)) }; @@ -19415,8 +18698,7 @@ pub unsafe fn _mm_maskz_srli_epi32(k: __mmask8, a: __m128i) -> _mm_setzero_si128() } else { let r = simd_shr(a.as_u32x4(), u32x4::splat(IMM8)); - let zero = u32x4::splat(0); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, u32x4::ZERO)) } } @@ -19452,7 +18734,7 @@ pub unsafe fn _mm512_mask_slli_epi64( ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let shf = if IMM8 >= 64 { - u64x8::splat(0) + u64x8::ZERO } else { simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)) }; @@ -19473,8 +18755,7 @@ pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i) _mm512_setzero_si512() } else { let shf = simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)); - let zero = u64x8::splat(0); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, u64x8::ZERO)) } } @@ -19493,7 +18774,7 @@ pub unsafe fn _mm256_mask_slli_epi64( ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let r = if IMM8 >= 64 { - u64x4::splat(0) + u64x4::ZERO } else { simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)) }; @@ -19514,8 +18795,7 @@ pub unsafe fn _mm256_maskz_slli_epi64(k: __mmask8, a: __m256i) _mm256_setzero_si256() } else { let r = simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64)); - let zero = u64x4::splat(0); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, u64x4::ZERO)) } } @@ -19534,7 +18814,7 @@ pub unsafe fn _mm_mask_slli_epi64( ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let r = if IMM8 >= 64 { - u64x2::splat(0) + u64x2::ZERO } else { simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)) }; @@ -19555,8 +18835,7 @@ pub unsafe fn _mm_maskz_slli_epi64(k: __mmask8, a: __m128i) -> _mm_setzero_si128() } else { let r = simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64)); - let zero = u64x2::splat(0); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, u64x2::ZERO)) } } @@ -19592,7 +18871,7 @@ pub unsafe fn _mm512_mask_srli_epi64( ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let shf = if IMM8 >= 64 { - u64x8::splat(0) + u64x8::ZERO } else { simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)) }; @@ -19613,8 +18892,7 @@ pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i) _mm512_setzero_si512() } else { let shf = simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)); - let zero = u64x8::splat(0); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, u64x8::ZERO)) } } @@ -19633,7 +18911,7 @@ pub unsafe fn _mm256_mask_srli_epi64( ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let r = if IMM8 >= 64 { - u64x4::splat(0) + u64x4::ZERO } else { simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)) }; @@ -19654,8 +18932,7 @@ pub unsafe fn _mm256_maskz_srli_epi64(k: __mmask8, a: __m256i) _mm256_setzero_si256() } else { let r = simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64)); - let zero = u64x4::splat(0); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, u64x4::ZERO)) } } @@ -19674,7 +18951,7 @@ pub unsafe fn _mm_mask_srli_epi64( ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let r = if IMM8 >= 64 { - u64x2::splat(0) + u64x2::ZERO } else { simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)) }; @@ -19695,8 +18972,7 @@ pub unsafe fn _mm_maskz_srli_epi64(k: __mmask8, a: __m128i) -> _mm_setzero_si128() } else { let r = simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64)); - let zero = u64x2::splat(0); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, u64x2::ZERO)) } } @@ -19737,8 +19013,7 @@ pub unsafe fn _mm512_mask_sll_epi32( #[cfg_attr(test, assert_instr(vpslld))] pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sll_epi32(a, count).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) } /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19767,8 +19042,7 @@ pub unsafe fn _mm256_mask_sll_epi32( #[cfg_attr(test, assert_instr(vpslld))] pub unsafe fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { let shf = _mm256_sll_epi32(a, count).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) } /// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19792,8 +19066,7 @@ pub unsafe fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: _ #[cfg_attr(test, assert_instr(vpslld))] pub unsafe fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_sll_epi32(a, count).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) } /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst. @@ -19833,8 +19106,7 @@ pub unsafe fn _mm512_mask_srl_epi32( #[cfg_attr(test, assert_instr(vpsrld))] pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_srl_epi32(a, count).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) } /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19863,8 +19135,7 @@ pub unsafe fn _mm256_mask_srl_epi32( #[cfg_attr(test, assert_instr(vpsrld))] pub unsafe fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { let shf = _mm256_srl_epi32(a, count).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) } /// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19888,8 +19159,7 @@ pub unsafe fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: _ #[cfg_attr(test, assert_instr(vpsrld))] pub unsafe fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_srl_epi32(a, count).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) } /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst. @@ -19929,8 +19199,7 @@ pub unsafe fn _mm512_mask_sll_epi64( #[cfg_attr(test, assert_instr(vpsllq))] pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sll_epi64(a, count).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) } /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19959,8 +19228,7 @@ pub unsafe fn _mm256_mask_sll_epi64( #[cfg_attr(test, assert_instr(vpsllq))] pub unsafe fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { let shf = _mm256_sll_epi64(a, count).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) } /// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -19984,8 +19252,7 @@ pub unsafe fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: _ #[cfg_attr(test, assert_instr(vpsllq))] pub unsafe fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_sll_epi64(a, count).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) } /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst. @@ -20025,8 +19292,7 @@ pub unsafe fn _mm512_mask_srl_epi64( #[cfg_attr(test, assert_instr(vpsrlq))] pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_srl_epi64(a, count).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) } /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20055,8 +19321,7 @@ pub unsafe fn _mm256_mask_srl_epi64( #[cfg_attr(test, assert_instr(vpsrlq))] pub unsafe fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { let shf = _mm256_srl_epi64(a, count).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) } /// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20080,8 +19345,7 @@ pub unsafe fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: _ #[cfg_attr(test, assert_instr(vpsrlq))] pub unsafe fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_srl_epi64(a, count).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) } /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst. @@ -20121,8 +19385,7 @@ pub unsafe fn _mm512_mask_sra_epi32( #[cfg_attr(test, assert_instr(vpsrad))] pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sra_epi32(a, count).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) } /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20151,8 +19414,7 @@ pub unsafe fn _mm256_mask_sra_epi32( #[cfg_attr(test, assert_instr(vpsrad))] pub unsafe fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { let shf = _mm256_sra_epi32(a, count).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) } /// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20176,8 +19438,7 @@ pub unsafe fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: _ #[cfg_attr(test, assert_instr(vpsrad))] pub unsafe fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_sra_epi32(a, count).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. @@ -20217,8 +19478,7 @@ pub unsafe fn _mm512_mask_sra_epi64( #[cfg_attr(test, assert_instr(vpsraq))] pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { let shf = _mm512_sra_epi64(a, count).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. @@ -20258,8 +19518,7 @@ pub unsafe fn _mm256_mask_sra_epi64( #[cfg_attr(test, assert_instr(vpsraq))] pub unsafe fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i { let shf = _mm256_sra_epi64(a, count).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) } /// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. @@ -20294,8 +19553,7 @@ pub unsafe fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: _ #[cfg_attr(test, assert_instr(vpsraq))] pub unsafe fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_sra_epi64(a, count).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. @@ -20340,8 +19598,7 @@ pub unsafe fn _mm512_mask_srai_epi32( pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)); - let zero = i32x16::splat(0); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20371,8 +19628,7 @@ pub unsafe fn _mm256_mask_srai_epi32( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm256_maskz_srai_epi32(k: __mmask8, a: __m256i) -> __m256i { let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32)); - let zero = i32x8::splat(0); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) } /// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20402,8 +19658,7 @@ pub unsafe fn _mm_mask_srai_epi32( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_maskz_srai_epi32(k: __mmask8, a: __m128i) -> __m128i { let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32)); - let zero = i32x4::splat(0); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) } /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. @@ -20448,8 +19703,7 @@ pub unsafe fn _mm512_mask_srai_epi64( pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)); - let zero = i64x8::splat(0); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) } /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. @@ -20494,8 +19748,7 @@ pub unsafe fn _mm256_mask_srai_epi64( pub unsafe fn _mm256_maskz_srai_epi64(k: __mmask8, a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)); - let zero = i64x4::splat(0); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) } /// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. @@ -20540,8 +19793,7 @@ pub unsafe fn _mm_mask_srai_epi64( pub unsafe fn _mm_maskz_srai_epi64(k: __mmask8, a: __m128i) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)); - let zero = i64x2::splat(0); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. @@ -20581,8 +19833,7 @@ pub unsafe fn _mm512_mask_srav_epi32( #[cfg_attr(test, assert_instr(vpsravd))] pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srav_epi32(a, count).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20611,8 +19862,7 @@ pub unsafe fn _mm256_mask_srav_epi32( #[cfg_attr(test, assert_instr(vpsravd))] pub unsafe fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { let shf = _mm256_srav_epi32(a, count).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -20641,8 +19891,7 @@ pub unsafe fn _mm_mask_srav_epi32( #[cfg_attr(test, assert_instr(vpsravd))] pub unsafe fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_srav_epi32(a, count).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. @@ -20682,8 +19931,7 @@ pub unsafe fn _mm512_mask_srav_epi64( #[cfg_attr(test, assert_instr(vpsravq))] pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srav_epi64(a, count).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. @@ -20723,8 +19971,7 @@ pub unsafe fn _mm256_mask_srav_epi64( #[cfg_attr(test, assert_instr(vpsravq))] pub unsafe fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { let shf = _mm256_srav_epi64(a, count).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. @@ -20764,8 +20011,7 @@ pub unsafe fn _mm_mask_srav_epi64( #[cfg_attr(test, assert_instr(vpsravq))] pub unsafe fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_srav_epi64(a, count).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20805,8 +20051,7 @@ pub unsafe fn _mm512_mask_rolv_epi32( #[cfg_attr(test, assert_instr(vprolvd))] pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let rol = _mm512_rolv_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, rol, i32x16::ZERO)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20841,8 +20086,7 @@ pub unsafe fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vprolvd))] pub unsafe fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let rol = _mm256_rolv_epi32(a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, rol, i32x8::ZERO)) } /// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20877,8 +20121,7 @@ pub unsafe fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[cfg_attr(test, assert_instr(vprolvd))] pub unsafe fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let rol = _mm_rolv_epi32(a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, rol, i32x4::ZERO)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20918,8 +20161,7 @@ pub unsafe fn _mm512_mask_rorv_epi32( #[cfg_attr(test, assert_instr(vprorvd))] pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let ror = _mm512_rorv_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, ror, i32x16::ZERO)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20954,8 +20196,7 @@ pub unsafe fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vprorvd))] pub unsafe fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let ror = _mm256_rorv_epi32(a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, ror, i32x8::ZERO)) } /// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -20990,8 +20231,7 @@ pub unsafe fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[cfg_attr(test, assert_instr(vprorvd))] pub unsafe fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let ror = _mm_rorv_epi32(a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, ror, i32x4::ZERO)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -21026,8 +20266,7 @@ pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vprolvq))] pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let rol = _mm512_rolv_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, rol, i64x8::ZERO)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -21062,8 +20301,7 @@ pub unsafe fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vprolvq))] pub unsafe fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let rol = _mm256_rolv_epi64(a, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, rol, i64x4::ZERO)) } /// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -21098,8 +20336,7 @@ pub unsafe fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[cfg_attr(test, assert_instr(vprolvq))] pub unsafe fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let rol = _mm_rolv_epi64(a, b).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, rol, zero)) + transmute(simd_select_bitmask(k, rol, i64x2::ZERO)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -21134,8 +20371,7 @@ pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vprorvq))] pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let ror = _mm512_rorv_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, ror, i64x8::ZERO)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -21170,8 +20406,7 @@ pub unsafe fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: _ #[cfg_attr(test, assert_instr(vprorvq))] pub unsafe fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let ror = _mm256_rorv_epi64(a, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, ror, i64x4::ZERO)) } /// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. @@ -21206,8 +20441,7 @@ pub unsafe fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m1 #[cfg_attr(test, assert_instr(vprorvq))] pub unsafe fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let ror = _mm_rorv_epi64(a, b).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, ror, zero)) + transmute(simd_select_bitmask(k, ror, i64x2::ZERO)) } /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -21247,8 +20481,7 @@ pub unsafe fn _mm512_mask_sllv_epi32( #[cfg_attr(test, assert_instr(vpsllvd))] pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_sllv_epi32(a, count).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) } /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21277,8 +20510,7 @@ pub unsafe fn _mm256_mask_sllv_epi32( #[cfg_attr(test, assert_instr(vpsllvd))] pub unsafe fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { let shf = _mm256_sllv_epi32(a, count).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) } /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21307,8 +20539,7 @@ pub unsafe fn _mm_mask_sllv_epi32( #[cfg_attr(test, assert_instr(vpsllvd))] pub unsafe fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_sllv_epi32(a, count).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -21348,8 +20579,7 @@ pub unsafe fn _mm512_mask_srlv_epi32( #[cfg_attr(test, assert_instr(vpsrlvd))] pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srlv_epi32(a, count).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21378,8 +20608,7 @@ pub unsafe fn _mm256_mask_srlv_epi32( #[cfg_attr(test, assert_instr(vpsrlvd))] pub unsafe fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { let shf = _mm256_srlv_epi32(a, count).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) } /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21408,8 +20637,7 @@ pub unsafe fn _mm_mask_srlv_epi32( #[cfg_attr(test, assert_instr(vpsrlvd))] pub unsafe fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_srlv_epi32(a, count).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) } /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -21449,8 +20677,7 @@ pub unsafe fn _mm512_mask_sllv_epi64( #[cfg_attr(test, assert_instr(vpsllvq))] pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_sllv_epi64(a, count).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) } /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21479,8 +20706,7 @@ pub unsafe fn _mm256_mask_sllv_epi64( #[cfg_attr(test, assert_instr(vpsllvq))] pub unsafe fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { let shf = _mm256_sllv_epi64(a, count).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) } /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21509,8 +20735,7 @@ pub unsafe fn _mm_mask_sllv_epi64( #[cfg_attr(test, assert_instr(vpsllvq))] pub unsafe fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_sllv_epi64(a, count).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. @@ -21550,8 +20775,7 @@ pub unsafe fn _mm512_mask_srlv_epi64( #[cfg_attr(test, assert_instr(vpsrlvq))] pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { let shf = _mm512_srlv_epi64(a, count).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21580,8 +20804,7 @@ pub unsafe fn _mm256_mask_srlv_epi64( #[cfg_attr(test, assert_instr(vpsrlvq))] pub unsafe fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i { let shf = _mm256_srlv_epi64(a, count).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) } /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21610,8 +20833,7 @@ pub unsafe fn _mm_mask_srlv_epi64( #[cfg_attr(test, assert_instr(vpsrlvq))] pub unsafe fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i { let shf = _mm_srlv_epi64(a, count).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst. @@ -21677,8 +20899,7 @@ pub unsafe fn _mm512_mask_permute_ps( pub unsafe fn _mm512_maskz_permute_ps(k: __mmask16, a: __m512) -> __m512 { static_assert_uimm_bits!(MASK, 8); let r = _mm512_permute_ps::(a); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, r.as_f32x16(), zero)) + transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21708,8 +20929,7 @@ pub unsafe fn _mm256_mask_permute_ps( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm256_maskz_permute_ps(k: __mmask8, a: __m256) -> __m256 { let r = _mm256_permute_ps::(a); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, r.as_f32x8(), zero)) + transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21735,8 +20955,7 @@ pub unsafe fn _mm_mask_permute_ps(src: __m128, k: __mmask8, a: #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_maskz_permute_ps(k: __mmask8, a: __m128) -> __m128 { let r = _mm_permute_ps::(a); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, r.as_f32x4(), zero)) + transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst. @@ -21794,8 +21013,7 @@ pub unsafe fn _mm512_mask_permute_pd( pub unsafe fn _mm512_maskz_permute_pd(k: __mmask8, a: __m512d) -> __m512d { static_assert_uimm_bits!(MASK, 8); let r = _mm512_permute_pd::(a); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, r.as_f64x8(), zero)) + transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21827,8 +21045,7 @@ pub unsafe fn _mm256_mask_permute_pd( pub unsafe fn _mm256_maskz_permute_pd(k: __mmask8, a: __m256d) -> __m256d { static_assert_uimm_bits!(MASK, 4); let r = _mm256_permute_pd::(a); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, r.as_f64x4(), zero)) + transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -21860,8 +21077,7 @@ pub unsafe fn _mm_mask_permute_pd( pub unsafe fn _mm_maskz_permute_pd(k: __mmask8, a: __m128d) -> __m128d { static_assert_uimm_bits!(IMM2, 2); let r = _mm_permute_pd::(a); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, r.as_f64x2(), zero)) + transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO)) } /// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst. @@ -21919,8 +21135,7 @@ pub unsafe fn _mm512_mask_permutex_epi64( pub unsafe fn _mm512_maskz_permutex_epi64(k: __mmask8, a: __m512i) -> __m512i { static_assert_uimm_bits!(MASK, 8); let r = _mm512_permutex_epi64::(a); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, r.as_i64x8(), zero)) + transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) } /// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst. @@ -21974,8 +21189,7 @@ pub unsafe fn _mm256_mask_permutex_epi64( pub unsafe fn _mm256_maskz_permutex_epi64(k: __mmask8, a: __m256i) -> __m256i { static_assert_uimm_bits!(MASK, 8); let r = _mm256_permutex_epi64::(a); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, r.as_i64x4(), zero)) + transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst. @@ -22031,8 +21245,7 @@ pub unsafe fn _mm512_mask_permutex_pd( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_maskz_permutex_pd(k: __mmask8, a: __m512d) -> __m512d { let r = _mm512_permutex_pd::(a); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, r.as_f64x8(), zero)) + transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst. @@ -22086,8 +21299,7 @@ pub unsafe fn _mm256_mask_permutex_pd( pub unsafe fn _mm256_maskz_permutex_pd(k: __mmask8, a: __m256d) -> __m256d { static_assert_uimm_bits!(MASK, 8); let r = _mm256_permutex_pd::(a); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, r.as_f64x4(), zero)) + transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) } /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name. @@ -22155,8 +21367,7 @@ pub unsafe fn _mm512_mask_permutevar_ps( #[cfg_attr(test, assert_instr(vpermilps))] pub unsafe fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 { let permute = _mm512_permutevar_ps(a, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f32x16::ZERO)) } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22180,8 +21391,7 @@ pub unsafe fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: #[cfg_attr(test, assert_instr(vpermilps))] pub unsafe fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 { let permute = _mm256_permutevar_ps(a, b).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f32x8::ZERO)) } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22205,8 +21415,7 @@ pub unsafe fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m #[cfg_attr(test, assert_instr(vpermilps))] pub unsafe fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 { let permute = _mm_permutevar_ps(a, b).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f32x4::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst. @@ -22246,8 +21455,7 @@ pub unsafe fn _mm512_mask_permutevar_pd( #[cfg_attr(test, assert_instr(vpermilpd))] pub unsafe fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d { let permute = _mm512_permutevar_pd(a, b).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f64x8::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22276,8 +21484,7 @@ pub unsafe fn _mm256_mask_permutevar_pd( #[cfg_attr(test, assert_instr(vpermilpd))] pub unsafe fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d { let permute = _mm256_permutevar_pd(a, b).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f64x4::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -22301,8 +21508,7 @@ pub unsafe fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: _ #[cfg_attr(test, assert_instr(vpermilpd))] pub unsafe fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d { let permute = _mm_permutevar_pd(a, b).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f64x2::ZERO)) } /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -22342,8 +21548,7 @@ pub unsafe fn _mm512_mask_permutexvar_epi32( #[cfg_attr(test, assert_instr(vpermd))] pub unsafe fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i { let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i32x16::ZERO)) } /// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -22383,8 +21588,7 @@ pub unsafe fn _mm256_mask_permutexvar_epi32( #[cfg_attr(test, assert_instr(vpermd))] pub unsafe fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i { let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i32x8::ZERO)) } /// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -22424,8 +21628,7 @@ pub unsafe fn _mm512_mask_permutexvar_epi64( #[cfg_attr(test, assert_instr(vpermq))] pub unsafe fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i { let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i64x8::ZERO)) } /// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -22465,8 +21668,7 @@ pub unsafe fn _mm256_mask_permutexvar_epi64( #[cfg_attr(test, assert_instr(vpermq))] pub unsafe fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i { let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i64x4::ZERO)) } /// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx. @@ -22506,8 +21708,7 @@ pub unsafe fn _mm512_mask_permutexvar_ps( #[cfg_attr(test, assert_instr(vpermps))] pub unsafe fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 { let permute = _mm512_permutexvar_ps(idx, a).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f32x16::ZERO)) } /// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx. @@ -22547,8 +21748,7 @@ pub unsafe fn _mm256_mask_permutexvar_ps( #[cfg_attr(test, assert_instr(vpermps))] pub unsafe fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 { let permute = _mm256_permutexvar_ps(idx, a).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f32x8::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst. @@ -22588,8 +21788,7 @@ pub unsafe fn _mm512_mask_permutexvar_pd( #[cfg_attr(test, assert_instr(vpermpd))] pub unsafe fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d { let permute = _mm512_permutexvar_pd(idx, a).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f64x8::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst. @@ -22629,8 +21828,7 @@ pub unsafe fn _mm256_mask_permutexvar_pd( #[cfg_attr(test, assert_instr(vpermpd))] pub unsafe fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d { let permute = _mm256_permutexvar_pd(idx, a).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f64x4::ZERO)) } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst. @@ -22675,8 +21873,7 @@ pub unsafe fn _mm512_maskz_permutex2var_epi32( b: __m512i, ) -> __m512i { let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i32x16::ZERO)) } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -22738,8 +21935,7 @@ pub unsafe fn _mm256_maskz_permutex2var_epi32( b: __m256i, ) -> __m256i { let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i32x8::ZERO)) } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -22801,8 +21997,7 @@ pub unsafe fn _mm_maskz_permutex2var_epi32( b: __m128i, ) -> __m128i { let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i32x4::ZERO)) } /// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -22864,8 +22059,7 @@ pub unsafe fn _mm512_maskz_permutex2var_epi64( b: __m512i, ) -> __m512i { let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i64x8::ZERO)) } /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -22927,8 +22121,7 @@ pub unsafe fn _mm256_maskz_permutex2var_epi64( b: __m256i, ) -> __m256i { let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i64x4::ZERO)) } /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -22990,8 +22183,7 @@ pub unsafe fn _mm_maskz_permutex2var_epi64( b: __m128i, ) -> __m128i { let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i64x2::ZERO)) } /// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -23053,8 +22245,7 @@ pub unsafe fn _mm512_maskz_permutex2var_ps( b: __m512, ) -> __m512 { let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f32x16::ZERO)) } /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -23117,8 +22308,7 @@ pub unsafe fn _mm256_maskz_permutex2var_ps( b: __m256, ) -> __m256 { let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f32x8::ZERO)) } /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -23171,8 +22361,7 @@ pub unsafe fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: #[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps pub unsafe fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 { let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f32x4::ZERO)) } /// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set). @@ -23230,8 +22419,7 @@ pub unsafe fn _mm512_maskz_permutex2var_pd( b: __m512d, ) -> __m512d { let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f64x8::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set) @@ -23294,8 +22482,7 @@ pub unsafe fn _mm256_maskz_permutex2var_pd( b: __m256d, ) -> __m256d { let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f64x4::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set) @@ -23358,8 +22545,7 @@ pub unsafe fn _mm_maskz_permutex2var_pd( b: __m128d, ) -> __m128d { let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, f64x2::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set) @@ -23447,8 +22633,7 @@ pub unsafe fn _mm512_maskz_shuffle_epi32( ) -> __m512i { static_assert_uimm_bits!(MASK, 8); let r = _mm512_shuffle_epi32::(a); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, r.as_i32x16(), zero)) + transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) } /// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23483,8 +22668,7 @@ pub unsafe fn _mm256_maskz_shuffle_epi32( ) -> __m256i { static_assert_uimm_bits!(MASK, 8); let r = _mm256_shuffle_epi32::(a); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, r.as_i32x8(), zero)) + transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) } /// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23519,8 +22703,7 @@ pub unsafe fn _mm_maskz_shuffle_epi32( ) -> __m128i { static_assert_uimm_bits!(MASK, 8); let r = _mm_shuffle_epi32::(a); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, r.as_i32x4(), zero)) + transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst. @@ -23591,8 +22774,7 @@ pub unsafe fn _mm512_maskz_shuffle_ps( ) -> __m512 { static_assert_uimm_bits!(MASK, 8); let r = _mm512_shuffle_ps::(a, b); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, r.as_f32x16(), zero)) + transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) } /// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23629,8 +22811,7 @@ pub unsafe fn _mm256_maskz_shuffle_ps( ) -> __m256 { static_assert_uimm_bits!(MASK, 8); let r = _mm256_shuffle_ps::(a, b); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, r.as_f32x8(), zero)) + transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) } /// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23663,8 +22844,7 @@ pub unsafe fn _mm_mask_shuffle_ps( pub unsafe fn _mm_maskz_shuffle_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { static_assert_uimm_bits!(MASK, 8); let r = _mm_shuffle_ps::(a, b); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, r.as_f32x4(), zero)) + transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst. @@ -23727,8 +22907,7 @@ pub unsafe fn _mm512_maskz_shuffle_pd( ) -> __m512d { static_assert_uimm_bits!(MASK, 8); let r = _mm512_shuffle_pd::(a, b); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, r.as_f64x8(), zero)) + transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23765,8 +22944,7 @@ pub unsafe fn _mm256_maskz_shuffle_pd( ) -> __m256d { static_assert_uimm_bits!(MASK, 8); let r = _mm256_shuffle_pd::(a, b); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, r.as_f64x4(), zero)) + transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) } /// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -23803,8 +22981,7 @@ pub unsafe fn _mm_maskz_shuffle_pd( ) -> __m128d { static_assert_uimm_bits!(MASK, 8); let r = _mm_shuffle_pd::(a, b); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, r.as_f64x2(), zero)) + transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO)) } /// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst. @@ -23878,8 +23055,7 @@ pub unsafe fn _mm512_maskz_shuffle_i32x4( ) -> __m512i { static_assert_uimm_bits!(MASK, 8); let r = _mm512_shuffle_i32x4::(a, b); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, r.as_i32x16(), zero)) + transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) } /// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst. @@ -23945,8 +23121,7 @@ pub unsafe fn _mm256_maskz_shuffle_i32x4( ) -> __m256i { static_assert_uimm_bits!(MASK, 8); let r = _mm256_shuffle_i32x4::(a, b); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, r.as_i32x8(), zero)) + transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) } /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst. @@ -24012,8 +23187,7 @@ pub unsafe fn _mm512_maskz_shuffle_i64x2( ) -> __m512i { static_assert_uimm_bits!(MASK, 8); let r = _mm512_shuffle_i64x2::(a, b); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, r.as_i64x8(), zero)) + transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) } /// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst. @@ -24075,8 +23249,7 @@ pub unsafe fn _mm256_maskz_shuffle_i64x2( ) -> __m256i { static_assert_uimm_bits!(MASK, 8); let r = _mm256_shuffle_i64x2::(a, b); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, r.as_i64x4(), zero)) + transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) } /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst. @@ -24150,8 +23323,7 @@ pub unsafe fn _mm512_maskz_shuffle_f32x4( ) -> __m512 { static_assert_uimm_bits!(MASK, 8); let r = _mm512_shuffle_f32x4::(a, b); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, r.as_f32x16(), zero)) + transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) } /// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst. @@ -24217,8 +23389,7 @@ pub unsafe fn _mm256_maskz_shuffle_f32x4( ) -> __m256 { static_assert_uimm_bits!(MASK, 8); let r = _mm256_shuffle_f32x4::(a, b); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, r.as_f32x8(), zero)) + transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) } /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst. @@ -24284,8 +23455,7 @@ pub unsafe fn _mm512_maskz_shuffle_f64x2( ) -> __m512d { static_assert_uimm_bits!(MASK, 8); let r = _mm512_shuffle_f64x2::(a, b); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, r.as_f64x8(), zero)) + transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) } /// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst. @@ -24347,8 +23517,7 @@ pub unsafe fn _mm256_maskz_shuffle_f64x2( ) -> __m256d { static_assert_uimm_bits!(MASK, 8); let r = _mm256_shuffle_f64x2::(a, b); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, r.as_f64x4(), zero)) + transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) } /// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst. @@ -24407,8 +23576,7 @@ pub unsafe fn _mm512_mask_extractf32x4_ps( pub unsafe fn _mm512_maskz_extractf32x4_ps(k: __mmask8, a: __m512) -> __m128 { static_assert_uimm_bits!(IMM8, 2); let r = _mm512_extractf32x4_ps::(a); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, r.as_f32x4(), zero)) + transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) } /// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst. @@ -24465,8 +23633,7 @@ pub unsafe fn _mm256_mask_extractf32x4_ps( pub unsafe fn _mm256_maskz_extractf32x4_ps(k: __mmask8, a: __m256) -> __m128 { static_assert_uimm_bits!(IMM8, 1); let r = _mm256_extractf32x4_ps::(a); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, r.as_f32x4(), zero)) + transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO)) } /// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst. @@ -24483,8 +23650,8 @@ pub unsafe fn _mm256_maskz_extractf32x4_ps(k: __mmask8, a: __m2 pub unsafe fn _mm512_extracti64x4_epi64(a: __m512i) -> __m256i { static_assert_uimm_bits!(IMM1, 1); match IMM1 { - 0 => simd_shuffle!(a, _mm512_set1_epi64(0), [0, 1, 2, 3]), - _ => simd_shuffle!(a, _mm512_set1_epi64(0), [4, 5, 6, 7]), + 0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]), + _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]), } } @@ -24523,8 +23690,7 @@ pub unsafe fn _mm512_mask_extracti64x4_epi64( pub unsafe fn _mm512_maskz_extracti64x4_epi64(k: __mmask8, a: __m512i) -> __m256i { static_assert_uimm_bits!(IMM1, 1); let r = _mm512_extracti64x4_epi64::(a); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, r.as_i64x4(), zero)) + transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) } /// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst. @@ -24581,8 +23747,7 @@ pub unsafe fn _mm512_mask_extractf64x4_pd( pub unsafe fn _mm512_maskz_extractf64x4_pd(k: __mmask8, a: __m512d) -> __m256d { static_assert_uimm_bits!(IMM8, 1); let r = _mm512_extractf64x4_pd::(a); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, r.as_f64x4(), zero)) + transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO)) } /// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst. @@ -24599,12 +23764,12 @@ pub unsafe fn _mm512_maskz_extractf64x4_pd(k: __mmask8, a: __m5 pub unsafe fn _mm512_extracti32x4_epi32(a: __m512i) -> __m128i { static_assert_uimm_bits!(IMM2, 2); let a = a.as_i32x16(); - let undefined = _mm512_undefined_epi32().as_i32x16(); + let zero = i32x16::ZERO; let extract: i32x4 = match IMM2 { - 0 => simd_shuffle!(a, undefined, [0, 1, 2, 3]), - 1 => simd_shuffle!(a, undefined, [4, 5, 6, 7]), - 2 => simd_shuffle!(a, undefined, [8, 9, 10, 11]), - _ => simd_shuffle!(a, undefined, [12, 13, 14, 15]), + 0 => simd_shuffle!(a, zero, [0, 1, 2, 3]), + 1 => simd_shuffle!(a, zero, [4, 5, 6, 7]), + 2 => simd_shuffle!(a, zero, [8, 9, 10, 11]), + _ => simd_shuffle!(a, zero, [12, 13, 14, 15]), }; transmute(extract) } @@ -24644,8 +23809,7 @@ pub unsafe fn _mm512_mask_extracti32x4_epi32( pub unsafe fn _mm512_maskz_extracti32x4_epi32(k: __mmask8, a: __m512i) -> __m128i { static_assert_uimm_bits!(IMM2, 2); let r = _mm512_extracti32x4_epi32::(a); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, r.as_i32x4(), zero)) + transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) } /// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst. @@ -24662,10 +23826,10 @@ pub unsafe fn _mm512_maskz_extracti32x4_epi32(k: __mmask8, a: _ pub unsafe fn _mm256_extracti32x4_epi32(a: __m256i) -> __m128i { static_assert_uimm_bits!(IMM1, 1); let a = a.as_i32x8(); - let undefined = _mm256_undefined_si256().as_i32x8(); + let zero = i32x8::ZERO; let extract: i32x4 = match IMM1 { - 0 => simd_shuffle!(a, undefined, [0, 1, 2, 3]), - _ => simd_shuffle!(a, undefined, [4, 5, 6, 7]), + 0 => simd_shuffle!(a, zero, [0, 1, 2, 3]), + _ => simd_shuffle!(a, zero, [4, 5, 6, 7]), }; transmute(extract) } @@ -24705,8 +23869,7 @@ pub unsafe fn _mm256_mask_extracti32x4_epi32( pub unsafe fn _mm256_maskz_extracti32x4_epi32(k: __mmask8, a: __m256i) -> __m128i { static_assert_uimm_bits!(IMM1, 1); let r = _mm256_extracti32x4_epi32::(a); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, r.as_i32x4(), zero)) + transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) } /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst. @@ -24742,8 +23905,7 @@ pub unsafe fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> _ #[cfg_attr(test, assert_instr(vmovsldup))] pub unsafe fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 { let mov: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, f32x16::ZERO)) } /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24767,8 +23929,7 @@ pub unsafe fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __ #[cfg_attr(test, assert_instr(vmovsldup))] pub unsafe fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 { let mov = _mm256_moveldup_ps(a); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, mov.as_f32x8(), zero)) + transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO)) } /// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24792,8 +23953,7 @@ pub unsafe fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m12 #[cfg_attr(test, assert_instr(vmovsldup))] pub unsafe fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 { let mov = _mm_moveldup_ps(a); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, mov.as_f32x4(), zero)) + transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO)) } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst. @@ -24829,8 +23989,7 @@ pub unsafe fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> _ #[cfg_attr(test, assert_instr(vmovshdup))] pub unsafe fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 { let mov: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, f32x16::ZERO)) } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24854,8 +24013,7 @@ pub unsafe fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __ #[cfg_attr(test, assert_instr(vmovshdup))] pub unsafe fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 { let mov = _mm256_movehdup_ps(a); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, mov.as_f32x8(), zero)) + transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO)) } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24879,8 +24037,7 @@ pub unsafe fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m12 #[cfg_attr(test, assert_instr(vmovshdup))] pub unsafe fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 { let mov = _mm_movehdup_ps(a); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, mov.as_f32x4(), zero)) + transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO)) } /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst. @@ -24916,8 +24073,7 @@ pub unsafe fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> _ #[cfg_attr(test, assert_instr(vmovddup))] pub unsafe fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d { let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, mov, zero)) + transmute(simd_select_bitmask(k, mov, f64x8::ZERO)) } /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24941,8 +24097,7 @@ pub unsafe fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> _ #[cfg_attr(test, assert_instr(vmovddup))] pub unsafe fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d { let mov = _mm256_movedup_pd(a); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, mov.as_f64x4(), zero)) + transmute(simd_select_bitmask(k, mov.as_f64x4(), f64x4::ZERO)) } /// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -24966,8 +24121,7 @@ pub unsafe fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m1 #[cfg_attr(test, assert_instr(vmovddup))] pub unsafe fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d { let mov = _mm_movedup_pd(a); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, mov.as_f64x2(), zero)) + transmute(simd_select_bitmask(k, mov.as_f64x2(), f64x2::ZERO)) } /// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8. @@ -25037,8 +24191,7 @@ pub unsafe fn _mm512_maskz_inserti32x4( ) -> __m512i { static_assert_uimm_bits!(IMM8, 2); let r = _mm512_inserti32x4::(a, b); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, r.as_i32x16(), zero)) + transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) } /// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8. @@ -25103,8 +24256,7 @@ pub unsafe fn _mm256_maskz_inserti32x4( ) -> __m256i { static_assert_uimm_bits!(IMM8, 1); let r = _mm256_inserti32x4::(a, b); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, r.as_i32x8(), zero)) + transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) } /// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8. @@ -25158,8 +24310,7 @@ pub unsafe fn _mm512_maskz_inserti64x4( ) -> __m512i { static_assert_uimm_bits!(IMM8, 1); let r = _mm512_inserti64x4::(a, b); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, r.as_i64x8(), zero)) + transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) } /// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8. @@ -25227,8 +24378,7 @@ pub unsafe fn _mm512_maskz_insertf32x4( ) -> __m512 { static_assert_uimm_bits!(IMM8, 2); let r = _mm512_insertf32x4::(a, b); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, r.as_f32x16(), zero)) + transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO)) } /// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8. @@ -25291,8 +24441,7 @@ pub unsafe fn _mm256_maskz_insertf32x4( ) -> __m256 { static_assert_uimm_bits!(IMM8, 1); let r = _mm256_insertf32x4::(a, b); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, r.as_f32x8(), zero)) + transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO)) } /// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8. @@ -25346,8 +24495,7 @@ pub unsafe fn _mm512_maskz_insertf64x4( ) -> __m512d { static_assert_uimm_bits!(IMM8, 1); let r = _mm512_insertf64x4::(a, b); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, r.as_f64x8(), zero)) + transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO)) } /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. @@ -25397,8 +24545,7 @@ pub unsafe fn _mm512_mask_unpackhi_epi32( #[cfg_attr(test, assert_instr(vpunpckhdq))] pub unsafe fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, i32x16::ZERO)) } /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25427,8 +24574,7 @@ pub unsafe fn _mm256_mask_unpackhi_epi32( #[cfg_attr(test, assert_instr(vpunpckhdq))] pub unsafe fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, i32x8::ZERO)) } /// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25457,8 +24603,7 @@ pub unsafe fn _mm_mask_unpackhi_epi32( #[cfg_attr(test, assert_instr(vpunpckhdq))] pub unsafe fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, i32x4::ZERO)) } /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst. @@ -25498,8 +24643,7 @@ pub unsafe fn _mm512_mask_unpackhi_epi64( #[cfg_attr(test, assert_instr(vpunpckhqdq))] pub unsafe fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, i64x8::ZERO)) } /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25528,8 +24672,7 @@ pub unsafe fn _mm256_mask_unpackhi_epi64( #[cfg_attr(test, assert_instr(vpunpckhqdq))] pub unsafe fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, i64x4::ZERO)) } /// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25558,8 +24701,7 @@ pub unsafe fn _mm_mask_unpackhi_epi64( #[cfg_attr(test, assert_instr(vpunpckhqdq))] pub unsafe fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, i64x2::ZERO)) } /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst. @@ -25601,8 +24743,7 @@ pub unsafe fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: _ #[cfg_attr(test, assert_instr(vunpckhps))] pub unsafe fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, f32x16::ZERO)) } /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25626,8 +24767,7 @@ pub unsafe fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __ #[cfg_attr(test, assert_instr(vunpckhps))] pub unsafe fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, f32x8::ZERO)) } /// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25651,8 +24791,7 @@ pub unsafe fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m12 #[cfg_attr(test, assert_instr(vunpckhps))] pub unsafe fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, f32x4::ZERO)) } /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst. @@ -25692,8 +24831,7 @@ pub unsafe fn _mm512_mask_unpackhi_pd( #[cfg_attr(test, assert_instr(vunpckhpd))] pub unsafe fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, f64x8::ZERO)) } /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25722,8 +24860,7 @@ pub unsafe fn _mm256_mask_unpackhi_pd( #[cfg_attr(test, assert_instr(vunpckhpd))] pub unsafe fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, f64x4::ZERO)) } /// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25747,8 +24884,7 @@ pub unsafe fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m #[cfg_attr(test, assert_instr(vunpckhpd))] pub unsafe fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, unpackhi, zero)) + transmute(simd_select_bitmask(k, unpackhi, f64x2::ZERO)) } /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. @@ -25798,8 +24934,7 @@ pub unsafe fn _mm512_mask_unpacklo_epi32( #[cfg_attr(test, assert_instr(vpunpckldq))] pub unsafe fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, i32x16::ZERO)) } /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25828,8 +24963,7 @@ pub unsafe fn _mm256_mask_unpacklo_epi32( #[cfg_attr(test, assert_instr(vpunpckldq))] pub unsafe fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, i32x8::ZERO)) } /// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25858,8 +24992,7 @@ pub unsafe fn _mm_mask_unpacklo_epi32( #[cfg_attr(test, assert_instr(vpunpckldq))] pub unsafe fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, i32x4::ZERO)) } /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst. @@ -25899,8 +25032,7 @@ pub unsafe fn _mm512_mask_unpacklo_epi64( #[cfg_attr(test, assert_instr(vpunpcklqdq))] pub unsafe fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, i64x8::ZERO)) } /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25929,8 +25061,7 @@ pub unsafe fn _mm256_mask_unpacklo_epi64( #[cfg_attr(test, assert_instr(vpunpcklqdq))] pub unsafe fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, i64x4::ZERO)) } /// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -25959,8 +25090,7 @@ pub unsafe fn _mm_mask_unpacklo_epi64( #[cfg_attr(test, assert_instr(vpunpcklqdq))] pub unsafe fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, i64x2::ZERO)) } /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst. @@ -26001,8 +25131,7 @@ pub unsafe fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: _ #[cfg_attr(test, assert_instr(vunpcklps))] pub unsafe fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, f32x16::ZERO)) } /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26026,8 +25155,7 @@ pub unsafe fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __ #[cfg_attr(test, assert_instr(vunpcklps))] pub unsafe fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 { let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, f32x8::ZERO)) } /// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26051,8 +25179,7 @@ pub unsafe fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m12 #[cfg_attr(test, assert_instr(vunpcklps))] pub unsafe fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 { let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, f32x4::ZERO)) } /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst. @@ -26092,8 +25219,7 @@ pub unsafe fn _mm512_mask_unpacklo_pd( #[cfg_attr(test, assert_instr(vunpcklpd))] pub unsafe fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, f64x8::ZERO)) } /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26122,8 +25248,7 @@ pub unsafe fn _mm256_mask_unpacklo_pd( #[cfg_attr(test, assert_instr(vunpcklpd))] pub unsafe fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d { let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, f64x4::ZERO)) } /// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26147,8 +25272,7 @@ pub unsafe fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m #[cfg_attr(test, assert_instr(vunpcklpd))] pub unsafe fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - transmute(simd_select_bitmask(k, unpacklo, zero)) + transmute(simd_select_bitmask(k, unpacklo, f64x2::ZERO)) } /// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -26354,7 +25478,7 @@ pub unsafe fn _mm512_castsi256_si512(a: __m256i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_zextsi128_si512(a: __m128i) -> __m512i { - simd_shuffle!(a, _mm_set1_epi64x(0), [0, 1, 2, 2, 2, 2, 2, 2]) + simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) } /// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -26364,7 +25488,7 @@ pub unsafe fn _mm512_zextsi128_si512(a: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_zextsi256_si512(a: __m256i) -> __m512i { - simd_shuffle!(a, _mm256_set1_epi64x(0), [0, 1, 2, 3, 4, 4, 4, 4]) + simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) } /// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -26472,8 +25596,7 @@ pub unsafe fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd pub unsafe fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i { let broadcast = _mm512_broadcastd_epi32(a).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO)) } /// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26497,8 +25620,7 @@ pub unsafe fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd pub unsafe fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i { let broadcast = _mm256_broadcastd_epi32(a).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO)) } /// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26522,8 +25644,7 @@ pub unsafe fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) - #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd pub unsafe fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i { let broadcast = _mm_broadcastd_epi32(a).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, i32x4::ZERO)) } /// Broadcast the low packed 64-bit integer from a to all elements of dst. @@ -26558,8 +25679,7 @@ pub unsafe fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq pub unsafe fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i { let broadcast = _mm512_broadcastq_epi64(a).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO)) } /// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26583,8 +25703,7 @@ pub unsafe fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq pub unsafe fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i { let broadcast = _mm256_broadcastq_epi64(a).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, i64x4::ZERO)) } /// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26608,8 +25727,7 @@ pub unsafe fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) - #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq pub unsafe fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i { let broadcast = _mm_broadcastq_epi64(a).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, i64x2::ZERO)) } /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst. @@ -26644,8 +25762,7 @@ pub unsafe fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) - #[cfg_attr(test, assert_instr(vbroadcastss))] pub unsafe fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 { let broadcast = _mm512_broadcastss_ps(a).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO)) } /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26669,8 +25786,7 @@ pub unsafe fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> #[cfg_attr(test, assert_instr(vbroadcastss))] pub unsafe fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 { let broadcast = _mm256_broadcastss_ps(a).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO)) } /// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26694,8 +25810,7 @@ pub unsafe fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __ #[cfg_attr(test, assert_instr(vbroadcastss))] pub unsafe fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 { let broadcast = _mm_broadcastss_ps(a).as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, f32x4::ZERO)) } /// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst. @@ -26730,8 +25845,7 @@ pub unsafe fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) #[cfg_attr(test, assert_instr(vbroadcastsd))] pub unsafe fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d { let broadcast = _mm512_broadcastsd_pd(a).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO)) } /// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -26755,8 +25869,7 @@ pub unsafe fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) #[cfg_attr(test, assert_instr(vbroadcastsd))] pub unsafe fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d { let broadcast = _mm256_broadcastsd_pd(a).as_f64x4(); - let zero = _mm256_setzero_pd().as_f64x4(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, f64x4::ZERO)) } /// Broadcast the 4 packed 32-bit integers from a to all elements of dst. @@ -26790,8 +25903,7 @@ pub unsafe fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i { let broadcast = _mm512_broadcast_i32x4(a).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO)) } /// Broadcast the 4 packed 32-bit integers from a to all elements of dst. @@ -26825,8 +25937,7 @@ pub unsafe fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i { let broadcast = _mm256_broadcast_i32x4(a).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO)) } /// Broadcast the 4 packed 64-bit integers from a to all elements of dst. @@ -26858,8 +25969,7 @@ pub unsafe fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i { let broadcast = _mm512_broadcast_i64x4(a).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO)) } /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst. @@ -26891,8 +26001,7 @@ pub unsafe fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 { let broadcast = _mm512_broadcast_f32x4(a).as_f32x16(); - let zero = _mm512_setzero_ps().as_f32x16(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO)) } /// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst. @@ -26924,8 +26033,7 @@ pub unsafe fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 { let broadcast = _mm256_broadcast_f32x4(a).as_f32x8(); - let zero = _mm256_setzero_ps().as_f32x8(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO)) } /// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst. @@ -26957,8 +26065,7 @@ pub unsafe fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d { let broadcast = _mm512_broadcast_f64x4(a).as_f64x8(); - let zero = _mm512_setzero_pd().as_f64x8(); - transmute(simd_select_bitmask(k, broadcast, zero)) + transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO)) } /// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst. @@ -27201,8 +26308,7 @@ pub unsafe fn _mm512_maskz_alignr_epi32( ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let r = _mm512_alignr_epi32::(a, b); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, r.as_i32x16(), zero)) + transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO)) } /// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst. @@ -27273,8 +26379,7 @@ pub unsafe fn _mm256_maskz_alignr_epi32( ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let r = _mm256_alignr_epi32::(a, b); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, r.as_i32x8(), zero)) + transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO)) } /// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst. @@ -27337,8 +26442,7 @@ pub unsafe fn _mm_maskz_alignr_epi32( ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let r = _mm_alignr_epi32::(a, b); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, r.as_i32x4(), zero)) + transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO)) } /// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst. @@ -27399,8 +26503,7 @@ pub unsafe fn _mm512_maskz_alignr_epi64( ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let r = _mm512_alignr_epi64::(a, b); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, r.as_i64x8(), zero)) + transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO)) } /// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst. @@ -27461,8 +26564,7 @@ pub unsafe fn _mm256_maskz_alignr_epi64( ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let r = _mm256_alignr_epi64::(a, b); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, r.as_i64x4(), zero)) + transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO)) } /// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst. @@ -27519,8 +26621,7 @@ pub unsafe fn _mm_maskz_alignr_epi64( ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let r = _mm_alignr_epi64::(a, b); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, r.as_i64x2(), zero)) + transmute(simd_select_bitmask(k, r.as_i64x2(), i64x2::ZERO)) } /// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst. @@ -27555,8 +26656,7 @@ pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpandd))] pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let and = _mm512_and_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, and, zero)) + transmute(simd_select_bitmask(k, and, i32x16::ZERO)) } /// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -27580,8 +26680,7 @@ pub unsafe fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpandd))] pub unsafe fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let and = simd_and(a.as_i32x8(), b.as_i32x8()); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, and, zero)) + transmute(simd_select_bitmask(k, and, i32x8::ZERO)) } /// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -27605,8 +26704,7 @@ pub unsafe fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpandd))] pub unsafe fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let and = simd_and(a.as_i32x4(), b.as_i32x4()); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, and, zero)) + transmute(simd_select_bitmask(k, and, i32x4::ZERO)) } /// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst. @@ -27641,8 +26739,7 @@ pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpandq))] pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let and = _mm512_and_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, and, zero)) + transmute(simd_select_bitmask(k, and, i64x8::ZERO)) } /// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -27666,8 +26763,7 @@ pub unsafe fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpandq))] pub unsafe fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let and = simd_and(a.as_i64x4(), b.as_i64x4()); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, and, zero)) + transmute(simd_select_bitmask(k, and, i64x4::ZERO)) } /// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -27691,8 +26787,7 @@ pub unsafe fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpandq))] pub unsafe fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let and = simd_and(a.as_i64x2(), b.as_i64x2()); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, and, zero)) + transmute(simd_select_bitmask(k, and, i64x2::ZERO)) } /// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst. @@ -27738,8 +26833,7 @@ pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpord))] pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let or = _mm512_or_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, or, zero)) + transmute(simd_select_bitmask(k, or, i32x16::ZERO)) } /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. @@ -27774,8 +26868,7 @@ pub unsafe fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m #[cfg_attr(test, assert_instr(vpord))] pub unsafe fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let or = _mm256_or_epi32(a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, or, zero)) + transmute(simd_select_bitmask(k, or, i32x8::ZERO)) } /// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. @@ -27810,8 +26903,7 @@ pub unsafe fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128 #[cfg_attr(test, assert_instr(vpord))] pub unsafe fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let or = _mm_or_epi32(a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, or, zero)) + transmute(simd_select_bitmask(k, or, i32x4::ZERO)) } /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst. @@ -27846,8 +26938,7 @@ pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m #[cfg_attr(test, assert_instr(vporq))] pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let or = _mm512_or_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, or, zero)) + transmute(simd_select_bitmask(k, or, i64x8::ZERO)) } /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst. @@ -27882,8 +26973,7 @@ pub unsafe fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m #[cfg_attr(test, assert_instr(vporq))] pub unsafe fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let or = _mm256_or_epi64(a, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, or, zero)) + transmute(simd_select_bitmask(k, or, i64x4::ZERO)) } /// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst. @@ -27918,8 +27008,7 @@ pub unsafe fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128 #[cfg_attr(test, assert_instr(vporq))] pub unsafe fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let or = _mm_or_epi64(a, b).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, or, zero)) + transmute(simd_select_bitmask(k, or, i64x2::ZERO)) } /// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst. @@ -27965,8 +27054,7 @@ pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: _ #[cfg_attr(test, assert_instr(vpxord))] pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let xor = _mm512_xor_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, xor, zero)) + transmute(simd_select_bitmask(k, xor, i32x16::ZERO)) } /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. @@ -28001,8 +27089,7 @@ pub unsafe fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpxord))] pub unsafe fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let xor = _mm256_xor_epi32(a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, xor, zero)) + transmute(simd_select_bitmask(k, xor, i32x8::ZERO)) } /// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. @@ -28037,8 +27124,7 @@ pub unsafe fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpxord))] pub unsafe fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let xor = _mm_xor_epi32(a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, xor, zero)) + transmute(simd_select_bitmask(k, xor, i32x4::ZERO)) } /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst. @@ -28073,8 +27159,7 @@ pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __ #[cfg_attr(test, assert_instr(vpxorq))] pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let xor = _mm512_xor_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, xor, zero)) + transmute(simd_select_bitmask(k, xor, i64x8::ZERO)) } /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst. @@ -28109,8 +27194,7 @@ pub unsafe fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __ #[cfg_attr(test, assert_instr(vpxorq))] pub unsafe fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let xor = _mm256_xor_epi64(a, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, xor, zero)) + transmute(simd_select_bitmask(k, xor, i64x4::ZERO)) } /// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst. @@ -28145,8 +27229,7 @@ pub unsafe fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m12 #[cfg_attr(test, assert_instr(vpxorq))] pub unsafe fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let xor = _mm_xor_epi64(a, b).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, xor, zero)) + transmute(simd_select_bitmask(k, xor, i64x2::ZERO)) } /// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst. @@ -28197,8 +27280,7 @@ pub unsafe fn _mm512_mask_andnot_epi32( #[cfg_attr(test, assert_instr(vpandnd))] pub unsafe fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { let andnot = _mm512_andnot_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, andnot, zero)) + transmute(simd_select_bitmask(k, andnot, i32x16::ZERO)) } /// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -28229,8 +27311,7 @@ pub unsafe fn _mm256_mask_andnot_epi32( pub unsafe fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32)); let andnot = simd_and(not.as_i32x8(), b.as_i32x8()); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, andnot, zero)) + transmute(simd_select_bitmask(k, andnot, i32x8::ZERO)) } /// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -28256,8 +27337,7 @@ pub unsafe fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __ pub unsafe fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32)); let andnot = simd_and(not.as_i32x4(), b.as_i32x4()); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, andnot, zero)) + transmute(simd_select_bitmask(k, andnot, i32x4::ZERO)) } /// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst. @@ -28297,8 +27377,7 @@ pub unsafe fn _mm512_mask_andnot_epi64( #[cfg_attr(test, assert_instr(vpandnq))] pub unsafe fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { let andnot = _mm512_andnot_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, andnot, zero)) + transmute(simd_select_bitmask(k, andnot, i64x8::ZERO)) } /// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -28329,8 +27408,7 @@ pub unsafe fn _mm256_mask_andnot_epi64( pub unsafe fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i { let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64)); let andnot = simd_and(not.as_i64x4(), b.as_i64x4()); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, andnot, zero)) + transmute(simd_select_bitmask(k, andnot, i64x4::ZERO)) } /// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -28356,8 +27434,7 @@ pub unsafe fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __ pub unsafe fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64)); let andnot = simd_and(not.as_i64x2(), b.as_i64x2()); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, andnot, zero)) + transmute(simd_select_bitmask(k, andnot, i64x2::ZERO)) } /// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst. @@ -29234,8 +28311,7 @@ pub unsafe fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m5 #[cfg_attr(test, assert_instr(vpbroadcastd))] pub unsafe fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i { let r = _mm512_set1_epi32(a).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) } /// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -29259,8 +28335,7 @@ pub unsafe fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m25 #[cfg_attr(test, assert_instr(vpbroadcastd))] pub unsafe fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i { let r = _mm256_set1_epi32(a).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) } /// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -29284,8 +28359,7 @@ pub unsafe fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i #[cfg_attr(test, assert_instr(vpbroadcastd))] pub unsafe fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i { let r = _mm_set1_epi32(a).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) } /// Broadcast 64-bit integer `a` to all elements of `dst`. @@ -29319,8 +28393,7 @@ pub unsafe fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m51 #[cfg_attr(test, assert_instr(vpbroadcastq))] pub unsafe fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i { let r = _mm512_set1_epi64(a).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i64x8::ZERO)) } /// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -29344,8 +28417,7 @@ pub unsafe fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m25 #[cfg_attr(test, assert_instr(vpbroadcastq))] pub unsafe fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i { let r = _mm256_set1_epi64x(a).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i64x4::ZERO)) } /// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -29369,8 +28441,7 @@ pub unsafe fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i #[cfg_attr(test, assert_instr(vpbroadcastq))] pub unsafe fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i { let r = _mm_set1_epi64x(a).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i64x2::ZERO)) } /// Set packed 64-bit integers in dst with the repeated 4 element sequence. @@ -30616,7 +29687,7 @@ pub unsafe fn _mm512_cmp_epu32_mask( 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i32x16::splat(0), + 3 => i32x16::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -30641,12 +29712,12 @@ pub unsafe fn _mm512_mask_cmp_epu32_mask( static_assert_uimm_bits!(IMM3, 3); let a = a.as_u32x16(); let b = b.as_u32x16(); - let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::splat(0)); + let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO); let r = match IMM3 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i32x16::splat(0), + 3 => i32x16::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -30674,7 +29745,7 @@ pub unsafe fn _mm256_cmp_epu32_mask( 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i32x8::splat(0), + 3 => i32x8::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -30699,12 +29770,12 @@ pub unsafe fn _mm256_mask_cmp_epu32_mask( static_assert_uimm_bits!(IMM3, 3); let a = a.as_u32x8(); let b = b.as_u32x8(); - let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::splat(0)); + let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO); let r = match IMM3 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i32x8::splat(0), + 3 => i32x8::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -30729,7 +29800,7 @@ pub unsafe fn _mm_cmp_epu32_mask(a: __m128i, b: __m 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i32x4::splat(0), + 3 => i32x4::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -30754,12 +29825,12 @@ pub unsafe fn _mm_mask_cmp_epu32_mask( static_assert_uimm_bits!(IMM3, 3); let a = a.as_u32x4(); let b = b.as_u32x4(); - let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::splat(0)); + let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO); let r = match IMM3 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i32x4::splat(0), + 3 => i32x4::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -31183,7 +30254,7 @@ pub unsafe fn _mm512_cmp_epi32_mask( 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i32x16::splat(0), + 3 => i32x16::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -31208,12 +30279,12 @@ pub unsafe fn _mm512_mask_cmp_epi32_mask( static_assert_uimm_bits!(IMM3, 3); let a = a.as_i32x16(); let b = b.as_i32x16(); - let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::splat(0)); + let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO); let r = match IMM3 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i32x16::splat(0), + 3 => i32x16::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -31241,7 +30312,7 @@ pub unsafe fn _mm256_cmp_epi32_mask( 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i32x8::splat(0), + 3 => i32x8::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -31266,12 +30337,12 @@ pub unsafe fn _mm256_mask_cmp_epi32_mask( static_assert_uimm_bits!(IMM3, 3); let a = a.as_i32x8(); let b = b.as_i32x8(); - let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::splat(0)); + let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO); let r = match IMM3 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i32x8::splat(0), + 3 => i32x8::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -31296,7 +30367,7 @@ pub unsafe fn _mm_cmp_epi32_mask(a: __m128i, b: __m 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i32x4::splat(0), + 3 => i32x4::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -31321,12 +30392,12 @@ pub unsafe fn _mm_mask_cmp_epi32_mask( static_assert_uimm_bits!(IMM3, 3); let a = a.as_i32x4(); let b = b.as_i32x4(); - let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::splat(0)); + let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO); let r = match IMM3 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i32x4::splat(0), + 3 => i32x4::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -31750,7 +30821,7 @@ pub unsafe fn _mm512_cmp_epu64_mask( 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i64x8::splat(0), + 3 => i64x8::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -31775,12 +30846,12 @@ pub unsafe fn _mm512_mask_cmp_epu64_mask( static_assert_uimm_bits!(IMM3, 3); let a = a.as_u64x8(); let b = b.as_u64x8(); - let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::splat(0)); + let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO); let r = match IMM3 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i64x8::splat(0), + 3 => i64x8::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -31808,7 +30879,7 @@ pub unsafe fn _mm256_cmp_epu64_mask( 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i64x4::splat(0), + 3 => i64x4::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -31833,12 +30904,12 @@ pub unsafe fn _mm256_mask_cmp_epu64_mask( static_assert_uimm_bits!(IMM3, 3); let a = a.as_u64x4(); let b = b.as_u64x4(); - let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::splat(0)); + let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO); let r = match IMM3 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i64x4::splat(0), + 3 => i64x4::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -31863,7 +30934,7 @@ pub unsafe fn _mm_cmp_epu64_mask(a: __m128i, b: __m 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i64x2::splat(0), + 3 => i64x2::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -31888,12 +30959,12 @@ pub unsafe fn _mm_mask_cmp_epu64_mask( static_assert_uimm_bits!(IMM3, 3); let a = a.as_u64x2(); let b = b.as_u64x2(); - let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::splat(0)); + let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO); let r = match IMM3 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i64x2::splat(0), + 3 => i64x2::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -32317,7 +31388,7 @@ pub unsafe fn _mm512_cmp_epi64_mask( 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i64x8::splat(0), + 3 => i64x8::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -32342,12 +31413,12 @@ pub unsafe fn _mm512_mask_cmp_epi64_mask( static_assert_uimm_bits!(IMM3, 3); let a = a.as_i64x8(); let b = b.as_i64x8(); - let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::splat(0)); + let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO); let r = match IMM3 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i64x8::splat(0), + 3 => i64x8::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -32375,7 +31446,7 @@ pub unsafe fn _mm256_cmp_epi64_mask( 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i64x4::splat(0), + 3 => i64x4::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -32400,12 +31471,12 @@ pub unsafe fn _mm256_mask_cmp_epi64_mask( static_assert_uimm_bits!(IMM3, 3); let a = a.as_i64x4(); let b = b.as_i64x4(); - let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::splat(0)); + let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO); let r = match IMM3 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i64x4::splat(0), + 3 => i64x4::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -32430,7 +31501,7 @@ pub unsafe fn _mm_cmp_epi64_mask(a: __m128i, b: __m 0 => simd_eq(a, b), 1 => simd_lt(a, b), 2 => simd_le(a, b), - 3 => i64x2::splat(0), + 3 => i64x2::ZERO, 4 => simd_ne(a, b), 5 => simd_ge(a, b), 6 => simd_gt(a, b), @@ -32455,12 +31526,12 @@ pub unsafe fn _mm_mask_cmp_epi64_mask( static_assert_uimm_bits!(IMM3, 3); let a = a.as_i64x2(); let b = b.as_i64x2(); - let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::splat(0)); + let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO); let r = match IMM3 { 0 => simd_and(k1, simd_eq(a, b)), 1 => simd_and(k1, simd_lt(a, b)), 2 => simd_and(k1, simd_le(a, b)), - 3 => i64x2::splat(0), + 3 => i64x2::ZERO, 4 => simd_and(k1, simd_ne(a, b)), 5 => simd_and(k1, simd_ge(a, b)), 6 => simd_and(k1, simd_gt(a, b)), @@ -32486,11 +31557,7 @@ pub unsafe fn _mm512_reduce_add_epi32(a: __m512i) -> i32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 { - simd_reduce_add_unordered(simd_select_bitmask( - k, - a.as_i32x16(), - _mm512_setzero_si512().as_i32x16(), - )) + simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) } /// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a. @@ -32510,11 +31577,7 @@ pub unsafe fn _mm512_reduce_add_epi64(a: __m512i) -> i64 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 { - simd_reduce_add_unordered(simd_select_bitmask( - k, - a.as_i64x8(), - _mm512_setzero_si512().as_i64x8(), - )) + simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) } /// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a. @@ -32730,11 +31793,7 @@ pub unsafe fn _mm512_reduce_max_epu32(a: __m512i) -> u32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 { - simd_reduce_max(simd_select_bitmask( - k, - a.as_u32x16(), - _mm512_setzero_si512().as_u32x16(), - )) + simd_reduce_max(simd_select_bitmask(k, a.as_u32x16(), u32x16::ZERO)) } /// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a. @@ -32754,11 +31813,7 @@ pub unsafe fn _mm512_reduce_max_epu64(a: __m512i) -> u64 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 { - simd_reduce_max(simd_select_bitmask( - k, - a.as_u64x8(), - _mm512_setzero_si512().as_u64x8(), - )) + simd_reduce_max(simd_select_bitmask(k, a.as_u64x8(), u64x8::ZERO)) } /// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a. @@ -33008,11 +32063,7 @@ pub unsafe fn _mm512_reduce_or_epi32(a: __m512i) -> i32 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 { - simd_reduce_or(simd_select_bitmask( - k, - a.as_i32x16(), - _mm512_setzero_si512().as_i32x16(), - )) + simd_reduce_or(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) } /// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a. @@ -33032,11 +32083,7 @@ pub unsafe fn _mm512_reduce_or_epi64(a: __m512i) -> i64 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 { - simd_reduce_or(simd_select_bitmask( - k, - a.as_i64x8(), - _mm512_setzero_si512().as_i64x8(), - )) + simd_reduce_or(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) } /// Returns vector of type `__m512d` with indeterminate elements. @@ -33049,7 +32096,7 @@ pub unsafe fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] // This intrinsic has no corresponding instruction. pub unsafe fn _mm512_undefined_pd() -> __m512d { - _mm512_set1_pd(0.0) + const { mem::zeroed() } } /// Returns vector of type `__m512` with indeterminate elements. @@ -33062,7 +32109,7 @@ pub unsafe fn _mm512_undefined_pd() -> __m512d { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] // This intrinsic has no corresponding instruction. pub unsafe fn _mm512_undefined_ps() -> __m512 { - _mm512_set1_ps(0.0) + const { mem::zeroed() } } /// Return vector of type __m512i with indeterminate elements. @@ -33075,7 +32122,7 @@ pub unsafe fn _mm512_undefined_ps() -> __m512 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] // This intrinsic has no corresponding instruction. pub unsafe fn _mm512_undefined_epi32() -> __m512i { - _mm512_set1_epi32(0) + const { mem::zeroed() } } /// Return vector of type __m512 with indeterminate elements. @@ -33088,7 +32135,7 @@ pub unsafe fn _mm512_undefined_epi32() -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] // This intrinsic has no corresponding instruction. pub unsafe fn _mm512_undefined() -> __m512 { - _mm512_set1_ps(0.0) + const { mem::zeroed() } } /// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary. @@ -35735,7 +34782,7 @@ pub unsafe fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { transmute(vmaxss( a.as_f32x4(), b.as_f32x4(), - _mm_setzero_ps().as_f32x4(), + f32x4::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -35769,7 +34816,7 @@ pub unsafe fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { transmute(vmaxsd( a.as_f64x2(), b.as_f64x2(), - _mm_setzero_pd().as_f64x2(), + f64x2::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -35803,7 +34850,7 @@ pub unsafe fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { transmute(vminss( a.as_f32x4(), b.as_f32x4(), - _mm_setzero_ps().as_f32x4(), + f32x4::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -35837,7 +34884,7 @@ pub unsafe fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { transmute(vminsd( a.as_f64x2(), b.as_f64x2(), - _mm_setzero_pd().as_f64x2(), + f64x2::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -35895,12 +34942,7 @@ pub unsafe fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ss))] pub unsafe fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 { - transmute(vrsqrt14ss( - a.as_f32x4(), - b.as_f32x4(), - _mm_setzero_ps().as_f32x4(), - 0b1, - )) + transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) } /// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14. @@ -35922,12 +34964,7 @@ pub unsafe fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14ss))] pub unsafe fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vrsqrt14ss( - a.as_f32x4(), - b.as_f32x4(), - _mm_setzero_ps().as_f32x4(), - k, - )) + transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) } /// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. @@ -35938,12 +34975,7 @@ pub unsafe fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14sd))] pub unsafe fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d { - transmute(vrsqrt14sd( - a.as_f64x2(), - b.as_f64x2(), - _mm_setzero_pd().as_f64x2(), - 0b1, - )) + transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) } /// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. @@ -35965,12 +34997,7 @@ pub unsafe fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m1 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrsqrt14sd))] pub unsafe fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vrsqrt14sd( - a.as_f64x2(), - b.as_f64x2(), - _mm_setzero_pd().as_f64x2(), - k, - )) + transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) } /// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14. @@ -35981,12 +35008,7 @@ pub unsafe fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m12 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ss))] pub unsafe fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 { - transmute(vrcp14ss( - a.as_f32x4(), - b.as_f32x4(), - _mm_setzero_ps().as_f32x4(), - 0b1, - )) + transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) } /// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14. @@ -36008,12 +35030,7 @@ pub unsafe fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14ss))] pub unsafe fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { - transmute(vrcp14ss( - a.as_f32x4(), - b.as_f32x4(), - _mm_setzero_ps().as_f32x4(), - k, - )) + transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) } /// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. @@ -36024,12 +35041,7 @@ pub unsafe fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14sd))] pub unsafe fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d { - transmute(vrcp14sd( - a.as_f64x2(), - b.as_f64x2(), - _mm_setzero_pd().as_f64x2(), - 0b1, - )) + transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) } /// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14. @@ -36051,12 +35063,7 @@ pub unsafe fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vrcp14sd))] pub unsafe fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d { - transmute(vrcp14sd( - a.as_f64x2(), - b.as_f64x2(), - _mm_setzero_pd().as_f64x2(), - k, - )) + transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) } /// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element. @@ -36070,7 +35077,7 @@ pub unsafe fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 { transmute(vgetexpss( a.as_f32x4(), b.as_f32x4(), - _mm_setzero_ps().as_f32x4(), + f32x4::ZERO, 0b1, _MM_FROUND_NO_EXC, )) @@ -36104,7 +35111,7 @@ pub unsafe fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { transmute(vgetexpss( a.as_f32x4(), b.as_f32x4(), - _mm_setzero_ps().as_f32x4(), + f32x4::ZERO, k, _MM_FROUND_NO_EXC, )) @@ -36121,7 +35128,7 @@ pub unsafe fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d { transmute(vgetexpsd( a.as_f64x2(), b.as_f64x2(), - _mm_setzero_pd().as_f64x2(), + f64x2::ZERO, 0b1, _MM_FROUND_NO_EXC, )) @@ -36155,7 +35162,7 @@ pub unsafe fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128 transmute(vgetexpsd( a.as_f64x2(), b.as_f64x2(), - _mm_setzero_pd().as_f64x2(), + f64x2::ZERO, k, _MM_FROUND_NO_EXC, )) @@ -36190,8 +35197,14 @@ pub unsafe fn _mm_getmant_ss< static_assert_uimm_bits!(SIGN, 2); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vgetmantss(a, b, SIGN << 2 | NORM, zero, 0b1, _MM_FROUND_CUR_DIRECTION); + let r = vgetmantss( + a, + b, + SIGN << 2 | NORM, + f32x4::ZERO, + 0b1, + _MM_FROUND_CUR_DIRECTION, + ); transmute(r) } @@ -36261,8 +35274,14 @@ pub unsafe fn _mm_maskz_getmant_ss< static_assert_uimm_bits!(SIGN, 2); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vgetmantss(a, b, SIGN << 2 | NORM, zero, k, _MM_FROUND_CUR_DIRECTION); + let r = vgetmantss( + a, + b, + SIGN << 2 | NORM, + f32x4::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + ); transmute(r) } @@ -36295,8 +35314,14 @@ pub unsafe fn _mm_getmant_sd< static_assert_uimm_bits!(SIGN, 2); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vgetmantsd(a, b, SIGN << 2 | NORM, zero, 0b1, _MM_FROUND_CUR_DIRECTION); + let r = vgetmantsd( + a, + b, + SIGN << 2 | NORM, + f64x2::ZERO, + 0b1, + _MM_FROUND_CUR_DIRECTION, + ); transmute(r) } @@ -36366,8 +35391,14 @@ pub unsafe fn _mm_maskz_getmant_sd< static_assert_uimm_bits!(SIGN, 2); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vgetmantsd(a, b, SIGN << 2 | NORM, zero, k, _MM_FROUND_CUR_DIRECTION); + let r = vgetmantsd( + a, + b, + SIGN << 2 | NORM, + f64x2::ZERO, + k, + _MM_FROUND_CUR_DIRECTION, + ); transmute(r) } @@ -36389,8 +35420,14 @@ pub unsafe fn _mm_roundscale_ss(a: __m128, b: __m128) -> __m128 static_assert_uimm_bits!(IMM8, 8); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vrndscaless(a, b, zero, 0b11111111, IMM8, _MM_FROUND_CUR_DIRECTION); + let r = vrndscaless( + a, + b, + f32x4::ZERO, + 0b11111111, + IMM8, + _MM_FROUND_CUR_DIRECTION, + ); transmute(r) } @@ -36444,8 +35481,7 @@ pub unsafe fn _mm_maskz_roundscale_ss( static_assert_uimm_bits!(IMM8, 8); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vrndscaless(a, b, zero, k, IMM8, _MM_FROUND_CUR_DIRECTION); + let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -36467,8 +35503,14 @@ pub unsafe fn _mm_roundscale_sd(a: __m128d, b: __m128d) -> __m1 static_assert_uimm_bits!(IMM8, 8); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vrndscalesd(a, b, zero, 0b11111111, IMM8, _MM_FROUND_CUR_DIRECTION); + let r = vrndscalesd( + a, + b, + f64x2::ZERO, + 0b11111111, + IMM8, + _MM_FROUND_CUR_DIRECTION, + ); transmute(r) } @@ -36522,8 +35564,7 @@ pub unsafe fn _mm_maskz_roundscale_sd( static_assert_uimm_bits!(IMM8, 8); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vrndscalesd(a, b, zero, k, IMM8, _MM_FROUND_CUR_DIRECTION); + let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION); transmute(r) } @@ -36537,8 +35578,13 @@ pub unsafe fn _mm_maskz_roundscale_sd( pub unsafe fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 { let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - transmute(vscalefss(a, b, zero, 0b11111111, _MM_FROUND_CUR_DIRECTION)) + transmute(vscalefss( + a, + b, + f32x4::ZERO, + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) } /// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. @@ -36566,7 +35612,7 @@ pub unsafe fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 { transmute(vscalefss( a.as_f32x4(), b.as_f32x4(), - _mm_setzero_ps().as_f32x4(), + f32x4::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -36583,7 +35629,7 @@ pub unsafe fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d { transmute(vscalefsd( a.as_f64x2(), b.as_f64x2(), - _mm_setzero_pd().as_f64x2(), + f64x2::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -36617,7 +35663,7 @@ pub unsafe fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128 transmute(vscalefsd( a.as_f64x2(), b.as_f64x2(), - _mm_setzero_pd().as_f64x2(), + f64x2::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -37082,8 +36128,7 @@ pub unsafe fn _mm_add_round_ss(a: __m128, b: __m128) -> __m static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vaddss(a, b, zero, 0b1, ROUNDING); + let r = vaddss(a, b, f32x4::ZERO, 0b1, ROUNDING); transmute(r) } @@ -37139,8 +36184,7 @@ pub unsafe fn _mm_maskz_add_round_ss( static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vaddss(a, b, zero, k, ROUNDING); + let r = vaddss(a, b, f32x4::ZERO, k, ROUNDING); transmute(r) } @@ -37163,8 +36207,7 @@ pub unsafe fn _mm_add_round_sd(a: __m128d, b: __m128d) -> _ static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vaddsd(a, b, zero, 0b1, ROUNDING); + let r = vaddsd(a, b, f64x2::ZERO, 0b1, ROUNDING); transmute(r) } @@ -37220,8 +36263,7 @@ pub unsafe fn _mm_maskz_add_round_sd( static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vaddsd(a, b, zero, k, ROUNDING); + let r = vaddsd(a, b, f64x2::ZERO, k, ROUNDING); transmute(r) } @@ -37244,8 +36286,7 @@ pub unsafe fn _mm_sub_round_ss(a: __m128, b: __m128) -> __m static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vsubss(a, b, zero, 0b1, ROUNDING); + let r = vsubss(a, b, f32x4::ZERO, 0b1, ROUNDING); transmute(r) } @@ -37301,8 +36342,7 @@ pub unsafe fn _mm_maskz_sub_round_ss( static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vsubss(a, b, zero, k, ROUNDING); + let r = vsubss(a, b, f32x4::ZERO, k, ROUNDING); transmute(r) } @@ -37325,8 +36365,7 @@ pub unsafe fn _mm_sub_round_sd(a: __m128d, b: __m128d) -> _ static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vsubsd(a, b, zero, 0b1, ROUNDING); + let r = vsubsd(a, b, f64x2::ZERO, 0b1, ROUNDING); transmute(r) } @@ -37382,8 +36421,7 @@ pub unsafe fn _mm_maskz_sub_round_sd( static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vsubsd(a, b, zero, k, ROUNDING); + let r = vsubsd(a, b, f64x2::ZERO, k, ROUNDING); transmute(r) } @@ -37406,8 +36444,7 @@ pub unsafe fn _mm_mul_round_ss(a: __m128, b: __m128) -> __m static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vmulss(a, b, zero, 0b1, ROUNDING); + let r = vmulss(a, b, f32x4::ZERO, 0b1, ROUNDING); transmute(r) } @@ -37463,8 +36500,7 @@ pub unsafe fn _mm_maskz_mul_round_ss( static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vmulss(a, b, zero, k, ROUNDING); + let r = vmulss(a, b, f32x4::ZERO, k, ROUNDING); transmute(r) } @@ -37487,8 +36523,7 @@ pub unsafe fn _mm_mul_round_sd(a: __m128d, b: __m128d) -> _ static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vmulsd(a, b, zero, 0b1, ROUNDING); + let r = vmulsd(a, b, f64x2::ZERO, 0b1, ROUNDING); transmute(r) } @@ -37544,8 +36579,7 @@ pub unsafe fn _mm_maskz_mul_round_sd( static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vmulsd(a, b, zero, k, ROUNDING); + let r = vmulsd(a, b, f64x2::ZERO, k, ROUNDING); transmute(r) } @@ -37568,8 +36602,7 @@ pub unsafe fn _mm_div_round_ss(a: __m128, b: __m128) -> __m static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vdivss(a, b, zero, 0b1, ROUNDING); + let r = vdivss(a, b, f32x4::ZERO, 0b1, ROUNDING); transmute(r) } @@ -37625,8 +36658,7 @@ pub unsafe fn _mm_maskz_div_round_ss( static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vdivss(a, b, zero, k, ROUNDING); + let r = vdivss(a, b, f32x4::ZERO, k, ROUNDING); transmute(r) } @@ -37649,8 +36681,7 @@ pub unsafe fn _mm_div_round_sd(a: __m128d, b: __m128d) -> _ static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vdivsd(a, b, zero, 0b1, ROUNDING); + let r = vdivsd(a, b, f64x2::ZERO, 0b1, ROUNDING); transmute(r) } @@ -37706,8 +36737,7 @@ pub unsafe fn _mm_maskz_div_round_sd( static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vdivsd(a, b, zero, k, ROUNDING); + let r = vdivsd(a, b, f64x2::ZERO, k, ROUNDING); transmute(r) } @@ -37724,8 +36754,7 @@ pub unsafe fn _mm_max_round_ss(a: __m128, b: __m128) -> __m128 { static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vmaxss(a, b, zero, 0b1, SAE); + let r = vmaxss(a, b, f32x4::ZERO, 0b1, SAE); transmute(r) } @@ -37765,8 +36794,7 @@ pub unsafe fn _mm_maskz_max_round_ss(k: __mmask8, a: __m128, b: static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vmaxss(a, b, zero, k, SAE); + let r = vmaxss(a, b, f32x4::ZERO, k, SAE); transmute(r) } @@ -37783,8 +36811,7 @@ pub unsafe fn _mm_max_round_sd(a: __m128d, b: __m128d) -> __m128 static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vmaxsd(a, b, zero, 0b1, SAE); + let r = vmaxsd(a, b, f64x2::ZERO, 0b1, SAE); transmute(r) } @@ -37828,8 +36855,7 @@ pub unsafe fn _mm_maskz_max_round_sd( static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vmaxsd(a, b, zero, k, SAE); + let r = vmaxsd(a, b, f64x2::ZERO, k, SAE); transmute(r) } @@ -37846,8 +36872,7 @@ pub unsafe fn _mm_min_round_ss(a: __m128, b: __m128) -> __m128 { static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vminss(a, b, zero, 0b1, SAE); + let r = vminss(a, b, f32x4::ZERO, 0b1, SAE); transmute(r) } @@ -37887,8 +36912,7 @@ pub unsafe fn _mm_maskz_min_round_ss(k: __mmask8, a: __m128, b: static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vminss(a, b, zero, k, SAE); + let r = vminss(a, b, f32x4::ZERO, k, SAE); transmute(r) } @@ -37905,8 +36929,7 @@ pub unsafe fn _mm_min_round_sd(a: __m128d, b: __m128d) -> __m128 static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vminsd(a, b, zero, 0b1, SAE); + let r = vminsd(a, b, f64x2::ZERO, 0b1, SAE); transmute(r) } @@ -37950,8 +36973,7 @@ pub unsafe fn _mm_maskz_min_round_sd( static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vminsd(a, b, zero, k, SAE); + let r = vminsd(a, b, f64x2::ZERO, k, SAE); transmute(r) } @@ -38106,8 +37128,7 @@ pub unsafe fn _mm_getexp_round_ss(a: __m128, b: __m128) -> __m12 static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vgetexpss(a, b, zero, 0b1, SAE); + let r = vgetexpss(a, b, f32x4::ZERO, 0b1, SAE); transmute(r) } @@ -38151,8 +37172,7 @@ pub unsafe fn _mm_maskz_getexp_round_ss( static_assert_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vgetexpss(a, b, zero, k, SAE); + let r = vgetexpss(a, b, f32x4::ZERO, k, SAE); transmute(r) } @@ -38169,8 +37189,7 @@ pub unsafe fn _mm_getexp_round_sd(a: __m128d, b: __m128d) -> __m static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vgetexpsd(a, b, zero, 0b1, SAE); + let r = vgetexpsd(a, b, f64x2::ZERO, 0b1, SAE); transmute(r) } @@ -38214,8 +37233,7 @@ pub unsafe fn _mm_maskz_getexp_round_sd( static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vgetexpsd(a, b, zero, k, SAE); + let r = vgetexpsd(a, b, f64x2::ZERO, k, SAE); transmute(r) } @@ -38250,8 +37268,7 @@ pub unsafe fn _mm_getmant_round_ss< static_assert_mantissas_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vgetmantss(a, b, SIGN << 2 | NORM, zero, 0b1, SAE); + let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, 0b1, SAE); transmute(r) } @@ -38325,8 +37342,7 @@ pub unsafe fn _mm_maskz_getmant_round_ss< static_assert_mantissas_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vgetmantss(a, b, SIGN << 2 | NORM, zero, k, SAE); + let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, k, SAE); transmute(r) } @@ -38361,8 +37377,7 @@ pub unsafe fn _mm_getmant_round_sd< static_assert_mantissas_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vgetmantsd(a, b, SIGN << 2 | NORM, zero, 0b1, SAE); + let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, 0b1, SAE); transmute(r) } @@ -38436,8 +37451,7 @@ pub unsafe fn _mm_maskz_getmant_round_sd< static_assert_mantissas_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vgetmantsd(a, b, SIGN << 2 | NORM, zero, k, SAE); + let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, k, SAE); transmute(r) } @@ -38464,8 +37478,7 @@ pub unsafe fn _mm_roundscale_round_ss( static_assert_mantissas_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vrndscaless(a, b, zero, 0b11111111, IMM8, SAE); + let r = vrndscaless(a, b, f32x4::ZERO, 0b11111111, IMM8, SAE); transmute(r) } @@ -38523,8 +37536,7 @@ pub unsafe fn _mm_maskz_roundscale_round_ss( static_assert_mantissas_sae!(SAE); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vrndscaless(a, b, zero, k, IMM8, SAE); + let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, SAE); transmute(r) } @@ -38551,8 +37563,7 @@ pub unsafe fn _mm_roundscale_round_sd( static_assert_mantissas_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vrndscalesd(a, b, zero, 0b11111111, IMM8, SAE); + let r = vrndscalesd(a, b, f64x2::ZERO, 0b11111111, IMM8, SAE); transmute(r) } @@ -38610,8 +37621,7 @@ pub unsafe fn _mm_maskz_roundscale_round_sd( static_assert_mantissas_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vrndscalesd(a, b, zero, k, IMM8, SAE); + let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, SAE); transmute(r) } @@ -38634,8 +37644,7 @@ pub unsafe fn _mm_scalef_round_ss(a: __m128, b: __m128) -> static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vscalefss(a, b, zero, 0b11111111, ROUNDING); + let r = vscalefss(a, b, f32x4::ZERO, 0b11111111, ROUNDING); transmute(r) } @@ -38691,8 +37700,7 @@ pub unsafe fn _mm_maskz_scalef_round_ss( static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f32x4(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vscalefss(a, b, zero, k, ROUNDING); + let r = vscalefss(a, b, f32x4::ZERO, k, ROUNDING); transmute(r) } @@ -38715,8 +37723,7 @@ pub unsafe fn _mm_scalef_round_sd(a: __m128d, b: __m128d) - static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vscalefsd(a, b, zero, 0b11111111, ROUNDING); + let r = vscalefsd(a, b, f64x2::ZERO, 0b11111111, ROUNDING); transmute(r) } @@ -38771,8 +37778,7 @@ pub unsafe fn _mm_maskz_scalef_round_sd( static_assert_rounding!(ROUNDING); let a = a.as_f64x2(); let b = b.as_f64x2(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vscalefsd(a, b, zero, k, ROUNDING); + let r = vscalefsd(a, b, f64x2::ZERO, k, ROUNDING); transmute(r) } @@ -40084,7 +39090,7 @@ pub unsafe fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d transmute(vcvtss2sd( a.as_f64x2(), b.as_f32x4(), - _mm_setzero_pd().as_f64x2(), + f64x2::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -40118,7 +39124,7 @@ pub unsafe fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 { transmute(vcvtsd2ss( a.as_f32x4(), b.as_f64x2(), - _mm_setzero_ps().as_f32x4(), + f32x4::ZERO, k, _MM_FROUND_CUR_DIRECTION, )) @@ -40137,8 +39143,7 @@ pub unsafe fn _mm_cvt_roundss_sd(a: __m128d, b: __m128) -> __m12 static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f32x4(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vcvtss2sd(a, b, zero, 0b11111111, SAE); + let r = vcvtss2sd(a, b, f64x2::ZERO, 0b11111111, SAE); transmute(r) } @@ -40182,8 +39187,7 @@ pub unsafe fn _mm_maskz_cvt_roundss_sd( static_assert_sae!(SAE); let a = a.as_f64x2(); let b = b.as_f32x4(); - let zero = _mm_setzero_pd().as_f64x2(); - let r = vcvtss2sd(a, b, zero, k, SAE); + let r = vcvtss2sd(a, b, f64x2::ZERO, k, SAE); transmute(r) } @@ -40205,8 +39209,7 @@ pub unsafe fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d) -> static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f64x2(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vcvtsd2ss(a, b, zero, 0b11111111, ROUNDING); + let r = vcvtsd2ss(a, b, f32x4::ZERO, 0b11111111, ROUNDING); transmute(r) } @@ -40260,8 +39263,7 @@ pub unsafe fn _mm_maskz_cvt_roundsd_ss( static_assert_rounding!(ROUNDING); let a = a.as_f32x4(); let b = b.as_f64x2(); - let zero = _mm_setzero_ps().as_f32x4(); - let r = vcvtsd2ss(a, b, zero, k, ROUNDING); + let r = vcvtsd2ss(a, b, f32x4::ZERO, k, ROUNDING); transmute(r) } diff --git a/crates/core_arch/src/x86/avx512fp16.rs b/crates/core_arch/src/x86/avx512fp16.rs index 372b10f32d..73b0df5481 100644 --- a/crates/core_arch/src/x86/avx512fp16.rs +++ b/crates/core_arch/src/x86/avx512fp16.rs @@ -239,7 +239,7 @@ pub unsafe fn _mm512_setr_ph( #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub unsafe fn _mm_setzero_ph() -> __m128h { - transmute(f16x8::splat(0.0)) + transmute(f16x8::ZERO) } /// Return vector of type __m256h with all elements set to zero. @@ -249,7 +249,7 @@ pub unsafe fn _mm_setzero_ph() -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub unsafe fn _mm256_setzero_ph() -> __m256h { - transmute(f16x16::splat(0.0)) + transmute(f16x16::ZERO) } /// Return vector of type __m512h with all elements set to zero. @@ -259,7 +259,7 @@ pub unsafe fn _mm256_setzero_ph() -> __m256h { #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub unsafe fn _mm512_setzero_ph() -> __m512h { - transmute(f16x32::splat(0.0)) + transmute(f16x32::ZERO) } /// Return vector of type `__m128h` with undefined elements. In practice, this returns the all-zero @@ -270,7 +270,7 @@ pub unsafe fn _mm512_setzero_ph() -> __m512h { #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub unsafe fn _mm_undefined_ph() -> __m128h { - transmute(f16x8::splat(0.0)) + transmute(f16x8::ZERO) } /// Return vector of type `__m256h` with undefined elements. In practice, this returns the all-zero @@ -281,7 +281,7 @@ pub unsafe fn _mm_undefined_ph() -> __m128h { #[target_feature(enable = "avx512fp16,avx512vl")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub unsafe fn _mm256_undefined_ph() -> __m256h { - transmute(f16x16::splat(0.0)) + transmute(f16x16::ZERO) } /// Return vector of type `__m512h` with undefined elements. In practice, this returns the all-zero @@ -292,7 +292,7 @@ pub unsafe fn _mm256_undefined_ph() -> __m256h { #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub unsafe fn _mm512_undefined_ph() -> __m512h { - transmute(f16x32::splat(0.0)) + transmute(f16x32::ZERO) } /// Cast vector of type `__m128d` to type `__m128h`. This intrinsic is only used for compilation and @@ -15986,7 +15986,7 @@ pub unsafe fn _mm_cvtsi128_si16(a: __m128i) -> i16 { #[target_feature(enable = "avx512fp16")] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] pub unsafe fn _mm_cvtsi16_si128(a: i16) -> __m128i { - transmute(simd_insert!(i16x8::splat(0), 0, a)) + transmute(simd_insert!(i16x8::ZERO, 0, a)) } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/avx512vbmi.rs b/crates/core_arch/src/x86/avx512vbmi.rs index f1e29963c4..3c16c9c424 100644 --- a/crates/core_arch/src/x86/avx512vbmi.rs +++ b/crates/core_arch/src/x86/avx512vbmi.rs @@ -46,8 +46,7 @@ pub unsafe fn _mm512_maskz_permutex2var_epi8( b: __m512i, ) -> __m512i { let permute = _mm512_permutex2var_epi8(a, idx, b).as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i8x64::ZERO)) } /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -109,8 +108,7 @@ pub unsafe fn _mm256_maskz_permutex2var_epi8( b: __m256i, ) -> __m256i { let permute = _mm256_permutex2var_epi8(a, idx, b).as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i8x32::ZERO)) } /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -172,8 +170,7 @@ pub unsafe fn _mm_maskz_permutex2var_epi8( b: __m128i, ) -> __m128i { let permute = _mm_permutex2var_epi8(a, idx, b).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i8x16::ZERO)) } /// Shuffle 8-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). @@ -230,8 +227,7 @@ pub unsafe fn _mm512_mask_permutexvar_epi8( #[cfg_attr(test, assert_instr(vpermb))] pub unsafe fn _mm512_maskz_permutexvar_epi8(k: __mmask64, idx: __m512i, a: __m512i) -> __m512i { let permute = _mm512_permutexvar_epi8(idx, a).as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i8x64::ZERO)) } /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -271,8 +267,7 @@ pub unsafe fn _mm256_mask_permutexvar_epi8( #[cfg_attr(test, assert_instr(vpermb))] pub unsafe fn _mm256_maskz_permutexvar_epi8(k: __mmask32, idx: __m256i, a: __m256i) -> __m256i { let permute = _mm256_permutexvar_epi8(idx, a).as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i8x32::ZERO)) } /// Shuffle 8-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. @@ -312,8 +307,7 @@ pub unsafe fn _mm_mask_permutexvar_epi8( #[cfg_attr(test, assert_instr(vpermb))] pub unsafe fn _mm_maskz_permutexvar_epi8(k: __mmask16, idx: __m128i, a: __m128i) -> __m128i { let permute = _mm_permutexvar_epi8(idx, a).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, permute, zero)) + transmute(simd_select_bitmask(k, permute, i8x16::ZERO)) } /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst. @@ -353,8 +347,7 @@ pub unsafe fn _mm512_mask_multishift_epi64_epi8( #[cfg_attr(test, assert_instr(vpmultishiftqb))] pub unsafe fn _mm512_maskz_multishift_epi64_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let multishift = _mm512_multishift_epi64_epi8(a, b).as_i8x64(); - let zero = _mm512_setzero_si512().as_i8x64(); - transmute(simd_select_bitmask(k, multishift, zero)) + transmute(simd_select_bitmask(k, multishift, i8x64::ZERO)) } /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst. @@ -394,8 +387,7 @@ pub unsafe fn _mm256_mask_multishift_epi64_epi8( #[cfg_attr(test, assert_instr(vpmultishiftqb))] pub unsafe fn _mm256_maskz_multishift_epi64_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let multishift = _mm256_multishift_epi64_epi8(a, b).as_i8x32(); - let zero = _mm256_setzero_si256().as_i8x32(); - transmute(simd_select_bitmask(k, multishift, zero)) + transmute(simd_select_bitmask(k, multishift, i8x32::ZERO)) } /// For each 64-bit element in b, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of a, and store the 8 assembled bytes to the corresponding 64-bit element of dst. @@ -435,8 +427,7 @@ pub unsafe fn _mm_mask_multishift_epi64_epi8( #[cfg_attr(test, assert_instr(vpmultishiftqb))] pub unsafe fn _mm_maskz_multishift_epi64_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let multishift = _mm_multishift_epi64_epi8(a, b).as_i8x16(); - let zero = _mm_setzero_si128().as_i8x16(); - transmute(simd_select_bitmask(k, multishift, zero)) + transmute(simd_select_bitmask(k, multishift, i8x16::ZERO)) } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/avx512vbmi2.rs b/crates/core_arch/src/x86/avx512vbmi2.rs index b51720982d..a14d0d7081 100644 --- a/crates/core_arch/src/x86/avx512vbmi2.rs +++ b/crates/core_arch/src/x86/avx512vbmi2.rs @@ -247,11 +247,7 @@ pub unsafe fn _mm512_mask_compress_epi16(src: __m512i, k: __mmask32, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressw))] pub unsafe fn _mm512_maskz_compress_epi16(k: __mmask32, a: __m512i) -> __m512i { - transmute(vpcompressw( - a.as_i16x32(), - _mm512_setzero_si512().as_i16x32(), - k, - )) + transmute(vpcompressw(a.as_i16x32(), i16x32::ZERO, k)) } /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -273,11 +269,7 @@ pub unsafe fn _mm256_mask_compress_epi16(src: __m256i, k: __mmask16, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressw))] pub unsafe fn _mm256_maskz_compress_epi16(k: __mmask16, a: __m256i) -> __m256i { - transmute(vpcompressw256( - a.as_i16x16(), - _mm256_setzero_si256().as_i16x16(), - k, - )) + transmute(vpcompressw256(a.as_i16x16(), i16x16::ZERO, k)) } /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -299,11 +291,7 @@ pub unsafe fn _mm_mask_compress_epi16(src: __m128i, k: __mmask8, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressw))] pub unsafe fn _mm_maskz_compress_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpcompressw128( - a.as_i16x8(), - _mm_setzero_si128().as_i16x8(), - k, - )) + transmute(vpcompressw128(a.as_i16x8(), i16x8::ZERO, k)) } /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -325,11 +313,7 @@ pub unsafe fn _mm512_mask_compress_epi8(src: __m512i, k: __mmask64, a: __m512i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressb))] pub unsafe fn _mm512_maskz_compress_epi8(k: __mmask64, a: __m512i) -> __m512i { - transmute(vpcompressb( - a.as_i8x64(), - _mm512_setzero_si512().as_i8x64(), - k, - )) + transmute(vpcompressb(a.as_i8x64(), i8x64::ZERO, k)) } /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -351,11 +335,7 @@ pub unsafe fn _mm256_mask_compress_epi8(src: __m256i, k: __mmask32, a: __m256i) #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressb))] pub unsafe fn _mm256_maskz_compress_epi8(k: __mmask32, a: __m256i) -> __m256i { - transmute(vpcompressb256( - a.as_i8x32(), - _mm256_setzero_si256().as_i8x32(), - k, - )) + transmute(vpcompressb256(a.as_i8x32(), i8x32::ZERO, k)) } /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -377,11 +357,7 @@ pub unsafe fn _mm_mask_compress_epi8(src: __m128i, k: __mmask16, a: __m128i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressb))] pub unsafe fn _mm_maskz_compress_epi8(k: __mmask16, a: __m128i) -> __m128i { - transmute(vpcompressb128( - a.as_i8x16(), - _mm_setzero_si128().as_i8x16(), - k, - )) + transmute(vpcompressb128(a.as_i8x16(), i8x16::ZERO, k)) } /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -403,11 +379,7 @@ pub unsafe fn _mm512_mask_expand_epi16(src: __m512i, k: __mmask32, a: __m512i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandw))] pub unsafe fn _mm512_maskz_expand_epi16(k: __mmask32, a: __m512i) -> __m512i { - transmute(vpexpandw( - a.as_i16x32(), - _mm512_setzero_si512().as_i16x32(), - k, - )) + transmute(vpexpandw(a.as_i16x32(), i16x32::ZERO, k)) } /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -429,11 +401,7 @@ pub unsafe fn _mm256_mask_expand_epi16(src: __m256i, k: __mmask16, a: __m256i) - #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandw))] pub unsafe fn _mm256_maskz_expand_epi16(k: __mmask16, a: __m256i) -> __m256i { - transmute(vpexpandw256( - a.as_i16x16(), - _mm256_setzero_si256().as_i16x16(), - k, - )) + transmute(vpexpandw256(a.as_i16x16(), i16x16::ZERO, k)) } /// Load contiguous active 16-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -455,11 +423,7 @@ pub unsafe fn _mm_mask_expand_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandw))] pub unsafe fn _mm_maskz_expand_epi16(k: __mmask8, a: __m128i) -> __m128i { - transmute(vpexpandw128( - a.as_i16x8(), - _mm_setzero_si128().as_i16x8(), - k, - )) + transmute(vpexpandw128(a.as_i16x8(), i16x8::ZERO, k)) } /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -481,11 +445,7 @@ pub unsafe fn _mm512_mask_expand_epi8(src: __m512i, k: __mmask64, a: __m512i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandb))] pub unsafe fn _mm512_maskz_expand_epi8(k: __mmask64, a: __m512i) -> __m512i { - transmute(vpexpandb( - a.as_i8x64(), - _mm512_setzero_si512().as_i8x64(), - k, - )) + transmute(vpexpandb(a.as_i8x64(), i8x64::ZERO, k)) } /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -507,11 +467,7 @@ pub unsafe fn _mm256_mask_expand_epi8(src: __m256i, k: __mmask32, a: __m256i) -> #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandb))] pub unsafe fn _mm256_maskz_expand_epi8(k: __mmask32, a: __m256i) -> __m256i { - transmute(vpexpandb256( - a.as_i8x32(), - _mm256_setzero_si256().as_i8x32(), - k, - )) + transmute(vpexpandb256(a.as_i8x32(), i8x32::ZERO, k)) } /// Load contiguous active 8-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -533,11 +489,7 @@ pub unsafe fn _mm_mask_expand_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __ #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpexpandb))] pub unsafe fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i { - transmute(vpexpandb128( - a.as_i8x16(), - _mm_setzero_si128().as_i8x16(), - k, - )) + transmute(vpexpandb128(a.as_i8x16(), i8x16::ZERO, k)) } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst. @@ -572,8 +524,7 @@ pub unsafe fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __ #[cfg_attr(test, assert_instr(vpshldvq))] pub unsafe fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { let shf = _mm512_shldv_epi64(a, b, c).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst. @@ -608,8 +559,7 @@ pub unsafe fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __ #[cfg_attr(test, assert_instr(vpshldvq))] pub unsafe fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { let shf = _mm256_shldv_epi64(a, b, c).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 64-bits in dst. @@ -644,8 +594,7 @@ pub unsafe fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m12 #[cfg_attr(test, assert_instr(vpshldvq))] pub unsafe fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { let shf = _mm_shldv_epi64(a, b, c).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst. @@ -685,8 +634,7 @@ pub unsafe fn _mm512_maskz_shldv_epi32( c: __m512i, ) -> __m512i { let shf = _mm512_shldv_epi32(a, b, c).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst. @@ -721,8 +669,7 @@ pub unsafe fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __ #[cfg_attr(test, assert_instr(vpshldvd))] pub unsafe fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { let shf = _mm256_shldv_epi32(a, b, c).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 32-bits in dst. @@ -757,8 +704,7 @@ pub unsafe fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m12 #[cfg_attr(test, assert_instr(vpshldvd))] pub unsafe fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { let shf = _mm_shldv_epi32(a, b, c).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst. @@ -798,8 +744,7 @@ pub unsafe fn _mm512_maskz_shldv_epi16( c: __m512i, ) -> __m512i { let shf = _mm512_shldv_epi16(a, b, c).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst. @@ -839,8 +784,7 @@ pub unsafe fn _mm256_maskz_shldv_epi16( c: __m256i, ) -> __m256i { let shf = _mm256_shldv_epi16(a, b, c).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by the amount specified in the corresponding element of c, and store the upper 16-bits in dst. @@ -875,8 +819,7 @@ pub unsafe fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m12 #[cfg_attr(test, assert_instr(vpshldvw))] pub unsafe fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { let shf = _mm_shldv_epi16(a, b, c).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst. @@ -911,8 +854,7 @@ pub unsafe fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __ #[cfg_attr(test, assert_instr(vpshrdvq))] pub unsafe fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i { let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst. @@ -947,8 +889,7 @@ pub unsafe fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __ #[cfg_attr(test, assert_instr(vpshrdvq))] pub unsafe fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 64-bits in dst. @@ -983,8 +924,7 @@ pub unsafe fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m12 #[cfg_attr(test, assert_instr(vpshrdvq))] pub unsafe fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { let shf = _mm_shrdv_epi64(a, b, c).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst. @@ -1024,8 +964,7 @@ pub unsafe fn _mm512_maskz_shrdv_epi32( c: __m512i, ) -> __m512i { let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst. @@ -1060,8 +999,7 @@ pub unsafe fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __ #[cfg_attr(test, assert_instr(vpshrdvd))] pub unsafe fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i { let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 32-bits in dst. @@ -1096,8 +1034,7 @@ pub unsafe fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m12 #[cfg_attr(test, assert_instr(vpshrdvd))] pub unsafe fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { let shf = _mm_shrdv_epi32(a, b, c).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst. @@ -1137,8 +1074,7 @@ pub unsafe fn _mm512_maskz_shrdv_epi16( c: __m512i, ) -> __m512i { let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst. @@ -1178,8 +1114,7 @@ pub unsafe fn _mm256_maskz_shrdv_epi16( c: __m256i, ) -> __m256i { let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by the amount specified in the corresponding element of c, and store the lower 16-bits in dst. @@ -1214,8 +1149,7 @@ pub unsafe fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m12 #[cfg_attr(test, assert_instr(vpshrdvw))] pub unsafe fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i { let shf = _mm_shrdv_epi16(a, b, c).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst). @@ -1265,8 +1199,7 @@ pub unsafe fn _mm512_maskz_shldi_epi64( ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm512_shldi_epi64::(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst). @@ -1316,8 +1249,7 @@ pub unsafe fn _mm256_maskz_shldi_epi64( ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm256_shldi_epi64::(a, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) } /// Concatenate packed 64-bit integers in a and b producing an intermediate 128-bit result. Shift the result left by imm8 bits, and store the upper 64-bits in dst). @@ -1367,8 +1299,7 @@ pub unsafe fn _mm_maskz_shldi_epi64( ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm_shldi_epi64::(a, b).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst. @@ -1418,8 +1349,7 @@ pub unsafe fn _mm512_maskz_shldi_epi32( ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm512_shldi_epi32::(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst. @@ -1469,8 +1399,7 @@ pub unsafe fn _mm256_maskz_shldi_epi32( ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm256_shldi_epi32::(a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) } /// Concatenate packed 32-bit integers in a and b producing an intermediate 64-bit result. Shift the result left by imm8 bits, and store the upper 32-bits in dst. @@ -1520,8 +1449,7 @@ pub unsafe fn _mm_maskz_shldi_epi32( ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm_shldi_epi32::(a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst). @@ -1571,8 +1499,7 @@ pub unsafe fn _mm512_maskz_shldi_epi16( ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm512_shldi_epi16::(a, b).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst). @@ -1622,8 +1549,7 @@ pub unsafe fn _mm256_maskz_shldi_epi16( ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm256_shldi_epi16::(a, b).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) } /// Concatenate packed 16-bit integers in a and b producing an intermediate 32-bit result. Shift the result left by imm8 bits, and store the upper 16-bits in dst). @@ -1673,8 +1599,7 @@ pub unsafe fn _mm_maskz_shldi_epi16( ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm_shldi_epi16::(a, b).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst. @@ -1724,8 +1649,7 @@ pub unsafe fn _mm512_maskz_shrdi_epi64( ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm512_shrdi_epi64::(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x8::ZERO)) } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst. @@ -1775,8 +1699,7 @@ pub unsafe fn _mm256_maskz_shrdi_epi64( ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm256_shrdi_epi64::(a, b).as_i64x4(); - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x4::ZERO)) } /// Concatenate packed 64-bit integers in b and a producing an intermediate 128-bit result. Shift the result right by imm8 bits, and store the lower 64-bits in dst. @@ -1826,8 +1749,7 @@ pub unsafe fn _mm_maskz_shrdi_epi64( ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm_shrdi_epi64::(a, b).as_i64x2(); - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i64x2::ZERO)) } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst. @@ -1877,8 +1799,7 @@ pub unsafe fn _mm512_maskz_shrdi_epi32( ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm512_shrdi_epi32::(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x16::ZERO)) } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst. @@ -1928,8 +1849,7 @@ pub unsafe fn _mm256_maskz_shrdi_epi32( ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm256_shrdi_epi32::(a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x8::ZERO)) } /// Concatenate packed 32-bit integers in b and a producing an intermediate 64-bit result. Shift the result right by imm8 bits, and store the lower 32-bits in dst. @@ -1979,8 +1899,7 @@ pub unsafe fn _mm_maskz_shrdi_epi32( ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm_shrdi_epi32::(a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i32x4::ZERO)) } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst. @@ -2030,8 +1949,7 @@ pub unsafe fn _mm512_maskz_shrdi_epi16( ) -> __m512i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm512_shrdi_epi16::(a, b).as_i16x32(); - let zero = _mm512_setzero_si512().as_i16x32(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x32::ZERO)) } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst. @@ -2081,8 +1999,7 @@ pub unsafe fn _mm256_maskz_shrdi_epi16( ) -> __m256i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm256_shrdi_epi16::(a, b).as_i16x16(); - let zero = _mm256_setzero_si256().as_i16x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x16::ZERO)) } /// Concatenate packed 16-bit integers in b and a producing an intermediate 32-bit result. Shift the result right by imm8 bits, and store the lower 16-bits in dst. @@ -2132,8 +2049,7 @@ pub unsafe fn _mm_maskz_shrdi_epi16( ) -> __m128i { static_assert_uimm_bits!(IMM8, 8); let shf = _mm_shrdi_epi16::(a, b).as_i16x8(); - let zero = _mm_setzero_si128().as_i16x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, shf, i16x8::ZERO)) } #[allow(improper_ctypes)] diff --git a/crates/core_arch/src/x86/avx512vnni.rs b/crates/core_arch/src/x86/avx512vnni.rs index 2ed800d295..8d207d1638 100644 --- a/crates/core_arch/src/x86/avx512vnni.rs +++ b/crates/core_arch/src/x86/avx512vnni.rs @@ -46,8 +46,7 @@ pub unsafe fn _mm512_maskz_dpwssd_epi32( b: __m512i, ) -> __m512i { let r = _mm512_dpwssd_epi32(src, a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. @@ -106,8 +105,7 @@ pub unsafe fn _mm256_maskz_dpwssd_epi32( b: __m256i, ) -> __m256i { let r = _mm256_dpwssd_epi32(src, a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. @@ -156,8 +154,7 @@ pub unsafe fn _mm_mask_dpwssd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __ #[cfg_attr(test, assert_instr(vpdpwssd))] pub unsafe fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { let r = _mm_dpwssd_epi32(src, a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -202,8 +199,7 @@ pub unsafe fn _mm512_maskz_dpwssds_epi32( b: __m512i, ) -> __m512i { let r = _mm512_dpwssds_epi32(src, a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -262,8 +258,7 @@ pub unsafe fn _mm256_maskz_dpwssds_epi32( b: __m256i, ) -> __m256i { let r = _mm256_dpwssds_epi32(src, a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) } /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -317,8 +312,7 @@ pub unsafe fn _mm_maskz_dpwssds_epi32( b: __m128i, ) -> __m128i { let r = _mm_dpwssds_epi32(src, a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. @@ -363,8 +357,7 @@ pub unsafe fn _mm512_maskz_dpbusd_epi32( b: __m512i, ) -> __m512i { let r = _mm512_dpbusd_epi32(src, a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. @@ -423,8 +416,7 @@ pub unsafe fn _mm256_maskz_dpbusd_epi32( b: __m256i, ) -> __m256i { let r = _mm256_dpbusd_epi32(src, a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst. @@ -473,8 +465,7 @@ pub unsafe fn _mm_mask_dpbusd_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __ #[cfg_attr(test, assert_instr(vpdpbusd))] pub unsafe fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i) -> __m128i { let r = _mm_dpbusd_epi32(src, a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -519,8 +510,7 @@ pub unsafe fn _mm512_maskz_dpbusds_epi32( b: __m512i, ) -> __m512i { let r = _mm512_dpbusds_epi32(src, a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x16::ZERO)) } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -579,8 +569,7 @@ pub unsafe fn _mm256_maskz_dpbusds_epi32( b: __m256i, ) -> __m256i { let r = _mm256_dpbusds_epi32(src, a, b).as_i32x8(); - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x8::ZERO)) } /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst. @@ -634,8 +623,7 @@ pub unsafe fn _mm_maskz_dpbusds_epi32( b: __m128i, ) -> __m128i { let r = _mm_dpbusds_epi32(src, a, b).as_i32x4(); - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, r, zero)) + transmute(simd_select_bitmask(k, r, i32x4::ZERO)) } /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit diff --git a/crates/core_arch/src/x86/avx512vpopcntdq.rs b/crates/core_arch/src/x86/avx512vpopcntdq.rs index 316d9417e9..6dc5408fb1 100644 --- a/crates/core_arch/src/x86/avx512vpopcntdq.rs +++ b/crates/core_arch/src/x86/avx512vpopcntdq.rs @@ -7,14 +7,12 @@ //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +use crate::core_arch::simd::*; use crate::core_arch::x86::__m128i; use crate::core_arch::x86::__m256i; use crate::core_arch::x86::__m512i; use crate::core_arch::x86::__mmask16; use crate::core_arch::x86::__mmask8; -use crate::core_arch::x86::_mm256_setzero_si256; -use crate::core_arch::x86::_mm512_setzero_si512; -use crate::core_arch::x86::_mm_setzero_si128; use crate::core_arch::x86::m128iExt; use crate::core_arch::x86::m256iExt; use crate::core_arch::x86::m512iExt; @@ -46,8 +44,11 @@ pub unsafe fn _mm512_popcnt_epi32(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntd))] pub unsafe fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i { - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, simd_ctpop(a.as_i32x16()), zero)) + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i32x16()), + i32x16::ZERO, + )) } /// For each packed 32-bit integer maps the value to the number of logical 1 bits. @@ -90,8 +91,11 @@ pub unsafe fn _mm256_popcnt_epi32(a: __m256i) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntd))] pub unsafe fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i { - let zero = _mm256_setzero_si256().as_i32x8(); - transmute(simd_select_bitmask(k, simd_ctpop(a.as_i32x8()), zero)) + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i32x8()), + i32x8::ZERO, + )) } /// For each packed 32-bit integer maps the value to the number of logical 1 bits. @@ -134,8 +138,11 @@ pub unsafe fn _mm_popcnt_epi32(a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntd))] pub unsafe fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i { - let zero = _mm_setzero_si128().as_i32x4(); - transmute(simd_select_bitmask(k, simd_ctpop(a.as_i32x4()), zero)) + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i32x4()), + i32x4::ZERO, + )) } /// For each packed 32-bit integer maps the value to the number of logical 1 bits. @@ -178,8 +185,11 @@ pub unsafe fn _mm512_popcnt_epi64(a: __m512i) -> __m512i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntq))] pub unsafe fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i { - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, simd_ctpop(a.as_i64x8()), zero)) + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i64x8()), + i64x8::ZERO, + )) } /// For each packed 64-bit integer maps the value to the number of logical 1 bits. @@ -222,8 +232,11 @@ pub unsafe fn _mm256_popcnt_epi64(a: __m256i) -> __m256i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntq))] pub unsafe fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i { - let zero = _mm256_setzero_si256().as_i64x4(); - transmute(simd_select_bitmask(k, simd_ctpop(a.as_i64x4()), zero)) + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i64x4()), + i64x4::ZERO, + )) } /// For each packed 64-bit integer maps the value to the number of logical 1 bits. @@ -266,8 +279,11 @@ pub unsafe fn _mm_popcnt_epi64(a: __m128i) -> __m128i { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpopcntq))] pub unsafe fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i { - let zero = _mm_setzero_si128().as_i64x2(); - transmute(simd_select_bitmask(k, simd_ctpop(a.as_i64x2()), zero)) + transmute(simd_select_bitmask( + k, + simd_ctpop(a.as_i64x2()), + i64x2::ZERO, + )) } /// For each packed 64-bit integer maps the value to the number of logical 1 bits. diff --git a/crates/core_arch/src/x86/gfni.rs b/crates/core_arch/src/x86/gfni.rs index 6b16996241..9c907d0e03 100644 --- a/crates/core_arch/src/x86/gfni.rs +++ b/crates/core_arch/src/x86/gfni.rs @@ -16,9 +16,6 @@ use crate::core_arch::x86::__m512i; use crate::core_arch::x86::__mmask16; use crate::core_arch::x86::__mmask32; use crate::core_arch::x86::__mmask64; -use crate::core_arch::x86::_mm256_setzero_si256; -use crate::core_arch::x86::_mm512_setzero_si512; -use crate::core_arch::x86::_mm_setzero_si128; use crate::core_arch::x86::m128iExt; use crate::core_arch::x86::m256iExt; use crate::core_arch::x86::m512iExt; @@ -110,7 +107,7 @@ pub unsafe fn _mm512_mask_gf2p8mul_epi8( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgf2p8mulb))] pub unsafe fn _mm512_maskz_gf2p8mul_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { - let zero = _mm512_setzero_si512().as_i8x64(); + let zero = i8x64::ZERO; transmute(simd_select_bitmask( k, vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()), @@ -169,7 +166,7 @@ pub unsafe fn _mm256_mask_gf2p8mul_epi8( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgf2p8mulb))] pub unsafe fn _mm256_maskz_gf2p8mul_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { - let zero = _mm256_setzero_si256().as_i8x32(); + let zero = i8x32::ZERO; transmute(simd_select_bitmask( k, vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()), @@ -228,7 +225,7 @@ pub unsafe fn _mm_mask_gf2p8mul_epi8( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgf2p8mulb))] pub unsafe fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { - let zero = _mm_setzero_si128().as_i8x16(); + let zero = i8x16::ZERO; transmute(simd_select_bitmask( k, vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()), @@ -277,7 +274,7 @@ pub unsafe fn _mm512_maskz_gf2p8affine_epi64_epi8( ) -> __m512i { static_assert_uimm_bits!(B, 8); let b = B as u8; - let zero = _mm512_setzero_si512().as_i8x64(); + let zero = i8x64::ZERO; let x = x.as_i8x64(); let a = a.as_i8x64(); let r = vgf2p8affineqb_512(x, a, b); @@ -353,7 +350,7 @@ pub unsafe fn _mm256_maskz_gf2p8affine_epi64_epi8( ) -> __m256i { static_assert_uimm_bits!(B, 8); let b = B as u8; - let zero = _mm256_setzero_si256().as_i8x32(); + let zero = i8x32::ZERO; let x = x.as_i8x32(); let a = a.as_i8x32(); let r = vgf2p8affineqb_256(x, a, b); @@ -429,7 +426,7 @@ pub unsafe fn _mm_maskz_gf2p8affine_epi64_epi8( ) -> __m128i { static_assert_uimm_bits!(B, 8); let b = B as u8; - let zero = _mm_setzero_si128().as_i8x16(); + let zero = i8x16::ZERO; let x = x.as_i8x16(); let a = a.as_i8x16(); let r = vgf2p8affineqb_128(x, a, b); @@ -509,7 +506,7 @@ pub unsafe fn _mm512_maskz_gf2p8affineinv_epi64_epi8( ) -> __m512i { static_assert_uimm_bits!(B, 8); let b = B as u8; - let zero = _mm512_setzero_si512().as_i8x64(); + let zero = i8x64::ZERO; let x = x.as_i8x64(); let a = a.as_i8x64(); let r = vgf2p8affineinvqb_512(x, a, b); @@ -591,7 +588,7 @@ pub unsafe fn _mm256_maskz_gf2p8affineinv_epi64_epi8( ) -> __m256i { static_assert_uimm_bits!(B, 8); let b = B as u8; - let zero = _mm256_setzero_si256().as_i8x32(); + let zero = i8x32::ZERO; let x = x.as_i8x32(); let a = a.as_i8x32(); let r = vgf2p8affineinvqb_256(x, a, b); @@ -673,7 +670,7 @@ pub unsafe fn _mm_maskz_gf2p8affineinv_epi64_epi8( ) -> __m128i { static_assert_uimm_bits!(B, 8); let b = B as u8; - let zero = _mm_setzero_si128().as_i8x16(); + let zero = i8x16::ZERO; let x = x.as_i8x16(); let a = a.as_i8x16(); let r = vgf2p8affineinvqb_128(x, a, b); diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 2843773eb0..358d551bdc 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -983,7 +983,7 @@ pub unsafe fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 { #[cfg_attr(test, assert_instr(xorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setzero_ps() -> __m128 { - __m128([0.0, 0.0, 0.0, 0.0]) + const { mem::zeroed() } } /// A utility function for creating masks to use with Intel shuffle and @@ -1089,7 +1089,7 @@ pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 { pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 { // Propagate the highest bit to the rest, because simd_bitmask // requires all-1 or all-0. - let mask: i32x4 = simd_lt(transmute(a), i32x4::splat(0)); + let mask: i32x4 = simd_lt(transmute(a), i32x4::ZERO); simd_bitmask::(mask).into() } @@ -1881,7 +1881,7 @@ pub unsafe fn _mm_prefetch(p: *const i8) { #[target_feature(enable = "sse")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_undefined_ps() -> __m128 { - _mm_set1_ps(0.0) + const { mem::zeroed() } } /// Transpose the 4x4 matrix formed by 4 rows of __m128 in place. diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index c6273cfc13..784bee69db 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -455,9 +455,8 @@ unsafe fn _mm_slli_si128_impl(a: __m128i) -> __m128i { 16 - shift + i } } - let zero = _mm_set1_epi8(0).as_i8x16(); transmute::(simd_shuffle!( - zero, + i8x16::ZERO, a.as_i8x16(), [ mask(IMM8, 0), @@ -670,10 +669,9 @@ unsafe fn _mm_srli_si128_impl(a: __m128i) -> __m128i { i + (shift as u32) } } - let zero = _mm_set1_epi8(0).as_i8x16(); let x: i8x16 = simd_shuffle!( a.as_i8x16(), - zero, + i8x16::ZERO, [ mask(IMM8, 0), mask(IMM8, 1), @@ -1191,7 +1189,7 @@ pub unsafe fn _mm_setr_epi8( #[cfg_attr(test, assert_instr(xorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setzero_si128() -> __m128i { - _mm_set1_epi64x(0) + const { mem::zeroed() } } /// Loads 64-bit integer from memory into first element of returned vector. @@ -1359,8 +1357,7 @@ pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { )] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i { - let zero = _mm_setzero_si128(); - let r: i64x2 = simd_shuffle!(a.as_i64x2(), zero.as_i64x2(), [0, 2]); + let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]); transmute(r) } @@ -1434,7 +1431,7 @@ pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32) -> __m128i { #[cfg_attr(test, assert_instr(pmovmskb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { - let z = i8x16::splat(0); + let z = i8x16::ZERO; let m: i8x16 = simd_lt(a.as_i8x16(), z); simd_bitmask::<_, u16>(m) as u32 as i32 } @@ -2267,7 +2264,7 @@ pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 { let r = simd_cast::<_, f32x2>(a.as_f64x2()); - let zero = f32x2::new(0.0, 0.0); + let zero = f32x2::ZERO; transmute::(simd_shuffle!(r, zero, [0, 1, 2, 3])) } @@ -2447,7 +2444,7 @@ pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d { #[cfg_attr(test, assert_instr(xorp))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setzero_pd() -> __m128d { - _mm_set_pd(0.0, 0.0) + const { mem::zeroed() } } /// Returns a mask of the most significant bit of each element in `a`. @@ -2463,7 +2460,7 @@ pub unsafe fn _mm_setzero_pd() -> __m128d { pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 { // Propagate the highest bit to the rest, because simd_bitmask // requires all-1 or all-0. - let mask: i64x2 = simd_lt(transmute(a), i64x2::splat(0)); + let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO); simd_bitmask::(mask).into() } @@ -2902,7 +2899,7 @@ pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 { #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_undefined_pd() -> __m128d { - __m128d([0.0, 0.0]) + const { mem::zeroed() } } /// Returns vector of type __m128i with indeterminate elements. @@ -2914,7 +2911,7 @@ pub unsafe fn _mm_undefined_pd() -> __m128d { #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_undefined_si128() -> __m128i { - __m128i([0, 0]) + const { mem::zeroed() } } /// The resulting `__m128d` element is composed by the low-order values of diff --git a/crates/core_arch/src/x86/sse41.rs b/crates/core_arch/src/x86/sse41.rs index 279847b222..40adf2d804 100644 --- a/crates/core_arch/src/x86/sse41.rs +++ b/crates/core_arch/src/x86/sse41.rs @@ -60,7 +60,7 @@ pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTI #[cfg_attr(test, assert_instr(pblendvb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i { - let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::splat(0)); + let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::ZERO); transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16())) } @@ -103,7 +103,7 @@ pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i) -> __m128 #[cfg_attr(test, assert_instr(blendvpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d { - let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::splat(0)); + let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::ZERO); transmute(simd_select(mask, b.as_f64x2(), a.as_f64x2())) } @@ -116,7 +116,7 @@ pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d { #[cfg_attr(test, assert_instr(blendvps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { - let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::splat(0)); + let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::ZERO); transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4())) } diff --git a/crates/core_arch/src/x86/ssse3.rs b/crates/core_arch/src/x86/ssse3.rs index 7efafced82..5a35d5cb3c 100644 --- a/crates/core_arch/src/x86/ssse3.rs +++ b/crates/core_arch/src/x86/ssse3.rs @@ -18,7 +18,7 @@ use stdarch_test::assert_instr; #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i { let a = a.as_i8x16(); - let zero = i8x16::splat(0); + let zero = i8x16::ZERO; let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); transmute(r) } @@ -34,7 +34,7 @@ pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i { let a = a.as_i16x8(); - let zero = i16x8::splat(0); + let zero = i16x8::ZERO; let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); transmute(r) } @@ -50,7 +50,7 @@ pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i { let a = a.as_i32x4(); - let zero = i32x4::splat(0); + let zero = i32x4::ZERO; let r = simd_select::(simd_lt(a, zero), simd_neg(a), a); transmute(r) } @@ -103,12 +103,12 @@ pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i) -> __m128 // If palignr is shifting the pair of vectors more than the size of two // lanes, emit zero. if IMM8 > 32 { - return _mm_set1_epi8(0); + return _mm_setzero_si128(); } // If palignr is shifting the pair of input vectors more than one lane, // but less than two lanes, convert to shifting in zeroes. let (a, b) = if IMM8 > 16 { - (_mm_set1_epi8(0), a) + (_mm_setzero_si128(), a) } else { (a, b) };