diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 74a2c5ed68..e46086c087 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -15046,60 +15046,90 @@ pub fn _mm512_maskz_cvt_roundepu32_ps(k: __mmask16, a: __m5 } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\ -/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: +/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] // round down +/// * [`_MM_FROUND_TO_POS_INF`] // round up +/// * [`_MM_FROUND_TO_ZERO`] // truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`] +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`] /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub fn _mm512_cvt_roundps_ph(a: __m512) -> __m256i { +pub fn _mm512_cvt_roundps_ph(a: __m512) -> __m256i { unsafe { - static_assert_sae!(SAE); + static_assert_extended_rounding!(ROUNDING); let a = a.as_f32x16(); - let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111); + let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111); transmute(r) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ -/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: +/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] // round down +/// * [`_MM_FROUND_TO_POS_INF`] // round up +/// * [`_MM_FROUND_TO_ZERO`] // truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`] +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`] /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub fn _mm512_mask_cvt_roundps_ph( +pub fn _mm512_mask_cvt_roundps_ph( src: __m256i, k: __mmask16, a: __m512, ) -> __m256i { unsafe { - static_assert_sae!(SAE); + static_assert_extended_rounding!(ROUNDING); let a = a.as_f32x16(); let src = src.as_i16x16(); - let r = vcvtps2ph(a, SAE, src, k); + let r = vcvtps2ph(a, ROUNDING, src, k); transmute(r) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ -/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: +/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] // round down +/// * [`_MM_FROUND_TO_POS_INF`] // round up +/// * [`_MM_FROUND_TO_ZERO`] // truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`] +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`] /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512) -> __m256i { +pub fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512) -> __m256i { unsafe { - static_assert_sae!(SAE); + static_assert_extended_rounding!(ROUNDING); let a = a.as_f32x16(); - let r = vcvtps2ph(a, SAE, i16x16::ZERO, k); + let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k); transmute(r) } } @@ -15203,56 +15233,86 @@ pub fn _mm_maskz_cvt_roundps_ph(k: __mmask8, a: __m128) -> __m1 } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\ -/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: +/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] // round down +/// * [`_MM_FROUND_TO_POS_INF`] // round up +/// * [`_MM_FROUND_TO_ZERO`] // truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`] +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`] /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(1)] -pub fn _mm512_cvtps_ph(a: __m512) -> __m256i { +pub fn _mm512_cvtps_ph(a: __m512) -> __m256i { unsafe { - static_assert_sae!(SAE); + static_assert_extended_rounding!(ROUNDING); let a = a.as_f32x16(); - let r = vcvtps2ph(a, SAE, i16x16::ZERO, 0b11111111_11111111); + let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111); transmute(r) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ -/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: +/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] // round down +/// * [`_MM_FROUND_TO_POS_INF`] // round up +/// * [`_MM_FROUND_TO_ZERO`] // truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`] +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`] /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(3)] -pub fn _mm512_mask_cvtps_ph(src: __m256i, k: __mmask16, a: __m512) -> __m256i { +pub fn _mm512_mask_cvtps_ph(src: __m256i, k: __mmask16, a: __m512) -> __m256i { unsafe { - static_assert_sae!(SAE); + static_assert_extended_rounding!(ROUNDING); let a = a.as_f32x16(); let src = src.as_i16x16(); - let r = vcvtps2ph(a, SAE, src, k); + let r = vcvtps2ph(a, ROUNDING, src, k); transmute(r) } } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ -/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: +/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest +/// * [`_MM_FROUND_TO_NEG_INF`] // round down +/// * [`_MM_FROUND_TO_POS_INF`] // round up +/// * [`_MM_FROUND_TO_ZERO`] // truncate +/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`] +/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions +/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions +/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions +/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions +/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`] /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780) #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))] #[rustc_legacy_const_generics(2)] -pub fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512) -> __m256i { +pub fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512) -> __m256i { unsafe { - static_assert_sae!(SAE); + static_assert_extended_rounding!(ROUNDING); let a = a.as_f32x16(); - let r = vcvtps2ph(a, SAE, i16x16::ZERO, k); + let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k); transmute(r) } } @@ -42487,11 +42547,11 @@ unsafe extern "C" { fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16; #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"] - fn vcvtps2ph(a: f32x16, sae: i32, src: i16x16, mask: u16) -> i16x16; + fn vcvtps2ph(a: f32x16, rounding: i32, src: i16x16, mask: u16) -> i16x16; #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"] - fn vcvtps2ph256(a: f32x8, sae: i32, src: i16x8, mask: u8) -> i16x8; + fn vcvtps2ph256(a: f32x8, imm8: i32, src: i16x8, mask: u8) -> i16x8; #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"] - fn vcvtps2ph128(a: f32x4, sae: i32, src: i16x8, mask: u8) -> i16x8; + fn vcvtps2ph128(a: f32x4, imm8: i32, src: i16x8, mask: u8) -> i16x8; #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"] fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16; diff --git a/crates/core_arch/src/x86/avx512fp16.rs b/crates/core_arch/src/x86/avx512fp16.rs index b674875893..a5ebf2c98d 100644 --- a/crates/core_arch/src/x86/avx512fp16.rs +++ b/crates/core_arch/src/x86/avx512fp16.rs @@ -13711,74 +13711,56 @@ pub fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i { /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, /// and store the results in dst. /// -/// Rounding is done according to the rounding parameter, which can be one of: -/// -/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions -/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions -/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions -/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions -/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_epu16) #[inline] #[target_feature(enable = "avx512fp16")] -#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))] +#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub fn _mm512_cvt_roundph_epu16(a: __m512h) -> __m512i { - static_assert_rounding!(ROUNDING); - _mm512_mask_cvt_roundph_epu16::(_mm512_undefined_epi32(), 0xffffffff, a) +pub fn _mm512_cvt_roundph_epu16(a: __m512h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvt_roundph_epu16::(_mm512_undefined_epi32(), 0xffffffff, a) } /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, /// and store the results in dst using writemask k (elements are copied from src when the corresponding /// mask bit is not set). /// -/// Rounding is done according to the rounding parameter, which can be one of: -/// -/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions -/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions -/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions -/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions -/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_epu16) #[inline] #[target_feature(enable = "avx512fp16")] -#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))] +#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub fn _mm512_mask_cvt_roundph_epu16( +pub fn _mm512_mask_cvt_roundph_epu16( src: __m512i, k: __mmask32, a: __m512h, ) -> __m512i { unsafe { - static_assert_rounding!(ROUNDING); - transmute(vcvtph2uw_512(a, src.as_u16x32(), k, ROUNDING)) + static_assert_sae!(SAE); + transmute(vcvtph2uw_512(a, src.as_u16x32(), k, SAE)) } } /// Convert packed half-precision (16-bit) floating-point elements in a to packed unsigned 16-bit integers, /// and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// Rounding is done according to the rounding parameter, which can be one of: -/// -/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions -/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions -/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions -/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions -/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_epu16) #[inline] #[target_feature(enable = "avx512fp16")] -#[cfg_attr(test, assert_instr(vcvtph2uw, ROUNDING = 8))] +#[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub fn _mm512_maskz_cvt_roundph_epu16(k: __mmask32, a: __m512h) -> __m512i { - static_assert_rounding!(ROUNDING); - _mm512_mask_cvt_roundph_epu16::(_mm512_setzero_si512(), k, a) +pub fn _mm512_maskz_cvt_roundph_epu16(k: __mmask32, a: __m512h) -> __m512i { + static_assert_sae!(SAE); + _mm512_mask_cvt_roundph_epu16::(_mm512_setzero_si512(), k, a) } /// Convert packed half-precision (16-bit) floating-point elements in a to packed 16-bit integers with @@ -14560,24 +14542,18 @@ pub fn _mm_cvtsh_u32(a: __m128h) -> u32 { /// Convert the lower half-precision (16-bit) floating-point element in a to a 32-bit unsigned integer, and store /// the result in dst. /// -/// Rounding is done according to the rounding parameter, which can be one of: -/// -/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions -/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions -/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions -/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions -/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`] +/// Exceptions can be suppressed by passing [`_MM_FROUND_NO_EXC`] in the sae parameter. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsh_u32) #[inline] #[target_feature(enable = "avx512fp16")] -#[cfg_attr(test, assert_instr(vcvtsh2usi, ROUNDING = 8))] +#[cfg_attr(test, assert_instr(vcvtsh2usi, SAE = 8))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] -pub fn _mm_cvt_roundsh_u32(a: __m128h) -> u32 { +pub fn _mm_cvt_roundsh_u32(a: __m128h) -> u32 { unsafe { - static_assert_rounding!(ROUNDING); - vcvtsh2usi32(a, ROUNDING) + static_assert_rounding!(SAE); + vcvtsh2usi32(a, SAE) } } @@ -16548,7 +16524,7 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.256"] fn vcvtph2uw_256(a: __m256h, src: u16x16, k: __mmask16) -> u16x16; #[link_name = "llvm.x86.avx512fp16.mask.vcvtph2uw.512"] - fn vcvtph2uw_512(a: __m512h, src: u16x32, k: __mmask32, rounding: i32) -> u16x32; + fn vcvtph2uw_512(a: __m512h, src: u16x32, k: __mmask32, sae: i32) -> u16x32; #[link_name = "llvm.x86.avx512fp16.mask.vcvttph2w.128"] fn vcvttph2w_128(a: __m128h, src: i16x8, k: __mmask8) -> i16x8; diff --git a/crates/core_arch/src/x86/macros.rs b/crates/core_arch/src/x86/macros.rs index ddf38aa506..9b9c24a447 100644 --- a/crates/core_arch/src/x86/macros.rs +++ b/crates/core_arch/src/x86/macros.rs @@ -21,6 +21,15 @@ macro_rules! static_assert_sae { }; } +// Helper macro used to trigger const eval errors when the const generic immediate value `imm` is +// not an extended rounding number +#[allow(unused)] +macro_rules! static_assert_extended_rounding { + ($imm: ident) => { + static_assert!(($imm & 7) < 5 && ($imm & !15) == 0, "Invalid IMM value") + }; +} + // Helper macro used to trigger const eval errors when the const generic immediate value `imm` is // not a mantissas sae number. #[allow(unused)]