Skip to content

Commit c2f5948

Browse files
Feature-flag fused mul-add to block libcalls
1 parent 6d3d07a commit c2f5948

File tree

3 files changed

+25
-22
lines changed

3 files changed

+25
-22
lines changed

crates/core_simd/src/intrinsics.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,6 @@ extern "platform-intrinsic" {
4646
/// fabs
4747
pub(crate) fn simd_fabs<T>(x: T) -> T;
4848

49-
/// fsqrt
50-
#[cfg(feature = "std")]
51-
pub(crate) fn simd_fsqrt<T>(x: T) -> T;
52-
53-
/// fma
54-
pub(crate) fn simd_fma<T>(x: T, y: T, z: T) -> T;
55-
5649
pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
5750
pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
5851
pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
@@ -110,6 +103,12 @@ mod std {
110103

111104
// trunc
112105
pub(crate) fn simd_trunc<T>(x: T) -> T;
106+
107+
// fsqrt
108+
pub(crate) fn simd_fsqrt<T>(x: T) -> T;
109+
110+
// fma
111+
pub(crate) fn simd_fma<T>(x: T, y: T, z: T) -> T;
113112
}
114113
}
115114

crates/core_simd/src/vector/float.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ macro_rules! impl_float_vector {
4242
/// architecture has a dedicated `fma` CPU instruction. However, this is not always
4343
/// true, and will be heavily dependent on designing algorithms with specific target
4444
/// hardware in mind.
45+
#[cfg(feature = "std")]
4546
#[inline]
4647
pub fn mul_add(self, a: Self, b: Self) -> Self {
4748
unsafe { intrinsics::simd_fma(self, a, b) }

crates/core_simd/tests/ops_macros.rs

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -437,14 +437,6 @@ macro_rules! impl_float_tests {
437437
)
438438
}
439439

440-
fn mul_add<const LANES: usize>() {
441-
test_helpers::test_ternary_elementwise(
442-
&Vector::<LANES>::mul_add,
443-
&Scalar::mul_add,
444-
&|_, _, _| true,
445-
)
446-
}
447-
448440
fn recip<const LANES: usize>() {
449441
test_helpers::test_unary_elementwise(
450442
&Vector::<LANES>::recip,
@@ -601,13 +593,24 @@ macro_rules! impl_float_tests {
601593
}
602594

603595
#[cfg(feature = "std")]
604-
test_helpers::test_lanes! {
605-
fn sqrt<const LANES: usize>() {
606-
test_helpers::test_unary_elementwise(
607-
&Vector::<LANES>::sqrt,
608-
&Scalar::sqrt,
609-
&|_| true,
610-
)
596+
mod std {
597+
use super::*;
598+
test_helpers::test_lanes! {
599+
fn sqrt<const LANES: usize>() {
600+
test_helpers::test_unary_elementwise(
601+
&Vector::<LANES>::sqrt,
602+
&Scalar::sqrt,
603+
&|_| true,
604+
)
605+
}
606+
607+
fn mul_add<const LANES: usize>() {
608+
test_helpers::test_ternary_elementwise(
609+
&Vector::<LANES>::mul_add,
610+
&Scalar::mul_add,
611+
&|_, _, _| true,
612+
)
613+
}
611614
}
612615
}
613616
}

0 commit comments

Comments
 (0)