From e1cbda54ed66ac3ade945db0086188101f1183a9 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 29 Apr 2025 20:50:58 +0000 Subject: [PATCH 1/2] Move `fma` implementations to `mod generic` This will not build correctly, the move is done as a separate step from the rest of refactoring so git's history is cleaner. --- libm/src/math/{ => generic}/fma.rs | 0 libm/src/math/{ => generic}/fma_wide.rs | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename libm/src/math/{ => generic}/fma.rs (100%) rename libm/src/math/{ => generic}/fma_wide.rs (100%) diff --git a/libm/src/math/fma.rs b/libm/src/math/generic/fma.rs similarity index 100% rename from libm/src/math/fma.rs rename to libm/src/math/generic/fma.rs diff --git a/libm/src/math/fma_wide.rs b/libm/src/math/generic/fma_wide.rs similarity index 100% rename from libm/src/math/fma_wide.rs rename to libm/src/math/generic/fma_wide.rs From 7aac021d4a5c516b144932aaf6e624316c0c027a Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Tue, 29 Apr 2025 20:52:54 +0000 Subject: [PATCH 2/2] Refactor the fma modules Move implementations to `generic/` like the other functions. This also allows us to combine the `fma` and `fma_wide` modules. --- etc/function-definitions.json | 2 +- libm/src/math/fma.rs | 165 ++++++++++++++++++++++++++++++ libm/src/math/generic/fma.rs | 133 +----------------------- libm/src/math/generic/fma_wide.rs | 44 +------- libm/src/math/generic/mod.rs | 4 + libm/src/math/mod.rs | 6 +- 6 files changed, 179 insertions(+), 175 deletions(-) create mode 100644 libm/src/math/fma.rs diff --git a/etc/function-definitions.json b/etc/function-definitions.json index 3e33343c4..9e5774eaf 100644 --- a/etc/function-definitions.json +++ b/etc/function-definitions.json @@ -350,7 +350,7 @@ "fmaf": { "sources": [ "libm/src/math/arch/aarch64.rs", - "libm/src/math/fma_wide.rs" + "libm/src/math/fma.rs" ], "type": "f32" }, diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs new file mode 100644 index 000000000..78f0f8992 --- /dev/null +++ b/libm/src/math/fma.rs @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/fma.c, fmaf.c Ported to generic Rust algorithm in 2025, TG. */ + +use super::generic; +use crate::support::Round; + +// Placeholder so we can have `fmaf16` in the `Float` trait. +#[allow(unused)] +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { + unimplemented!() +} + +/// Floating multiply add (f32) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { + select_implementation! { + name: fmaf, + use_arch: all(target_arch = "aarch64", target_feature = "neon"), + args: x, y, z, + } + + generic::fma_wide_round(x, y, z, Round::Nearest).val +} + +/// Fused multiply add (f64) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fma(x: f64, y: f64, z: f64) -> f64 { + select_implementation! { + name: fma, + use_arch: all(target_arch = "aarch64", target_feature = "neon"), + args: x, y, z, + } + + generic::fma_round(x, y, z, Round::Nearest).val +} + +/// Fused multiply add (f128) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg(f128_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { + generic::fma_round(x, y, z, Round::Nearest).val +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::support::{CastFrom, CastInto, Float, FpResult, HInt, MinInt, Round, Status}; + + /// Test the generic `fma_round` algorithm for a given float. + fn spec_test(f: impl Fn(F, F, F) -> F) + where + F: Float, + F: CastFrom, + F: CastFrom, + F::Int: HInt, + u32: CastInto, + { + let x = F::from_bits(F::Int::ONE); + let y = F::from_bits(F::Int::ONE); + let z = F::ZERO; + + // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of + // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the + // exact result" + assert_biteq!(f(x, y, z), F::ZERO); + assert_biteq!(f(x, -y, z), F::NEG_ZERO); + assert_biteq!(f(-x, y, z), F::NEG_ZERO); + assert_biteq!(f(-x, -y, z), F::ZERO); + } + + #[test] + fn spec_test_f32() { + spec_test::(fmaf); + + // Also do a small check that the non-widening version works for f32 (this should ideally + // get tested some more). + spec_test::(|x, y, z| generic::fma_round(x, y, z, Round::Nearest).val); + } + + #[test] + fn spec_test_f64() { + spec_test::(fma); + + let expect_underflow = [ + ( + hf64!("0x1.0p-1070"), + hf64!("0x1.0p-1070"), + hf64!("0x1.ffffffffffffp-1023"), + hf64!("0x0.ffffffffffff8p-1022"), + ), + ( + // FIXME: we raise underflow but this should only be inexact (based on C and + // `rustc_apfloat`). + hf64!("0x1.0p-1070"), + hf64!("0x1.0p-1070"), + hf64!("-0x1.0p-1022"), + hf64!("-0x1.0p-1022"), + ), + ]; + + for (x, y, z, res) in expect_underflow { + let FpResult { val, status } = generic::fma_round(x, y, z, Round::Nearest); + assert_biteq!(val, res); + assert_eq!(status, Status::UNDERFLOW); + } + } + + #[test] + #[cfg(f128_enabled)] + fn spec_test_f128() { + spec_test::(fmaf128); + } + + #[test] + fn issue_263() { + let a = f32::from_bits(1266679807); + let b = f32::from_bits(1300234242); + let c = f32::from_bits(1115553792); + let expected = f32::from_bits(1501560833); + assert_eq!(fmaf(a, b, c), expected); + } + + #[test] + fn fma_segfault() { + // These two inputs cause fma to segfault on release due to overflow: + assert_eq!( + fma( + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313, + -0.0000000000000002220446049250313 + ), + -0.00000000000000022204460492503126, + ); + + let result = fma(-0.992, -0.992, -0.992); + //force rounding to storage format on x87 to prevent superious errors. + #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] + let result = force_eval!(result); + assert_eq!(result, -0.007936000000000007,); + } + + #[test] + fn fma_sbb() { + assert_eq!( + fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), + -3991680619069439e277 + ); + } + + #[test] + fn fma_underflow() { + assert_eq!( + fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), + 0.0, + ); + } +} diff --git a/libm/src/math/generic/fma.rs b/libm/src/math/generic/fma.rs index 8856e63f5..aaf459d1b 100644 --- a/libm/src/math/generic/fma.rs +++ b/libm/src/math/generic/fma.rs @@ -1,31 +1,9 @@ /* SPDX-License-Identifier: MIT */ /* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */ -use super::support::{DInt, FpResult, HInt, IntTy, Round, Status}; -use super::{CastFrom, CastInto, Float, Int, MinInt}; - -/// Fused multiply add (f64) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fma(x: f64, y: f64, z: f64) -> f64 { - select_implementation! { - name: fma, - use_arch: all(target_arch = "aarch64", target_feature = "neon"), - args: x, y, z, - } - - fma_round(x, y, z, Round::Nearest).val -} - -/// Fused multiply add (f128) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). -#[cfg(f128_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { - fma_round(x, y, z, Round::Nearest).val -} +use crate::support::{ + CastFrom, CastInto, DInt, Float, FpResult, HInt, Int, IntTy, MinInt, Round, Status, +}; /// Fused multiply-add that works when there is not a larger float size available. Computes /// `(x * y) + z`. @@ -234,7 +212,7 @@ where } // Use our exponent to scale the final value. - FpResult::new(super::generic::scalbn(r, e), status) + FpResult::new(super::scalbn(r, e), status) } /// Representation of `F` that has handled subnormals. @@ -298,106 +276,3 @@ impl Norm { self.e > Self::ZERO_INF_NAN as i32 } } - -#[cfg(test)] -mod tests { - use super::*; - - /// Test the generic `fma_round` algorithm for a given float. - fn spec_test() - where - F: Float, - F: CastFrom, - F: CastFrom, - F::Int: HInt, - u32: CastInto, - { - let x = F::from_bits(F::Int::ONE); - let y = F::from_bits(F::Int::ONE); - let z = F::ZERO; - - let fma = |x, y, z| fma_round(x, y, z, Round::Nearest).val; - - // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of - // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the - // exact result" - assert_biteq!(fma(x, y, z), F::ZERO); - assert_biteq!(fma(x, -y, z), F::NEG_ZERO); - assert_biteq!(fma(-x, y, z), F::NEG_ZERO); - assert_biteq!(fma(-x, -y, z), F::ZERO); - } - - #[test] - fn spec_test_f32() { - spec_test::(); - } - - #[test] - fn spec_test_f64() { - spec_test::(); - - let expect_underflow = [ - ( - hf64!("0x1.0p-1070"), - hf64!("0x1.0p-1070"), - hf64!("0x1.ffffffffffffp-1023"), - hf64!("0x0.ffffffffffff8p-1022"), - ), - ( - // FIXME: we raise underflow but this should only be inexact (based on C and - // `rustc_apfloat`). - hf64!("0x1.0p-1070"), - hf64!("0x1.0p-1070"), - hf64!("-0x1.0p-1022"), - hf64!("-0x1.0p-1022"), - ), - ]; - - for (x, y, z, res) in expect_underflow { - let FpResult { val, status } = fma_round(x, y, z, Round::Nearest); - assert_biteq!(val, res); - assert_eq!(status, Status::UNDERFLOW); - } - } - - #[test] - #[cfg(f128_enabled)] - fn spec_test_f128() { - spec_test::(); - } - - #[test] - fn fma_segfault() { - // These two inputs cause fma to segfault on release due to overflow: - assert_eq!( - fma( - -0.0000000000000002220446049250313, - -0.0000000000000002220446049250313, - -0.0000000000000002220446049250313 - ), - -0.00000000000000022204460492503126, - ); - - let result = fma(-0.992, -0.992, -0.992); - //force rounding to storage format on x87 to prevent superious errors. - #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] - let result = force_eval!(result); - assert_eq!(result, -0.007936000000000007,); - } - - #[test] - fn fma_sbb() { - assert_eq!( - fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), - -3991680619069439e277 - ); - } - - #[test] - fn fma_underflow() { - assert_eq!( - fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), - 0.0, - ); - } -} diff --git a/libm/src/math/generic/fma_wide.rs b/libm/src/math/generic/fma_wide.rs index f268c2f14..a2ef59d3e 100644 --- a/libm/src/math/generic/fma_wide.rs +++ b/libm/src/math/generic/fma_wide.rs @@ -1,30 +1,6 @@ -/* SPDX-License-Identifier: MIT */ -/* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */ - -use super::support::{FpResult, IntTy, Round, Status}; -use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt}; - -// Placeholder so we can have `fmaf16` in the `Float` trait. -#[allow(unused)] -#[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { - unimplemented!() -} - -/// Floating multiply add (f32) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { - select_implementation! { - name: fmaf, - use_arch: all(target_arch = "aarch64", target_feature = "neon"), - args: x, y, z, - } - - fma_wide_round(x, y, z, Round::Nearest).val -} +use crate::support::{ + CastFrom, CastInto, DFloat, Float, FpResult, HFloat, IntTy, MinInt, Round, Status, +}; /// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`, /// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding. @@ -95,17 +71,3 @@ where FpResult::ok(B::from_bits(ui).narrow()) } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn issue_263() { - let a = f32::from_bits(1266679807); - let b = f32::from_bits(1300234242); - let c = f32::from_bits(1115553792); - let expected = f32::from_bits(1501560833); - assert_eq!(fmaf(a, b, c), expected); - } -} diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index 35846351a..9d497a03f 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -6,6 +6,8 @@ mod copysign; mod fabs; mod fdim; mod floor; +mod fma; +mod fma_wide; mod fmax; mod fmaximum; mod fmaximum_num; @@ -24,6 +26,8 @@ pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; pub use floor::floor; +pub use fma::fma_round; +pub use fma_wide::fma_wide_round; pub use fmax::fmax; pub use fmaximum::fmaximum; pub use fmaximum_num::fmaximum_num; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 949c18b40..ce9b8fc58 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -159,7 +159,6 @@ mod fabs; mod fdim; mod floor; mod fma; -mod fma_wide; mod fmin_fmax; mod fminimum_fmaximum; mod fminimum_fmaximum_num; @@ -254,8 +253,7 @@ pub use self::expm1f::expm1f; pub use self::fabs::{fabs, fabsf}; pub use self::fdim::{fdim, fdimf}; pub use self::floor::{floor, floorf}; -pub use self::fma::fma; -pub use self::fma_wide::fmaf; +pub use self::fma::{fma, fmaf}; pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf}; pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf}; pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf}; @@ -336,7 +334,7 @@ cfg_if! { // verify-sorted-end #[allow(unused_imports)] - pub(crate) use self::fma_wide::fmaf16; + pub(crate) use self::fma::fmaf16; } }