diff --git a/etc/function-definitions.json b/etc/function-definitions.json index 3e33343c4..9e5774eaf 100644 --- a/etc/function-definitions.json +++ b/etc/function-definitions.json @@ -350,7 +350,7 @@ "fmaf": { "sources": [ "libm/src/math/arch/aarch64.rs", - "libm/src/math/fma_wide.rs" + "libm/src/math/fma.rs" ], "type": "f32" }, diff --git a/libm/src/math/fma.rs b/libm/src/math/fma.rs index 8856e63f5..78f0f8992 100644 --- a/libm/src/math/fma.rs +++ b/libm/src/math/fma.rs @@ -1,8 +1,30 @@ /* SPDX-License-Identifier: MIT */ -/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */ +/* origin: musl src/math/fma.c, fmaf.c Ported to generic Rust algorithm in 2025, TG. */ -use super::support::{DInt, FpResult, HInt, IntTy, Round, Status}; -use super::{CastFrom, CastInto, Float, Int, MinInt}; +use super::generic; +use crate::support::Round; + +// Placeholder so we can have `fmaf16` in the `Float` trait. +#[allow(unused)] +#[cfg(f16_enabled)] +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { + unimplemented!() +} + +/// Floating multiply add (f32) +/// +/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). +#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] +pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { + select_implementation! { + name: fmaf, + use_arch: all(target_arch = "aarch64", target_feature = "neon"), + args: x, y, z, + } + + generic::fma_wide_round(x, y, z, Round::Nearest).val +} /// Fused multiply add (f64) /// @@ -15,7 +37,7 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { args: x, y, z, } - fma_round(x, y, z, Round::Nearest).val + generic::fma_round(x, y, z, Round::Nearest).val } /// Fused multiply add (f128) @@ -24,287 +46,16 @@ pub fn fma(x: f64, y: f64, z: f64) -> f64 { #[cfg(f128_enabled)] #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 { - fma_round(x, y, z, Round::Nearest).val -} - -/// Fused multiply-add that works when there is not a larger float size available. Computes -/// `(x * y) + z`. -#[inline] -pub fn fma_round(x: F, y: F, z: F, _round: Round) -> FpResult -where - F: Float, - F: CastFrom, - F: CastFrom, - F::Int: HInt, - u32: CastInto, -{ - let one = IntTy::::ONE; - let zero = IntTy::::ZERO; - - // Normalize such that the top of the mantissa is zero and we have a guard bit. - let nx = Norm::from_float(x); - let ny = Norm::from_float(y); - let nz = Norm::from_float(z); - - if nx.is_zero_nan_inf() || ny.is_zero_nan_inf() { - // Value will overflow, defer to non-fused operations. - return FpResult::ok(x * y + z); - } - - if nz.is_zero_nan_inf() { - if nz.is_zero() { - // Empty add component means we only need to multiply. - return FpResult::ok(x * y); - } - // `z` is NaN or infinity, which sets the result. - return FpResult::ok(z); - } - - // multiply: r = x * y - let zhi: F::Int; - let zlo: F::Int; - let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi(); - - // Exponent result of multiplication - let mut e: i32 = nx.e + ny.e; - // Needed shift to align `z` to the multiplication result - let mut d: i32 = nz.e - e; - let sbits = F::BITS as i32; - - // Scale `z`. Shift `z <<= kz`, `r >>= kr`, so `kz+kr == d`, set `e = e+kr` (== ez-kz) - if d > 0 { - // The magnitude of `z` is larger than `x * y` - if d < sbits { - // Maximum shift of one `F::BITS` means shifted `z` will fit into `2 * F::BITS`. Shift - // it into `(zhi, zlo)`. No exponent adjustment necessary. - zlo = nz.m << d; - zhi = nz.m >> (sbits - d); - } else { - // Shift larger than `sbits`, `z` only needs the top half `zhi`. Place it there (acts - // as a shift by `sbits`). - zlo = zero; - zhi = nz.m; - d -= sbits; - - // `z`'s exponent is large enough that it now needs to be taken into account. - e = nz.e - sbits; - - if d == 0 { - // Exactly `sbits`, nothing to do - } else if d < sbits { - // Remaining shift fits within `sbits`. Leave `z` in place, shift `x * y` - rlo = (rhi << (sbits - d)) | (rlo >> d); - // Set the sticky bit - rlo |= IntTy::::from((rlo << (sbits - d)) != zero); - rhi = rhi >> d; - } else { - // `z`'s magnitude is enough that `x * y` is irrelevant. It was nonzero, so set - // the sticky bit. - rlo = one; - rhi = zero; - } - } - } else { - // `z`'s magnitude once shifted fits entirely within `zlo` - zhi = zero; - d = -d; - if d == 0 { - // No shift needed - zlo = nz.m; - } else if d < sbits { - // Shift s.t. `nz.m` fits into `zlo` - let sticky = IntTy::::from((nz.m << (sbits - d)) != zero); - zlo = (nz.m >> d) | sticky; - } else { - // Would be entirely shifted out, only set the sticky bit - zlo = one; - } - } - - /* addition */ - - let mut neg = nx.neg ^ ny.neg; - let samesign: bool = !neg ^ nz.neg; - let mut rhi_nonzero = true; - - if samesign { - // r += z - rlo = rlo.wrapping_add(zlo); - rhi += zhi + IntTy::::from(rlo < zlo); - } else { - // r -= z - let (res, borrow) = rlo.overflowing_sub(zlo); - rlo = res; - rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::::from(borrow))); - if (rhi >> (F::BITS - 1)) != zero { - rlo = rlo.signed().wrapping_neg().unsigned(); - rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::::from(rlo != zero); - neg = !neg; - } - rhi_nonzero = rhi != zero; - } - - /* Construct result */ - - // Shift result into `rhi`, left-aligned. Last bit is sticky - if rhi_nonzero { - // `d` > 0, need to shift both `rhi` and `rlo` into result - e += sbits; - d = rhi.leading_zeros() as i32 - 1; - rhi = (rhi << d) | (rlo >> (sbits - d)); - // Update sticky - rhi |= IntTy::::from((rlo << d) != zero); - } else if rlo != zero { - // `rhi` is zero, `rlo` is the entire result and needs to be shifted - d = rlo.leading_zeros() as i32 - 1; - if d < 0 { - // Shift and set sticky - rhi = (rlo >> 1) | (rlo & one); - } else { - rhi = rlo << d; - } - } else { - // exact +/- 0.0 - return FpResult::ok(x * y + z); - } - - e -= d; - - // Use int->float conversion to populate the significand. - // i is in [1 << (BITS - 2), (1 << (BITS - 1)) - 1] - let mut i: F::SignedInt = rhi.signed(); - - if neg { - i = -i; - } - - // `|r|` is in `[0x1p62,0x1p63]` for `f64` - let mut r: F = F::cast_from_lossy(i); - - /* Account for subnormal and rounding */ - - // Unbiased exponent for the maximum value of `r` - let max_pow = F::BITS - 1 + F::EXP_BIAS; - - let mut status = Status::OK; - - if e < -(max_pow as i32 - 2) { - // Result is subnormal before rounding - if e == -(max_pow as i32 - 1) { - let mut c = F::from_parts(false, max_pow, zero); - if neg { - c = -c; - } - - if r == c { - // Min normal after rounding, - status.set_underflow(true); - r = F::MIN_POSITIVE_NORMAL.copysign(r); - return FpResult::new(r, status); - } - - if (rhi << (F::SIG_BITS + 1)) != zero { - // Account for truncated bits. One bit will be lost in the `scalbn` call, add - // another top bit to avoid double rounding if inexact. - let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << (F::BITS - 2)); - i = iu.signed(); - - if neg { - i = -i; - } - - r = F::cast_from_lossy(i); - - // Remove the top bit - r = F::cast_from(2i8) * r - c; - status.set_underflow(true); - } - } else { - // Only round once when scaled - d = F::EXP_BITS as i32 - 1; - let sticky = IntTy::::from(rhi << (F::BITS as i32 - d) != zero); - i = (((rhi >> d) | sticky) << d).signed(); - - if neg { - i = -i; - } - - r = F::cast_from_lossy(i); - } - } - - // Use our exponent to scale the final value. - FpResult::new(super::generic::scalbn(r, e), status) -} - -/// Representation of `F` that has handled subnormals. -#[derive(Clone, Copy, Debug)] -struct Norm { - /// Normalized significand with one guard bit, unsigned. - m: F::Int, - /// Exponent of the mantissa such that `m * 2^e = x`. Accounts for the shift in the mantissa - /// and the guard bit; that is, 1.0 will normalize as `m = 1 << 53` and `e = -53`. - e: i32, - neg: bool, -} - -impl Norm { - /// Unbias the exponent and account for the mantissa's precision, including the guard bit. - const EXP_UNBIAS: u32 = F::EXP_BIAS + F::SIG_BITS + 1; - - /// Values greater than this had a saturated exponent (infinity or NaN), OR were zero and we - /// adjusted the exponent such that it exceeds this threashold. - const ZERO_INF_NAN: u32 = F::EXP_SAT - Self::EXP_UNBIAS; - - fn from_float(x: F) -> Self { - let mut ix = x.to_bits(); - let mut e = x.ex() as i32; - let neg = x.is_sign_negative(); - if e == 0 { - // Normalize subnormals by multiplication - let scale_i = F::BITS - 1; - let scale_f = F::from_parts(false, scale_i + F::EXP_BIAS, F::Int::ZERO); - let scaled = x * scale_f; - ix = scaled.to_bits(); - e = scaled.ex() as i32; - e = if e == 0 { - // If the exponent is still zero, the input was zero. Artifically set this value - // such that the final `e` will exceed `ZERO_INF_NAN`. - 1 << F::EXP_BITS - } else { - // Otherwise, account for the scaling we just did. - e - scale_i as i32 - }; - } - - e -= Self::EXP_UNBIAS as i32; - - // Absolute value, set the implicit bit, and shift to create a guard bit - ix &= F::SIG_MASK; - ix |= F::IMPLICIT_BIT; - ix <<= 1; - - Self { m: ix, e, neg } - } - - /// True if the value was zero, infinity, or NaN. - fn is_zero_nan_inf(self) -> bool { - self.e >= Self::ZERO_INF_NAN as i32 - } - - /// The only value we have - fn is_zero(self) -> bool { - // The only exponent that strictly exceeds this value is our sentinel value for zero. - self.e > Self::ZERO_INF_NAN as i32 - } + generic::fma_round(x, y, z, Round::Nearest).val } #[cfg(test)] mod tests { use super::*; + use crate::support::{CastFrom, CastInto, Float, FpResult, HInt, MinInt, Round, Status}; /// Test the generic `fma_round` algorithm for a given float. - fn spec_test() + fn spec_test(f: impl Fn(F, F, F) -> F) where F: Float, F: CastFrom, @@ -316,25 +67,27 @@ mod tests { let y = F::from_bits(F::Int::ONE); let z = F::ZERO; - let fma = |x, y, z| fma_round(x, y, z, Round::Nearest).val; - // 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of // fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the // exact result" - assert_biteq!(fma(x, y, z), F::ZERO); - assert_biteq!(fma(x, -y, z), F::NEG_ZERO); - assert_biteq!(fma(-x, y, z), F::NEG_ZERO); - assert_biteq!(fma(-x, -y, z), F::ZERO); + assert_biteq!(f(x, y, z), F::ZERO); + assert_biteq!(f(x, -y, z), F::NEG_ZERO); + assert_biteq!(f(-x, y, z), F::NEG_ZERO); + assert_biteq!(f(-x, -y, z), F::ZERO); } #[test] fn spec_test_f32() { - spec_test::(); + spec_test::(fmaf); + + // Also do a small check that the non-widening version works for f32 (this should ideally + // get tested some more). + spec_test::(|x, y, z| generic::fma_round(x, y, z, Round::Nearest).val); } #[test] fn spec_test_f64() { - spec_test::(); + spec_test::(fma); let expect_underflow = [ ( @@ -354,7 +107,7 @@ mod tests { ]; for (x, y, z, res) in expect_underflow { - let FpResult { val, status } = fma_round(x, y, z, Round::Nearest); + let FpResult { val, status } = generic::fma_round(x, y, z, Round::Nearest); assert_biteq!(val, res); assert_eq!(status, Status::UNDERFLOW); } @@ -363,7 +116,16 @@ mod tests { #[test] #[cfg(f128_enabled)] fn spec_test_f128() { - spec_test::(); + spec_test::(fmaf128); + } + + #[test] + fn issue_263() { + let a = f32::from_bits(1266679807); + let b = f32::from_bits(1300234242); + let c = f32::from_bits(1115553792); + let expected = f32::from_bits(1501560833); + assert_eq!(fmaf(a, b, c), expected); } #[test] diff --git a/libm/src/math/generic/fma.rs b/libm/src/math/generic/fma.rs new file mode 100644 index 000000000..aaf459d1b --- /dev/null +++ b/libm/src/math/generic/fma.rs @@ -0,0 +1,278 @@ +/* SPDX-License-Identifier: MIT */ +/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */ + +use crate::support::{ + CastFrom, CastInto, DInt, Float, FpResult, HInt, Int, IntTy, MinInt, Round, Status, +}; + +/// Fused multiply-add that works when there is not a larger float size available. Computes +/// `(x * y) + z`. +#[inline] +pub fn fma_round(x: F, y: F, z: F, _round: Round) -> FpResult +where + F: Float, + F: CastFrom, + F: CastFrom, + F::Int: HInt, + u32: CastInto, +{ + let one = IntTy::::ONE; + let zero = IntTy::::ZERO; + + // Normalize such that the top of the mantissa is zero and we have a guard bit. + let nx = Norm::from_float(x); + let ny = Norm::from_float(y); + let nz = Norm::from_float(z); + + if nx.is_zero_nan_inf() || ny.is_zero_nan_inf() { + // Value will overflow, defer to non-fused operations. + return FpResult::ok(x * y + z); + } + + if nz.is_zero_nan_inf() { + if nz.is_zero() { + // Empty add component means we only need to multiply. + return FpResult::ok(x * y); + } + // `z` is NaN or infinity, which sets the result. + return FpResult::ok(z); + } + + // multiply: r = x * y + let zhi: F::Int; + let zlo: F::Int; + let (mut rlo, mut rhi) = nx.m.widen_mul(ny.m).lo_hi(); + + // Exponent result of multiplication + let mut e: i32 = nx.e + ny.e; + // Needed shift to align `z` to the multiplication result + let mut d: i32 = nz.e - e; + let sbits = F::BITS as i32; + + // Scale `z`. Shift `z <<= kz`, `r >>= kr`, so `kz+kr == d`, set `e = e+kr` (== ez-kz) + if d > 0 { + // The magnitude of `z` is larger than `x * y` + if d < sbits { + // Maximum shift of one `F::BITS` means shifted `z` will fit into `2 * F::BITS`. Shift + // it into `(zhi, zlo)`. No exponent adjustment necessary. + zlo = nz.m << d; + zhi = nz.m >> (sbits - d); + } else { + // Shift larger than `sbits`, `z` only needs the top half `zhi`. Place it there (acts + // as a shift by `sbits`). + zlo = zero; + zhi = nz.m; + d -= sbits; + + // `z`'s exponent is large enough that it now needs to be taken into account. + e = nz.e - sbits; + + if d == 0 { + // Exactly `sbits`, nothing to do + } else if d < sbits { + // Remaining shift fits within `sbits`. Leave `z` in place, shift `x * y` + rlo = (rhi << (sbits - d)) | (rlo >> d); + // Set the sticky bit + rlo |= IntTy::::from((rlo << (sbits - d)) != zero); + rhi = rhi >> d; + } else { + // `z`'s magnitude is enough that `x * y` is irrelevant. It was nonzero, so set + // the sticky bit. + rlo = one; + rhi = zero; + } + } + } else { + // `z`'s magnitude once shifted fits entirely within `zlo` + zhi = zero; + d = -d; + if d == 0 { + // No shift needed + zlo = nz.m; + } else if d < sbits { + // Shift s.t. `nz.m` fits into `zlo` + let sticky = IntTy::::from((nz.m << (sbits - d)) != zero); + zlo = (nz.m >> d) | sticky; + } else { + // Would be entirely shifted out, only set the sticky bit + zlo = one; + } + } + + /* addition */ + + let mut neg = nx.neg ^ ny.neg; + let samesign: bool = !neg ^ nz.neg; + let mut rhi_nonzero = true; + + if samesign { + // r += z + rlo = rlo.wrapping_add(zlo); + rhi += zhi + IntTy::::from(rlo < zlo); + } else { + // r -= z + let (res, borrow) = rlo.overflowing_sub(zlo); + rlo = res; + rhi = rhi.wrapping_sub(zhi.wrapping_add(IntTy::::from(borrow))); + if (rhi >> (F::BITS - 1)) != zero { + rlo = rlo.signed().wrapping_neg().unsigned(); + rhi = rhi.signed().wrapping_neg().unsigned() - IntTy::::from(rlo != zero); + neg = !neg; + } + rhi_nonzero = rhi != zero; + } + + /* Construct result */ + + // Shift result into `rhi`, left-aligned. Last bit is sticky + if rhi_nonzero { + // `d` > 0, need to shift both `rhi` and `rlo` into result + e += sbits; + d = rhi.leading_zeros() as i32 - 1; + rhi = (rhi << d) | (rlo >> (sbits - d)); + // Update sticky + rhi |= IntTy::::from((rlo << d) != zero); + } else if rlo != zero { + // `rhi` is zero, `rlo` is the entire result and needs to be shifted + d = rlo.leading_zeros() as i32 - 1; + if d < 0 { + // Shift and set sticky + rhi = (rlo >> 1) | (rlo & one); + } else { + rhi = rlo << d; + } + } else { + // exact +/- 0.0 + return FpResult::ok(x * y + z); + } + + e -= d; + + // Use int->float conversion to populate the significand. + // i is in [1 << (BITS - 2), (1 << (BITS - 1)) - 1] + let mut i: F::SignedInt = rhi.signed(); + + if neg { + i = -i; + } + + // `|r|` is in `[0x1p62,0x1p63]` for `f64` + let mut r: F = F::cast_from_lossy(i); + + /* Account for subnormal and rounding */ + + // Unbiased exponent for the maximum value of `r` + let max_pow = F::BITS - 1 + F::EXP_BIAS; + + let mut status = Status::OK; + + if e < -(max_pow as i32 - 2) { + // Result is subnormal before rounding + if e == -(max_pow as i32 - 1) { + let mut c = F::from_parts(false, max_pow, zero); + if neg { + c = -c; + } + + if r == c { + // Min normal after rounding, + status.set_underflow(true); + r = F::MIN_POSITIVE_NORMAL.copysign(r); + return FpResult::new(r, status); + } + + if (rhi << (F::SIG_BITS + 1)) != zero { + // Account for truncated bits. One bit will be lost in the `scalbn` call, add + // another top bit to avoid double rounding if inexact. + let iu: F::Int = (rhi >> 1) | (rhi & one) | (one << (F::BITS - 2)); + i = iu.signed(); + + if neg { + i = -i; + } + + r = F::cast_from_lossy(i); + + // Remove the top bit + r = F::cast_from(2i8) * r - c; + status.set_underflow(true); + } + } else { + // Only round once when scaled + d = F::EXP_BITS as i32 - 1; + let sticky = IntTy::::from(rhi << (F::BITS as i32 - d) != zero); + i = (((rhi >> d) | sticky) << d).signed(); + + if neg { + i = -i; + } + + r = F::cast_from_lossy(i); + } + } + + // Use our exponent to scale the final value. + FpResult::new(super::scalbn(r, e), status) +} + +/// Representation of `F` that has handled subnormals. +#[derive(Clone, Copy, Debug)] +struct Norm { + /// Normalized significand with one guard bit, unsigned. + m: F::Int, + /// Exponent of the mantissa such that `m * 2^e = x`. Accounts for the shift in the mantissa + /// and the guard bit; that is, 1.0 will normalize as `m = 1 << 53` and `e = -53`. + e: i32, + neg: bool, +} + +impl Norm { + /// Unbias the exponent and account for the mantissa's precision, including the guard bit. + const EXP_UNBIAS: u32 = F::EXP_BIAS + F::SIG_BITS + 1; + + /// Values greater than this had a saturated exponent (infinity or NaN), OR were zero and we + /// adjusted the exponent such that it exceeds this threashold. + const ZERO_INF_NAN: u32 = F::EXP_SAT - Self::EXP_UNBIAS; + + fn from_float(x: F) -> Self { + let mut ix = x.to_bits(); + let mut e = x.ex() as i32; + let neg = x.is_sign_negative(); + if e == 0 { + // Normalize subnormals by multiplication + let scale_i = F::BITS - 1; + let scale_f = F::from_parts(false, scale_i + F::EXP_BIAS, F::Int::ZERO); + let scaled = x * scale_f; + ix = scaled.to_bits(); + e = scaled.ex() as i32; + e = if e == 0 { + // If the exponent is still zero, the input was zero. Artifically set this value + // such that the final `e` will exceed `ZERO_INF_NAN`. + 1 << F::EXP_BITS + } else { + // Otherwise, account for the scaling we just did. + e - scale_i as i32 + }; + } + + e -= Self::EXP_UNBIAS as i32; + + // Absolute value, set the implicit bit, and shift to create a guard bit + ix &= F::SIG_MASK; + ix |= F::IMPLICIT_BIT; + ix <<= 1; + + Self { m: ix, e, neg } + } + + /// True if the value was zero, infinity, or NaN. + fn is_zero_nan_inf(self) -> bool { + self.e >= Self::ZERO_INF_NAN as i32 + } + + /// The only value we have + fn is_zero(self) -> bool { + // The only exponent that strictly exceeds this value is our sentinel value for zero. + self.e > Self::ZERO_INF_NAN as i32 + } +} diff --git a/libm/src/math/fma_wide.rs b/libm/src/math/generic/fma_wide.rs similarity index 63% rename from libm/src/math/fma_wide.rs rename to libm/src/math/generic/fma_wide.rs index f268c2f14..a2ef59d3e 100644 --- a/libm/src/math/fma_wide.rs +++ b/libm/src/math/generic/fma_wide.rs @@ -1,30 +1,6 @@ -/* SPDX-License-Identifier: MIT */ -/* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */ - -use super::support::{FpResult, IntTy, Round, Status}; -use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt}; - -// Placeholder so we can have `fmaf16` in the `Float` trait. -#[allow(unused)] -#[cfg(f16_enabled)] -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 { - unimplemented!() -} - -/// Floating multiply add (f32) -/// -/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision). -#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] -pub fn fmaf(x: f32, y: f32, z: f32) -> f32 { - select_implementation! { - name: fmaf, - use_arch: all(target_arch = "aarch64", target_feature = "neon"), - args: x, y, z, - } - - fma_wide_round(x, y, z, Round::Nearest).val -} +use crate::support::{ + CastFrom, CastInto, DFloat, Float, FpResult, HFloat, IntTy, MinInt, Round, Status, +}; /// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`, /// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding. @@ -95,17 +71,3 @@ where FpResult::ok(B::from_bits(ui).narrow()) } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn issue_263() { - let a = f32::from_bits(1266679807); - let b = f32::from_bits(1300234242); - let c = f32::from_bits(1115553792); - let expected = f32::from_bits(1501560833); - assert_eq!(fmaf(a, b, c), expected); - } -} diff --git a/libm/src/math/generic/mod.rs b/libm/src/math/generic/mod.rs index 35846351a..9d497a03f 100644 --- a/libm/src/math/generic/mod.rs +++ b/libm/src/math/generic/mod.rs @@ -6,6 +6,8 @@ mod copysign; mod fabs; mod fdim; mod floor; +mod fma; +mod fma_wide; mod fmax; mod fmaximum; mod fmaximum_num; @@ -24,6 +26,8 @@ pub use copysign::copysign; pub use fabs::fabs; pub use fdim::fdim; pub use floor::floor; +pub use fma::fma_round; +pub use fma_wide::fma_wide_round; pub use fmax::fmax; pub use fmaximum::fmaximum; pub use fmaximum_num::fmaximum_num; diff --git a/libm/src/math/mod.rs b/libm/src/math/mod.rs index 949c18b40..ce9b8fc58 100644 --- a/libm/src/math/mod.rs +++ b/libm/src/math/mod.rs @@ -159,7 +159,6 @@ mod fabs; mod fdim; mod floor; mod fma; -mod fma_wide; mod fmin_fmax; mod fminimum_fmaximum; mod fminimum_fmaximum_num; @@ -254,8 +253,7 @@ pub use self::expm1f::expm1f; pub use self::fabs::{fabs, fabsf}; pub use self::fdim::{fdim, fdimf}; pub use self::floor::{floor, floorf}; -pub use self::fma::fma; -pub use self::fma_wide::fmaf; +pub use self::fma::{fma, fmaf}; pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf}; pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf}; pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf}; @@ -336,7 +334,7 @@ cfg_if! { // verify-sorted-end #[allow(unused_imports)] - pub(crate) use self::fma_wide::fmaf16; + pub(crate) use self::fma::fmaf16; } }