Skip to content

Commit 7672bd0

Browse files
committed
fma refactor 3/3: combine fma public API with its implementation
Similar to other recent changes, just put public API in the same file as its generic implementation. To keep things slightly cleaner, split the default implementation from the `_wide` implementation. Also introduces a stub `fmaf16`.
1 parent c1ea5dc commit 7672bd0

File tree

7 files changed

+161
-125
lines changed

7 files changed

+161
-125
lines changed

libm/etc/function-definitions.json

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -343,22 +343,19 @@
343343
},
344344
"fma": {
345345
"sources": [
346-
"src/math/fma.rs",
347-
"src/math/generic/fma.rs"
346+
"src/math/fma.rs"
348347
],
349348
"type": "f64"
350349
},
351350
"fmaf": {
352351
"sources": [
353-
"src/math/fmaf.rs",
354-
"src/math/generic/fma.rs"
352+
"src/math/fma_wide.rs"
355353
],
356354
"type": "f32"
357355
},
358356
"fmaf128": {
359357
"sources": [
360-
"src/math/fmaf128.rs",
361-
"src/math/generic/fma.rs"
358+
"src/math/fma.rs"
362359
],
363360
"type": "f128"
364361
},

libm/src/math/fma.rs

Lines changed: 55 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,28 @@
11
/* SPDX-License-Identifier: MIT */
2-
/* origin: musl src/math/{fma,fmaf}.c. Ported to generic Rust algorithm in 2025, TG. */
2+
/* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */
33

44
use super::super::support::{DInt, FpResult, HInt, IntTy, Round, Status};
5-
use super::super::{CastFrom, CastInto, DFloat, Float, HFloat, Int, MinInt};
5+
use super::{CastFrom, CastInto, Float, Int, MinInt};
66

7-
/// Fused multiply-add that works when there is not a larger float size available. Currently this
8-
/// is still specialized only for `f64`. Computes `(x * y) + z`.
7+
/// Fused multiply add (f64)
8+
///
9+
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
910
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
10-
pub fn fma<F>(x: F, y: F, z: F) -> F
11-
where
12-
F: Float,
13-
F: CastFrom<F::SignedInt>,
14-
F: CastFrom<i8>,
15-
F::Int: HInt,
16-
u32: CastInto<F::Int>,
17-
{
11+
pub fn fma(x: f64, y: f64, z: f64) -> f64 {
12+
fma_round(x, y, z, Round::Nearest).val
13+
}
14+
15+
/// Fused multiply add (f128)
16+
///
17+
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
18+
#[cfg(f128_enabled)]
19+
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
20+
pub fn fmaf128(x: f128, y: f128, z: f128) -> f128 {
1821
fma_round(x, y, z, Round::Nearest).val
1922
}
2023

24+
/// Fused multiply-add that works when there is not a larger float size available. Computes
25+
/// `(x * y) + z`.
2126
pub fn fma_round<F>(x: F, y: F, z: F, _round: Round) -> FpResult<F>
2227
where
2328
F: Float,
@@ -222,79 +227,7 @@ where
222227
}
223228

224229
// Use our exponent to scale the final value.
225-
FpResult::new(super::scalbn(r, e), status)
226-
}
227-
228-
/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
229-
/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
230-
pub fn fma_wide<F, B>(x: F, y: F, z: F) -> F
231-
where
232-
F: Float + HFloat<D = B>,
233-
B: Float + DFloat<H = F>,
234-
B::Int: CastInto<i32>,
235-
i32: CastFrom<i32>,
236-
{
237-
fma_wide_round(x, y, z, Round::Nearest).val
238-
}
239-
240-
pub fn fma_wide_round<F, B>(x: F, y: F, z: F, round: Round) -> FpResult<F>
241-
where
242-
F: Float + HFloat<D = B>,
243-
B: Float + DFloat<H = F>,
244-
B::Int: CastInto<i32>,
245-
i32: CastFrom<i32>,
246-
{
247-
let one = IntTy::<B>::ONE;
248-
249-
let xy: B = x.widen() * y.widen();
250-
let mut result: B = xy + z.widen();
251-
let mut ui: B::Int = result.to_bits();
252-
let re = result.ex();
253-
let zb: B = z.widen();
254-
255-
let prec_diff = B::SIG_BITS - F::SIG_BITS;
256-
let excess_prec = ui & ((one << prec_diff) - one);
257-
let halfway = one << (prec_diff - 1);
258-
259-
// Common case: the larger precision is fine if...
260-
// This is not a halfway case
261-
if excess_prec != halfway
262-
// Or the result is NaN
263-
|| re == B::EXP_SAT
264-
// Or the result is exact
265-
|| (result - xy == zb && result - zb == xy)
266-
// Or the mode is something other than round to nearest
267-
|| round != Round::Nearest
268-
{
269-
let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32;
270-
let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32;
271-
272-
let mut status = Status::OK;
273-
274-
if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() {
275-
// This branch is never hit; requires previous operations to set a status
276-
status.set_inexact(false);
277-
278-
result = xy + z.widen();
279-
if status.inexact() {
280-
status.set_underflow(true);
281-
} else {
282-
status.set_inexact(true);
283-
}
284-
}
285-
286-
return FpResult { val: result.narrow(), status };
287-
}
288-
289-
let neg = ui >> (B::BITS - 1) != IntTy::<B>::ZERO;
290-
let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy };
291-
if neg == (err < B::ZERO) {
292-
ui += one;
293-
} else {
294-
ui -= one;
295-
}
296-
297-
FpResult::ok(B::from_bits(ui).narrow())
230+
FpResult::new(super::generic::scalbn(r, e), status)
298231
}
299232

300233
/// Representation of `F` that has handled subnormals.
@@ -363,6 +296,7 @@ impl<F: Float> Norm<F> {
363296
mod tests {
364297
use super::*;
365298

299+
/// Test the generic `fma_round` algorithm for a given float.
366300
fn spec_test<F>()
367301
where
368302
F: Float,
@@ -375,6 +309,8 @@ mod tests {
375309
let y = F::from_bits(F::Int::ONE);
376310
let z = F::ZERO;
377311

312+
let fma = |x, y, z| fma_round(x, y, z, Round::Nearest).val;
313+
378314
// 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of
379315
// fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
380316
// exact result"
@@ -384,6 +320,11 @@ mod tests {
384320
assert_biteq!(fma(-x, -y, z), F::ZERO);
385321
}
386322

323+
#[test]
324+
fn spec_test_f32() {
325+
spec_test::<f32>();
326+
}
327+
387328
#[test]
388329
fn spec_test_f64() {
389330
spec_test::<f64>();
@@ -417,4 +358,33 @@ mod tests {
417358
fn spec_test_f128() {
418359
spec_test::<f128>();
419360
}
361+
362+
#[test]
363+
fn fma_segfault() {
364+
// These two inputs cause fma to segfault on release due to overflow:
365+
assert_eq!(
366+
fma(
367+
-0.0000000000000002220446049250313,
368+
-0.0000000000000002220446049250313,
369+
-0.0000000000000002220446049250313
370+
),
371+
-0.00000000000000022204460492503126,
372+
);
373+
374+
let result = fma(-0.992, -0.992, -0.992);
375+
//force rounding to storage format on x87 to prevent superious errors.
376+
#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
377+
let result = force_eval!(result);
378+
assert_eq!(result, -0.007936000000000007,);
379+
}
380+
381+
#[test]
382+
fn fma_sbb() {
383+
assert_eq!(fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), -3991680619069439e277);
384+
}
385+
386+
#[test]
387+
fn fma_underflow() {
388+
assert_eq!(fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), 0.0,);
389+
}
420390
}

libm/src/math/fma_wide.rs

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/* SPDX-License-Identifier: MIT */
2+
/* origin: musl src/math/fmaf.c. Ported to generic Rust algorithm in 2025, TG. */
3+
4+
use super::super::support::{FpResult, IntTy, Round, Status};
5+
use super::{CastFrom, CastInto, DFloat, Float, HFloat, MinInt};
6+
7+
// Placeholder so we can have `fmaf16` in the `Float` trait.
8+
#[allow(unused)]
9+
#[cfg(f16_enabled)]
10+
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
11+
pub(crate) fn fmaf16(_x: f16, _y: f16, _z: f16) -> f16 {
12+
unimplemented!()
13+
}
14+
15+
/// Floating multiply add (f32)
16+
///
17+
/// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
18+
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
19+
pub fn fmaf(x: f32, y: f32, z: f32) -> f32 {
20+
fma_wide_round(x, y, z, Round::Nearest).val
21+
}
22+
23+
/// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
24+
/// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
25+
pub fn fma_wide_round<F, B>(x: F, y: F, z: F, round: Round) -> FpResult<F>
26+
where
27+
F: Float + HFloat<D = B>,
28+
B: Float + DFloat<H = F>,
29+
B::Int: CastInto<i32>,
30+
i32: CastFrom<i32>,
31+
{
32+
let one = IntTy::<B>::ONE;
33+
34+
let xy: B = x.widen() * y.widen();
35+
let mut result: B = xy + z.widen();
36+
let mut ui: B::Int = result.to_bits();
37+
let re = result.ex();
38+
let zb: B = z.widen();
39+
40+
let prec_diff = B::SIG_BITS - F::SIG_BITS;
41+
let excess_prec = ui & ((one << prec_diff) - one);
42+
let halfway = one << (prec_diff - 1);
43+
44+
// Common case: the larger precision is fine if...
45+
// This is not a halfway case
46+
if excess_prec != halfway
47+
// Or the result is NaN
48+
|| re == B::EXP_SAT
49+
// Or the result is exact
50+
|| (result - xy == zb && result - zb == xy)
51+
// Or the mode is something other than round to nearest
52+
|| round != Round::Nearest
53+
{
54+
let min_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN_SUBNORM) as u32;
55+
let max_inexact_exp = (B::EXP_BIAS as i32 + F::EXP_MIN) as u32;
56+
57+
let mut status = Status::OK;
58+
59+
if (min_inexact_exp..max_inexact_exp).contains(&re) && status.inexact() {
60+
// This branch is never hit; requires previous operations to set a status
61+
status.set_inexact(false);
62+
63+
result = xy + z.widen();
64+
if status.inexact() {
65+
status.set_underflow(true);
66+
} else {
67+
status.set_inexact(true);
68+
}
69+
}
70+
71+
return FpResult { val: result.narrow(), status };
72+
}
73+
74+
let neg = ui >> (B::BITS - 1) != IntTy::<B>::ZERO;
75+
let err = if neg == (zb > xy) { xy - result + zb } else { zb - result + xy };
76+
if neg == (err < B::ZERO) {
77+
ui += one;
78+
} else {
79+
ui -= one;
80+
}
81+
82+
FpResult::ok(B::from_bits(ui).narrow())
83+
}
84+
85+
#[cfg(test)]
86+
mod tests {
87+
use super::*;
88+
89+
#[test]
90+
fn issue_263() {
91+
let a = f32::from_bits(1266679807);
92+
let b = f32::from_bits(1300234242);
93+
let c = f32::from_bits(1115553792);
94+
let expected = f32::from_bits(1501560833);
95+
assert_eq!(fmaf(a, b, c), expected);
96+
}
97+
}

libm/src/math/fmaf.rs

Lines changed: 0 additions & 21 deletions
This file was deleted.

libm/src/math/fmaf128.rs

Lines changed: 0 additions & 7 deletions
This file was deleted.

libm/src/math/generic/mod.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ mod copysign;
33
mod fabs;
44
mod fdim;
55
mod floor;
6-
mod fma;
76
mod fmax;
87
mod fmaximum;
98
mod fmaximum_num;
@@ -22,7 +21,6 @@ pub use copysign::copysign;
2221
pub use fabs::fabs;
2322
pub use fdim::fdim;
2423
pub use floor::floor;
25-
pub use fma::{fma, fma_wide};
2624
pub use fmax::fmax;
2725
pub use fmaximum::fmaximum;
2826
pub use fmaximum_num::fmaximum_num;

libm/src/math/mod.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ mod fdimf;
164164
mod floor;
165165
mod floorf;
166166
mod fma;
167-
mod fmaf;
167+
mod fma_wide;
168168
mod fmin_fmax;
169169
mod fminimum_fmaximum;
170170
mod fminimum_fmaximum_num;
@@ -271,7 +271,7 @@ pub use self::fdimf::fdimf;
271271
pub use self::floor::floor;
272272
pub use self::floorf::floorf;
273273
pub use self::fma::fma;
274-
pub use self::fmaf::fmaf;
274+
pub use self::fma_wide::fmaf;
275275
pub use self::fmin_fmax::{fmax, fmaxf, fmin, fminf};
276276
pub use self::fminimum_fmaximum::{fmaximum, fmaximumf, fminimum, fminimumf};
277277
pub use self::fminimum_fmaximum_num::{fmaximum_num, fmaximum_numf, fminimum_num, fminimum_numf};
@@ -370,6 +370,9 @@ cfg_if! {
370370
pub use self::sqrtf16::sqrtf16;
371371
pub use self::truncf16::truncf16;
372372
// verify-sorted-end
373+
374+
#[allow(unused_imports)]
375+
pub(crate) use self::fma_wide::fmaf16;
373376
}
374377
}
375378

@@ -381,7 +384,6 @@ cfg_if! {
381384
mod fabsf128;
382385
mod fdimf128;
383386
mod floorf128;
384-
mod fmaf128;
385387
mod fmodf128;
386388
mod ldexpf128;
387389
mod roundf128;
@@ -396,7 +398,7 @@ cfg_if! {
396398
pub use self::fabsf128::fabsf128;
397399
pub use self::fdimf128::fdimf128;
398400
pub use self::floorf128::floorf128;
399-
pub use self::fmaf128::fmaf128;
401+
pub use self::fma::fmaf128;
400402
pub use self::fmin_fmax::{fmaxf128, fminf128};
401403
pub use self::fminimum_fmaximum::{fmaximumf128, fminimumf128};
402404
pub use self::fminimum_fmaximum_num::{fmaximum_numf128, fminimum_numf128};

0 commit comments

Comments
 (0)