1
1
/* SPDX-License-Identifier: MIT */
2
- /* origin: musl src/math/{ fma,fmaf} .c. Ported to generic Rust algorithm in 2025, TG. */
2
+ /* origin: musl src/math/fma.c. Ported to generic Rust algorithm in 2025, TG. */
3
3
4
4
use super :: super :: support:: { DInt , FpResult , HInt , IntTy , Round , Status } ;
5
- use super :: super :: { CastFrom , CastInto , DFloat , Float , HFloat , Int , MinInt } ;
5
+ use super :: { CastFrom , CastInto , Float , Int , MinInt } ;
6
6
7
- /// Fused multiply-add that works when there is not a larger float size available. Currently this
8
- /// is still specialized only for `f64`. Computes `(x * y) + z`.
7
+ /// Fused multiply add (f64)
8
+ ///
9
+ /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
9
10
#[ cfg_attr( all( test, assert_no_panic) , no_panic:: no_panic) ]
10
- pub fn fma < F > ( x : F , y : F , z : F ) -> F
11
- where
12
- F : Float ,
13
- F : CastFrom < F :: SignedInt > ,
14
- F : CastFrom < i8 > ,
15
- F :: Int : HInt ,
16
- u32 : CastInto < F :: Int > ,
17
- {
11
+ pub fn fma ( x : f64 , y : f64 , z : f64 ) -> f64 {
12
+ fma_round ( x, y, z, Round :: Nearest ) . val
13
+ }
14
+
15
+ /// Fused multiply add (f128)
16
+ ///
17
+ /// Computes `(x*y)+z`, rounded as one ternary operation (i.e. calculated with infinite precision).
18
+ #[ cfg( f128_enabled) ]
19
+ #[ cfg_attr( all( test, assert_no_panic) , no_panic:: no_panic) ]
20
+ pub fn fmaf128 ( x : f128 , y : f128 , z : f128 ) -> f128 {
18
21
fma_round ( x, y, z, Round :: Nearest ) . val
19
22
}
20
23
24
+ /// Fused multiply-add that works when there is not a larger float size available. Computes
25
+ /// `(x * y) + z`.
21
26
pub fn fma_round < F > ( x : F , y : F , z : F , _round : Round ) -> FpResult < F >
22
27
where
23
28
F : Float ,
@@ -222,79 +227,7 @@ where
222
227
}
223
228
224
229
// Use our exponent to scale the final value.
225
- FpResult :: new ( super :: scalbn ( r, e) , status)
226
- }
227
-
228
- /// Fma implementation when a hardware-backed larger float type is available. For `f32` and `f64`,
229
- /// `f64` has enough precision to represent the `f32` in its entirety, except for double rounding.
230
- pub fn fma_wide < F , B > ( x : F , y : F , z : F ) -> F
231
- where
232
- F : Float + HFloat < D = B > ,
233
- B : Float + DFloat < H = F > ,
234
- B :: Int : CastInto < i32 > ,
235
- i32 : CastFrom < i32 > ,
236
- {
237
- fma_wide_round ( x, y, z, Round :: Nearest ) . val
238
- }
239
-
240
- pub fn fma_wide_round < F , B > ( x : F , y : F , z : F , round : Round ) -> FpResult < F >
241
- where
242
- F : Float + HFloat < D = B > ,
243
- B : Float + DFloat < H = F > ,
244
- B :: Int : CastInto < i32 > ,
245
- i32 : CastFrom < i32 > ,
246
- {
247
- let one = IntTy :: < B > :: ONE ;
248
-
249
- let xy: B = x. widen ( ) * y. widen ( ) ;
250
- let mut result: B = xy + z. widen ( ) ;
251
- let mut ui: B :: Int = result. to_bits ( ) ;
252
- let re = result. ex ( ) ;
253
- let zb: B = z. widen ( ) ;
254
-
255
- let prec_diff = B :: SIG_BITS - F :: SIG_BITS ;
256
- let excess_prec = ui & ( ( one << prec_diff) - one) ;
257
- let halfway = one << ( prec_diff - 1 ) ;
258
-
259
- // Common case: the larger precision is fine if...
260
- // This is not a halfway case
261
- if excess_prec != halfway
262
- // Or the result is NaN
263
- || re == B :: EXP_SAT
264
- // Or the result is exact
265
- || ( result - xy == zb && result - zb == xy)
266
- // Or the mode is something other than round to nearest
267
- || round != Round :: Nearest
268
- {
269
- let min_inexact_exp = ( B :: EXP_BIAS as i32 + F :: EXP_MIN_SUBNORM ) as u32 ;
270
- let max_inexact_exp = ( B :: EXP_BIAS as i32 + F :: EXP_MIN ) as u32 ;
271
-
272
- let mut status = Status :: OK ;
273
-
274
- if ( min_inexact_exp..max_inexact_exp) . contains ( & re) && status. inexact ( ) {
275
- // This branch is never hit; requires previous operations to set a status
276
- status. set_inexact ( false ) ;
277
-
278
- result = xy + z. widen ( ) ;
279
- if status. inexact ( ) {
280
- status. set_underflow ( true ) ;
281
- } else {
282
- status. set_inexact ( true ) ;
283
- }
284
- }
285
-
286
- return FpResult { val : result. narrow ( ) , status } ;
287
- }
288
-
289
- let neg = ui >> ( B :: BITS - 1 ) != IntTy :: < B > :: ZERO ;
290
- let err = if neg == ( zb > xy) { xy - result + zb } else { zb - result + xy } ;
291
- if neg == ( err < B :: ZERO ) {
292
- ui += one;
293
- } else {
294
- ui -= one;
295
- }
296
-
297
- FpResult :: ok ( B :: from_bits ( ui) . narrow ( ) )
230
+ FpResult :: new ( super :: generic:: scalbn ( r, e) , status)
298
231
}
299
232
300
233
/// Representation of `F` that has handled subnormals.
@@ -363,6 +296,7 @@ impl<F: Float> Norm<F> {
363
296
mod tests {
364
297
use super :: * ;
365
298
299
+ /// Test the generic `fma_round` algorithm for a given float.
366
300
fn spec_test < F > ( )
367
301
where
368
302
F : Float ,
@@ -375,6 +309,8 @@ mod tests {
375
309
let y = F :: from_bits ( F :: Int :: ONE ) ;
376
310
let z = F :: ZERO ;
377
311
312
+ let fma = |x, y, z| fma_round ( x, y, z, Round :: Nearest ) . val ;
313
+
378
314
// 754-2020 says "When the exact result of (a × b) + c is non-zero yet the result of
379
315
// fusedMultiplyAdd is zero because of rounding, the zero result takes the sign of the
380
316
// exact result"
@@ -384,6 +320,11 @@ mod tests {
384
320
assert_biteq ! ( fma( -x, -y, z) , F :: ZERO ) ;
385
321
}
386
322
323
+ #[ test]
324
+ fn spec_test_f32 ( ) {
325
+ spec_test :: < f32 > ( ) ;
326
+ }
327
+
387
328
#[ test]
388
329
fn spec_test_f64 ( ) {
389
330
spec_test :: < f64 > ( ) ;
@@ -417,4 +358,33 @@ mod tests {
417
358
fn spec_test_f128 ( ) {
418
359
spec_test :: < f128 > ( ) ;
419
360
}
361
+
362
+ #[ test]
363
+ fn fma_segfault ( ) {
364
+ // These two inputs cause fma to segfault on release due to overflow:
365
+ assert_eq ! (
366
+ fma(
367
+ -0.0000000000000002220446049250313 ,
368
+ -0.0000000000000002220446049250313 ,
369
+ -0.0000000000000002220446049250313
370
+ ) ,
371
+ -0.00000000000000022204460492503126 ,
372
+ ) ;
373
+
374
+ let result = fma ( -0.992 , -0.992 , -0.992 ) ;
375
+ //force rounding to storage format on x87 to prevent superious errors.
376
+ #[ cfg( all( target_arch = "x86" , not( target_feature = "sse2" ) ) ) ]
377
+ let result = force_eval ! ( result) ;
378
+ assert_eq ! ( result, -0.007936000000000007 , ) ;
379
+ }
380
+
381
+ #[ test]
382
+ fn fma_sbb ( ) {
383
+ assert_eq ! ( fma( -( 1.0 - f64 :: EPSILON ) , f64 :: MIN , f64 :: MIN ) , -3991680619069439e277 ) ;
384
+ }
385
+
386
+ #[ test]
387
+ fn fma_underflow ( ) {
388
+ assert_eq ! ( fma( 1.1102230246251565e-16 , -9.812526705433188e-305 , 1.0894e-320 ) , 0.0 , ) ;
389
+ }
420
390
}
0 commit comments