@@ -276,6 +276,9 @@ significantly more expensive than `x*y+z`. `fma` is used to improve accuracy in
276276algorithms. See [`muladd`](@ref).
277277"""
278278function fma end
279+ function fma_emulated (a:: Float16 , b:: Float16 , c:: Float16 )
280+ Float16 (muladd (Float32 (a), Float32 (b), Float32 (c))) # don't use fma if the hardware doesn't have it.
281+ end
279282function fma_emulated (a:: Float32 , b:: Float32 , c:: Float32 ):: Float32
280283 ab = Float64 (a) * b
281284 res = ab+ c
@@ -348,19 +351,14 @@ function fma_emulated(a::Float64, b::Float64,c::Float64)
348351 s = (abs (abhi) > abs (c)) ? (abhi- r+ c+ ablo) : (c- r+ abhi+ ablo)
349352 return r+ s
350353end
351- fma_llvm (x:: Float32 , y:: Float32 , z:: Float32 ) = fma_float (x, y, z)
352- fma_llvm (x:: Float64 , y:: Float64 , z:: Float64 ) = fma_float (x, y, z)
353354
354355# Disable LLVM's fma if it is incorrect, e.g. because LLVM falls back
355356# onto a broken system libm; if so, use a software emulated fma
356- @assume_effects :consistent fma (x:: Float32 , y:: Float32 , z:: Float32 ) = Core. Intrinsics. have_fma (Float32) ? fma_llvm (x,y,z) : fma_emulated (x,y,z)
357- @assume_effects :consistent fma (x:: Float64 , y:: Float64 , z:: Float64 ) = Core. Intrinsics. have_fma (Float64) ? fma_llvm (x,y,z) : fma_emulated (x,y,z)
358-
359- function fma (a:: Float16 , b:: Float16 , c:: Float16 )
360- Float16 (muladd (Float32 (a), Float32 (b), Float32 (c))) # don't use fma if the hardware doesn't have it.
357+ @assume_effects :consistent function fma (x:: T , y:: T , z:: T ) where {T<: IEEEFloat }
358+ Core. Intrinsics. have_fma (T) ? fma_float (x,y,z) : fma_emulated (x,y,z)
361359end
362360
363- # This is necessary at least on 32-bit Intel Linux, since fma_llvm may
361+ # This is necessary at least on 32-bit Intel Linux, since fma_float may
364362# have called glibc, and some broken glibc fma implementations don't
365363# properly restore the rounding mode
366364Rounding. setrounding_raw (Float32, Rounding. JL_FE_TONEAREST)
0 commit comments