@@ -100,6 +100,26 @@ LIBC_INLINE NumberPair<T> exact_mult(const NumberPair<T> &as, T a, T b) {
100100 return r;
101101}
102102
103+ // The templated exact multiplication needs template version of
104+ // LIBC_TARGET_CPU_HAS_FMA_* macro to correctly select the implementation.
105+ // These can be moved to "src/__support/macros/properties/cpu_features.h" if
106+ // other part of libc needed.
107+ template <typename T> struct TargetHasFmaInstruction {
108+ static constexpr bool VALUE = false ;
109+ };
110+
111+ #ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
112+ template <> struct TargetHasFmaInstruction <float > {
113+ static constexpr bool VALUE = true ;
114+ };
115+ #endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
116+
117+ #ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE
118+ template <> struct TargetHasFmaInstruction <double > {
119+ static constexpr bool VALUE = true ;
120+ };
121+ #endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
122+
103123// Note: When FMA instruction is not available, the `exact_mult` function is
104124// only correct for round-to-nearest mode. See:
105125// Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
@@ -111,15 +131,15 @@ template <typename T = double, size_t SPLIT_B = DefaultSplit<T>::VALUE>
111131LIBC_INLINE NumberPair<T> exact_mult (T a, T b) {
112132 NumberPair<T> r{0.0 , 0.0 };
113133
114- # ifdef LIBC_TARGET_CPU_HAS_FMA
115- r.hi = a * b;
116- r.lo = fputil::multiply_add (a, b, -r.hi );
117- # else
118- // Dekker's Product.
119- NumberPair<T> as = split (a);
134+ if constexpr (TargetHasFmaInstruction<T>::VALUE) {
135+ r.hi = a * b;
136+ r.lo = fputil::multiply_add (a, b, -r.hi );
137+ } else {
138+ // Dekker's Product.
139+ NumberPair<T> as = split (a);
120140
121- r = exact_mult<T, SPLIT_B>(as, a, b);
122- # endif // LIBC_TARGET_CPU_HAS_FMA
141+ r = exact_mult<T, SPLIT_B>(as, a, b);
142+ }
123143
124144 return r;
125145}
0 commit comments