@@ -126,7 +126,7 @@ template <size_t Bits> struct DyadicFloat {
126126 shift >= MantissaType::BITS ? MantissaType (0 ) : mantissa >> shift;
127127
128128 T d_hi = FPBits<T>::create_value (
129- sign, exp_hi,
129+ sign, static_cast < output_bits_t >( exp_hi) ,
130130 (static_cast <output_bits_t >(m_hi) & FPBits<T>::SIG_MASK) |
131131 IMPLICIT_MASK)
132132 .get_val ();
@@ -143,33 +143,41 @@ template <size_t Bits> struct DyadicFloat {
143143
144144 if (LIBC_UNLIKELY (exp_lo <= 0 )) {
145145 // d_lo is denormal, but the output is normal.
146- int scale_up_exponent = 2 * PRECISION ;
146+ int scale_up_exponent = 1 - exp_lo ;
147147 T scale_up_factor =
148- FPBits<T>::create_value (sign, FPBits<T>::EXP_BIAS + scale_up_exponent,
148+ FPBits<T>::create_value (sign,
149+ static_cast <output_bits_t >(
150+ FPBits<T>::EXP_BIAS + scale_up_exponent),
149151 IMPLICIT_MASK)
150152 .get_val ();
151153 T scale_down_factor =
152- FPBits<T>::create_value (sign, FPBits<T>::EXP_BIAS - scale_up_exponent,
154+ FPBits<T>::create_value (sign,
155+ static_cast <output_bits_t >(
156+ FPBits<T>::EXP_BIAS - scale_up_exponent),
153157 IMPLICIT_MASK)
154158 .get_val ();
155159
156- d_lo = FPBits<T>::create_value (sign, exp_lo + scale_up_exponent,
157- IMPLICIT_MASK)
160+ d_lo = FPBits<T>::create_value (
161+ sign, static_cast <output_bits_t >(exp_lo + scale_up_exponent),
162+ IMPLICIT_MASK)
158163 .get_val ();
159164
160165 return multiply_add (d_lo, T (round_and_sticky), d_hi * scale_up_factor) *
161166 scale_down_factor;
162167 }
163168
164- d_lo = FPBits<T>::create_value (sign, exp_lo, IMPLICIT_MASK).get_val ();
169+ d_lo = FPBits<T>::create_value (sign, static_cast <output_bits_t >(exp_lo),
170+ IMPLICIT_MASK)
171+ .get_val ();
165172
166173 // Still correct without FMA instructions if `d_lo` is not underflow.
167174 T r = multiply_add (d_lo, T (round_and_sticky), d_hi);
168175
169176 if (LIBC_UNLIKELY (denorm)) {
170177 // Exponent before rounding is in denormal range, simply clear the
171178 // exponent field.
172- output_bits_t clear_exp = (output_bits_t (exp_hi) << FPBits<T>::SIG_LEN);
179+ output_bits_t clear_exp = static_cast <output_bits_t >(
180+ output_bits_t (exp_hi) << FPBits<T>::SIG_LEN);
173181 output_bits_t r_bits = FPBits<T>(r).uintval () - clear_exp;
174182 if (!(r_bits & FPBits<T>::EXP_MASK)) {
175183 // Output is denormal after rounding, clear the implicit bit for 80-bit
0 commit comments