@@ -77,8 +77,6 @@ eisel_lemire(ExpandedFloat<T> init_num,
7777 UIntType mantissa = init_num.mantissa ;
7878 int32_t exp10 = init_num.exponent ;
7979
80- constexpr uint32_t BITS_IN_MANTISSA = sizeof (mantissa) * 8 ;
81-
8280 if (sizeof (T) > 8 ) { // This algorithm cannot handle anything longer than a
8381 // double, so we skip straight to the fallback.
8482 return cpp::nullopt ;
@@ -94,8 +92,8 @@ eisel_lemire(ExpandedFloat<T> init_num,
9492 uint32_t clz = cpp::countl_zero<UIntType>(mantissa);
9593 mantissa <<= clz;
9694
97- int32_t exp2 =
98- exp10_to_exp2 (exp10) + BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz;
95+ int32_t exp2 = exp10_to_exp2 (exp10) + FloatProp::UINTTYPE_BITS +
96+ FloatProp::EXPONENT_BIAS - clz;
9997
10098 // Multiplication
10199 const uint64_t *power_of_ten =
@@ -112,7 +110,9 @@ eisel_lemire(ExpandedFloat<T> init_num,
112110 // accuracy, and the most significant bit is ignored.) = 9 bits. Similarly,
113111 // it's 6 bits for floats in this case.
114112 const uint64_t halfway_constant =
115- (uint64_t (1 ) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3 ))) - 1 ;
113+ (uint64_t (1 ) << (FloatProp::UINTTYPE_BITS -
114+ (FloatProp::MANTISSA_WIDTH + 3 ))) -
115+ 1 ;
116116 if ((high64 (first_approx) & halfway_constant) == halfway_constant &&
117117 low64 (first_approx) + mantissa < mantissa) {
118118 UInt128 low_bits =
@@ -131,11 +131,11 @@ eisel_lemire(ExpandedFloat<T> init_num,
131131 }
132132
133133 // Shifting to 54 bits for doubles and 25 bits for floats
134- UIntType msb =
135- static_cast <UIntType>( high64 (final_approx) >> (BITS_IN_MANTISSA - 1 ));
134+ UIntType msb = static_cast <UIntType>( high64 (final_approx) >>
135+ (FloatProp::UINTTYPE_BITS - 1 ));
136136 UIntType final_mantissa = static_cast <UIntType>(
137137 high64 (final_approx) >>
138- (msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3 )));
138+ (msb + FloatProp::UINTTYPE_BITS - (FloatProp::MANTISSA_WIDTH + 3 )));
139139 exp2 -= static_cast <uint32_t >(1 ^ msb); // same as !msb
140140
141141 if (round == RoundDirection::Nearest) {
@@ -190,8 +190,6 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
190190 UIntType mantissa = init_num.mantissa ;
191191 int32_t exp10 = init_num.exponent ;
192192
193- constexpr uint32_t BITS_IN_MANTISSA = sizeof (mantissa) * 8 ;
194-
195193 // Exp10 Range
196194 // This doesn't reach very far into the range for long doubles, since it's
197195 // sized for doubles and their 11 exponent bits, and not for long doubles and
@@ -211,8 +209,8 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
211209 uint32_t clz = cpp::countl_zero<UIntType>(mantissa);
212210 mantissa <<= clz;
213211
214- int32_t exp2 =
215- exp10_to_exp2 (exp10) + BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz;
212+ int32_t exp2 = exp10_to_exp2 (exp10) + FloatProp::UINTTYPE_BITS +
213+ FloatProp::EXPONENT_BIAS - clz;
216214
217215 // Multiplication
218216 const uint64_t *power_of_ten =
@@ -249,19 +247,21 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
249247 // accuracy, and the most significant bit is ignored.) = 61 bits. Similarly,
250248 // it's 12 bits for 128 bit floats in this case.
251249 constexpr UInt128 HALFWAY_CONSTANT =
252- (UInt128 (1 ) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3 ))) - 1 ;
250+ (UInt128 (1 ) << (FloatProp::UINTTYPE_BITS -
251+ (FloatProp::MANTISSA_WIDTH + 3 ))) -
252+ 1 ;
253253
254254 if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT &&
255255 final_approx_lower + mantissa < mantissa) {
256256 return cpp::nullopt ;
257257 }
258258
259259 // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats
260- uint32_t msb =
261- static_cast < uint32_t >(final_approx_upper >> (BITS_IN_MANTISSA - 1 ));
260+ uint32_t msb = static_cast < uint32_t >(final_approx_upper >>
261+ (FloatProp::UINTTYPE_BITS - 1 ));
262262 UIntType final_mantissa =
263263 final_approx_upper >>
264- (msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3 ));
264+ (msb + FloatProp::UINTTYPE_BITS - (FloatProp::MANTISSA_WIDTH + 3 ));
265265 exp2 -= static_cast <uint32_t >(1 ^ msb); // same as !msb
266266
267267 if (round == RoundDirection::Nearest) {
@@ -622,9 +622,10 @@ template <> constexpr int32_t get_upper_bound<double>() { return 309; }
622622// other out, and subnormal numbers allow for the result to be at the very low
623623// end of the final mantissa.
624624template <typename T> constexpr int32_t get_lower_bound () {
625- return -((fputil::FloatProperties<T>::EXPONENT_BIAS +
626- static_cast <int32_t >(fputil::FloatProperties<T>::MANTISSA_WIDTH +
627- (sizeof (T) * 8 ))) /
625+ using FloatProp = typename fputil::FloatProperties<T>;
626+ return -((FloatProp::EXPONENT_BIAS +
627+ static_cast <int32_t >(FloatProp::MANTISSA_WIDTH +
628+ FloatProp::UINTTYPE_BITS)) /
628629 3 );
629630}
630631
@@ -733,7 +734,6 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
733734
734735 // This is the number of leading zeroes a properly normalized float of type T
735736 // should have.
736- constexpr int32_t NUMBITS = sizeof (UIntType) * 8 ;
737737 constexpr int32_t INF_EXP = (1 << FloatProp::EXPONENT_WIDTH) - 1 ;
738738
739739 // Normalization step 1: Bring the leading bit to the highest bit of UIntType.
@@ -743,8 +743,9 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
743743 // Keep exp2 representing the exponent of the lowest bit of UIntType.
744744 exp2 -= amount_to_shift_left;
745745
746- // biasedExponent represents the biased exponent of the most significant bit.
747- int32_t biased_exponent = exp2 + NUMBITS + FPBits::EXPONENT_BIAS - 1 ;
746+ // biased_exponent represents the biased exponent of the most significant bit.
747+ int32_t biased_exponent =
748+ exp2 + FloatProp::UINTTYPE_BITS + FPBits::EXPONENT_BIAS - 1 ;
748749
749750 // Handle numbers that're too large and get squashed to inf
750751 if (biased_exponent >= INF_EXP) {
@@ -754,14 +755,15 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
754755 return output;
755756 }
756757
757- uint32_t amount_to_shift_right = NUMBITS - FloatProp::MANTISSA_WIDTH - 1 ;
758+ uint32_t amount_to_shift_right =
759+ FloatProp::UINTTYPE_BITS - FloatProp::MANTISSA_WIDTH - 1 ;
758760
759761 // Handle subnormals.
760762 if (biased_exponent <= 0 ) {
761763 amount_to_shift_right += 1 - biased_exponent;
762764 biased_exponent = 0 ;
763765
764- if (amount_to_shift_right > NUMBITS ) {
766+ if (amount_to_shift_right > FloatProp::UINTTYPE_BITS ) {
765767 // Return 0 if the exponent is too small.
766768 output.num = {0 , 0 };
767769 output.error = ERANGE;
@@ -774,7 +776,7 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
774776 bool round_bit = static_cast <bool >(mantissa & round_bit_mask);
775777 bool sticky_bit = static_cast <bool >(mantissa & sticky_mask) || truncated;
776778
777- if (amount_to_shift_right < NUMBITS ) {
779+ if (amount_to_shift_right < FloatProp::UINTTYPE_BITS ) {
778780 // Shift the mantissa and clear the implicit bit.
779781 mantissa >>= amount_to_shift_right;
780782 mantissa &= FloatProp::MANTISSA_MASK;
0 commit comments