reduce register pressure.

IRainman · IRainman · commit b261492ae742 · 2025-04-08T01:22:14.000+03:00
diff --git a/include/fast_float/digit_comparison.h b/include/fast_float/digit_comparison.h
@@ -40,7 +40,7 @@ constexpr static uint64_t powers_of_ten_uint64[] = {1UL,
 // to slow down performance for faster algorithms, and this is still fast.
 template <typename UC>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int16_t
-scientific_exponent(const parsed_number_string_t<UC> &num) noexcept {
+scientific_exponent(parsed_number_string_t<UC> const &num) noexcept {
   uint64_t mantissa = num.mantissa;
   int16_t exponent = num.exponent;
   while (mantissa >= 10000) {
@@ -61,7 +61,7 @@ scientific_exponent(const parsed_number_string_t<UC> &num) noexcept {
 // this converts a native floating-point number to an extended-precision float.
 template <typename T>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
-to_extended(T value) noexcept {
+to_extended(T const &value) noexcept {
   using equiv_uint = equiv_uint_t<T>;
   constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
   constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
@@ -96,7 +96,7 @@ to_extended(T value) noexcept {
 // halfway between b and b+u.
 template <typename T>
 fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
-to_extended_halfway(T value) noexcept {
+to_extended_halfway(T const &value) noexcept {
   adjusted_mantissa am = to_extended(value);
   am.mantissa <<= 1;
   am.mantissa += 1;
@@ -341,26 +341,24 @@ parse_mantissa(bigint &result, const parsed_number_string_t<UC> &num,
 }
 
 template <typename T>
-inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
-positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept {
+inline FASTFLOAT_CONSTEXPR20 void
+positive_digit_comp(bigint &bigmant, adjusted_mantissa &am,
+                    int32_t const exponent) noexcept {
   FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent)));
-  adjusted_mantissa answer;
   bool truncated;
-  answer.mantissa = bigmant.hi64(truncated);
-  int bias = binary_format<T>::mantissa_explicit_bits() -
+  am.mantissa = bigmant.hi64(truncated);
+  int32_t bias = binary_format<T>::mantissa_explicit_bits() -
              binary_format<T>::minimum_exponent();
-  answer.power2 = bigmant.bit_length() - 64 + bias;
+  am.power2 = bigmant.bit_length() - 64 + bias;
 
-  round<T>(answer, [truncated](adjusted_mantissa &a, int32_t shift) {
+  round<T>(am, [truncated](adjusted_mantissa &a, int32_t shift) {
     round_nearest_tie_even(
         a, shift,
         [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool {
           return is_above || (is_halfway && truncated) ||
                  (is_odd && is_halfway);
         });
   });
-
-  return answer;
 }
 
 // the scaling here is quite simple: we have, for the real digits `m * 10^e`,
@@ -369,24 +367,26 @@ positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept {
 // we then need to scale by `2^(f- e)`, and then the two significant digits
 // are of the same magnitude.
 template <typename T>
-inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
-negative_digit_comp(bigint &bigmant, const adjusted_mantissa am,
-                    const int32_t exponent) noexcept {
+inline FASTFLOAT_CONSTEXPR20 void
+negative_digit_comp(bigint &bigmant, adjusted_mantissa &am,
+                    int32_t const exponent) noexcept {
   bigint &real_digits = bigmant;
   const int32_t &real_exp = exponent;
 
-  // get the value of `b`, rounded down, and get a bigint representation of b+h
-  adjusted_mantissa am_b = am;
-  // gcc7 buf: use a lambda to remove the noexcept qualifier bug with
-  // -Wnoexcept-type.
-  round<T>(am_b,
-           [](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); });
   T b;
-  to_float(
+  {
+    // get the value of `b`, rounded down, and get a bigint representation of b+h
+    adjusted_mantissa am_b = am;
+    // gcc7 bug: use a lambda to remove the noexcept qualifier bug with
+    // -Wnoexcept-type.
+    round<T>(am_b,
+             [](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); });
+    to_float(
 #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
-      false,
+            false,
 #endif
-      am_b, b);
+            am_b, b);
+  }
   adjusted_mantissa theor = to_extended_halfway(b);
   bigint theor_digits(theor.mantissa);
   int32_t theor_exp = theor.power2;
@@ -405,8 +405,7 @@ negative_digit_comp(bigint &bigmant, const adjusted_mantissa am,
 
   // compare digits, and use it to director rounding
   int ord = real_digits.compare(theor_digits);
-  adjusted_mantissa answer = am;
-  round<T>(answer, [ord](adjusted_mantissa &a, int32_t shift) {
+  round<T>(am, [ord](adjusted_mantissa &a, int32_t shift) {
     round_nearest_tie_even(
         a, shift, [ord](bool is_odd, bool _, bool __) -> bool {
           (void)_;  // not needed, since we've done our comparison
@@ -420,8 +419,6 @@ negative_digit_comp(bigint &bigmant, const adjusted_mantissa am,
           }
         });
   });
-
-  return answer;
 }
 
 // parse the significant digits as a big integer to unambiguously round the
@@ -438,8 +435,8 @@ negative_digit_comp(bigint &bigmant, const adjusted_mantissa am,
 // the actual digits. we then compare the big integer representations
 // of both, and use that to direct rounding.
 template <typename T, typename UC>
-inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa digit_comp(
-    const parsed_number_string_t<UC> &num, adjusted_mantissa &am) noexcept {
+inline FASTFLOAT_CONSTEXPR20 void digit_comp(
+    parsed_number_string_t<UC> const &num, adjusted_mantissa &am) noexcept {
   // remove the invalid exponent bias
   am.power2 -= invalid_am_bias;
 
@@ -451,9 +448,9 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa digit_comp(
   // can't underflow, since digits is at most max_digits.
   int16_t exponent = sci_exp + 1 - digits;
   if (exponent >= 0) {
-    return positive_digit_comp<T>(bigmant, exponent);
+    positive_digit_comp<T>(bigmant, am, exponent);
   } else {
-    return negative_digit_comp<T>(bigmant, am, exponent);
+    negative_digit_comp<T>(bigmant, am, exponent);
   }
 }
 
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
@@ -772,7 +772,7 @@ inline constexpr int binary_format<std::float16_t>::smallest_power_of_ten() {
 }
 
 template <>
-inline constexpr unsigned int binary_format<std::float16_t>::max_digits() {
+inline constexpr size_t binary_format<std::float16_t>::max_digits() {
   return 22;
 }
 #endif // __STDCPP_FLOAT16_T__
@@ -899,7 +899,7 @@ inline constexpr int binary_format<std::bfloat16_t>::smallest_power_of_ten() {
 }
 
 template <>
-inline constexpr unsigned int binary_format<std::bfloat16_t>::max_digits() {
+inline constexpr size_t binary_format<std::bfloat16_t>::max_digits() {
   return 98;
 }
 #endif // __STDCPP_BFLOAT16_T__
@@ -1009,7 +1009,7 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void to_float(
 #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
     bool const negative,
 #endif
-    adjusted_mantissa const am, T &value) noexcept {
+    adjusted_mantissa const &am, T &value) noexcept {
   using equiv_uint = equiv_uint_t<T>;
   equiv_uint word = equiv_uint(am.mantissa);
   word = equiv_uint(word | equiv_uint(am.power2)
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
@@ -283,7 +283,7 @@ from_chars_advanced(parsed_number_string_t<UC> const &pns, T &value) noexcept {
   // and we have an invalid power (am.power2 < 0), then we need to go the long
   // way around again. This is very uncommon.
   if (am.power2 < 0) {
-    am = digit_comp<T>(pns, am);
+    digit_comp<T>(pns, am);
   }
   to_float(
 #ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN

Original file line number	Diff line number	Diff line change
`@@ -772,7 +772,7 @@ inline constexpr int binary_format<std::float16_t>::smallest_power_of_ten() {`
`772`	`772`	`}`
`773`	`773`
`774`	`774`	`template <>`
`775`		`-inline constexpr unsigned int binary_format<std::float16_t>::max_digits() {`
	`775`	`+inline constexpr size_t binary_format<std::float16_t>::max_digits() {`
`776`	`776`	`return 22;`
`777`	`777`	`}`
`778`	`778`	`#endif // __STDCPP_FLOAT16_T__`
`@@ -899,7 +899,7 @@ inline constexpr int binary_format<std::bfloat16_t>::smallest_power_of_ten() {`
`899`	`899`	`}`
`900`	`900`
`901`	`901`	`template <>`
`902`		`-inline constexpr unsigned int binary_format<std::bfloat16_t>::max_digits() {`
	`902`	`+inline constexpr size_t binary_format<std::bfloat16_t>::max_digits() {`
`903`	`903`	`return 98;`
`904`	`904`	`}`
`905`	`905`	`#endif // __STDCPP_BFLOAT16_T__`
`@@ -1009,7 +1009,7 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void to_float(`
`1009`	`1009`	`#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN`
`1010`	`1010`	`bool const negative,`
`1011`	`1011`	`#endif`
`1012`		`- adjusted_mantissa const am, T &value) noexcept {`
	`1012`	`+ adjusted_mantissa const &am, T &value) noexcept {`
`1013`	`1013`	`using equiv_uint = equiv_uint_t<T>;`
`1014`	`1014`	`equiv_uint word = equiv_uint(am.mantissa);`
`1015`	`1015`	`word = equiv_uint(word \| equiv_uint(am.power2)`
Original file line number	Diff line number	Diff line change
`@@ -283,7 +283,7 @@ from_chars_advanced(parsed_number_string_t<UC> const &pns, T &value) noexcept {`
`283`	`283`	`// and we have an invalid power (am.power2 < 0), then we need to go the long`
`284`	`284`	`// way around again. This is very uncommon.`
`285`	`285`	`if (am.power2 < 0) {`
`286`		`- am = digit_comp<T>(pns, am);`
	`286`	`+ digit_comp<T>(pns, am);`
`287`	`287`	`}`
`288`	`288`	`to_float(`
`289`	`289`	`#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN`