Skip to content

Commit b261492

Browse files
committed
reduce register pressure.
1 parent 8212e9e commit b261492

File tree

3 files changed

+33
-36
lines changed

3 files changed

+33
-36
lines changed

include/fast_float/digit_comparison.h

Lines changed: 29 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ constexpr static uint64_t powers_of_ten_uint64[] = {1UL,
4040
// to slow down performance for faster algorithms, and this is still fast.
4141
template <typename UC>
4242
fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int16_t
43-
scientific_exponent(const parsed_number_string_t<UC> &num) noexcept {
43+
scientific_exponent(parsed_number_string_t<UC> const &num) noexcept {
4444
uint64_t mantissa = num.mantissa;
4545
int16_t exponent = num.exponent;
4646
while (mantissa >= 10000) {
@@ -61,7 +61,7 @@ scientific_exponent(const parsed_number_string_t<UC> &num) noexcept {
6161
// this converts a native floating-point number to an extended-precision float.
6262
template <typename T>
6363
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
64-
to_extended(T value) noexcept {
64+
to_extended(T const &value) noexcept {
6565
using equiv_uint = equiv_uint_t<T>;
6666
constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
6767
constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
@@ -96,7 +96,7 @@ to_extended(T value) noexcept {
9696
// halfway between b and b+u.
9797
template <typename T>
9898
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
99-
to_extended_halfway(T value) noexcept {
99+
to_extended_halfway(T const &value) noexcept {
100100
adjusted_mantissa am = to_extended(value);
101101
am.mantissa <<= 1;
102102
am.mantissa += 1;
@@ -341,26 +341,24 @@ parse_mantissa(bigint &result, const parsed_number_string_t<UC> &num,
341341
}
342342

343343
template <typename T>
344-
inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
345-
positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept {
344+
inline FASTFLOAT_CONSTEXPR20 void
345+
positive_digit_comp(bigint &bigmant, adjusted_mantissa &am,
346+
int32_t const exponent) noexcept {
346347
FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent)));
347-
adjusted_mantissa answer;
348348
bool truncated;
349-
answer.mantissa = bigmant.hi64(truncated);
350-
int bias = binary_format<T>::mantissa_explicit_bits() -
349+
am.mantissa = bigmant.hi64(truncated);
350+
int32_t bias = binary_format<T>::mantissa_explicit_bits() -
351351
binary_format<T>::minimum_exponent();
352-
answer.power2 = bigmant.bit_length() - 64 + bias;
352+
am.power2 = bigmant.bit_length() - 64 + bias;
353353

354-
round<T>(answer, [truncated](adjusted_mantissa &a, int32_t shift) {
354+
round<T>(am, [truncated](adjusted_mantissa &a, int32_t shift) {
355355
round_nearest_tie_even(
356356
a, shift,
357357
[truncated](bool is_odd, bool is_halfway, bool is_above) -> bool {
358358
return is_above || (is_halfway && truncated) ||
359359
(is_odd && is_halfway);
360360
});
361361
});
362-
363-
return answer;
364362
}
365363

366364
// the scaling here is quite simple: we have, for the real digits `m * 10^e`,
@@ -369,24 +367,26 @@ positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept {
369367
// we then need to scale by `2^(f- e)`, and then the two significant digits
370368
// are of the same magnitude.
371369
template <typename T>
372-
inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa
373-
negative_digit_comp(bigint &bigmant, const adjusted_mantissa am,
374-
const int32_t exponent) noexcept {
370+
inline FASTFLOAT_CONSTEXPR20 void
371+
negative_digit_comp(bigint &bigmant, adjusted_mantissa &am,
372+
int32_t const exponent) noexcept {
375373
bigint &real_digits = bigmant;
376374
const int32_t &real_exp = exponent;
377375

378-
// get the value of `b`, rounded down, and get a bigint representation of b+h
379-
adjusted_mantissa am_b = am;
380-
// gcc7 buf: use a lambda to remove the noexcept qualifier bug with
381-
// -Wnoexcept-type.
382-
round<T>(am_b,
383-
[](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); });
384376
T b;
385-
to_float(
377+
{
378+
// get the value of `b`, rounded down, and get a bigint representation of b+h
379+
adjusted_mantissa am_b = am;
380+
// gcc7 bug: use a lambda to remove the noexcept qualifier bug with
381+
// -Wnoexcept-type.
382+
round<T>(am_b,
383+
[](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); });
384+
to_float(
386385
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
387-
false,
386+
false,
388387
#endif
389-
am_b, b);
388+
am_b, b);
389+
}
390390
adjusted_mantissa theor = to_extended_halfway(b);
391391
bigint theor_digits(theor.mantissa);
392392
int32_t theor_exp = theor.power2;
@@ -405,8 +405,7 @@ negative_digit_comp(bigint &bigmant, const adjusted_mantissa am,
405405

406406
// compare digits, and use it to director rounding
407407
int ord = real_digits.compare(theor_digits);
408-
adjusted_mantissa answer = am;
409-
round<T>(answer, [ord](adjusted_mantissa &a, int32_t shift) {
408+
round<T>(am, [ord](adjusted_mantissa &a, int32_t shift) {
410409
round_nearest_tie_even(
411410
a, shift, [ord](bool is_odd, bool _, bool __) -> bool {
412411
(void)_; // not needed, since we've done our comparison
@@ -420,8 +419,6 @@ negative_digit_comp(bigint &bigmant, const adjusted_mantissa am,
420419
}
421420
});
422421
});
423-
424-
return answer;
425422
}
426423

427424
// parse the significant digits as a big integer to unambiguously round the
@@ -438,8 +435,8 @@ negative_digit_comp(bigint &bigmant, const adjusted_mantissa am,
438435
// the actual digits. we then compare the big integer representations
439436
// of both, and use that to direct rounding.
440437
template <typename T, typename UC>
441-
inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa digit_comp(
442-
const parsed_number_string_t<UC> &num, adjusted_mantissa &am) noexcept {
438+
inline FASTFLOAT_CONSTEXPR20 void digit_comp(
439+
parsed_number_string_t<UC> const &num, adjusted_mantissa &am) noexcept {
443440
// remove the invalid exponent bias
444441
am.power2 -= invalid_am_bias;
445442

@@ -451,9 +448,9 @@ inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa digit_comp(
451448
// can't underflow, since digits is at most max_digits.
452449
int16_t exponent = sci_exp + 1 - digits;
453450
if (exponent >= 0) {
454-
return positive_digit_comp<T>(bigmant, exponent);
451+
positive_digit_comp<T>(bigmant, am, exponent);
455452
} else {
456-
return negative_digit_comp<T>(bigmant, am, exponent);
453+
negative_digit_comp<T>(bigmant, am, exponent);
457454
}
458455
}
459456

include/fast_float/float_common.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,7 @@ inline constexpr int binary_format<std::float16_t>::smallest_power_of_ten() {
772772
}
773773

774774
template <>
775-
inline constexpr unsigned int binary_format<std::float16_t>::max_digits() {
775+
inline constexpr size_t binary_format<std::float16_t>::max_digits() {
776776
return 22;
777777
}
778778
#endif // __STDCPP_FLOAT16_T__
@@ -899,7 +899,7 @@ inline constexpr int binary_format<std::bfloat16_t>::smallest_power_of_ten() {
899899
}
900900

901901
template <>
902-
inline constexpr unsigned int binary_format<std::bfloat16_t>::max_digits() {
902+
inline constexpr size_t binary_format<std::bfloat16_t>::max_digits() {
903903
return 98;
904904
}
905905
#endif // __STDCPP_BFLOAT16_T__
@@ -1009,7 +1009,7 @@ fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void to_float(
10091009
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN
10101010
bool const negative,
10111011
#endif
1012-
adjusted_mantissa const am, T &value) noexcept {
1012+
adjusted_mantissa const &am, T &value) noexcept {
10131013
using equiv_uint = equiv_uint_t<T>;
10141014
equiv_uint word = equiv_uint(am.mantissa);
10151015
word = equiv_uint(word | equiv_uint(am.power2)

include/fast_float/parse_number.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ from_chars_advanced(parsed_number_string_t<UC> const &pns, T &value) noexcept {
283283
// and we have an invalid power (am.power2 < 0), then we need to go the long
284284
// way around again. This is very uncommon.
285285
if (am.power2 < 0) {
286-
am = digit_comp<T>(pns, am);
286+
digit_comp<T>(pns, am);
287287
}
288288
to_float(
289289
#ifndef FASTFLOAT_ONLY_POSITIVE_C_NUMBER_WO_INF_NAN

0 commit comments

Comments
 (0)