Skip to content

Commit d22ce8e

Browse files
committed
chore: use uint128 for FromBigEndian
1 parent c84431e commit d22ce8e

File tree

1 file changed

+18
-108
lines changed

1 file changed

+18
-108
lines changed

src/iceberg/util/decimal.cc

Lines changed: 18 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,6 @@ namespace iceberg {
4848

4949
namespace {
5050

51-
// Signed left shift with well-defined behaviour on negative numbers or overflow
52-
template <typename SignedInt, typename Shift>
53-
requires std::is_signed_v<SignedInt> && std::is_integral_v<Shift>
54-
constexpr SignedInt SafeLeftShift(SignedInt u, Shift bits) {
55-
using UnsignedInt = std::make_unsigned_t<SignedInt>;
56-
return static_cast<SignedInt>(static_cast<UnsignedInt>(u) << bits);
57-
}
58-
5951
struct DecimalComponents {
6052
std::string_view while_digits;
6153
std::string_view fractional_digits;
@@ -205,7 +197,9 @@ inline void ShiftAndAdd(std::string_view input, uint128_t& out) {
205197
const uint64_t multiple = kUInt64PowersOfTen[group_size];
206198
uint64_t value = 0;
207199

208-
std::from_chars(input.data() + pos, input.data() + pos + group_size, value);
200+
auto [_, ec] =
201+
std::from_chars(input.data() + pos, input.data() + pos + group_size, value);
202+
ICEBERG_DCHECK(ec == std::errc(), "Failed to parse digits in ShiftAndAdd");
209203

210204
out = out * multiple + value;
211205
pos += group_size;
@@ -471,73 +465,6 @@ Result<Decimal> Decimal::FromString(std::string_view str, int32_t* precision,
471465

472466
namespace {
473467

474-
constexpr float kFloatInf = std::numeric_limits<float>::infinity();
475-
476-
// Attention: these pre-computed constants might not exactly represent their
477-
// decimal counterparts:
478-
// >>> int32_t(1e38)
479-
// 99999999999999997748809823456034029568
480-
481-
constexpr int32_t kPrecomputedPowersOfTen = 76;
482-
483-
constexpr std::array<float, 2 * kPrecomputedPowersOfTen + 1> kFloatPowersOfTen = {
484-
0, 0, 0, 0, 0, 0, 0,
485-
0, 0, 0, 0, 0, 0, 0,
486-
0, 0, 0, 0, 0, 0, 0,
487-
0, 0, 0, 0, 0, 0, 0,
488-
0, 0, 0, 1e-45f, 1e-44f, 1e-43f, 1e-42f,
489-
1e-41f, 1e-40f, 1e-39f, 1e-38f, 1e-37f, 1e-36f, 1e-35f,
490-
1e-34f, 1e-33f, 1e-32f, 1e-31f, 1e-30f, 1e-29f, 1e-28f,
491-
1e-27f, 1e-26f, 1e-25f, 1e-24f, 1e-23f, 1e-22f, 1e-21f,
492-
1e-20f, 1e-19f, 1e-18f, 1e-17f, 1e-16f, 1e-15f, 1e-14f,
493-
1e-13f, 1e-12f, 1e-11f, 1e-10f, 1e-9f, 1e-8f, 1e-7f,
494-
1e-6f, 1e-5f, 1e-4f, 1e-3f, 1e-2f, 1e-1f, 1e0f,
495-
1e1f, 1e2f, 1e3f, 1e4f, 1e5f, 1e6f, 1e7f,
496-
1e8f, 1e9f, 1e10f, 1e11f, 1e12f, 1e13f, 1e14f,
497-
1e15f, 1e16f, 1e17f, 1e18f, 1e19f, 1e20f, 1e21f,
498-
1e22f, 1e23f, 1e24f, 1e25f, 1e26f, 1e27f, 1e28f,
499-
1e29f, 1e30f, 1e31f, 1e32f, 1e33f, 1e34f, 1e35f,
500-
1e36f, 1e37f, 1e38f, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
501-
kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
502-
kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
503-
kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
504-
kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
505-
kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf};
506-
507-
constexpr std::array<double, 2 * kPrecomputedPowersOfTen + 1> kDoublePowersOfTen = {
508-
1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65,
509-
1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53,
510-
1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41,
511-
1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29,
512-
1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
513-
1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5,
514-
1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
515-
1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
516-
1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31,
517-
1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, 1e43,
518-
1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, 1e52, 1e53, 1e54, 1e55,
519-
1e56, 1e57, 1e58, 1e59, 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67,
520-
1e68, 1e69, 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76};
521-
522-
// Helper function used by Decimal::FromBigEndian
523-
static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) {
524-
// We don't bounds check the length here because this is called by
525-
// FromBigEndian that has a Decimal128 as its out parameters and
526-
// that function is already checking the length of the bytes and only
527-
// passes lengths between zero and eight.
528-
uint64_t result = 0;
529-
// Using memcpy instead of special casing for length
530-
// and doing the conversion in 16, 32 parts, which could
531-
// possibly create unaligned memory access on certain platforms
532-
std::memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length);
533-
534-
if constexpr (std::endian::native == std::endian::little) {
535-
return std::byteswap(result);
536-
} else {
537-
return result;
538-
}
539-
}
540-
541468
static bool RescaleWouldCauseDataLoss(const Decimal& value, int32_t delta_scale,
542469
const Decimal& multiplier, Decimal* result) {
543470
if (delta_scale < 0) {
@@ -569,41 +496,24 @@ Result<Decimal> Decimal::FromBigEndian(const uint8_t* bytes, int32_t length) {
569496
// sign bit.
570497
const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
571498

572-
// 1. Extract the high bytes
573-
// Stop byte of the high bytes
574-
const int32_t high_bits_offset = std::max(0, length - 8);
575-
const auto high_bits = UInt64FromBigEndian(bytes, high_bits_offset);
499+
uint128_t result = 0;
500+
std::memcpy(reinterpret_cast<uint8_t*>(&result) + 16 - length, bytes, length);
576501

577-
if (high_bits_offset == 8) {
578-
// Avoid undefined shift by 64 below
579-
high = high_bits;
580-
} else {
581-
high = -1 * (is_negative && length < kMaxDecimalBytes);
582-
// Shift left enough bits to make room for the incoming int64_t
583-
high = SafeLeftShift(high, high_bits_offset * CHAR_BIT);
584-
// Preserve the upper bits by inplace OR-ing the int64_t
585-
high |= high_bits;
586-
}
587-
588-
// 2. Extract the low bytes
589-
// Stop byte of the low bytes
590-
const int32_t low_bits_offset = std::min(length, 8);
591-
const auto low_bits =
592-
UInt64FromBigEndian(bytes + high_bits_offset, length - high_bits_offset);
593-
594-
if (low_bits_offset == 8) {
595-
// Avoid undefined shift by 64 below
596-
low = low_bits;
597-
} else {
598-
// Sign extend the low bits if necessary
599-
low = -1 * (is_negative && length < 8);
600-
// Shift left enough bits to make room for the incoming int64_t
601-
low = SafeLeftShift(low, low_bits_offset * CHAR_BIT);
602-
// Preserve the upper bits by inplace OR-ing the int64_t
603-
low |= low_bits;
502+
if constexpr (std::endian::native == std::endian::little) {
503+
auto high = static_cast<uint64_t>(result >> 64);
504+
auto low = static_cast<uint64_t>(result);
505+
high = std::byteswap(high);
506+
low = std::byteswap(low);
507+
// also need to swap the two halves
508+
result = (static_cast<uint128_t>(low) << 64) | high;
509+
}
510+
511+
if (is_negative && length < kMaxDecimalBytes) {
512+
// Sign extend the high bits
513+
result |= (static_cast<uint128_t>(-1) << (length * CHAR_BIT));
604514
}
605515

606-
return Decimal(high, static_cast<uint64_t>(low));
516+
return Decimal(static_cast<int128_t>(result));
607517
}
608518

609519
Result<Decimal> Decimal::Rescale(int32_t orig_scale, int32_t new_scale) const {

0 commit comments

Comments
 (0)