From 5a15ef2a88073af4afc9f293c652763dfc92b719 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Sun, 17 Aug 2025 22:50:05 +0800
Subject: [PATCH 01/20] feat: add decimal value representation

---
 src/iceberg/CMakeLists.txt        |   1 +
 src/iceberg/expression/decimal.cc | 826 ++++++++++++++++++++++++++++++
 src/iceberg/expression/decimal.h  | 211 ++++++++
 src/iceberg/result.h              |   6 +
 src/iceberg/util/port.h           |  45 ++
 test/CMakeLists.txt               |   6 +-
 test/decimal_test.cc              | 153 ++++++
 7 files changed, 1247 insertions(+), 1 deletion(-)
 create mode 100644 src/iceberg/expression/decimal.cc
 create mode 100644 src/iceberg/expression/decimal.h
 create mode 100644 src/iceberg/util/port.h
 create mode 100644 test/decimal_test.cc
diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
index ae282a3af..882c5a681 100644
--- a/src/iceberg/CMakeLists.txt
+++ b/src/iceberg/CMakeLists.txt
@@ -19,6 +19,7 @@ set(ICEBERG_INCLUDES "$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/src>"
                      "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>")
 set(ICEBERG_SOURCES
     catalog/in_memory_catalog.cc
+    expression/decimal.cc
     expression/expression.cc
     expression/literal.cc
     file_reader.cc
diff --git a/src/iceberg/expression/decimal.cc b/src/iceberg/expression/decimal.cc
new file mode 100644
index 000000000..e5ac492ef
--- /dev/null
+++ b/src/iceberg/expression/decimal.cc
@@ -0,0 +1,826 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/expression/decimal.h"
+
+#include <array>
+#include <bit>
+#include <cassert>
+#include <charconv>
+#include <cstddef>
+#include <cstdint>
+#include <format>
+#include <iostream>
+#include <limits>
+#include <string>
+#include <string_view>
+
+#include "iceberg/result.h"
+#include "iceberg/util/macros.h"
+
+namespace iceberg {
+
+namespace {
+
+static constexpr uint64_t kInt64Mask = 0xFFFFFFFFFFFFFFFF;
+
+// Convenience wrapper type over 128 bit unsigned integers. This class merely provides the
+// minimum necessary set of functions to perform 128 bit multiplication operations.
+struct Uint128 {
+  Uint128() = default;
+  Uint128(uint64_t high, uint64_t low)
+      : val_((static_cast<uint128_t>(high) << 64) | low) {}
+
+  explicit Uint128(uint64_t value) : val_(value) {}
+  explicit Uint128(const Decimal& decimal)
+      : val_((static_cast<uint128_t>(decimal.high()) << 64) | decimal.low()) {}
+
+  uint64_t high() const { return static_cast<uint64_t>(val_ >> 64); }
+  uint64_t low() const { return static_cast<uint64_t>(val_ & kInt64Mask); }
+
+  Uint128& operator+=(const Uint128& other) {
+    val_ += other.val_;
+    return *this;
+  }
+
+  Uint128& operator*=(const Uint128& other) {
+    val_ *= other.val_;
+    return *this;
+  }
+
+  uint128_t val_{};
+};
+
+// Signed addition with well-defined behaviour on overflow (as unsigned)
+template <typename SignedInt>
+  requires std::is_signed_v<SignedInt>
+constexpr SignedInt SafeSignedAdd(SignedInt u, SignedInt v) {
+  using UnsignedInt = std::make_unsigned_t<SignedInt>;
+  return static_cast<SignedInt>(static_cast<UnsignedInt>(u) +
+                                static_cast<UnsignedInt>(v));
+}
+
+// Signed subtraction with well-defined behaviour on overflow (as unsigned)
+template <typename SignedInt>
+  requires std::is_signed_v<SignedInt>
+constexpr SignedInt SafeSignedSubtract(SignedInt u, SignedInt v) {
+  using UnsignedInt = std::make_unsigned_t<SignedInt>;
+  return static_cast<SignedInt>(static_cast<UnsignedInt>(u) -
+                                static_cast<UnsignedInt>(v));
+}
+
+// Signed negation with well-defined behaviour on overflow (as unsigned)
+template <typename SignedInt>
+  requires std::is_signed_v<SignedInt>
+constexpr SignedInt SafeSignedNegate(SignedInt u) {
+  using UnsignedInt = std::make_unsigned_t<SignedInt>;
+  return static_cast<SignedInt>(~static_cast<UnsignedInt>(u) + 1);
+}
+
+// Signed left shift with well-defined behaviour on negative numbers or overflow
+template <typename SignedInt, typename Shift>
+  requires std::is_signed_v<SignedInt> && std::is_integral_v<Shift>
+constexpr SignedInt SafeLeftShift(SignedInt u, Shift shift) {
+  using UnsignedInt = std::make_unsigned_t<SignedInt>;
+  return static_cast<SignedInt>(static_cast<UnsignedInt>(u) << shift);
+}
+
+/// \brief Expands the given value into a big endian array of ints so that we can work on
+/// it. The array will be converted to an absolute value. The array will remove leading
+/// zeros from the value.
+/// \param array a big endian array of length 4 to set with the value
+/// \result the output length of the array
+static inline int64_t FillInArray(const Decimal& value, uint32_t* array) {
+  Decimal abs_value = Decimal::Abs(value);
+  auto high = static_cast<uint64_t>(abs_value.high());
+  auto low = abs_value.low();
+
+  if (high != 0) {
+    if (high > std::numeric_limits<uint32_t>::max()) {
+      array[0] = static_cast<uint32_t>(high >> 32);
+      array[1] = static_cast<uint32_t>(high);
+      array[2] = static_cast<uint32_t>(low >> 32);
+      array[3] = static_cast<uint32_t>(low);
+      return 4;
+    }
+
+    array[0] = static_cast<uint32_t>(high);
+    array[1] = static_cast<uint32_t>(low >> 32);
+    array[2] = static_cast<uint32_t>(low);
+    return 3;
+  }
+
+  if (low > std::numeric_limits<uint32_t>::max()) {
+    array[0] = static_cast<uint32_t>(low >> 32);
+    array[1] = static_cast<uint32_t>(low);
+    return 2;
+  }
+
+  if (low == 0) {
+    return 0;
+  }
+
+  array[0] = static_cast<uint32_t>(low);
+  return 1;
+}
+
+/// \brief Shift the number in the array left by bits positions.
+static inline void ShiftArrayLeft(uint32_t* array, int64_t length, int64_t bits) {
+  if (length > 0 && bits > 0) {
+    for (int i = 0; i < length - 1; ++i) {
+      array[i] = (array[i] << bits) | (array[i + 1] >> (32 - bits));
+    }
+    array[length - 1] <<= bits;
+  }
+}
+
+/// \brief Shift the number in the array right by bits positions.
+static inline void ShiftArrayRight(uint32_t* array, int64_t length, int64_t bits) {
+  if (length > 0 && bits > 0) {
+    for (int i = length - 1; i > 0; --i) {
+      array[i] = (array[i] >> bits) | (array[i - 1] << (32 - bits));
+    }
+    array[0] >>= bits;
+  }
+}
+
+/// \brief Fix the signs of the result and remainder after a division operation.
+static inline void FixDivisionSigns(Decimal* result, Decimal* remainder,
+                                    bool dividend_negative, bool divisor_negative) {
+  if (dividend_negative != divisor_negative) {
+    result->Negate();
+  }
+  if (dividend_negative) {
+    remainder->Negate();
+  }
+}
+
+/// \brief Build a Decimal from a big-endian array of uint32_t.
+static Status BuildFromArray(Decimal* result, const uint32_t* array, int64_t length) {
+  // result_array[0] is low, result_array[1] is high
+  std::array<uint64_t, 2> result_array = {0, 0};
+  for (int64_t i = length - 2 * 2 - 1; i >= 0; i--) {
+    if (array[i] != 0) {
+      return Overflow("Decimal division overflow");
+    }
+  }
+
+  // Construct the array in reverse order
+  int64_t next_index = length - 1;
+  for (size_t i = 0; i < 2 && next_index >= 0; i++) {
+    uint64_t lower_bits = array[next_index--];
+    result_array[i] = (next_index < 0)
+                          ? lower_bits
+                          : (static_cast<uint64_t>(lower_bits) << 32) | lower_bits;
+  }
+
+  *result = Decimal(result_array[1], result_array[0]);
+  return {};
+}
+
+/// \brief Do a division where the divisor fits into a single 32-bit integer.
+static inline Status SingleDivide(const uint32_t* dividend, int64_t dividend_length,
+                                  uint32_t divisor, bool dividend_negative,
+                                  bool divisor_negative, Decimal* result,
+                                  Decimal* remainder) {
+  uint64_t r = 0;
+  constexpr int64_t kDecimalArrayLength = Decimal::kBitWidth / sizeof(uint32_t) + 1;
+  std::array<uint32_t, kDecimalArrayLength> quotient;
+  for (int64_t i = 0; i < dividend_length; ++i) {
+    r <<= 32;
+    r |= dividend[i];
+    if (r < divisor) {
+      quotient[i] = 0;
+    } else {
+      quotient[i] = static_cast<uint32_t>(r / divisor);
+      r -= static_cast<uint64_t>(quotient[i]) * divisor;
+    }
+  }
+
+  ICEBERG_RETURN_UNEXPECTED(BuildFromArray(result, quotient.data(), dividend_length));
+
+  *remainder = static_cast<int64_t>(r);
+  FixDivisionSigns(result, remainder, dividend_negative, divisor_negative);
+  return {};
+}
+
+/// \brief Divide two Decimal values and return the result and remainder.
+static inline Status DecimalDivide(const Decimal& dividend, const Decimal& divisor,
+                                   Decimal* result, Decimal* remainder) {
+  constexpr int64_t kDecimalArrayLength = Decimal::kBitWidth / sizeof(uint32_t);
+  // Split the dividend and divisor into 32-bit integer pieces so that we can work on
+  // them.
+  std::array<uint32_t, kDecimalArrayLength + 1> dividend_array;
+  std::array<uint32_t, kDecimalArrayLength> divisor_array;
+  bool dividend_negative = dividend.high() < 0;
+  bool divisor_negative = divisor.high() < 0;
+
+  // leave an extra zero before the dividend
+  dividend_array[0] = 0;
+  int64_t dividend_length = FillInArray(dividend, dividend_array.data() + 1) + 1;
+  int64_t divisor_length = FillInArray(divisor, divisor_array.data());
+
+  if (dividend_length <= divisor_length) {
+    *remainder = dividend;
+    *result = 0;
+    return {};
+  }
+
+  if (divisor_length == 0) {
+    return DivideByZero("Cannot divide by zero in DecimalDivide");
+  }
+
+  if (divisor_length == 1) {
+    ICEBERG_RETURN_UNEXPECTED(SingleDivide(dividend_array.data(), dividend_length,
+                                           divisor_array[0], dividend_negative,
+                                           divisor_negative, result, remainder));
+    return {};
+  }
+
+  int64_t result_length = dividend_length - divisor_length;
+  std::array<uint32_t, kDecimalArrayLength> result_array;
+
+  assert(result_length < kDecimalArrayLength);
+
+  // Normalize by shifting both by a multiple of 2 so that the digit guessing is easier.
+  // The requirement is that divisor_array[0] is greater than 2**31.
+  int64_t normalize_bits = std::countl_zero(divisor_array[0]);
+  ShiftArrayLeft(divisor_array.data(), divisor_length, normalize_bits);
+  ShiftArrayLeft(dividend_array.data(), dividend_length, normalize_bits);
+
+  // compute each digit in the result
+  for (int64_t i = 0; i < result_length; ++i) {
+    // Guess the next digit. At worst it is two too large
+    uint32_t guess = std::numeric_limits<uint32_t>::max();
+    const auto high_dividend =
+        static_cast<uint64_t>(dividend_array[i]) << 32 | dividend_array[i + 1];
+    if (dividend_array[i] != divisor_array[0]) {
+      guess = static_cast<uint32_t>(high_dividend / divisor_array[0]);
+    }
+
+    // catch all the cases where the guess is two too large and most of the cases where it
+    // is one too large
+    auto rhat = static_cast<uint32_t>(high_dividend -
+                                      guess * static_cast<uint64_t>(divisor_array[0]));
+    while (static_cast<uint64_t>(divisor_array[1]) * guess >
+           (static_cast<uint64_t>(rhat) << 32) + dividend_array[i + 2]) {
+      guess--;
+      rhat += divisor_array[0];
+      if (static_cast<uint64_t>(rhat) < divisor_array[1]) {
+        break;
+      }
+    }
+
+    // substract off the guess * divisor from the dividend
+    uint64_t mult = 0;
+    for (int64_t j = divisor_length - 1; j >= 0; --j) {
+      mult += static_cast<uint64_t>(guess) * divisor_array[j];
+      uint32_t prev = dividend_array[i + j + 1];
+      dividend_array[i + j + 1] -= static_cast<uint32_t>(mult);
+      mult >>= 32;
+      if (dividend_array[i + j + 1] > prev) {
+        ++mult;
+      }
+    }
+    uint32_t prev = dividend_array[i];
+    dividend_array[i] -= static_cast<uint32_t>(mult);
+
+    // if guess was too big,  we add back divisor
+    if (dividend_array[i] > prev) {
+      guess--;
+      uint32_t carry = 0;
+      for (int64_t j = divisor_length - 1; j >= 0; --j) {
+        const auto sum =
+            static_cast<uint64_t>(divisor_array[j]) + dividend_array[i + j + 1] + carry;
+        dividend_array[i + j + 1] = static_cast<uint32_t>(sum);
+        carry = static_cast<uint32_t>(sum >> 32);
+      }
+      dividend_array[i] += carry;
+    }
+
+    result_array[i] = guess;
+  }
+
+  // denormalize the remainder
+  ShiftArrayRight(dividend_array.data(), dividend_length, normalize_bits);
+
+  // return result and remainder
+  ICEBERG_RETURN_UNEXPECTED(BuildFromArray(result, result_array.data(), result_length));
+  ICEBERG_RETURN_UNEXPECTED(
+      BuildFromArray(remainder, dividend_array.data(), dividend_length));
+
+  FixDivisionSigns(result, remainder, dividend_negative, divisor_negative);
+  return {};
+}
+
+/// \brief Powers of ten for Decimal with scale from 0 to 38.
+constexpr std::array<Decimal, Decimal::kMaxScale + 1> kDecimal128PowersOfTen = {
+    Decimal(1LL),
+    Decimal(10LL),
+    Decimal(100LL),
+    Decimal(1000LL),
+    Decimal(10000LL),
+    Decimal(100000LL),
+    Decimal(1000000LL),
+    Decimal(10000000LL),
+    Decimal(100000000LL),
+    Decimal(1000000000LL),
+    Decimal(10000000000LL),
+    Decimal(100000000000LL),
+    Decimal(1000000000000LL),
+    Decimal(10000000000000LL),
+    Decimal(100000000000000LL),
+    Decimal(1000000000000000LL),
+    Decimal(10000000000000000LL),
+    Decimal(100000000000000000LL),
+    Decimal(1000000000000000000LL),
+    Decimal(0LL, 10000000000000000000ULL),
+    Decimal(5LL, 7766279631452241920ULL),
+    Decimal(54LL, 3875820019684212736ULL),
+    Decimal(542LL, 1864712049423024128ULL),
+    Decimal(5421LL, 200376420520689664ULL),
+    Decimal(54210LL, 2003764205206896640ULL),
+    Decimal(542101LL, 1590897978359414784ULL),
+    Decimal(5421010LL, 15908979783594147840ULL),
+    Decimal(54210108LL, 11515845246265065472ULL),
+    Decimal(542101086LL, 4477988020393345024ULL),
+    Decimal(5421010862LL, 7886392056514347008ULL),
+    Decimal(54210108624LL, 5076944270305263616ULL),
+    Decimal(542101086242LL, 13875954555633532928ULL),
+    Decimal(5421010862427LL, 9632337040368467968ULL),
+    Decimal(54210108624275LL, 4089650035136921600ULL),
+    Decimal(542101086242752LL, 4003012203950112768ULL),
+    Decimal(5421010862427522LL, 3136633892082024448ULL),
+    Decimal(54210108624275221LL, 12919594847110692864ULL),
+    Decimal(542101086242752217LL, 68739955140067328ULL),
+    Decimal(5421010862427522170LL, 687399551400673280ULL)};
+}  // namespace
+
+Decimal::Decimal(std::string_view str) {
+  auto result = Decimal::FromString(str);
+  if (!result) {
+    throw std::runtime_error(
+        std::format("Failed to parse Decimal from string: {}, error: {}", str,
+                    result.error().message));
+  }
+  *this = std::move(result.value());
+}
+
+Decimal& Decimal::Negate() {
+  uint64_t result_low = ~low() + 1;
+  int64_t result_high = ~high();
+  if (result_low == 0) {
+    result_high = SafeSignedAdd<int64_t>(result_high, 1);
+  }
+  *this = Decimal(result_high, result_low);
+  return *this;
+}
+
+Decimal& Decimal::Abs() { return *this < 0 ? Negate() : *this; }
+
+Decimal Decimal::Abs(const Decimal& value) {
+  Decimal result(value);
+  return result.Abs();
+}
+
+Decimal& Decimal::operator+=(const Decimal& other) {
+  int64_t result_high = SafeSignedAdd(high(), other.high());
+  uint64_t result_low = low() + other.low();
+  result_high = SafeSignedAdd<int64_t>(result_high, result_low < low());
+  *this = Decimal(result_high, result_low);
+  return *this;
+}
+
+Decimal& Decimal::operator-=(const Decimal& other) {
+  int64_t result_high = SafeSignedSubtract(high(), other.high());
+  uint64_t result_low = low() - other.low();
+  result_high = SafeSignedSubtract<int64_t>(result_high, result_low > low());
+  *this = Decimal(result_high, result_low);
+  return *this;
+}
+
+Decimal& Decimal::operator*=(const Decimal& other) {
+  // Since the max value of Decimal is supposed to be 1e38 - 1 and the min the
+  // negation taking the abosolute values here should aways be safe.
+  const bool negate = Sign() != other.Sign();
+  Decimal x = Decimal::Abs(*this);
+  Decimal y = Decimal::Abs(other);
+
+  Uint128 r(x);
+  r *= Uint128(y);
+
+  *this = Decimal(static_cast<int64_t>(r.high()), r.low());
+  if (negate) {
+    Negate();
+  }
+  return *this;
+}
+
+Result<std::pair<Decimal, Decimal>> Decimal::Divide(const Decimal& divisor) const {
+  std::pair<Decimal, Decimal> result;
+  ICEBERG_RETURN_UNEXPECTED(DecimalDivide(*this, divisor, &result.first, &result.second));
+  return result;
+}
+
+Decimal& Decimal::operator/=(const Decimal& other) {
+  Decimal remainder;
+  auto s = DecimalDivide(*this, other, this, &remainder);
+  assert(s);
+  return *this;
+}
+
+Decimal& Decimal::operator|=(const Decimal& other) {
+  data_[0] |= other.data_[0];
+  data_[1] |= other.data_[1];
+  return *this;
+}
+
+Decimal& Decimal::operator&=(const Decimal& other) {
+  data_[0] &= other.data_[0];
+  data_[1] &= other.data_[1];
+  return *this;
+}
+
+Decimal& Decimal::operator<<=(uint32_t shift) {
+  if (shift != 0) {
+    uint64_t low_;
+    int64_t high_;
+    if (shift < 64) {
+      high_ = SafeLeftShift<int64_t>(high(), shift);
+      high_ |= low() >> (64 - shift);
+      low_ = low() << shift;
+    } else if (shift < 128) {
+      high_ = static_cast<int64_t>(low() << (shift - 64));
+      low_ = 0;
+    } else {
+      high_ = 0;
+      low_ = 0;
+    }
+    *this = Decimal(high_, low_);
+  }
+
+  return *this;
+}
+
+Decimal& Decimal::operator>>=(uint32_t shift) {
+  if (shift != 0) {
+    uint64_t low_;
+    int64_t high_;
+    if (shift < 64) {
+      low_ = low() >> shift;
+      low_ |= static_cast<uint64_t>(high()) << (64 - shift);
+      high_ = high() >> shift;
+    } else if (shift < 128) {
+      low_ = static_cast<uint64_t>(high() >> (shift - 64));
+      high_ = high() >> 63;
+    } else {
+      high_ = high() >> 63;
+      low_ = static_cast<uint64_t>(high_);
+    }
+    *this = Decimal(high_, low_);
+  }
+
+  return *this;
+}
+
+namespace {
+
+struct DecimalComponents {
+  std::string_view while_digits;
+  std::string_view fractional_digits;
+  int32_t exponent;
+  char sign{0};
+  bool has_exponent{false};
+};
+
+inline bool IsSign(char c) { return c == '+' || c == '-'; }
+
+inline bool IsDigit(char c) { return c >= '0' && c <= '9'; }
+
+inline bool IsDot(char c) { return c == '.'; }
+
+inline bool StartsExponent(char c) { return c == 'e' || c == 'E'; }
+
+inline size_t ParseDigitsRun(std::string_view str, size_t pos, std::string_view* out) {
+  size_t start = pos;
+  while (pos < str.size() && IsDigit(str[pos])) {
+    ++pos;
+  }
+  *out = str.substr(start, pos - start);
+  return pos;
+}
+
+bool ParseDecimalComponents(std::string_view str, DecimalComponents* out) {
+  size_t pos = 0;
+
+  if (str.empty()) {
+    return false;
+  }
+
+  // Sign of the number
+  if (IsSign(str[pos])) {
+    out->sign = str[pos++];
+  }
+  // First run of digits
+  pos = ParseDigitsRun(str, pos, &out->while_digits);
+  if (pos == str.size()) {
+    return !out->while_digits.empty();
+  }
+
+  // Optional dot
+  if (IsDot(str[pos])) {
+    ++pos;
+    // Second run of digits after the dot
+    pos = ParseDigitsRun(str, pos, &out->fractional_digits);
+  }
+  if (out->fractional_digits.empty() && out->while_digits.empty()) {
+    // Need at least some digits (whole or fractional)
+    return false;
+  }
+  if (pos == str.size()) {
+    return true;
+  }
+
+  // Optional exponent part
+  if (StartsExponent(str[pos])) {
+    ++pos;
+    // Skip '+' sign, '-' sign will be handled by from_chars
+    if (pos < str.size() && str[pos] == '+') {
+      ++pos;
+    }
+    out->has_exponent = true;
+    auto [ptr, ec] =
+        std::from_chars(str.data() + pos, str.data() + str.size(), out->exponent);
+    if (ec != std::errc()) {
+      return false;  // Failed to parse exponent
+    }
+    pos = ptr - str.data();
+  }
+
+  return pos == str.size();
+}
+
+constexpr auto kInt64DecimalDigits =
+    static_cast<size_t>(std::numeric_limits<int64_t>::digits10);
+
+constexpr std::array<uint64_t, kInt64DecimalDigits + 1> kUInt64PowersOfTen = {
+    // clang-format off
+    1ULL,
+    10ULL,
+    100ULL,
+    1000ULL,
+    10000ULL,
+    100000ULL,
+    1000000ULL,
+    10000000ULL,
+    100000000ULL,
+    1000000000ULL,
+    10000000000ULL,
+    100000000000ULL,
+    1000000000000ULL,
+    10000000000000ULL,
+    100000000000000ULL,
+    1000000000000000ULL,
+    10000000000000000ULL,
+    100000000000000000ULL,
+    1000000000000000000ULL
+    // clang-format on
+};
+
+static inline void ShiftAndAdd(std::string_view input, std::array<uint64_t, 2>& out) {
+  for (size_t pos = 0; pos < input.size();) {
+    const size_t group_size = std::min(kInt64DecimalDigits, input.size() - pos);
+    const uint64_t multiple = kUInt64PowersOfTen[group_size];
+    uint64_t value = 0;
+
+    std::from_chars(input.data() + pos, input.data() + pos + group_size, value);
+
+    for (size_t i = 0; i < 2; ++i) {
+      Uint128 tmp(out[i]);
+      tmp *= Uint128(multiple);
+      tmp += Uint128(value);
+      out[i] = tmp.low();
+      value = tmp.high();
+    }
+    pos += group_size;
+  }
+}
+
+static void AppendLittleEndianArrayToString(const std::array<uint64_t, 2>& array,
+                                            std::string* out) {
+  // TODO(zhjwpku): Implementation this.
+}
+
+}  // namespace
+
+Result<std::string> Decimal::ToString(int32_t scale) const {
+  if (scale < -kMaxScale || scale > kMaxScale) {
+    return InvalidArgument(
+        "Decimal::ToString: scale must be in the range [-{}, {}], was {}", kMaxScale,
+        kMaxScale, scale);
+  }
+  return NotImplemented("Decimal::ToString is not implemented yet");
+}
+
+std::string Decimal::ToIntegerString() const {
+  std::string result;
+  if (high() < 0) {
+    result.push_back('-');
+    Decimal abs = *this;
+    abs.Negate();
+    AppendLittleEndianArrayToString({abs.low(), static_cast<uint64_t>(abs.high())},
+                                    &result);
+  } else {
+    AppendLittleEndianArrayToString({low(), static_cast<uint64_t>(high())}, &result);
+  }
+  return result;
+}
+
+Result<Decimal> Decimal::FromString(std::string_view str, int32_t* precision,
+                                    int32_t* scale) {
+  if (str.empty()) {
+    return InvalidArgument("Decimal::FromString: empty string is not a valid Decimal");
+  }
+  DecimalComponents dec;
+  if (!ParseDecimalComponents(str, &dec)) {
+    return InvalidArgument("Decimal::FromString: invalid decimal string '{}'", str);
+  }
+
+  // Count number of significant digits (without leading zeros)
+  size_t first_non_zero = dec.while_digits.find_first_not_of('0');
+  size_t significant_digits = dec.fractional_digits.size();
+  if (first_non_zero != std::string_view::npos) {
+    significant_digits += dec.while_digits.size() - first_non_zero;
+  }
+
+  auto parsed_precision = static_cast<int32_t>(significant_digits);
+
+  int32_t parsed_scale = 0;
+  if (dec.has_exponent) {
+    auto adjusted_exponent = dec.exponent;
+    parsed_scale = static_cast<int32_t>(dec.fractional_digits.size()) - adjusted_exponent;
+  } else {
+    parsed_scale = static_cast<int32_t>(dec.fractional_digits.size());
+  }
+
+  // Index 1 is the high part, index 0 is the low part
+  std::array<uint64_t, 2> result_array = {0, 0};
+  ShiftAndAdd(dec.while_digits, result_array);
+  ShiftAndAdd(dec.fractional_digits, result_array);
+  Decimal result(static_cast<int64_t>(result_array[1]), result_array[0]);
+
+  if (dec.sign == '-') {
+    result.Negate();
+  }
+
+  std::cout << result_array[1] << " " << result_array[0] << std::endl;
+
+  if (parsed_scale < 0) {
+    // For the scale to 0, to avoid negative scales (due to compatibility issues with
+    // external systems such as databases)
+    if (parsed_scale < -kMaxScale) {
+      return InvalidArgument(
+          "Decimal::FromString: scale must be in the range [-{}, {}], was {}", kMaxScale,
+          kMaxScale, parsed_scale);
+    }
+
+    result *= kDecimal128PowersOfTen[-parsed_scale];
+    parsed_precision -= parsed_scale;
+    parsed_scale = 0;
+  }
+
+  if (precision != nullptr) {
+    *precision = parsed_precision;
+  }
+  if (scale != nullptr) {
+    *scale = parsed_scale;
+  }
+
+  return result;
+}
+
+template <typename T>
+  requires std::is_floating_point_v<T>
+Result<Decimal> Decimal::FromReal(T value, int32_t precision, int32_t scale) {
+  if (precision < 1 || precision > kMaxPrecision) {
+    return InvalidArgument(
+        "Decimal::FromReal: precision must be in the range [1, {}], "
+        "was {}",
+        kMaxPrecision, precision);
+  }
+  if (scale > precision) {
+    return InvalidArgument(
+        "Decimal::FromReal: scale must be less than or equal to "
+        "precision, was scale = {}, precision = {}",
+        scale, precision);
+  }
+
+  return NotImplemented("Decimal::FromReal is not implemented yet");
+}
+
+Result<Decimal> Decimal::Rescale(int32_t orig_scale, int32_t new_scale) const {
+  if (orig_scale < 0 || orig_scale > kMaxScale) {
+    return InvalidArgument(
+        "Decimal::Rescale: original scale must be in the range [0, {}], "
+        "was {}",
+        kMaxScale, orig_scale);
+  }
+  if (new_scale < 0 || new_scale > kMaxScale) {
+    return InvalidArgument(
+        "Decimal::Rescale: new scale must be in the range [0, {}], "
+        "was {}",
+        kMaxScale, new_scale);
+  }
+
+  if (orig_scale == new_scale) {
+    return *this;
+  }
+
+  return NotImplemented("Decimal::Rescale is not implemented yet");
+}
+
+template <typename T>
+  requires std::is_floating_point_v<T>
+T Decimal::ToReal(int32_t scale) const {
+  if (scale < -kMaxScale || scale > kMaxScale) {
+    throw InvalidArgument("Decimal::ToReal: scale must be in the range [-{}, {}], was {}",
+                          kMaxScale, kMaxScale, scale);
+  }
+
+  // Convert Decimal to floating-point value with the given scale.
+  // This is a placeholder implementation.
+  return T(0);  // Replace with actual conversion logic.
+}
+
+// Unary operators
+ICEBERG_EXPORT Decimal operator-(const Decimal& operand) {
+  Decimal result(operand.high(), operand.low());
+  return result.Negate();
+}
+
+ICEBERG_EXPORT Decimal operator~(const Decimal& operand) {
+  return {~operand.high(), ~operand.low()};
+}
+
+// Binary operators
+ICEBERG_EXPORT Decimal operator+(const Decimal& lhs, const Decimal& rhs) {
+  Decimal result(lhs);
+  result += rhs;
+  return result;
+}
+
+ICEBERG_EXPORT Decimal operator-(const Decimal& lhs, const Decimal& rhs) {
+  Decimal result(lhs);
+  result -= rhs;
+  return result;
+}
+
+ICEBERG_EXPORT Decimal operator*(const Decimal& lhs, const Decimal& rhs) {
+  Decimal result(lhs);
+  result *= rhs;
+  return result;
+}
+
+ICEBERG_EXPORT Decimal operator/(const Decimal& lhs, const Decimal& rhs) {
+  auto [result, _] = lhs.Divide(rhs).value();
+  return result;
+}
+
+ICEBERG_EXPORT Decimal operator%(const Decimal& lhs, const Decimal& rhs) {
+  auto [_, remainder] = lhs.Divide(rhs).value();
+  return remainder;
+}
+
+ICEBERG_EXPORT bool operator<(const Decimal& lhs, const Decimal& rhs) {
+  return (lhs.high() < rhs.high()) || (lhs.high() == rhs.high() && lhs.low() < rhs.low());
+}
+
+ICEBERG_EXPORT bool operator<=(const Decimal& lhs, const Decimal& rhs) {
+  return !operator>(lhs, rhs);
+}
+
+ICEBERG_EXPORT bool operator>(const Decimal& lhs, const Decimal& rhs) {
+  return operator<(rhs, lhs);
+}
+
+ICEBERG_EXPORT bool operator>=(const Decimal& lhs, const Decimal& rhs) {
+  return !operator<(lhs, rhs);
+}
+
+}  // namespace iceberg
diff --git a/src/iceberg/expression/decimal.h b/src/iceberg/expression/decimal.h
new file mode 100644
index 000000000..c4672bbde
--- /dev/null
+++ b/src/iceberg/expression/decimal.h
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <array>
+#include <cstdint>
+#include <string>
+#include <string_view>
+#include <type_traits>
+
+#include "iceberg/iceberg_export.h"
+#include "iceberg/result.h"
+#include "iceberg/util/port.h"
+
+namespace iceberg {
+
+/// \brief Represents 128-bit fixed-point decimal numbers.
+/// The max decimal precision that can be safely represented is
+/// 38 significant digits.
+class ICEBERG_EXPORT Decimal {
+ public:
+  static constexpr int kBitWidth = 128;
+  static constexpr int kByteWidth = kBitWidth / 8;
+  static constexpr int kMaxPrecision = 38;
+  static constexpr int kMaxScale = 38;
+
+  /// \brief Default constructor initializes to zero.
+  constexpr Decimal() noexcept : data_{0, 0} {}
+
+  /// \brief Construct a Decimal from an array of 2 64-bit integers.
+  explicit Decimal(std::array<uint64_t, 2> data) noexcept : data_(std::move(data)) {}
+
+  /// \brief Create a Decimal from any integer not wider than 64 bits.
+  template <typename T>
+    requires(std::is_integral_v<T> && (sizeof(T) <= sizeof(uint64_t)))
+  constexpr Decimal(T value) noexcept  // NOLINT
+  {
+    if (value < T{}) {
+      data_[kHighIndex] = ~static_cast<uint64_t>(0);
+    } else {
+      data_[kHighIndex] = 0;
+    }
+    data_[kLowIndex] = static_cast<uint64_t>(value);
+  }
+
+  /// \brief Parse a Decimal from a string representation.
+  explicit Decimal(std::string_view str);
+
+/// \brief Create a Decimal from two 64-bit integers.
+#if ICEBERG_LITTLE_ENDIAN
+  constexpr Decimal(int64_t high, uint64_t low) noexcept
+      : data_{low, static_cast<uint64_t>(high)} {}
+#else
+  constexpr Decimal(int64_t high, uint64_t low) noexcept
+      : data_{static_cast<uint64_t>(high), low} {}
+#endif
+
+  /// \brief Negate the current Decimal value (in place)
+  Decimal& Negate();
+
+  /// \brief Absolute value of the current Decimal value (in place)
+  Decimal& Abs();
+
+  /// \brief Absolute value of the current Decimal value
+  static Decimal Abs(const Decimal& value);
+
+  /// \brief Add a number to this one. The result is truncated to 128 bits.
+  Decimal& operator+=(const Decimal& other);
+
+  /// \brief Subtract a number from this one. The result is truncated to 128 bits.
+  Decimal& operator-=(const Decimal& other);
+
+  /// \brief Multiply this number by another. The result is truncated to 128 bits.
+  Decimal& operator*=(const Decimal& other);
+
+  /// \brief Divide this number by another.
+  ///
+  /// The operation does not modify the current Decimal value.
+  /// The answer rounds towards zero. Signs work like:
+  ///   21 /  5 ->  4,  1
+  ///  -21 /  5 -> -4, -1
+  ///   21 / -5 -> -4,  1
+  ///  -21 / -5 ->  4, -1
+  /// \param[in] divisor the number to divide by
+  /// \return the pair of the quotient and the remainder
+  Result<std::pair<Decimal, Decimal>> Divide(const Decimal& divisor) const;
+
+  /// \brief In place division.
+  Decimal& operator/=(const Decimal& other);
+
+  /// \brief Bitwise OR operation.
+  Decimal& operator|=(const Decimal& other);
+
+  /// \brief Bitwise AND operation.
+  Decimal& operator&=(const Decimal& other);
+
+  /// \brief Shift left by the given number of bits (in place).
+  Decimal& operator<<=(uint32_t shift);
+
+  /// \brief Shift left by the given number of bits.
+  Decimal operator<<(uint32_t shift) const {
+    Decimal result(*this);
+    result <<= shift;
+    return result;
+  }
+
+  /// \brief Shift right by the given number of bits (in place).
+  Decimal& operator>>=(uint32_t shift);
+
+  /// \brief Shift right by the given number of bits.
+  Decimal operator>>(uint32_t shift) const {
+    Decimal result(*this);
+    result >>= shift;
+    return result;
+  }
+
+  /// \brief Get the high bits of the two's complement representation of the number.
+  constexpr int64_t high() const { return static_cast<int64_t>(data_[kHighIndex]); }
+
+  /// \brief Get the low bits of the two's complement representation of the number.
+  constexpr uint64_t low() const { return data_[kLowIndex]; }
+
+  /// \brief Convert the Decimal value to a base 10 decimal string with the given scale.
+  /// \param scale The scale to use for the string representation.
+  /// \return The string representation of the Decimal value.
+  Result<std::string> ToString(int32_t scale = 0) const;
+
+  /// \brief Convert the Decimal value to an integer string.
+  std::string ToIntegerString() const;
+
+  /// \brief Convert the decimal string to a Decimal value, optionally including precision
+  /// and scale if they are provided not null.
+  static Result<Decimal> FromString(std::string_view str, int32_t* precision = nullptr,
+                                    int32_t* scale = nullptr);
+
+  /// \brief Convert the floating-point value to a Decimal value with the given
+  /// precision and scale.
+  template <typename T>
+    requires std::is_floating_point_v<T>
+  static Result<Decimal> FromReal(T value, int32_t precision, int32_t scale);
+
+  /// \brief Convert Decimal from one scale to another.
+  Result<Decimal> Rescale(int32_t orig_scale, int32_t new_scale) const;
+
+  /// \brief Convert the Decimal value to a floating-point value with the given scale.
+  /// \param scale The scale to use for the conversion.
+  /// \return The floating-point value.
+  template <typename T>
+    requires std::is_floating_point_v<T>
+  T ToReal(int32_t scale = 0) const;
+
+  /// \brief Returns 1 if positive or zero, -1 if strictly negative.
+  int64_t Sign() const { return 1 | (static_cast<int64_t>(data_[kHighIndex]) >> 63); }
+
+  /// \brief Check if the Decimal value is negative.
+  bool IsNegative() const { return static_cast<int64_t>(data_[kHighIndex]) < 0; }
+
+  friend bool operator==(const Decimal& lhs, const Decimal& rhs) {
+    return lhs.data_ == rhs.data_;
+  }
+
+  friend bool operator!=(const Decimal& lhs, const Decimal& rhs) {
+    return lhs.data_ != rhs.data_;
+  }
+
+ private:
+#if ICEBERG_LITTLE_ENDIAN
+  static constexpr int kHighIndex = 1;
+  static constexpr int kLowIndex = 0;
+#else
+  static constexpr int kHighIndex = 0;
+  static constexpr int kLowIndex = 1;
+#endif
+
+  std::array<uint64_t, 2> data_;
+};
+
+// Unary operators
+ICEBERG_EXPORT Decimal operator-(const Decimal& operand);
+ICEBERG_EXPORT Decimal operator~(const Decimal& operand);
+
+// Binary operators
+ICEBERG_EXPORT bool operator<=(const Decimal& lhs, const Decimal& rhs);
+ICEBERG_EXPORT bool operator<(const Decimal& lhs, const Decimal& rhs);
+ICEBERG_EXPORT bool operator>=(const Decimal& lhs, const Decimal& rhs);
+ICEBERG_EXPORT bool operator>(const Decimal& lhs, const Decimal& rhs);
+
+ICEBERG_EXPORT Decimal operator+(const Decimal& lhs, const Decimal& rhs);
+ICEBERG_EXPORT Decimal operator-(const Decimal& lhs, const Decimal& rhs);
+ICEBERG_EXPORT Decimal operator*(const Decimal& lhs, const Decimal& rhs);
+ICEBERG_EXPORT Decimal operator/(const Decimal& lhs, const Decimal& rhs);
+ICEBERG_EXPORT Decimal operator%(const Decimal& lhs, const Decimal& rhs);
+
+}  // namespace iceberg
diff --git a/src/iceberg/result.h b/src/iceberg/result.h
index d1aa4cedd..a70ebac5a 100644
--- a/src/iceberg/result.h
+++ b/src/iceberg/result.h
@@ -32,6 +32,7 @@ enum class ErrorKind {
   kAlreadyExists,
   kCommitStateUnknown,
   kDecompressError,
+  kDivideByZero,
   kInvalidArgument,
   kInvalidArrowData,
   kInvalidExpression,
@@ -46,6 +47,8 @@ enum class ErrorKind {
   kNotFound,
   kNotImplemented,
   kNotSupported,
+  kOverflow,
+  kRescaleDataLoss,
   kUnknownError,
 };
 
@@ -79,6 +82,7 @@ using Status = Result<void>;
 DEFINE_ERROR_FUNCTION(AlreadyExists)
 DEFINE_ERROR_FUNCTION(CommitStateUnknown)
 DEFINE_ERROR_FUNCTION(DecompressError)
+DEFINE_ERROR_FUNCTION(DivideByZero)
 DEFINE_ERROR_FUNCTION(InvalidArgument)
 DEFINE_ERROR_FUNCTION(InvalidArrowData)
 DEFINE_ERROR_FUNCTION(InvalidExpression)
@@ -93,6 +97,8 @@ DEFINE_ERROR_FUNCTION(NotAllowed)
 DEFINE_ERROR_FUNCTION(NotFound)
 DEFINE_ERROR_FUNCTION(NotImplemented)
 DEFINE_ERROR_FUNCTION(NotSupported)
+DEFINE_ERROR_FUNCTION(Overflow)
+DEFINE_ERROR_FUNCTION(RescaleDataLoss)
 DEFINE_ERROR_FUNCTION(UnknownError)
 
 #undef DEFINE_ERROR_FUNCTION
diff --git a/src/iceberg/util/port.h b/src/iceberg/util/port.h
new file mode 100644
index 000000000..bdd65520b
--- /dev/null
+++ b/src/iceberg/util/port.h
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/// \file iceberg/util/port.h
+/// \brief Portability macros and definitions for Iceberg C++ library
+
+#pragma once
+
+#if defined(_WIN32) /* Windows is always little endian */ \
+    || defined(__LITTLE_ENDIAN__) ||                      \
+    (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#  define ICEBERG_LITTLE_ENDIAN 1
+#elif defined(__BIG_ENDIAN__) || \
+    (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#  define ICEBERG_LITTLE_ENDIAN 0
+#else
+#  error "Unsupported or unknown endianness"
+#endif
+
+#if defined(_MSC_VER)
+#  include <__msvc_int128.hpp>
+using int128_t = std::_Signed128;
+using uint128_t = std::_Unsigned128;
+#elif defined(__GNUC__) || defined(__clang__)
+using int128_t = __int128;
+using uint128_t = unsigned __int128;
+#else
+#  error "128-bit integer type is not supported on this platform"
+#endif
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 42ad13209..e0583468b 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -75,7 +75,11 @@ add_iceberg_test(table_test
                  table_test.cc
                  schema_json_test.cc)
 
-add_iceberg_test(expression_test SOURCES expression_test.cc literal_test.cc)
+add_iceberg_test(expression_test
+                 SOURCES
+                 decimal_test.cc
+                 expression_test.cc
+                 literal_test.cc)
 
 add_iceberg_test(json_serde_test
                  SOURCES
diff --git a/test/decimal_test.cc b/test/decimal_test.cc
new file mode 100644
index 000000000..41c78896c
--- /dev/null
+++ b/test/decimal_test.cc
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "iceberg/expression/decimal.h"
+
+#include <gtest/gtest.h>
+
+#include "gmock/gmock.h"
+#include "matchers.h"
+
+namespace iceberg {
+
+namespace {
+
+void AssertDecimalFromString(const std::string& s, const Decimal& expected,
+                             int32_t expected_precision, int32_t expected_scale) {
+  int32_t precision = 0;
+  int32_t scale = 0;
+  auto result = Decimal::FromString(s, &precision, &scale);
+  EXPECT_THAT(result, IsOk());
+  const Decimal& actual = result.value();
+  EXPECT_EQ(expected, actual);
+  EXPECT_EQ(expected_precision, precision);
+  EXPECT_EQ(expected_scale, scale);
+}
+
+}  // namespace
+
+TEST(DecimalTest, Basics) {
+  AssertDecimalFromString("234.23445", Decimal(23423445), 8, 5);
+
+  std::string string_value("-23049223942343532412");
+  Decimal result(string_value);
+  Decimal expected(static_cast<int64_t>(-230492239423435324));
+  ASSERT_EQ(result, expected * 100 - 12);
+  ASSERT_NE(result.high(), 0);
+
+  result = Decimal("-23049223942343.532412");
+  ASSERT_EQ(result, expected * 100 - 12);
+  ASSERT_NE(result.high(), 0);
+}
+
+TEST(DecimalTest, StringStartingWithSign) {
+  AssertDecimalFromString("+234.567", Decimal(234567), 6, 3);
+  AssertDecimalFromString("+2342394230592.232349023094",
+                          Decimal("2342394230592232349023094"), 25, 12);
+  AssertDecimalFromString("-234.567", Decimal("-234567"), 6, 3);
+  AssertDecimalFromString("-2342394230592.232349023094",
+                          Decimal("-2342394230592232349023094"), 25, 12);
+}
+
+TEST(DecimalTest, StringWithLeadingZeros) {
+  AssertDecimalFromString("0000000000000000000000000000000.234", Decimal(234), 3, 3);
+  AssertDecimalFromString("0000000000000000000000000000000.23400", Decimal(23400), 5, 5);
+  AssertDecimalFromString("234.00", Decimal(23400), 5, 2);
+  AssertDecimalFromString("234.0", Decimal(2340), 4, 1);
+  AssertDecimalFromString("0000000", Decimal(0), 0, 0);
+  AssertDecimalFromString("000.0000", Decimal(0), 4, 4);
+  AssertDecimalFromString(".00000", Decimal(0), 5, 5);
+}
+
+TEST(DecimalTest, DecimalWithExponent) {
+  AssertDecimalFromString("1E1", Decimal(10), 2, 0);
+  AssertDecimalFromString("234.23445e2", Decimal(23423445), 8, 3);
+  AssertDecimalFromString("234.23445e-2", Decimal(23423445), 8, 7);
+  AssertDecimalFromString("234.23445E2", Decimal(23423445), 8, 3);
+  AssertDecimalFromString("234.23445E-2", Decimal(23423445), 8, 7);
+  AssertDecimalFromString("1.23E-8", Decimal(123), 3, 10);
+}
+
+TEST(DecimalTest, SmallValues) {
+  struct TestValue {
+    std::string s;
+    int64_t expected;
+    int32_t expected_precision;
+    int32_t expected_scale;
+  };
+
+  for (const auto& tv : std::vector<TestValue>{
+           {.s = "12.3", .expected = 123LL, .expected_precision = 3, .expected_scale = 1},
+           {.s = "0.00123",
+            .expected = 123LL,
+            .expected_precision = 5,
+            .expected_scale = 5},
+           {.s = "1.23E-8",
+            .expected = 123LL,
+            .expected_precision = 3,
+            .expected_scale = 10},
+           {.s = "-1.23E-8",
+            .expected = -123LL,
+            .expected_precision = 3,
+            .expected_scale = 10},
+           {.s = "1.23E+3",
+            .expected = 1230LL,
+            .expected_precision = 4,
+            .expected_scale = 0},
+           {.s = "-1.23E+3",
+            .expected = -1230LL,
+            .expected_precision = 4,
+            .expected_scale = 0},
+           {.s = "1.23E+5",
+            .expected = 123000LL,
+            .expected_precision = 6,
+            .expected_scale = 0},
+           {.s = "1.2345E+7",
+            .expected = 12345000LL,
+            .expected_precision = 8,
+            .expected_scale = 0},
+           {.s = "1.23e-8",
+            .expected = 123LL,
+            .expected_precision = 3,
+            .expected_scale = 10},
+           {.s = "-1.23e-8",
+            .expected = -123LL,
+            .expected_precision = 3,
+            .expected_scale = 10},
+           {.s = "1.23e+3",
+            .expected = 1230LL,
+            .expected_precision = 4,
+            .expected_scale = 0},
+           {.s = "-1.23e+3",
+            .expected = -1230LL,
+            .expected_precision = 4,
+            .expected_scale = 0},
+           {.s = "1.23e+5",
+            .expected = 123000LL,
+            .expected_precision = 6,
+            .expected_scale = 0},
+           {.s = "1.2345e+7",
+            .expected = 12345000LL,
+            .expected_precision = 8,
+            .expected_scale = 0}}) {
+    AssertDecimalFromString(tv.s, Decimal(tv.expected), tv.expected_precision,
+                            tv.expected_scale);
+  }
+}
+
+}  // namespace iceberg

From f43023d1d340442fc993a4005be7b96a81b429c9 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Mon, 18 Aug 2025 23:44:25 +0800
Subject: [PATCH 02/20] feat: add ToIntegerString

---
 src/iceberg/expression/decimal.cc | 73 ++++++++++++++++++++++++++++---
 test/decimal_test.cc              | 14 ++++++
 2 files changed, 80 insertions(+), 7 deletions(-)

diff --git a/src/iceberg/expression/decimal.cc b/src/iceberg/expression/decimal.cc
index e5ac492ef..a79a957e5 100644
--- a/src/iceberg/expression/decimal.cc
+++ b/src/iceberg/expression/decimal.cc
@@ -19,6 +19,7 @@
 
 #include "iceberg/expression/decimal.h"
 
+#include <algorithm>
 #include <array>
 #include <bit>
 #include <cassert>
@@ -26,8 +27,10 @@
 #include <cstddef>
 #include <cstdint>
 #include <format>
+#include <iomanip>
 #include <iostream>
 #include <limits>
+#include <sstream>
 #include <string>
 #include <string_view>
 
@@ -622,9 +625,69 @@ static inline void ShiftAndAdd(std::string_view input, std::array<uint64_t, 2>&
   }
 }
 
+// Returns a mask for the bit_index lower order bits.
+// Only valid for bit_index in the range [0, 64).
+constexpr uint64_t LeastSignificantBitMask(int64_t bit_index) {
+  return (static_cast<uint64_t>(1) << bit_index) - 1;
+}
+
 static void AppendLittleEndianArrayToString(const std::array<uint64_t, 2>& array,
                                             std::string* out) {
-  // TODO(zhjwpku): Implementation this.
+  const auto most_significant_non_zero = std::ranges::find_if(
+      array.rbegin(), array.rend(), [](uint64_t v) { return v != 0; });
+  if (most_significant_non_zero == array.rend()) {
+    out->push_back('0');
+    return;
+  }
+
+  size_t most_significant_elem_idx = &*most_significant_non_zero - array.data();
+  std::array<uint64_t, 2> copy = array;
+  constexpr uint32_t k1e9 = 1000000000U;
+  constexpr size_t kNumBits = 128;
+  // Segments will contain the array split into groups that map to decimal digits, in
+  // little endian order. Each segment will hold at most 9 decimal digits. For example, if
+  // the input represents 9876543210123456789, then segments will be [123456789,
+  // 876543210, 9].
+  // The max number of segments needed = ceil(kNumBits * log(2) / log(1e9))
+  // = ceil(kNumBits / 29.897352854) <= ceil(kNumBits / 29).
+  std::array<uint32_t, (kNumBits + 28) / 29> segments;
+  size_t num_segments = 0;
+  uint64_t* most_significant_elem = &copy[most_significant_elem_idx];
+
+  std::cout << copy[1] << " " << copy[0] << std::endl;
+  do {
+    // Compute remainder = copy % 1e9 and copy = copy / 1e9.
+    uint32_t remainder = 0;
+    uint64_t* elem = most_significant_elem;
+    do {
+      // Compute dividend = (remainder << 32) | *elem  (a virtual 96-bit integer);
+      // *elem = dividend / 1e9;
+      // remainder = dividend % 1e9.
+      auto hi = static_cast<uint32_t>(*elem >> 32);
+      auto lo = static_cast<uint32_t>(*elem & LeastSignificantBitMask(32));
+      uint64_t dividend_hi = (static_cast<uint64_t>(remainder) << 32) | hi;
+      uint64_t quotient_hi = dividend_hi / k1e9;
+      remainder = static_cast<uint32_t>(dividend_hi % k1e9);
+      uint64_t dividend_lo = (static_cast<uint64_t>(remainder) << 32) | lo;
+      uint64_t quotient_lo = dividend_lo / k1e9;
+      remainder = static_cast<uint32_t>(dividend_lo % k1e9);
+      *elem = (quotient_hi << 32) | quotient_lo;
+    } while (elem-- != copy.data());
+
+    segments[num_segments++] = remainder;
+  } while (*most_significant_elem != 0 || most_significant_elem-- != copy.data());
+
+  const uint32_t* segment = &segments[num_segments - 1];
+  std::stringstream oss;
+  // First segment is formatted as-is.
+  oss << *segment;
+  // Remaining segments are formatted with leading zeros to fill 9 digits. e.g. 123 is
+  // formatted as "000000123"
+  while (segment != segments.data()) {
+    --segment;
+    oss << std::setfill('0') << std::setw(9) << *segment;
+  }
+  out->append(oss.str());
 }
 
 }  // namespace
@@ -689,8 +752,6 @@ Result<Decimal> Decimal::FromString(std::string_view str, int32_t* precision,
     result.Negate();
   }
 
-  std::cout << result_array[1] << " " << result_array[0] << std::endl;
-
   if (parsed_scale < 0) {
     // For the scale to 0, to avoid negative scales (due to compatibility issues with
     // external systems such as databases)
@@ -798,13 +859,11 @@ ICEBERG_EXPORT Decimal operator*(const Decimal& lhs, const Decimal& rhs) {
 }
 
 ICEBERG_EXPORT Decimal operator/(const Decimal& lhs, const Decimal& rhs) {
-  auto [result, _] = lhs.Divide(rhs).value();
-  return result;
+  return lhs.Divide(rhs).value().first;
 }
 
 ICEBERG_EXPORT Decimal operator%(const Decimal& lhs, const Decimal& rhs) {
-  auto [_, remainder] = lhs.Divide(rhs).value();
-  return remainder;
+  return lhs.Divide(rhs).value().second;
 }
 
 ICEBERG_EXPORT bool operator<(const Decimal& lhs, const Decimal& rhs) {
diff --git a/test/decimal_test.cc b/test/decimal_test.cc
index 41c78896c..5b140c4c5 100644
--- a/test/decimal_test.cc
+++ b/test/decimal_test.cc
@@ -150,4 +150,18 @@ TEST(DecimalTest, SmallValues) {
   }
 }
 
+TEST(DecimalTest, LargeValues) {
+  const std::array<std::string, 4> string_values = {
+      "99999999999999999999999999999999999999", "-99999999999999999999999999999999999999",
+      "170141183460469231731687303715884105727",  // maximum positive value
+      "-170141183460469231731687303715884105728"  // minimum negative value
+  };
+
+  for (const auto& s : string_values) {
+    const Decimal value(s);
+    const std::string printed_value = value.ToIntegerString();
+    EXPECT_EQ(printed_value, s) << "Expected: " << s << ", but got: " << printed_value;
+  }
+}
+
 }  // namespace iceberg

From 4ebb2812f9bc08f695c73f2b4e24ecac1a617038 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Wed, 20 Aug 2025 00:36:44 +0800
Subject: [PATCH 03/20] feat: add ToString

---
 src/iceberg/expression/decimal.cc |  77 ++++++++-
 test/decimal_test.cc              | 251 ++++++++++++++++++++++++++++++
 2 files changed, 323 insertions(+), 5 deletions(-)

diff --git a/src/iceberg/expression/decimal.cc b/src/iceberg/expression/decimal.cc
index a79a957e5..eadab243a 100644
--- a/src/iceberg/expression/decimal.cc
+++ b/src/iceberg/expression/decimal.cc
@@ -188,9 +188,10 @@ static Status BuildFromArray(Decimal* result, const uint32_t* array, int64_t len
   int64_t next_index = length - 1;
   for (size_t i = 0; i < 2 && next_index >= 0; i++) {
     uint64_t lower_bits = array[next_index--];
-    result_array[i] = (next_index < 0)
-                          ? lower_bits
-                          : (static_cast<uint64_t>(lower_bits) << 32) | lower_bits;
+    result_array[i] =
+        (next_index < 0)
+            ? lower_bits
+            : (static_cast<uint64_t>(array[next_index--]) << 32) | lower_bits;
   }
 
   *result = Decimal(result_array[1], result_array[0]);
@@ -654,7 +655,6 @@ static void AppendLittleEndianArrayToString(const std::array<uint64_t, 2>& array
   size_t num_segments = 0;
   uint64_t* most_significant_elem = &copy[most_significant_elem_idx];
 
-  std::cout << copy[1] << " " << copy[0] << std::endl;
   do {
     // Compute remainder = copy % 1e9 and copy = copy / 1e9.
     uint32_t remainder = 0;
@@ -690,6 +690,71 @@ static void AppendLittleEndianArrayToString(const std::array<uint64_t, 2>& array
   out->append(oss.str());
 }
 
+static void AdjustIntegerStringWithScale(std::string* str, int32_t scale) {
+  if (scale == 0) {
+    return;
+  }
+  assert(str != nullptr);
+  assert(!str->empty());
+  const bool is_negative = str->front() == '-';
+  const auto is_negative_offset = static_cast<int32_t>(is_negative);
+  const auto len = static_cast<int32_t>(str->size());
+  const int32_t num_digits = len - is_negative_offset;
+  const int32_t adjusted_exponent = num_digits - 1 - scale;
+
+  // Note that the -6 is taken from the Java BigDecimal documentation.
+  if (scale < 0 || adjusted_exponent < -6) {
+    // Example 1:
+    // Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = -2,
+    //               adjusted_exponent = 4
+    // After inserting decimal point: *str = "1.23"
+    // After appending exponent: *str = "1.23E+4"
+    // Example 2:
+    // Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 9,
+    //               adjusted_exponent = -7
+    // After inserting decimal point: *str = "-1.23"
+    // After appending exponent: *str = "-1.23E-7"
+    // Example 3:
+    // Precondition: *str = "0", is_negative_offset = 0, num_digits = 1, scale = -1,
+    //               adjusted_exponent = 1
+    // After inserting decimal point: *str = "0" // Not inserted
+    // After appending exponent: *str = "0E+1"
+    if (num_digits > 1) {
+      str->insert(str->begin() + 1 + is_negative_offset, '.');
+    }
+    str->push_back('E');
+    if (adjusted_exponent >= 0) {
+      str->push_back('+');
+    }
+    // Append the adjusted exponent as a string.
+    str->append(std::to_string(adjusted_exponent));
+    return;
+  }
+
+  if (num_digits > scale) {
+    const auto n = static_cast<size_t>(len - scale);
+    // Example 1:
+    // Precondition: *str = "123", len = num_digits = 3, scale = 1, n = 2
+    // After inserting decimal point: *str = "12.3"
+    // Example 2:
+    // Precondition: *str = "-123", len = 4, num_digits = 3, scale = 1, n = 3
+    // After inserting decimal point: *str = "-12.3"
+    str->insert(str->begin() + n, '.');
+    return;
+  }
+
+  // Example 1:
+  // Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = 4
+  // After insert: *str = "000123"
+  // After setting decimal point: *str = "0.0123"
+  // Example 2:
+  // Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 4
+  // After insert: *str = "-000123"
+  // After setting decimal point: *str = "-0.0123"
+  str->insert(is_negative_offset, scale - num_digits + 2, '0');
+  str->at(is_negative_offset + 1) = '.';
+}
+
 }  // namespace
 
 Result<std::string> Decimal::ToString(int32_t scale) const {
@@ -698,7 +763,9 @@ Result<std::string> Decimal::ToString(int32_t scale) const {
         "Decimal::ToString: scale must be in the range [-{}, {}], was {}", kMaxScale,
         kMaxScale, scale);
   }
-  return NotImplemented("Decimal::ToString is not implemented yet");
+  std::string str(ToIntegerString());
+  AdjustIntegerStringWithScale(&str, scale);
+  return str;
 }
 
 std::string Decimal::ToIntegerString() const {
diff --git a/test/decimal_test.cc b/test/decimal_test.cc
index 5b140c4c5..dd28a0c21 100644
--- a/test/decimal_test.cc
+++ b/test/decimal_test.cc
@@ -18,7 +18,11 @@
  */
 #include "iceberg/expression/decimal.h"
 
+#include <array>
+#include <cstdint>
+
 #include <gtest/gtest.h>
+#include <sys/types.h>
 
 #include "gmock/gmock.h"
 #include "matchers.h"
@@ -164,4 +168,251 @@ TEST(DecimalTest, LargeValues) {
   }
 }
 
+TEST(DecimalTest, TestStringRoundTrip) {
+  static constexpr std::array<uint64_t, 11> kTestBits = {
+      0,
+      1,
+      999,
+      1000,
+      std::numeric_limits<int32_t>::max(),
+      (1ull << 31),
+      std::numeric_limits<uint32_t>::max(),
+      (1ull << 32),
+      std::numeric_limits<int64_t>::max(),
+      (1ull << 63),
+      std::numeric_limits<uint64_t>::max(),
+  };
+  static constexpr std::array<int32_t, 3> kScales = {0, 1, 10};
+  for (uint64_t high : kTestBits) {
+    for (uint64_t low : kTestBits) {
+      Decimal value(high, low);
+      for (int32_t scale : kScales) {
+        auto result = value.ToString(scale);
+
+        ASSERT_THAT(result, IsOk())
+            << "Failed to convert Decimal to string: " << value.ToIntegerString()
+            << ", scale: " << scale;
+
+        auto round_trip = Decimal::FromString(result.value());
+        ASSERT_THAT(round_trip, IsOk())
+            << "Failed to convert string back to Decimal: " << result.value();
+
+        EXPECT_EQ(value, round_trip.value())
+            << "Round trip failed for value: " << value.ToIntegerString()
+            << ", scale: " << scale;
+      }
+    }
+  }
+}
+
+TEST(DecimalTest, FromStringLimits) {
+  AssertDecimalFromString("1e37", Decimal(542101086242752217ULL, 68739955140067328ULL),
+                          38, 0);
+
+  AssertDecimalFromString(
+      "-1e37", Decimal(17904642987466799398ULL, 18378004118569484288ULL), 38, 0);
+  AssertDecimalFromString(
+      "9.87e37", Decimal(5350537721215964381ULL, 15251391175463010304ULL), 38, 0);
+  AssertDecimalFromString(
+      "-9.87e37", Decimal(13096206352493587234ULL, 3195352898246541312ULL), 38, 0);
+  AssertDecimalFromString("12345678901234567890123456789012345678",
+                          Decimal(669260594276348691ULL, 14143994781733811022ULL), 38, 0);
+  AssertDecimalFromString("-12345678901234567890123456789012345678",
+                          Decimal(17777483479433202924ULL, 4302749291975740594ULL), 38,
+                          0);
+
+  // "9..9" (38 times)
+  const auto dec38times9pos = Decimal(5421010862427522170ULL, 687399551400673279ULL);
+  // "-9..9" (38 times)
+  const auto dec38times9neg = Decimal(13025733211282029445ULL, 17759344522308878337ULL);
+
+  AssertDecimalFromString("99999999999999999999999999999999999999", dec38times9pos, 38,
+                          0);
+  AssertDecimalFromString("-99999999999999999999999999999999999999", dec38times9neg, 38,
+                          0);
+  AssertDecimalFromString("9.9999999999999999999999999999999999999e37", dec38times9pos,
+                          38, 0);
+  AssertDecimalFromString("-9.9999999999999999999999999999999999999e37", dec38times9neg,
+                          38, 0);
+
+  // No exponent, many fractional digits
+  AssertDecimalFromString("9.9999999999999999999999999999999999999", dec38times9pos, 38,
+                          37);
+  AssertDecimalFromString("-9.9999999999999999999999999999999999999", dec38times9neg, 38,
+                          37);
+  AssertDecimalFromString("0.99999999999999999999999999999999999999", dec38times9pos, 38,
+                          38);
+  AssertDecimalFromString("-0.99999999999999999999999999999999999999", dec38times9neg, 38,
+                          38);
+
+  // Negative exponent
+  AssertDecimalFromString("1e-38", Decimal(0, 1), 1, 38);
+  AssertDecimalFromString(
+      "-1e-38", Decimal(18446744073709551615ULL, 18446744073709551615ULL), 1, 38);
+  AssertDecimalFromString("9.99e-36", Decimal(0, 999), 3, 38);
+  AssertDecimalFromString(
+      "-9.99e-36", Decimal(18446744073709551615ULL, 18446744073709550617ULL), 3, 38);
+  AssertDecimalFromString("987e-38", Decimal(0, 987), 3, 38);
+  AssertDecimalFromString(
+      "-987e-38", Decimal(18446744073709551615ULL, 18446744073709550629ULL), 3, 38);
+  AssertDecimalFromString("99999999999999999999999999999999999999e-37", dec38times9pos,
+                          38, 37);
+  AssertDecimalFromString("-99999999999999999999999999999999999999e-37", dec38times9neg,
+                          38, 37);
+  AssertDecimalFromString("99999999999999999999999999999999999999e-38", dec38times9pos,
+                          38, 38);
+  AssertDecimalFromString("-99999999999999999999999999999999999999e-38", dec38times9neg,
+                          38, 38);
+}
+
+TEST(DecimalTest, FromStringInvalid) {
+  // Empty string
+  auto result = Decimal::FromString("");
+  ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
+  ASSERT_THAT(result, HasErrorMessage(
+                          "Decimal::FromString: empty string is not a valid Decimal"));
+  for (const auto& invalid_string :
+       std::vector<std::string>{"-", "0.0.0", "0-13-32", "a", "-23092.235-",
+                                "-+23092.235", "+-23092.235", "00a", "1e1a", "0.00123D/3",
+                                "1.23eA8", "1.23E+3A", "-1.23E--5", "1.2345E+++07"}) {
+    auto result = Decimal::FromString(invalid_string);
+    ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
+    ASSERT_THAT(result, HasErrorMessage("Decimal::FromString: invalid decimal string"));
+  }
+
+  for (const auto& invalid_string :
+       std::vector<std::string>{"1e39", "-1e39", "9e39", "-9e39", "9.9e40", "-9.9e40"}) {
+    auto result = Decimal::FromString(invalid_string);
+    ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
+    ASSERT_THAT(result,
+                HasErrorMessage("Decimal::FromString: scale must be in the range"));
+  }
+}
+
+TEST(DecimalTest, Division) {
+  const std::string expected_string_value("-23923094039234029");
+  const Decimal value(expected_string_value);
+  const Decimal result(value / 3);
+  const Decimal expected_value("-7974364679744676");
+  ASSERT_EQ(expected_value, result);
+}
+
+TEST(DecimalTest, ToString) {
+  struct ToStringCase {
+    int64_t test_value;
+    int32_t scale;
+    const char* expected_string;
+  };
+
+  for (const auto& t : std::vector<ToStringCase>{
+           {.test_value = 0, .scale = -1, .expected_string = "0E+1"},
+           {.test_value = 0, .scale = 0, .expected_string = "0"},
+           {.test_value = 0, .scale = 1, .expected_string = "0.0"},
+           {.test_value = 0, .scale = 6, .expected_string = "0.000000"},
+           {.test_value = 2, .scale = 7, .expected_string = "2E-7"},
+           {.test_value = 2, .scale = -1, .expected_string = "2E+1"},
+           {.test_value = 2, .scale = 0, .expected_string = "2"},
+           {.test_value = 2, .scale = 1, .expected_string = "0.2"},
+           {.test_value = 2, .scale = 6, .expected_string = "0.000002"},
+           {.test_value = -2, .scale = 7, .expected_string = "-2E-7"},
+           {.test_value = -2, .scale = 7, .expected_string = "-2E-7"},
+           {.test_value = -2, .scale = -1, .expected_string = "-2E+1"},
+           {.test_value = -2, .scale = 0, .expected_string = "-2"},
+           {.test_value = -2, .scale = 1, .expected_string = "-0.2"},
+           {.test_value = -2, .scale = 6, .expected_string = "-0.000002"},
+           {.test_value = -2, .scale = 7, .expected_string = "-2E-7"},
+           {.test_value = 123, .scale = -3, .expected_string = "1.23E+5"},
+           {.test_value = 123, .scale = -1, .expected_string = "1.23E+3"},
+           {.test_value = 123, .scale = 1, .expected_string = "12.3"},
+           {.test_value = 123, .scale = 0, .expected_string = "123"},
+           {.test_value = 123, .scale = 5, .expected_string = "0.00123"},
+           {.test_value = 123, .scale = 8, .expected_string = "0.00000123"},
+           {.test_value = 123, .scale = 9, .expected_string = "1.23E-7"},
+           {.test_value = 123, .scale = 10, .expected_string = "1.23E-8"},
+           {.test_value = -123, .scale = -3, .expected_string = "-1.23E+5"},
+           {.test_value = -123, .scale = -1, .expected_string = "-1.23E+3"},
+           {.test_value = -123, .scale = 1, .expected_string = "-12.3"},
+           {.test_value = -123, .scale = 0, .expected_string = "-123"},
+           {.test_value = -123, .scale = 5, .expected_string = "-0.00123"},
+           {.test_value = -123, .scale = 8, .expected_string = "-0.00000123"},
+           {.test_value = -123, .scale = 9, .expected_string = "-1.23E-7"},
+           {.test_value = -123, .scale = 10, .expected_string = "-1.23E-8"},
+           {.test_value = 1000000000, .scale = -3, .expected_string = "1.000000000E+12"},
+           {.test_value = 1000000000, .scale = -1, .expected_string = "1.000000000E+10"},
+           {.test_value = 1000000000, .scale = 0, .expected_string = "1000000000"},
+           {.test_value = 1000000000, .scale = 1, .expected_string = "100000000.0"},
+           {.test_value = 1000000000, .scale = 5, .expected_string = "10000.00000"},
+           {.test_value = 1000000000,
+            .scale = 15,
+            .expected_string = "0.000001000000000"},
+           {.test_value = 1000000000, .scale = 16, .expected_string = "1.000000000E-7"},
+           {.test_value = 1000000000, .scale = 17, .expected_string = "1.000000000E-8"},
+           {.test_value = -1000000000,
+            .scale = -3,
+            .expected_string = "-1.000000000E+12"},
+           {.test_value = -1000000000,
+            .scale = -1,
+            .expected_string = "-1.000000000E+10"},
+           {.test_value = -1000000000, .scale = 0, .expected_string = "-1000000000"},
+           {.test_value = -1000000000, .scale = 1, .expected_string = "-100000000.0"},
+           {.test_value = -1000000000, .scale = 5, .expected_string = "-10000.00000"},
+           {.test_value = -1000000000,
+            .scale = 15,
+            .expected_string = "-0.000001000000000"},
+           {.test_value = -1000000000, .scale = 16, .expected_string = "-1.000000000E-7"},
+           {.test_value = -1000000000, .scale = 17, .expected_string = "-1.000000000E-8"},
+           {.test_value = 1234567890123456789LL,
+            .scale = -3,
+            .expected_string = "1.234567890123456789E+21"},
+           {.test_value = 1234567890123456789LL,
+            .scale = -1,
+            .expected_string = "1.234567890123456789E+19"},
+           {.test_value = 1234567890123456789LL,
+            .scale = 0,
+            .expected_string = "1234567890123456789"},
+           {.test_value = 1234567890123456789LL,
+            .scale = 1,
+            .expected_string = "123456789012345678.9"},
+           {.test_value = 1234567890123456789LL,
+            .scale = 5,
+            .expected_string = "12345678901234.56789"},
+           {.test_value = 1234567890123456789LL,
+            .scale = 24,
+            .expected_string = "0.000001234567890123456789"},
+           {.test_value = 1234567890123456789LL,
+            .scale = 25,
+            .expected_string = "1.234567890123456789E-7"},
+           {.test_value = -1234567890123456789LL,
+            .scale = -3,
+            .expected_string = "-1.234567890123456789E+21"},
+           {.test_value = -1234567890123456789LL,
+            .scale = -1,
+            .expected_string = "-1.234567890123456789E+19"},
+           {.test_value = -1234567890123456789LL,
+            .scale = 0,
+            .expected_string = "-1234567890123456789"},
+           {.test_value = -1234567890123456789LL,
+            .scale = 1,
+            .expected_string = "-123456789012345678.9"},
+           {.test_value = -1234567890123456789LL,
+            .scale = 5,
+            .expected_string = "-12345678901234.56789"},
+           {.test_value = -1234567890123456789LL,
+            .scale = 24,
+            .expected_string = "-0.000001234567890123456789"},
+           {.test_value = -1234567890123456789LL,
+            .scale = 25,
+            .expected_string = "-1.234567890123456789E-7"},
+       }) {
+    const Decimal value(t.test_value);
+    auto result = value.ToString(t.scale);
+    ASSERT_THAT(result, IsOk())
+        << "Failed to convert Decimal to string: " << value.ToIntegerString()
+        << ", scale: " << t.scale;
+
+    EXPECT_EQ(result.value(), t.expected_string)
+        << "Expected: " << t.expected_string << ", but got: " << result.value();
+  }
+}
+
 }  // namespace iceberg

From d93b6503acc00fff4cae5ec409e32e7e5e1407f2 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Thu, 21 Aug 2025 00:54:15 +0800
Subject: [PATCH 04/20] feat: add FromReal and ToReal

---
 src/iceberg/expression/decimal.cc | 504 ++++++++++++++++++++++++++----
 src/iceberg/expression/decimal.h  |  44 ++-
 test/decimal_test.cc              |  94 ++++++
 3 files changed, 563 insertions(+), 79 deletions(-)

diff --git a/src/iceberg/expression/decimal.cc b/src/iceberg/expression/decimal.cc
index eadab243a..cd7acaa25 100644
--- a/src/iceberg/expression/decimal.cc
+++ b/src/iceberg/expression/decimal.cc
@@ -146,7 +146,7 @@ static inline int64_t FillInArray(const Decimal& value, uint32_t* array) {
 /// \brief Shift the number in the array left by bits positions.
 static inline void ShiftArrayLeft(uint32_t* array, int64_t length, int64_t bits) {
   if (length > 0 && bits > 0) {
-    for (int i = 0; i < length - 1; ++i) {
+    for (int32_t i = 0; i < length - 1; ++i) {
       array[i] = (array[i] << bits) | (array[i + 1] >> (32 - bits));
     }
     array[length - 1] <<= bits;
@@ -156,7 +156,7 @@ static inline void ShiftArrayLeft(uint32_t* array, int64_t length, int64_t bits)
 /// \brief Shift the number in the array right by bits positions.
 static inline void ShiftArrayRight(uint32_t* array, int64_t length, int64_t bits) {
   if (length > 0 && bits > 0) {
-    for (int i = length - 1; i > 0; --i) {
+    for (int32_t i = length - 1; i > 0; --i) {
       array[i] = (array[i] >> bits) | (array[i - 1] << (32 - bits));
     }
     array[0] >>= bits;
@@ -333,47 +333,6 @@ static inline Status DecimalDivide(const Decimal& dividend, const Decimal& divis
   return {};
 }
 
-/// \brief Powers of ten for Decimal with scale from 0 to 38.
-constexpr std::array<Decimal, Decimal::kMaxScale + 1> kDecimal128PowersOfTen = {
-    Decimal(1LL),
-    Decimal(10LL),
-    Decimal(100LL),
-    Decimal(1000LL),
-    Decimal(10000LL),
-    Decimal(100000LL),
-    Decimal(1000000LL),
-    Decimal(10000000LL),
-    Decimal(100000000LL),
-    Decimal(1000000000LL),
-    Decimal(10000000000LL),
-    Decimal(100000000000LL),
-    Decimal(1000000000000LL),
-    Decimal(10000000000000LL),
-    Decimal(100000000000000LL),
-    Decimal(1000000000000000LL),
-    Decimal(10000000000000000LL),
-    Decimal(100000000000000000LL),
-    Decimal(1000000000000000000LL),
-    Decimal(0LL, 10000000000000000000ULL),
-    Decimal(5LL, 7766279631452241920ULL),
-    Decimal(54LL, 3875820019684212736ULL),
-    Decimal(542LL, 1864712049423024128ULL),
-    Decimal(5421LL, 200376420520689664ULL),
-    Decimal(54210LL, 2003764205206896640ULL),
-    Decimal(542101LL, 1590897978359414784ULL),
-    Decimal(5421010LL, 15908979783594147840ULL),
-    Decimal(54210108LL, 11515845246265065472ULL),
-    Decimal(542101086LL, 4477988020393345024ULL),
-    Decimal(5421010862LL, 7886392056514347008ULL),
-    Decimal(54210108624LL, 5076944270305263616ULL),
-    Decimal(542101086242LL, 13875954555633532928ULL),
-    Decimal(5421010862427LL, 9632337040368467968ULL),
-    Decimal(54210108624275LL, 4089650035136921600ULL),
-    Decimal(542101086242752LL, 4003012203950112768ULL),
-    Decimal(5421010862427522LL, 3136633892082024448ULL),
-    Decimal(54210108624275221LL, 12919594847110692864ULL),
-    Decimal(542101086242752217LL, 68739955140067328ULL),
-    Decimal(5421010862427522170LL, 687399551400673280ULL)};
 }  // namespace
 
 Decimal::Decimal(std::string_view str) {
@@ -607,6 +566,48 @@ constexpr std::array<uint64_t, kInt64DecimalDigits + 1> kUInt64PowersOfTen = {
     // clang-format on
 };
 
+/// \brief Powers of ten for Decimal with scale from 0 to 38.
+constexpr std::array<Decimal, Decimal::kMaxScale + 1> kDecimal128PowersOfTen = {
+    Decimal(1LL),
+    Decimal(10LL),
+    Decimal(100LL),
+    Decimal(1000LL),
+    Decimal(10000LL),
+    Decimal(100000LL),
+    Decimal(1000000LL),
+    Decimal(10000000LL),
+    Decimal(100000000LL),
+    Decimal(1000000000LL),
+    Decimal(10000000000LL),
+    Decimal(100000000000LL),
+    Decimal(1000000000000LL),
+    Decimal(10000000000000LL),
+    Decimal(100000000000000LL),
+    Decimal(1000000000000000LL),
+    Decimal(10000000000000000LL),
+    Decimal(100000000000000000LL),
+    Decimal(1000000000000000000LL),
+    Decimal(0LL, 10000000000000000000ULL),
+    Decimal(5LL, 7766279631452241920ULL),
+    Decimal(54LL, 3875820019684212736ULL),
+    Decimal(542LL, 1864712049423024128ULL),
+    Decimal(5421LL, 200376420520689664ULL),
+    Decimal(54210LL, 2003764205206896640ULL),
+    Decimal(542101LL, 1590897978359414784ULL),
+    Decimal(5421010LL, 15908979783594147840ULL),
+    Decimal(54210108LL, 11515845246265065472ULL),
+    Decimal(542101086LL, 4477988020393345024ULL),
+    Decimal(5421010862LL, 7886392056514347008ULL),
+    Decimal(54210108624LL, 5076944270305263616ULL),
+    Decimal(542101086242LL, 13875954555633532928ULL),
+    Decimal(5421010862427LL, 9632337040368467968ULL),
+    Decimal(54210108624275LL, 4089650035136921600ULL),
+    Decimal(542101086242752LL, 4003012203950112768ULL),
+    Decimal(5421010862427522LL, 3136633892082024448ULL),
+    Decimal(54210108624275221LL, 12919594847110692864ULL),
+    Decimal(542101086242752217LL, 68739955140067328ULL),
+    Decimal(5421010862427522170LL, 687399551400673280ULL)};
+
 static inline void ShiftAndAdd(std::string_view input, std::array<uint64_t, 2>& out) {
   for (size_t pos = 0; pos < input.size();) {
     const size_t group_size = std::min(kInt64DecimalDigits, input.size() - pos);
@@ -843,23 +844,392 @@ Result<Decimal> Decimal::FromString(std::string_view str, int32_t* precision,
   return result;
 }
 
-template <typename T>
-  requires std::is_floating_point_v<T>
-Result<Decimal> Decimal::FromReal(T value, int32_t precision, int32_t scale) {
-  if (precision < 1 || precision > kMaxPrecision) {
-    return InvalidArgument(
-        "Decimal::FromReal: precision must be in the range [1, {}], "
-        "was {}",
-        kMaxPrecision, precision);
+namespace {
+
+constexpr float kFloatInf = std::numeric_limits<float>::infinity();
+
+// Attention: these pre-computed constants might not exactly represent their
+// decimal counterparts:
+//   >>> int32_t(1e38)
+//   99999999999999997748809823456034029568
+
+constexpr int32_t kPrecomputedPowersOfTen = 76;
+
+constexpr std::array<float, 2 * kPrecomputedPowersOfTen + 1> kFloatPowersOfTen = {
+    0,         0,         0,         0,         0,         0,         0,
+    0,         0,         0,         0,         0,         0,         0,
+    0,         0,         0,         0,         0,         0,         0,
+    0,         0,         0,         0,         0,         0,         0,
+    0,         0,         0,         1e-45f,    1e-44f,    1e-43f,    1e-42f,
+    1e-41f,    1e-40f,    1e-39f,    1e-38f,    1e-37f,    1e-36f,    1e-35f,
+    1e-34f,    1e-33f,    1e-32f,    1e-31f,    1e-30f,    1e-29f,    1e-28f,
+    1e-27f,    1e-26f,    1e-25f,    1e-24f,    1e-23f,    1e-22f,    1e-21f,
+    1e-20f,    1e-19f,    1e-18f,    1e-17f,    1e-16f,    1e-15f,    1e-14f,
+    1e-13f,    1e-12f,    1e-11f,    1e-10f,    1e-9f,     1e-8f,     1e-7f,
+    1e-6f,     1e-5f,     1e-4f,     1e-3f,     1e-2f,     1e-1f,     1e0f,
+    1e1f,      1e2f,      1e3f,      1e4f,      1e5f,      1e6f,      1e7f,
+    1e8f,      1e9f,      1e10f,     1e11f,     1e12f,     1e13f,     1e14f,
+    1e15f,     1e16f,     1e17f,     1e18f,     1e19f,     1e20f,     1e21f,
+    1e22f,     1e23f,     1e24f,     1e25f,     1e26f,     1e27f,     1e28f,
+    1e29f,     1e30f,     1e31f,     1e32f,     1e33f,     1e34f,     1e35f,
+    1e36f,     1e37f,     1e38f,     kFloatInf, kFloatInf, kFloatInf, kFloatInf,
+    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
+    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
+    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
+    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
+    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf};
+
+constexpr std::array<double, 2 * kPrecomputedPowersOfTen + 1> kDoublePowersOfTen = {
+    1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65,
+    1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53,
+    1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41,
+    1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29,
+    1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
+    1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9,  1e-8,  1e-7,  1e-6,  1e-5,
+    1e-4,  1e-3,  1e-2,  1e-1,  1e0,   1e1,   1e2,   1e3,   1e4,   1e5,   1e6,   1e7,
+    1e8,   1e9,   1e10,  1e11,  1e12,  1e13,  1e14,  1e15,  1e16,  1e17,  1e18,  1e19,
+    1e20,  1e21,  1e22,  1e23,  1e24,  1e25,  1e26,  1e27,  1e28,  1e29,  1e30,  1e31,
+    1e32,  1e33,  1e34,  1e35,  1e36,  1e37,  1e38,  1e39,  1e40,  1e41,  1e42,  1e43,
+    1e44,  1e45,  1e46,  1e47,  1e48,  1e49,  1e50,  1e51,  1e52,  1e53,  1e54,  1e55,
+    1e56,  1e57,  1e58,  1e59,  1e60,  1e61,  1e62,  1e63,  1e64,  1e65,  1e66,  1e67,
+    1e68,  1e69,  1e70,  1e71,  1e72,  1e73,  1e74,  1e75,  1e76};
+
+// ceil(log2(10 ^ k)) for k in [0...76]
+constexpr std::array<int32_t, 76 + 1> kCeilLog2PowersOfTen = {
+    0,   4,   7,   10,  14,  17,  20,  24,  27,  30,  34,  37,  40,  44,  47,  50,
+    54,  57,  60,  64,  67,  70,  74,  77,  80,  84,  87,  90,  94,  97,  100, 103,
+    107, 110, 113, 117, 120, 123, 127, 130, 133, 137, 140, 143, 147, 150, 153, 157,
+    160, 163, 167, 170, 173, 177, 180, 183, 187, 190, 193, 196, 200, 203, 206, 210,
+    213, 216, 220, 223, 226, 230, 233, 236, 240, 243, 246, 250, 253};
+
+template <typename Real>
+struct RealTraits {};
+
+template <>
+struct RealTraits<float> {
+  static constexpr const float* powers_of_ten() { return kFloatPowersOfTen.data(); }
+
+  static constexpr float two_to_64(float x) { return x * 1.8446744e+19f; }
+  static constexpr float two_to_128(float x) { return x == 0 ? 0 : kFloatInf; }
+  static constexpr float two_to_192(float x) { return x == 0 ? 0 : kFloatInf; }
+
+  static constexpr int32_t kMantissaBits = 24;
+  // ceil(log10(2 ^ kMantissaBits))
+  static constexpr int32_t kMantissaDigits = 8;
+  // Integers between zero and kMaxPreciseInteger can be precisely represented
+  static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1;
+};
+
+template <>
+struct RealTraits<double> {
+  static constexpr const double* powers_of_ten() { return kDoublePowersOfTen.data(); }
+
+  static constexpr double two_to_64(double x) { return x * 1.8446744073709552e+19; }
+  static constexpr double two_to_128(double x) { return x * 3.402823669209385e+38; }
+  static constexpr double two_to_192(double x) { return x * 6.277101735386681e+57; }
+
+  static constexpr int32_t kMantissaBits = 53;
+  // ceil(log10(2 ^ kMantissaBits))
+  static constexpr int32_t kMantissaDigits = 16;
+  // Integers between zero and kMaxPreciseInteger can be precisely represented
+  static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1;
+};
+
+struct DecimalRealConversion {
+  // Return 10**exp, with a fast lookup, assuming `exp` is within bounds
+  template <typename Real>
+  static Real PowerOfTen(int32_t exp) {
+    constexpr int32_t N = kPrecomputedPowersOfTen;
+    assert(exp >= -N && exp <= N);
+    return RealTraits<Real>::powers_of_ten()[N + exp];
   }
-  if (scale > precision) {
-    return InvalidArgument(
-        "Decimal::FromReal: scale must be less than or equal to "
-        "precision, was scale = {}, precision = {}",
-        scale, precision);
+
+  // Return 10**exp, with a fast lookup if possible
+  template <typename Real>
+  static Real LargePowerOfTen(int32_t exp) {
+    constexpr int32_t N = kPrecomputedPowersOfTen;
+    if (exp >= -N && exp <= N) {
+      return RealTraits<Real>::powers_of_ten()[N + exp];
+    } else {
+      return std::pow(static_cast<Real>(10.0), static_cast<Real>(exp));
+    }
+  }
+
+  static constexpr int32_t kMaxPrecision = Decimal::kMaxPrecision;
+  static constexpr int32_t kMaxScale = Decimal::kMaxScale;
+
+  static constexpr auto& DecimalPowerOfTen(int32_t exp) {
+    assert(exp >= 0 && exp <= kMaxPrecision);
+    return kDecimal128PowersOfTen[exp];
+  }
+
+  // Right shift positive `x` by positive `bits`, rounded half to even
+  static Decimal RoundedRightShift(const Decimal& x, int32_t bits) {
+    if (bits == 0) {
+      return x;
+    }
+    int64_t result_hi = x.high();
+    uint64_t result_lo = x.low();
+    uint64_t shifted = 0;
+    while (bits >= 64) {
+      // Retain the information that set bits were shifted right.
+      // This is important to detect an exact half.
+      shifted = result_lo | (shifted > 0);
+      result_lo = result_hi;
+      result_hi >>= 63;  // for sign
+      bits -= 64;
+    }
+    if (bits > 0) {
+      shifted = (result_lo << (64 - bits)) | (shifted > 0);
+      result_lo >>= bits;
+      result_lo |= static_cast<uint64_t>(result_hi) << (64 - bits);
+      result_hi >>= bits;
+    }
+    // We almost have our result, but now do the rounding.
+    constexpr uint64_t kHalf = 0x8000000000000000ULL;
+    if (shifted > kHalf) {
+      // Strictly more than half => round up
+      result_lo += 1;
+      result_hi += (result_lo == 0);
+    } else if (shifted == kHalf) {
+      // Exactly half => round to even
+      if ((result_lo & 1) != 0) {
+        result_lo += 1;
+        result_hi += (result_lo == 0);
+      }
+    } else {
+      // Strictly less than half => round down
+    }
+    return Decimal{result_hi, result_lo};
+  }
+
+  template <typename Real>
+  static Result<Decimal> FromPositiveApprox(Real real, int32_t precision, int32_t scale) {
+    // Approximate algorithm that operates in the FP domain (thus subject
+    // to precision loss).
+    const auto x = std::nearbyint(real * PowerOfTen<double>(scale));
+    const auto max_abs = PowerOfTen<double>(precision);
+    if (x <= -max_abs || x >= max_abs) {
+      return Overflow(
+          "Cannot convert {} to Decimal(precision = {}, scale = {}): overflow", real,
+          precision, scale);
+    }
+
+    // Extract high and low bits
+    const auto high = std::floor(std::ldexp(x, -64));
+    const auto low = x - std::ldexp(high, 64);
+
+    assert(high >= 0);
+    assert(high < 9.223372036854776e+18);  // 2**63
+    assert(low >= 0);
+    assert(low < 1.8446744073709552e+19);  // 2**64
+    return Decimal(static_cast<int64_t>(high), static_cast<uint64_t>(low));
+  }
+
+  template <typename Real>
+  static Result<Decimal> FromPositiveReal(Real real, int32_t precision, int32_t scale) {
+    constexpr int32_t kMantissaBits = RealTraits<Real>::kMantissaBits;
+    constexpr int32_t kMantissaDigits = RealTraits<Real>::kMantissaDigits;
+
+    // Problem statement: construct the Decimal with the value
+    // closest to `real * 10^scale`.
+    if (scale < 0) {
+      // Negative scales are not handled below, fall back to approx algorithm
+      return FromPositiveApprox(real, precision, scale);
+    }
+
+    // 1. Check that `real` is within acceptable bounds.
+    const Real limit = PowerOfTen<Real>(precision - scale);
+    if (real > limit) {
+      // Checking the limit early helps ensure the computations below do not
+      // overflow.
+      // NOTE: `limit` is allowed here as rounding can make it smaller than
+      // the theoretical limit (for example, 1.0e23 < 10^23).
+      return Overflow(
+          "Cannot convert {} to Decimal(precision = {}, scale = {}): overflow", real,
+          precision, scale);
+    }
+
+    // 2. Losslessly convert `real` to `mant * 2**k`
+    int32_t binary_exp = 0;
+    const Real real_mant = std::frexp(real, &binary_exp);
+    // `real_mant` is within 0.5 and 1 and has M bits of precision.
+    // Multiply it by 2^M to get an exact integer.
+    const auto mant = static_cast<uint64_t>(std::ldexp(real_mant, kMantissaBits));
+    const int32_t k = binary_exp - kMantissaBits;
+    // (note that `real = mant * 2^k`)
+
+    // 3. Start with `mant`.
+    // We want to end up with `real * 10^scale` i.e. `mant * 2^k * 10^scale`.
+    Decimal x(mant);
+
+    if (k < 0) {
+      // k < 0 (i.e. binary_exp < kMantissaBits), is probably the common case
+      // when converting to decimal. It implies right-shifting by -k bits,
+      // while multiplying by 10^scale. We also must avoid overflow (losing
+      // bits on the left) and precision loss (losing bits on the right).
+      int32_t right_shift_by = -k;
+      int32_t mul_by_ten_to = scale;
+
+      // At this point, `x` has kMantissaDigits significant digits but it can
+      // fit kMaxPrecision (excluding sign). We can therefore multiply by up
+      // to 10^(kMaxPrecision - kMantissaDigits).
+      constexpr int32_t kSafeMulByTenTo = kMaxPrecision - kMantissaDigits;
+
+      if (mul_by_ten_to <= kSafeMulByTenTo) {
+        // Scale is small enough, so we can do it all at once.
+        x *= DecimalPowerOfTen(mul_by_ten_to);
+        x = RoundedRightShift(x, right_shift_by);
+      } else {
+        // Scale is too large, we cannot multiply at once without overflow.
+        // We use an iterative algorithm which alternately shifts left by
+        // multiplying by a power of ten, and shifts right by a number of bits.
+
+        // First multiply `x` by as large a power of ten as possible
+        // without overflowing.
+        x *= DecimalPowerOfTen(kSafeMulByTenTo);
+        mul_by_ten_to -= kSafeMulByTenTo;
+
+        // `x` now has full precision. However, we know we'll only
+        // keep `precision` digits at the end. Extraneous bits/digits
+        // on the right can be safely shifted away, before multiplying
+        // again.
+        // NOTE: if `precision` is the full precision then the algorithm will
+        // lose the last digit. If `precision` is almost the full precision,
+        // there can be an off-by-one error due to rounding.
+        const int32_t mul_step = std::max(1, kMaxPrecision - precision);
+
+        // The running exponent, useful to compute by how much we must
+        // shift right to make place on the left before the next multiply.
+        int32_t total_exp = 0;
+        int32_t total_shift = 0;
+        while (mul_by_ten_to > 0 && right_shift_by > 0) {
+          const int32_t exp = std::min(mul_by_ten_to, mul_step);
+          total_exp += exp;
+          // The supplementary right shift required so that
+          // `x * 10^total_exp / 2^total_shift` fits in the decimal.
+          assert(static_cast<size_t>(total_exp) < sizeof(kCeilLog2PowersOfTen));
+          const int32_t bits =
+              std::min(right_shift_by, kCeilLog2PowersOfTen[total_exp] - total_shift);
+          total_shift += bits;
+          // Right shift to make place on the left, then multiply
+          x = RoundedRightShift(x, bits);
+          right_shift_by -= bits;
+          // Should not overflow thanks to the precautions taken
+          x *= DecimalPowerOfTen(exp);
+          mul_by_ten_to -= exp;
+        }
+        if (mul_by_ten_to > 0) {
+          x *= DecimalPowerOfTen(mul_by_ten_to);
+        }
+        if (right_shift_by > 0) {
+          x = RoundedRightShift(x, right_shift_by);
+        }
+      }
+    } else {
+      // k >= 0 implies left-shifting by k bits and multiplying by 10^scale.
+      // The order of these operations therefore doesn't matter. We know
+      // we won't overflow because of the limit check above, and we also
+      // won't lose any significant bits on the right.
+      x *= DecimalPowerOfTen(scale);
+      x <<= k;
+    }
+
+    // Rounding might have pushed `x` just above the max precision, check again
+    if (!x.FitsInPrecision(precision)) {
+      return Overflow(
+          "Cannot convert {} to Decimal(precision = {}, scale = {}): overflow", real,
+          precision, scale);
+    }
+    return x;
+  }
+
+  template <typename Real>
+  static Real ToRealPositiveNoSplit(const Decimal& decimal, int32_t scale) {
+    Real x = RealTraits<Real>::two_to_64(static_cast<Real>(decimal.high()));
+    x += static_cast<Real>(decimal.low());
+    x *= LargePowerOfTen<Real>(-scale);
+    return x;
+  }
+
+  /// An approximate conversion from Decimal128 to Real that guarantees:
+  /// 1. If the decimal is an integer, the conversion is exact.
+  /// 2. If the number of fractional digits is <= RealTraits<Real>::kMantissaDigits (e.g.
+  ///    8 for float and 16 for double), the conversion is within 1 ULP of the exact
+  ///    value.
+  /// 3. Otherwise, the conversion is within 2^(-RealTraits<Real>::kMantissaDigits+1)
+  ///    (e.g. 2^-23 for float and 2^-52 for double) of the exact value.
+  /// Here "exact value" means the closest representable value by Real.
+  template <typename Real>
+  static Real ToRealPositive(const Decimal& decimal, int32_t scale) {
+    if (scale <= 0 ||
+        (decimal.high() == 0 && decimal.low() <= RealTraits<Real>::kMaxPreciseInteger)) {
+      // No need to split the decimal if it is already an integer (scale <= 0) or if it
+      // can be precisely represented by Real
+      return ToRealPositiveNoSplit<Real>(decimal, scale);
+    }
+
+    // Split decimal into whole and fractional parts to avoid precision loss
+    auto s = decimal.GetWholeAndFraction(scale);
+    assert(s);
+
+    Real whole = ToRealPositiveNoSplit<Real>(s->first, 0);
+    Real fraction = ToRealPositiveNoSplit<Real>(s->second, scale);
+
+    return whole + fraction;
+  }
+
+  template <typename Real>
+  static Result<Decimal> FromReal(Real value, int32_t precision, int32_t scale) {
+    assert(precision >= 1 && precision <= kMaxPrecision);
+    assert(scale >= -kMaxScale && scale <= kMaxScale);
+
+    if (!std::isfinite(value)) {
+      return InvalidArgument("Cannot convert {} to Decimal", value);
+    }
+
+    if (value == 0) {
+      return Decimal{};
+    }
+
+    if (value < 0) {
+      ICEBERG_ASSIGN_OR_RAISE(auto decimal, FromPositiveReal(-value, precision, scale));
+      return decimal.Negate();
+    } else {
+      return FromPositiveReal(value, precision, scale);
+    }
+  }
+
+  template <typename Real>
+  static Real ToReal(const Decimal& decimal, int32_t scale) {
+    assert(scale >= -kMaxScale && scale <= kMaxScale);
+
+    if (decimal.IsNegative()) {
+      // Convert the absolute value to avoid precision loss
+      auto abs = decimal;
+      abs.Negate();
+      return -ToRealPositive<Real>(abs, scale);
+    } else {
+      return ToRealPositive<Real>(decimal, scale);
+    }
   }
+};
+
+}  // namespace
 
-  return NotImplemented("Decimal::FromReal is not implemented yet");
+Result<Decimal> Decimal::FromReal(float x, int32_t precision, int32_t scale) {
+  return DecimalRealConversion::FromReal(x, precision, scale);
+}
+
+Result<Decimal> Decimal::FromReal(double x, int32_t precision, int32_t scale) {
+  return DecimalRealConversion::FromReal(x, precision, scale);
+}
+
+Result<std::pair<Decimal, Decimal>> Decimal::GetWholeAndFraction(int32_t scale) const {
+  assert(scale >= 0 && scale <= kMaxScale);
+
+  Decimal multiplier(kDecimal128PowersOfTen[scale]);
+  return Divide(multiplier);
 }
 
 Result<Decimal> Decimal::Rescale(int32_t orig_scale, int32_t new_scale) const {
@@ -883,17 +1253,17 @@ Result<Decimal> Decimal::Rescale(int32_t orig_scale, int32_t new_scale) const {
   return NotImplemented("Decimal::Rescale is not implemented yet");
 }
 
-template <typename T>
-  requires std::is_floating_point_v<T>
-T Decimal::ToReal(int32_t scale) const {
-  if (scale < -kMaxScale || scale > kMaxScale) {
-    throw InvalidArgument("Decimal::ToReal: scale must be in the range [-{}, {}], was {}",
-                          kMaxScale, kMaxScale, scale);
-  }
+bool Decimal::FitsInPrecision(int32_t precision) const {
+  assert(precision >= 1 && precision <= kMaxPrecision);
+  return Decimal::Abs(*this) < kDecimal128PowersOfTen[precision];
+}
+
+float Decimal::ToFloat(int32_t scale) const {
+  return DecimalRealConversion::ToReal<float>(*this, scale);
+}
 
-  // Convert Decimal to floating-point value with the given scale.
-  // This is a placeholder implementation.
-  return T(0);  // Replace with actual conversion logic.
+double Decimal::ToDouble(int32_t scale) const {
+  return DecimalRealConversion::ToReal<double>(*this, scale);
 }
 
 // Unary operators
diff --git a/src/iceberg/expression/decimal.h b/src/iceberg/expression/decimal.h
index c4672bbde..7591fd6db 100644
--- a/src/iceberg/expression/decimal.h
+++ b/src/iceberg/expression/decimal.h
@@ -36,10 +36,10 @@ namespace iceberg {
 /// 38 significant digits.
 class ICEBERG_EXPORT Decimal {
  public:
-  static constexpr int kBitWidth = 128;
-  static constexpr int kByteWidth = kBitWidth / 8;
-  static constexpr int kMaxPrecision = 38;
-  static constexpr int kMaxScale = 38;
+  static constexpr int32_t kBitWidth = 128;
+  static constexpr int32_t kByteWidth = kBitWidth / 8;
+  static constexpr int32_t kMaxPrecision = 38;
+  static constexpr int32_t kMaxScale = 38;
 
   /// \brief Default constructor initializes to zero.
   constexpr Decimal() noexcept : data_{0, 0} {}
@@ -152,19 +152,37 @@ class ICEBERG_EXPORT Decimal {
 
   /// \brief Convert the floating-point value to a Decimal value with the given
   /// precision and scale.
-  template <typename T>
-    requires std::is_floating_point_v<T>
-  static Result<Decimal> FromReal(T value, int32_t precision, int32_t scale);
+  static Result<Decimal> FromReal(double real, int32_t precision, int32_t scale);
+  static Result<Decimal> FromReal(float real, int32_t precision, int32_t scale);
+
+  /// \brief separate the integer and fractional parts for the given scale.
+  Result<std::pair<Decimal, Decimal>> GetWholeAndFraction(int32_t scale) const;
 
   /// \brief Convert Decimal from one scale to another.
   Result<Decimal> Rescale(int32_t orig_scale, int32_t new_scale) const;
 
+  /// \brief Whether this number fits in the given precision
+  ///
+  /// Return true if the number of significant digits is less or equal to `precision`.
+  bool FitsInPrecision(int32_t precision) const;
+
+  /// \brief Convert to a floating-point number (scaled)
+  float ToFloat(int32_t scale) const;
+  /// \brief Convert to a floating-point number (scaled)
+  double ToDouble(int32_t scale) const;
+
   /// \brief Convert the Decimal value to a floating-point value with the given scale.
   /// \param scale The scale to use for the conversion.
   /// \return The floating-point value.
   template <typename T>
     requires std::is_floating_point_v<T>
-  T ToReal(int32_t scale = 0) const;
+  T ToReal(int32_t scale) const {
+    if constexpr (std::is_same_v<T, float>) {
+      return ToFloat(scale);
+    } else {
+      return ToDouble(scale);
+    }
+  }
 
   /// \brief Returns 1 if positive or zero, -1 if strictly negative.
   int64_t Sign() const { return 1 | (static_cast<int64_t>(data_[kHighIndex]) >> 63); }
@@ -172,6 +190,8 @@ class ICEBERG_EXPORT Decimal {
   /// \brief Check if the Decimal value is negative.
   bool IsNegative() const { return static_cast<int64_t>(data_[kHighIndex]) < 0; }
 
+  explicit operator bool() const { return data_ != std::array<uint64_t, 2>{0, 0}; }
+
   friend bool operator==(const Decimal& lhs, const Decimal& rhs) {
     return lhs.data_ == rhs.data_;
   }
@@ -182,11 +202,11 @@ class ICEBERG_EXPORT Decimal {
 
  private:
 #if ICEBERG_LITTLE_ENDIAN
-  static constexpr int kHighIndex = 1;
-  static constexpr int kLowIndex = 0;
+  static constexpr int32_t kHighIndex = 1;
+  static constexpr int32_t kLowIndex = 0;
 #else
-  static constexpr int kHighIndex = 0;
-  static constexpr int kLowIndex = 1;
+  static constexpr int32_t kHighIndex = 0;
+  static constexpr int32_t kLowIndex = 1;
 #endif
 
   std::array<uint64_t, 2> data_;
diff --git a/test/decimal_test.cc b/test/decimal_test.cc
index dd28a0c21..cc641db4e 100644
--- a/test/decimal_test.cc
+++ b/test/decimal_test.cc
@@ -415,4 +415,98 @@ TEST(DecimalTest, ToString) {
   }
 }
 
+template <typename Real>
+struct FromRealTestParam {
+  Real real;
+  int32_t precision;
+  int32_t scale;
+  const char* expected_string;
+};
+
+template <typename Real>
+void CheckDecimalFromReal(Real real, int32_t precision, int32_t scale,
+                          const char* expected_string) {
+  auto result = Decimal::FromReal(real, precision, scale);
+  ASSERT_THAT(result, IsOk()) << "Failed to convert real to Decimal: " << real
+                              << ", precision: " << precision << ", scale: " << scale;
+  const Decimal& decimal = result.value();
+  EXPECT_EQ(decimal.ToString(scale).value(), expected_string);
+  const std::string expected_neg =
+      (decimal) ? "-" + std::string(expected_string) : expected_string;
+  auto neg_result = Decimal::FromReal(-real, precision, scale);
+  ASSERT_THAT(neg_result, IsOk())
+      << "Failed to convert negative real to Decimal: " << -real
+      << ", precision: " << precision << ", scale: " << scale;
+  const Decimal& neg_decimal = neg_result.value();
+  EXPECT_EQ(neg_decimal.ToString(scale).value(), expected_neg);
+}
+
+using FromFloatTestParam = FromRealTestParam<float>;
+using FromDoubleTestParam = FromRealTestParam<double>;
+
+template <typename Real>
+class TestDecimalFromReal : public ::testing::Test {
+ public:
+  using ParamType = FromRealTestParam<Real>;
+
+  void TestSuccess() {
+    const std::vector<ParamType> params{
+        // clang-format off
+        {0.0f, 1, 0, "0"},
+        {0.0f, 19, 4, "0.0000"},
+        {123.0f, 7, 4, "123.0000"},
+        {456.78f, 7, 4, "456.7800"},
+        {456.784f, 5, 2, "456.78"},
+        {456.786f, 5, 2, "456.79"},
+        {999.99f, 5, 2, "999.99"},
+        {123.0f, 19, 0, "123"},
+        {123.4f, 19, 0, "123"},
+        {123.6f, 19, 0, "124"},
+        // 2**62
+        {4.6116860184273879e+18, 19, 0, "4611686018427387904"},
+        // 2**63
+        {9.2233720368547758e+18, 19, 0, "9223372036854775808"},
+        // 2**64
+        {1.8446744073709552e+19, 20, 0, "18446744073709551616"},
+        // clang-format on
+    };
+
+    for (const ParamType& param : params) {
+      CheckDecimalFromReal(param.real, param.precision, param.scale,
+                           param.expected_string);
+    }
+  }
+
+  void TestErrors() {
+    const std::vector<ParamType> params{
+        {std::numeric_limits<Real>::infinity(), Decimal::kMaxPrecision / 2, 4, ""},
+        {-std::numeric_limits<Real>::infinity(), Decimal::kMaxPrecision / 2, 4, ""},
+        {std::numeric_limits<Real>::quiet_NaN(), Decimal::kMaxPrecision / 2, 4, ""},
+        {-std::numeric_limits<Real>::quiet_NaN(), Decimal::kMaxPrecision / 2, 4, ""},
+    };
+
+    for (const ParamType& param : params) {
+      auto result = Decimal::FromReal(param.real, param.precision, param.scale);
+      ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
+    }
+
+    const std::vector<ParamType> overflow_params{
+        // Overflow errors
+        {1000.0, 3, 0, ""},  {-1000.0, 3, 0, ""}, {1000.0, 5, 2, ""},
+        {-1000.0, 5, 2, ""}, {999.996, 5, 2, ""}, {-999.996, 5, 2, ""},
+    };
+    for (const ParamType& param : overflow_params) {
+      auto result = Decimal::FromReal(param.real, param.precision, param.scale);
+      ASSERT_THAT(result, IsError(ErrorKind::kOverflow));
+    }
+  }
+};
+
+using RealTypes = ::testing::Types<float, double>;
+TYPED_TEST_SUITE(TestDecimalFromReal, RealTypes);
+
+TYPED_TEST(TestDecimalFromReal, TestSuccess) { this->TestSuccess(); }
+
+TYPED_TEST(TestDecimalFromReal, TestErrors) { this->TestErrors(); }
+
 }  // namespace iceberg

From 640f8c2b9f9f25f64ecc461581cfb9f04b439913 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Thu, 21 Aug 2025 22:13:13 +0800
Subject: [PATCH 05/20] feat: add more ToReal tests

---
 src/iceberg/expression/decimal.cc |   1 +
 test/decimal_test.cc              | 390 ++++++++++++++++++++++++++++++
 2 files changed, 391 insertions(+)

diff --git a/src/iceberg/expression/decimal.cc b/src/iceberg/expression/decimal.cc
index cd7acaa25..b48b08143 100644
--- a/src/iceberg/expression/decimal.cc
+++ b/src/iceberg/expression/decimal.cc
@@ -24,6 +24,7 @@
 #include <bit>
 #include <cassert>
 #include <charconv>
+#include <cmath>
 #include <cstddef>
 #include <cstdint>
 #include <format>
diff --git a/test/decimal_test.cc b/test/decimal_test.cc
index cc641db4e..213c5fbf4 100644
--- a/test/decimal_test.cc
+++ b/test/decimal_test.cc
@@ -19,12 +19,15 @@
 #include "iceberg/expression/decimal.h"
 
 #include <array>
+#include <cmath>
 #include <cstdint>
+#include <vector>
 
 #include <gtest/gtest.h>
 #include <sys/types.h>
 
 #include "gmock/gmock.h"
+#include "gtest/gtest.h"
 #include "matchers.h"
 
 namespace iceberg {
@@ -441,6 +444,24 @@ void CheckDecimalFromReal(Real real, int32_t precision, int32_t scale,
   EXPECT_EQ(neg_decimal.ToString(scale).value(), expected_neg);
 }
 
+template <typename Real>
+void CheckDecimalFromRealIntegerString(Real real, int32_t precision, int32_t scale,
+                                       const char* expected_string) {
+  auto result = Decimal::FromReal(real, precision, scale);
+  ASSERT_THAT(result, IsOk()) << "Failed to convert real to Decimal: " << real
+                              << ", precision: " << precision << ", scale: " << scale;
+  const Decimal& decimal = result.value();
+  EXPECT_EQ(decimal.ToIntegerString(), expected_string);
+  const std::string expected_neg =
+      (decimal) ? "-" + std::string(expected_string) : expected_string;
+  auto neg_result = Decimal::FromReal(-real, precision, scale);
+  ASSERT_THAT(neg_result, IsOk())
+      << "Failed to convert negative real to Decimal: " << -real
+      << ", precision: " << precision << ", scale: " << scale;
+  const Decimal& neg_decimal = neg_result.value();
+  EXPECT_EQ(neg_decimal.ToIntegerString(), expected_neg);
+}
+
 using FromFloatTestParam = FromRealTestParam<float>;
 using FromDoubleTestParam = FromRealTestParam<double>;
 
@@ -509,4 +530,373 @@ TYPED_TEST(TestDecimalFromReal, TestSuccess) { this->TestSuccess(); }
 
 TYPED_TEST(TestDecimalFromReal, TestErrors) { this->TestErrors(); }
 
+TEST(TestDecimalFromReal, FromFloat) {
+  const std::vector<FromFloatTestParam> params{
+      // -- Stress the 24 bits of precision of a float
+      // 2**63 + 2**40
+      FromFloatTestParam{9.223373e+18f, 19, 0, "9223373136366403584"},
+      // 2**64 - 2**40
+      FromFloatTestParam{1.8446743e+19f, 20, 0, "18446742974197923840"},
+      // 2**64 + 2**41
+      FromFloatTestParam{1.8446746e+19f, 20, 0, "18446746272732807168"},
+      // 2**14 - 2**-10
+      FromFloatTestParam{16383.999f, 8, 3, "16383.999"},
+      FromFloatTestParam{16383.999f, 19, 3, "16383.999"},
+      // 1 - 2**-24
+      FromFloatTestParam{0.99999994f, 10, 10, "0.9999999404"},
+      FromFloatTestParam{0.99999994f, 15, 15, "0.999999940395355"},
+      FromFloatTestParam{0.99999994f, 20, 20, "0.99999994039535522461"},
+      FromFloatTestParam{0.99999994f, 21, 21, "0.999999940395355224609"},
+      FromFloatTestParam{0.99999994f, 38, 38, "0.99999994039535522460937500000000000000"},
+      // -- Other cases
+      // 10**38 - 2**103
+      FromFloatTestParam{9.999999e+37f, 38, 0, "99999986661652122824821048795547566080"},
+  };
+
+  for (const auto& param : params) {
+    CheckDecimalFromReal(param.real, param.precision, param.scale, param.expected_string);
+  }
+}
+
+TEST(TestDecimalFromReal, FromFloatLargeValues) {
+  for (int32_t scale = -38; scale <= 38; ++scale) {
+    float real = std::pow(10.0f, static_cast<float>(scale));
+    CheckDecimalFromRealIntegerString(real, 1, -scale, "1");
+  }
+
+  for (int32_t scale = -37; scale <= 36; ++scale) {
+    float real = 123.f * std::pow(10.f, static_cast<float>(scale));
+    CheckDecimalFromRealIntegerString(real, 2, -scale - 1, "12");
+    CheckDecimalFromRealIntegerString(real, 3, -scale, "123");
+    CheckDecimalFromRealIntegerString(real, 4, -scale + 1, "1230");
+  }
+}
+
+TEST(TestDecimalFromReal, FromDouble) {
+  const std::vector<FromDoubleTestParam> params{
+      // -- Stress the 53 bits of precision of a double
+      // 2**63 + 2**11
+      FromDoubleTestParam{9.223372036854778e+18, 19, 0, "9223372036854777856"},
+      // 2**64 - 2**11
+      FromDoubleTestParam{1.844674407370955e+19, 20, 0, "18446744073709549568"},
+      // 2**64 + 2**11
+      FromDoubleTestParam{1.8446744073709556e+19, 20, 0, "18446744073709555712"},
+      // 2**126
+      FromDoubleTestParam{8.507059173023462e+37, 38, 0,
+                          "85070591730234615865843651857942052864"},
+      // 2**126 - 2**74
+      FromDoubleTestParam{8.50705917302346e+37, 38, 0,
+                          "85070591730234596976377720379361198080"},
+      // 2**36 - 2**-16
+      FromDoubleTestParam{68719476735.999985, 11, 0, "68719476736"},
+      FromDoubleTestParam{68719476735.999985, 38, 27,
+                          "68719476735.999984741210937500000000000"},
+      // -- Other cases
+      // Almost 10**38 (minus 2**73)
+      FromDoubleTestParam{9.999999999999998e+37, 38, 0,
+                          "99999999999999978859343891977453174784"},
+      FromDoubleTestParam{9.999999999999998e+27, 38, 10,
+                          "9999999999999997384096481280.0000000000"},
+      // 10**N (sometimes fits in N digits)
+      FromDoubleTestParam{1e23, 23, 0, "99999999999999991611392"},
+      FromDoubleTestParam{1e23, 24, 1, "99999999999999991611392.0"},
+      FromDoubleTestParam{1e36, 37, 0, "1000000000000000042420637374017961984"},
+      FromDoubleTestParam{1e36, 38, 1, "1000000000000000042420637374017961984.0"},
+      FromDoubleTestParam{1e37, 37, 0, "9999999999999999538762658202121142272"},
+      FromDoubleTestParam{1e37, 38, 1, "9999999999999999538762658202121142272.0"},
+      FromDoubleTestParam{1e38, 38, 0, "99999999999999997748809823456034029568"},
+      // Hand-picked test cases that can involve precision issues.
+      // More comprehensive testing is done in the PyArrow test suite.
+      FromDoubleTestParam{9.223372036854778e+10, 19, 8, "92233720368.54777527"},
+      FromDoubleTestParam{1.8446744073709556e+15, 20, 4, "1844674407370955.5000"},
+      FromDoubleTestParam{999999999999999.0, 16, 1, "999999999999999.0"},
+      FromDoubleTestParam{9999999999999998.0, 17, 1, "9999999999999998.0"},
+      FromDoubleTestParam{999999999999999.9, 16, 1, "999999999999999.9"},
+      FromDoubleTestParam{9999999987., 38, 22, "9999999987.0000000000000000000000"},
+      FromDoubleTestParam{9999999987., 38, 28, "9999999987.0000000000000000000000000000"},
+      // 1 - 2**-52
+      // XXX the result should be 0.99999999999999977795539507496869191527
+      // but our algorithm loses the right digit.
+      FromDoubleTestParam{0.9999999999999998, 38, 38,
+                          "0.99999999999999977795539507496869191520"},
+      FromDoubleTestParam{0.9999999999999998, 20, 20, "0.99999999999999977796"},
+      FromDoubleTestParam{0.9999999999999998, 16, 16, "0.9999999999999998"},
+  };
+
+  for (const auto& param : params) {
+    CheckDecimalFromReal(param.real, param.precision, param.scale, param.expected_string);
+  }
+}
+
+TEST(TestDecimalFromReal, FromDoubleLargeValues) {
+  constexpr auto kMaxScale = Decimal::kMaxScale;
+  for (int32_t scale = -kMaxScale; scale <= kMaxScale; ++scale) {
+    if (std::abs(1 - scale) < kMaxScale) {
+      double real = std::pow(10.0, static_cast<double>(scale));
+      CheckDecimalFromRealIntegerString(real, 1, -scale, "1");
+    }
+  }
+
+  for (int32_t scale = -kMaxScale + 1; scale <= kMaxScale - 1; ++scale) {
+    if (std::abs(4 - scale) < kMaxScale) {
+      double real = 123. * std::pow(10.0, static_cast<double>(scale));
+      CheckDecimalFromRealIntegerString(real, 2, -scale - 1, "12");
+      CheckDecimalFromRealIntegerString(real, 3, -scale, "123");
+      CheckDecimalFromRealIntegerString(real, 4, -scale + 1, "1230");
+    }
+  }
+}
+
+template <typename Real>
+struct ToRealTestParam {
+  std::string decimal_value;
+  int32_t scale;
+  Real expected;
+};
+
+using ToFloatTestParam = ToRealTestParam<float>;
+using ToDoubleTestParam = ToRealTestParam<double>;
+
+template <typename Real>
+void CheckDecimalToReal(const std::string& decimal_value, int32_t scale, Real expected) {
+  auto result = Decimal::FromString(decimal_value);
+  ASSERT_THAT(result, IsOk()) << "Failed to convert string to Decimal: " << decimal_value;
+
+  const Decimal& decimal = result.value();
+  auto real_result = decimal.ToReal<Real>(scale);
+
+  EXPECT_EQ(real_result, expected)
+      << "Expected: " << expected << ", but got: " << real_result;
+}
+
+template <typename Real>
+void CheckDecimalToRealWithinOneULP(const std::string& decimal_value, int32_t scale,
+                                    Real expected) {
+  Decimal dec(decimal_value);
+  Real actual = dec.template ToReal<Real>(scale);
+  ASSERT_TRUE(actual == expected || actual == std::nextafter(expected, expected + 1) ||
+              actual == std::nextafter(expected, expected - 1))
+      << "Decimal value: " << decimal_value << ", scale: " << scale
+      << ", expected: " << expected << ", actual: " << actual;
+}
+
+template <typename Real>
+void CheckDecimalToRealWithinEpsilon(const std::string& decimal_value, int32_t scale,
+                                     Real epsilon, Real expected) {
+  Decimal dec(decimal_value);
+  Real actual = dec.template ToReal<Real>(scale);
+  ASSERT_LE(std::abs(actual - expected), epsilon)
+      << "Decimal value: " << decimal_value << ", scale: " << scale
+      << ", expected: " << expected << ", actual: " << actual;
+}
+
+void CheckDecimalToRealApprox(const std::string& decimal_value, int32_t scale,
+                              float expected) {
+  Decimal dec(decimal_value);
+  auto actual = dec.template ToReal<float>(scale);
+  ASSERT_FLOAT_EQ(actual, expected)
+      << "Decimal value: " << decimal_value << ", scale: " << scale
+      << ", expected: " << expected << ", actual: " << actual;
+}
+
+void CheckDecimalToRealApprox(const std::string& decimal_value, int32_t scale,
+                              double expected) {
+  Decimal dec(decimal_value);
+  auto actual = dec.template ToReal<double>(scale);
+  ASSERT_DOUBLE_EQ(actual, expected)
+      << "Decimal value: " << decimal_value << ", scale: " << scale
+      << ", expected: " << expected << ", actual: " << actual;
+}
+
+template <typename Real>
+class TestDecimalToReal : public ::testing::Test {
+ public:
+  using ParamType = ToRealTestParam<Real>;
+
+  Real Pow2(int exp) { return std::pow(static_cast<Real>(2), static_cast<Real>(exp)); }
+
+  Real Pow10(int exp) { return std::pow(static_cast<Real>(10), static_cast<Real>(exp)); }
+
+  void TestSuccess() {
+    const std::vector<ParamType> params{
+        // clang-format off
+        {"0", 0, 0.0f},
+        {"0", 10, 0.0f},
+        {"0", -10, 0.0f},
+        {"1", 0, 1.0f},
+        {"12345", 0, 12345.f},
+#ifndef __MINGW32__  // MinGW has precision issues
+        {"12345", 1, 1234.5f},
+#endif
+        {"12345", -3, 12345000.f},
+        // 2**62
+        {"4611686018427387904", 0, Pow2(62)},
+        // 2**63 + 2**62
+        {"13835058055282163712", 0, Pow2(63) + Pow2(62)},
+        // 2**64 + 2**62
+        {"23058430092136939520", 0, Pow2(64) + Pow2(62)},
+        // 10**38 - 2**103
+#ifndef __MINGW32__  // MinGW has precision issues
+        {"99999989858795198174164788026374356992", 0, Pow10(38) - Pow2(103)},
+#endif
+        // clang-format on
+    };
+
+    for (const ParamType& param : params) {
+      CheckDecimalToReal(param.decimal_value, param.scale, param.expected);
+      if (param.decimal_value != "0") {
+        // Check negative values as well
+        CheckDecimalToReal("-" + param.decimal_value, param.scale, -param.expected);
+      }
+    }
+  }
+};
+
+TYPED_TEST_SUITE(TestDecimalToReal, RealTypes);
+
+TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); }
+
+// Custom test for Decimal::ToReal<float>
+TEST(TestDecimalToReal, ToFloatLargeValues) {
+  constexpr auto max_scale = Decimal::kMaxScale;
+
+  // Note that exact comparisons would succeed on some platforms (Linux, macOS).
+  // Nevertheless, power-of-ten factors are not all exactly representable
+  // in binary floating point.
+  for (int32_t scale = -max_scale; scale <= max_scale; scale++) {
+#ifdef _WIN32
+    // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
+    if (scale == 45) continue;
+#endif
+    CheckDecimalToRealApprox(
+        "1", scale, std::pow(static_cast<float>(10), static_cast<float>(-scale)));
+  }
+  for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) {
+#ifdef _WIN32
+    // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
+    if (scale == 45) continue;
+#endif
+    const auto factor = static_cast<float>(123);
+    CheckDecimalToRealApprox(
+        "123", scale,
+        factor * std::pow(static_cast<float>(10), static_cast<float>(-scale)));
+  }
+}
+
+TEST(TestDecimalToReal, ToFloatPrecision) {
+  // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
+  CheckDecimalToReal<float>("9223373136366403584", 0, 9.223373e+18f);
+  CheckDecimalToReal<float>("-9223373136366403584", 0, -9.223373e+18f);
+
+  // 2**64 + 2**41 (exactly representable in a float)
+  CheckDecimalToReal<float>("18446746272732807168", 0, 1.8446746e+19f);
+  CheckDecimalToReal<float>("-18446746272732807168", 0, -1.8446746e+19f);
+
+  // Integers are always exact
+  auto scale = Decimal::kMaxScale - 1;
+  std::string seven = "7.";
+  seven.append(scale, '0');  // pad with trailing zeros
+  CheckDecimalToReal<float>(seven, scale, 7.0f);
+  CheckDecimalToReal<float>("-" + seven, scale, -7.0f);
+
+  CheckDecimalToReal<float>("99999999999999999999.0000000000000000", 16,
+                            99999999999999999999.0f);
+  CheckDecimalToReal<float>("-99999999999999999999.0000000000000000", 16,
+                            -99999999999999999999.0f);
+
+  // Small fractions are within one ULP
+  CheckDecimalToRealWithinOneULP<float>("9999999.9", 1, 9999999.9f);
+  CheckDecimalToRealWithinOneULP<float>("-9999999.9", 1, -9999999.9f);
+
+  CheckDecimalToRealWithinOneULP<float>("9999999.999999", 6, 9999999.999999f);
+  CheckDecimalToRealWithinOneULP<float>("-9999999.999999", 6, -9999999.999999f);
+
+  // Large fractions are within 2^-23
+  constexpr float epsilon = 1.1920928955078125e-07f;  // 2^-23
+  CheckDecimalToRealWithinEpsilon<float>("112334829348925.99070703983306884765625", 23,
+                                         epsilon,
+                                         112334829348925.99070703983306884765625f);
+  CheckDecimalToRealWithinEpsilon<float>("1.987748987892758765582589910934859345", 36,
+                                         epsilon,
+                                         1.987748987892758765582589910934859345f);
+}
+
+// ToReal<double> tests are disabled on MinGW because of precision issues in results
+#ifndef __MINGW32__
+
+TEST(TestDecimalToReal, ToDoubleLargeValues) {
+  // Note that exact comparisons would succeed on some platforms (Linux, macOS).
+  // Nevertheless, power-of-ten factors are not all exactly representable
+  // in binary floating point.
+  constexpr auto max_scale = Decimal::kMaxScale;
+
+  for (int32_t scale = -max_scale; scale <= max_scale; scale++) {
+    CheckDecimalToRealApprox(
+        "1", scale, std::pow(static_cast<double>(10), static_cast<double>(-scale)));
+  }
+  for (int32_t scale = -max_scale + 1; scale <= max_scale - 1; scale++) {
+    const auto factor = static_cast<double>(123);
+    CheckDecimalToRealApprox(
+        "123", scale,
+        factor * std::pow(static_cast<double>(10), static_cast<double>(-scale)));
+  }
+}
+
+TEST(TestDecimalToReal, ToDoublePrecision) {
+  // 2**63 + 2**11 (exactly representable in a double's 53 bits of precision)
+  CheckDecimalToReal<double>("9223372036854777856", 0, 9.223372036854778e+18);
+  CheckDecimalToReal<double>("-9223372036854777856", 0, -9.223372036854778e+18);
+  // 2**64 - 2**11 (exactly representable in a double)
+  CheckDecimalToReal<double>("18446744073709549568", 0, 1.844674407370955e+19);
+  CheckDecimalToReal<double>("-18446744073709549568", 0, -1.844674407370955e+19);
+  // 2**64 + 2**11 (exactly representable in a double)
+  CheckDecimalToReal<double>("18446744073709555712", 0, 1.8446744073709556e+19);
+  CheckDecimalToReal<double>("-18446744073709555712", 0, -1.8446744073709556e+19);
+
+  // Almost 10**38 (minus 2**73)
+  CheckDecimalToReal<double>("99999999999999978859343891977453174784", 0,
+                             9.999999999999998e+37);
+  CheckDecimalToReal<double>("-99999999999999978859343891977453174784", 0,
+                             -9.999999999999998e+37);
+  CheckDecimalToReal<double>("99999999999999978859343891977453174784", 10,
+                             9.999999999999998e+27);
+  CheckDecimalToReal<double>("-99999999999999978859343891977453174784", 10,
+                             -9.999999999999998e+27);
+  CheckDecimalToReal<double>("99999999999999978859343891977453174784", -10,
+                             9.999999999999998e+47);
+  CheckDecimalToReal<double>("-99999999999999978859343891977453174784", -10,
+                             -9.999999999999998e+47);
+
+  // Integers are always exact
+  auto scale = Decimal::kMaxScale - 1;
+  std::string seven = "7.";
+  seven.append(scale, '0');
+  CheckDecimalToReal<double>(seven, scale, 7.0);
+  CheckDecimalToReal<double>("-" + seven, scale, -7.0);
+
+  CheckDecimalToReal<double>("99999999999999999999.0000000000000000", 16,
+                             99999999999999999999.0);
+  CheckDecimalToReal<double>("-99999999999999999999.0000000000000000", 16,
+                             -99999999999999999999.0);
+
+  // Small fractions are within one ULP
+  CheckDecimalToRealWithinOneULP<double>("9999999.9", 1, 9999999.9);
+  CheckDecimalToRealWithinOneULP<double>("-9999999.9", 1, -9999999.9);
+
+  CheckDecimalToRealWithinOneULP<double>("9999999.999999999999999", 15,
+                                         9999999.999999999999999);
+  CheckDecimalToRealWithinOneULP<double>("-9999999.999999999999999", 15,
+                                         -9999999.999999999999999);
+  // Large fractions are within 2^-52
+  constexpr double epsilon = 2.220446049250313080847263336181640625e-16;  // 2^-52
+  CheckDecimalToRealWithinEpsilon<double>("112334829348925.99070703983306884765625", 23,
+                                          epsilon,
+                                          112334829348925.99070703983306884765625);
+  CheckDecimalToRealWithinEpsilon<double>("1.987748987892758765582589910934859345", 36,
+                                          epsilon,
+                                          1.987748987892758765582589910934859345);
+}
+
+#endif  // __MINGW32__
+
 }  // namespace iceberg

From 51cf0481efb81814bf4e1d5728a25abcbd5b3ae2 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Thu, 21 Aug 2025 22:53:25 +0800
Subject: [PATCH 06/20] feat: add FromBigEndian

---
 src/iceberg/expression/decimal.cc | 71 +++++++++++++++++++++++++++++
 src/iceberg/expression/decimal.h  | 16 +++++++
 test/decimal_test.cc              | 74 +++++++++++++++++++++++++++++++
 3 files changed, 161 insertions(+)

diff --git a/src/iceberg/expression/decimal.cc b/src/iceberg/expression/decimal.cc
index b48b08143..b378a91f5 100644
--- a/src/iceberg/expression/decimal.cc
+++ b/src/iceberg/expression/decimal.cc
@@ -1216,6 +1216,24 @@ struct DecimalRealConversion {
   }
 };
 
+// Helper function used by Decimal::FromBigEndian
+static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) {
+  // We don't bounds check the length here because this is called by
+  // FromBigEndian that has a Decimal128 as its out parameters and
+  // that function is already checking the length of the bytes and only
+  // passes lengths between zero and eight.
+  uint64_t result = 0;
+  // Using memcpy instead of special casing for length
+  // and doing the conversion in 16, 32 parts, which could
+  // possibly create unaligned memory access on certain platforms
+  memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length);
+#if ICEBERG_LITTLE_ENDIAN
+  return std::byteswap(result);
+#else
+  return result;
+#endif
+}
+
 }  // namespace
 
 Result<Decimal> Decimal::FromReal(float x, int32_t precision, int32_t scale) {
@@ -1233,6 +1251,59 @@ Result<std::pair<Decimal, Decimal>> Decimal::GetWholeAndFraction(int32_t scale)
   return Divide(multiplier);
 }
 
+Result<Decimal> Decimal::FromBigEndian(const uint8_t* bytes, int32_t length) {
+  static constexpr int32_t kMinDecimalBytes = 1;
+  static constexpr int32_t kMaxDecimalBytes = 16;
+
+  int64_t high, low;
+
+  if (length < kMinDecimalBytes || length > kMaxDecimalBytes) {
+    return InvalidArgument(
+        "Decimal::FromBigEndian: length must be in the range [{}, {}], was {}",
+        kMinDecimalBytes, kMaxDecimalBytes, length);
+  }
+
+  // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the
+  // sign bit.
+  const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
+
+  // 1. Extract the high bytes
+  // Stop byte of the high bytes
+  const int32_t high_bits_offset = std::max(0, length - 8);
+  const auto high_bits = UInt64FromBigEndian(bytes, high_bits_offset);
+
+  if (high_bits_offset == 8) {
+    // Avoid undefined shift by 64 below
+    high = high_bits;
+  } else {
+    high = -1 * (is_negative && length < kMaxDecimalBytes);
+    // Shift left enough bits to make room for the incoming int64_t
+    high = SafeLeftShift(high, high_bits_offset * CHAR_BIT);
+    // Preserve the upper bits by inplace OR-ing the int64_t
+    high |= high_bits;
+  }
+
+  // 2. Extract the low bytes
+  // Stop byte of the low bytes
+  const int32_t low_bits_offset = std::min(length, 8);
+  const auto low_bits =
+      UInt64FromBigEndian(bytes + high_bits_offset, length - high_bits_offset);
+
+  if (low_bits_offset == 8) {
+    // Avoid undefined shift by 64 below
+    low = low_bits;
+  } else {
+    // Sign extend the low bits if necessary
+    low = -1 * (is_negative && length < 8);
+    // Shift left enough bits to make room for the incoming int64_t
+    low = SafeLeftShift(low, low_bits_offset * CHAR_BIT);
+    // Preserve the upper bits by inplace OR-ing the int64_t
+    low |= low_bits;
+  }
+
+  return Decimal(high, static_cast<uint64_t>(low));
+}
+
 Result<Decimal> Decimal::Rescale(int32_t orig_scale, int32_t new_scale) const {
   if (orig_scale < 0 || orig_scale > kMaxScale) {
     return InvalidArgument(
diff --git a/src/iceberg/expression/decimal.h b/src/iceberg/expression/decimal.h
index 7591fd6db..b6781393a 100644
--- a/src/iceberg/expression/decimal.h
+++ b/src/iceberg/expression/decimal.h
@@ -155,6 +155,11 @@ class ICEBERG_EXPORT Decimal {
   static Result<Decimal> FromReal(double real, int32_t precision, int32_t scale);
   static Result<Decimal> FromReal(float real, int32_t precision, int32_t scale);
 
+  /// \brief Convert from a big-endian byte representation. The length must be
+  ///        between 1 and 16.
+  /// \return error status if the length is an invalid value
+  static Result<Decimal> FromBigEndian(const uint8_t* data, int32_t length);
+
   /// \brief separate the integer and fractional parts for the given scale.
   Result<std::pair<Decimal, Decimal>> GetWholeAndFraction(int32_t scale) const;
 
@@ -184,6 +189,17 @@ class ICEBERG_EXPORT Decimal {
     }
   }
 
+  const uint8_t* native_endian_bytes() const {
+    return reinterpret_cast<const uint8_t*>(data_.data());
+  }
+
+  /// \brief Return the raw bytes of the value in native-endian byte order.
+  std::array<uint8_t, kByteWidth> ToBytes() const {
+    std::array<uint8_t, kByteWidth> out{{0}};
+    memcpy(out.data(), data_.data(), kByteWidth);
+    return out;
+  }
+
   /// \brief Returns 1 if positive or zero, -1 if strictly negative.
   int64_t Sign() const { return 1 | (static_cast<int64_t>(data_[kHighIndex]) >> 63); }
 
diff --git a/test/decimal_test.cc b/test/decimal_test.cc
index 213c5fbf4..ee19e4930 100644
--- a/test/decimal_test.cc
+++ b/test/decimal_test.cc
@@ -18,6 +18,7 @@
  */
 #include "iceberg/expression/decimal.h"
 
+#include <algorithm>
 #include <array>
 #include <cmath>
 #include <cstdint>
@@ -899,4 +900,77 @@ TEST(TestDecimalToReal, ToDoublePrecision) {
 
 #endif  // __MINGW32__
 
+TEST(DecimalTest, FromBigEndian) {
+  // We test out a variety of scenarios:
+  //
+  // * Positive values that are left shifted
+  //   and filled in with the same bit pattern
+  // * Negated of the positive values
+  // * Complement of the positive values
+  //
+  // For the positive values, we can call FromBigEndian
+  // with a length that is less than 16, whereas we must
+  // pass all 16 bytes for the negative and complement.
+  //
+  // We use a number of bit patterns to increase the coverage
+  // of scenarios
+  constexpr int WidthMinusOne = Decimal::kByteWidth - 1;
+
+  for (int32_t start : {1, 15, /* 00001111 */
+                        85,    /* 01010101 */
+                        127 /* 01111111 */}) {
+    Decimal value(start);
+    for (int ii = 0; ii < Decimal::kByteWidth; ++ii) {
+      auto native_endian = value.ToBytes();
+#if ICEBERG_LITTLE_ENDIAN
+      std::ranges::reverse(native_endian);
+#endif
+      // Limit the number of bytes we are passing to make
+      // sure that it works correctly. That's why all of the
+      // 'start' values don't have a 1 in the most significant
+      // bit place
+      auto result =
+          Decimal::FromBigEndian(native_endian.data() + WidthMinusOne - ii, ii + 1);
+      ASSERT_THAT(result, IsOk());
+      const Decimal& decimal = result.value();
+      EXPECT_EQ(decimal, value);
+
+      // Negate it
+      auto negated = -value;
+      native_endian = negated.ToBytes();
+
+#if ICEBERG_LITTLE_ENDIAN
+      // convert to big endian
+      std::ranges::reverse(native_endian);
+#endif
+      result = Decimal::FromBigEndian(native_endian.data() + WidthMinusOne - ii, ii + 1);
+      ASSERT_THAT(result, IsOk());
+      const Decimal& negated_decimal = result.value();
+      EXPECT_EQ(negated_decimal, negated);
+
+      // Take the complement
+      auto complement = ~value;
+      native_endian = complement.ToBytes();
+
+#if ICEBERG_LITTLE_ENDIAN
+      // convert to big endian
+      std::ranges::reverse(native_endian);
+#endif
+      result = Decimal::FromBigEndian(native_endian.data(), Decimal::kByteWidth);
+      ASSERT_THAT(result, IsOk());
+      const Decimal& complement_decimal = result.value();
+      EXPECT_EQ(complement_decimal, complement);
+
+      value <<= 2;
+      value += Decimal(start);
+    }
+  }
+}
+
+TEST(DecimalTest, FromBigEndianInvalid) {
+  ASSERT_THAT(Decimal::FromBigEndian(nullptr, -1), IsError(ErrorKind::kInvalidArgument));
+  ASSERT_THAT(Decimal::FromBigEndian(nullptr, Decimal::kByteWidth + 1),
+              IsError(ErrorKind::kInvalidArgument));
+}
+
 }  // namespace iceberg

From 83487db4ddb85fc788d7a4762e4fa6c1dc01f460 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Thu, 21 Aug 2025 23:11:09 +0800
Subject: [PATCH 07/20] fix: include missing headers

---
 src/iceberg/expression/decimal.cc |  8 ++++++++
 src/iceberg/expression/decimal.h  | 10 +++-------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/iceberg/expression/decimal.cc b/src/iceberg/expression/decimal.cc
index b378a91f5..3868c877a 100644
--- a/src/iceberg/expression/decimal.cc
+++ b/src/iceberg/expression/decimal.cc
@@ -24,9 +24,11 @@
 #include <bit>
 #include <cassert>
 #include <charconv>
+#include <climits>
 #include <cmath>
 #include <cstddef>
 #include <cstdint>
+#include <cstring>
 #include <format>
 #include <iomanip>
 #include <iostream>
@@ -1338,6 +1340,12 @@ double Decimal::ToDouble(int32_t scale) const {
   return DecimalRealConversion::ToReal<double>(*this, scale);
 }
 
+std::array<uint8_t, Decimal::kByteWidth> Decimal::ToBytes() const {
+  std::array<uint8_t, kByteWidth> out{{0}};
+  memcpy(out.data(), data_.data(), kByteWidth);
+  return out;
+}
+
 // Unary operators
 ICEBERG_EXPORT Decimal operator-(const Decimal& operand) {
   Decimal result(operand.high(), operand.low());
diff --git a/src/iceberg/expression/decimal.h b/src/iceberg/expression/decimal.h
index b6781393a..44b161620 100644
--- a/src/iceberg/expression/decimal.h
+++ b/src/iceberg/expression/decimal.h
@@ -168,7 +168,7 @@ class ICEBERG_EXPORT Decimal {
 
   /// \brief Whether this number fits in the given precision
   ///
-  /// Return true if the number of significant digits is less or equal to `precision`.
+  /// Returns true if the number of significant digits is less or equal to `precision`.
   bool FitsInPrecision(int32_t precision) const;
 
   /// \brief Convert to a floating-point number (scaled)
@@ -193,12 +193,8 @@ class ICEBERG_EXPORT Decimal {
     return reinterpret_cast<const uint8_t*>(data_.data());
   }
 
-  /// \brief Return the raw bytes of the value in native-endian byte order.
-  std::array<uint8_t, kByteWidth> ToBytes() const {
-    std::array<uint8_t, kByteWidth> out{{0}};
-    memcpy(out.data(), data_.data(), kByteWidth);
-    return out;
-  }
+  /// \brief Returns the raw bytes of the value in native-endian byte order.
+  std::array<uint8_t, kByteWidth> ToBytes() const;
 
   /// \brief Returns 1 if positive or zero, -1 if strictly negative.
   int64_t Sign() const { return 1 | (static_cast<int64_t>(data_[kHighIndex]) >> 63); }

From aaafbfefb65e2314bb17a1e14cefb942c12c4af9 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 22 Aug 2025 20:51:59 +0800
Subject: [PATCH 08/20] feat: add more tests

---
 src/iceberg/expression/decimal.cc |  25 +-
 src/iceberg/expression/decimal.h  |  25 ++
 src/iceberg/result.h              |   8 +-
 test/decimal_test.cc              | 408 ++++++++++++++++++++++++++----
 4 files changed, 404 insertions(+), 62 deletions(-)

diff --git a/src/iceberg/expression/decimal.cc b/src/iceberg/expression/decimal.cc
index 3868c877a..3cef87f32 100644
--- a/src/iceberg/expression/decimal.cc
+++ b/src/iceberg/expression/decimal.cc
@@ -33,6 +33,7 @@
 #include <iomanip>
 #include <iostream>
 #include <limits>
+#include <ostream>
 #include <sstream>
 #include <string>
 #include <string_view>
@@ -183,7 +184,7 @@ static Status BuildFromArray(Decimal* result, const uint32_t* array, int64_t len
   std::array<uint64_t, 2> result_array = {0, 0};
   for (int64_t i = length - 2 * 2 - 1; i >= 0; i--) {
     if (array[i] != 0) {
-      return Overflow("Decimal division overflow");
+      return Invalid("Decimal division overflow");
     }
   }
 
@@ -250,7 +251,7 @@ static inline Status DecimalDivide(const Decimal& dividend, const Decimal& divis
   }
 
   if (divisor_length == 0) {
-    return DivideByZero("Cannot divide by zero in DecimalDivide");
+    return Invalid("Cannot divide by zero in DecimalDivide");
   }
 
   if (divisor_length == 1) {
@@ -1013,9 +1014,8 @@ struct DecimalRealConversion {
     const auto x = std::nearbyint(real * PowerOfTen<double>(scale));
     const auto max_abs = PowerOfTen<double>(precision);
     if (x <= -max_abs || x >= max_abs) {
-      return Overflow(
-          "Cannot convert {} to Decimal(precision = {}, scale = {}): overflow", real,
-          precision, scale);
+      return Invalid("Cannot convert {} to Decimal(precision = {}, scale = {}): overflow",
+                     real, precision, scale);
     }
 
     // Extract high and low bits
@@ -1048,9 +1048,8 @@ struct DecimalRealConversion {
       // overflow.
       // NOTE: `limit` is allowed here as rounding can make it smaller than
       // the theoretical limit (for example, 1.0e23 < 10^23).
-      return Overflow(
-          "Cannot convert {} to Decimal(precision = {}, scale = {}): overflow", real,
-          precision, scale);
+      return Invalid("Cannot convert {} to Decimal(precision = {}, scale = {}): overflow",
+                     real, precision, scale);
     }
 
     // 2. Losslessly convert `real` to `mant * 2**k`
@@ -1140,9 +1139,8 @@ struct DecimalRealConversion {
 
     // Rounding might have pushed `x` just above the max precision, check again
     if (!x.FitsInPrecision(precision)) {
-      return Overflow(
-          "Cannot convert {} to Decimal(precision = {}, scale = {}): overflow", real,
-          precision, scale);
+      return Invalid("Cannot convert {} to Decimal(precision = {}, scale = {}): overflow",
+                     real, precision, scale);
     }
     return x;
   }
@@ -1346,6 +1344,11 @@ std::array<uint8_t, Decimal::kByteWidth> Decimal::ToBytes() const {
   return out;
 }
 
+ICEBERG_EXPORT std::ostream& operator<<(std::ostream& os, const Decimal& decimal) {
+  os << decimal.ToIntegerString();
+  return os;
+}
+
 // Unary operators
 ICEBERG_EXPORT Decimal operator-(const Decimal& operand) {
   Decimal result(operand.high(), operand.low());
diff --git a/src/iceberg/expression/decimal.h b/src/iceberg/expression/decimal.h
index 44b161620..0d497fbfe 100644
--- a/src/iceberg/expression/decimal.h
+++ b/src/iceberg/expression/decimal.h
@@ -27,6 +27,7 @@
 
 #include "iceberg/iceberg_export.h"
 #include "iceberg/result.h"
+#include "iceberg/util/macros.h"
 #include "iceberg/util/port.h"
 
 namespace iceberg {
@@ -171,6 +172,28 @@ class ICEBERG_EXPORT Decimal {
   /// Returns true if the number of significant digits is less or equal to `precision`.
   bool FitsInPrecision(int32_t precision) const;
 
+  /// \brief Convert to a signed integer
+  template <typename T>
+    requires std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t>
+  Result<T> ToInteger() const {
+    constexpr auto min_value = std::numeric_limits<T>::min();
+    constexpr auto max_value = std::numeric_limits<T>::max();
+    const auto& self = *this;
+    if (self < min_value || self > max_value) {
+      return Invalid("Invalid cast from Decimal to {} byte integer", sizeof(T));
+    }
+    return static_cast<T>(low());
+  }
+
+  /// \brief Convert to a signed integer
+  template <typename T>
+    requires std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t>
+  Status ToInteger(T* out) const {
+    ICEBERG_ASSIGN_OR_RAISE(auto result, ToInteger<T>());
+    *out = result;
+    return {};
+  }
+
   /// \brief Convert to a floating-point number (scaled)
   float ToFloat(int32_t scale) const;
   /// \brief Convert to a floating-point number (scaled)
@@ -212,6 +235,8 @@ class ICEBERG_EXPORT Decimal {
     return lhs.data_ != rhs.data_;
   }
 
+  friend std::ostream& operator<<(std::ostream& os, const Decimal& decimal);
+
  private:
 #if ICEBERG_LITTLE_ENDIAN
   static constexpr int32_t kHighIndex = 1;
diff --git a/src/iceberg/result.h b/src/iceberg/result.h
index a70ebac5a..79dd52b93 100644
--- a/src/iceberg/result.h
+++ b/src/iceberg/result.h
@@ -32,7 +32,7 @@ enum class ErrorKind {
   kAlreadyExists,
   kCommitStateUnknown,
   kDecompressError,
-  kDivideByZero,
+  kInvalid,  // For general invalid errors
   kInvalidArgument,
   kInvalidArrowData,
   kInvalidExpression,
@@ -47,8 +47,6 @@ enum class ErrorKind {
   kNotFound,
   kNotImplemented,
   kNotSupported,
-  kOverflow,
-  kRescaleDataLoss,
   kUnknownError,
 };
 
@@ -82,7 +80,7 @@ using Status = Result<void>;
 DEFINE_ERROR_FUNCTION(AlreadyExists)
 DEFINE_ERROR_FUNCTION(CommitStateUnknown)
 DEFINE_ERROR_FUNCTION(DecompressError)
-DEFINE_ERROR_FUNCTION(DivideByZero)
+DEFINE_ERROR_FUNCTION(Invalid)
 DEFINE_ERROR_FUNCTION(InvalidArgument)
 DEFINE_ERROR_FUNCTION(InvalidArrowData)
 DEFINE_ERROR_FUNCTION(InvalidExpression)
@@ -97,8 +95,6 @@ DEFINE_ERROR_FUNCTION(NotAllowed)
 DEFINE_ERROR_FUNCTION(NotFound)
 DEFINE_ERROR_FUNCTION(NotImplemented)
 DEFINE_ERROR_FUNCTION(NotSupported)
-DEFINE_ERROR_FUNCTION(Overflow)
-DEFINE_ERROR_FUNCTION(RescaleDataLoss)
 DEFINE_ERROR_FUNCTION(UnknownError)
 
 #undef DEFINE_ERROR_FUNCTION
diff --git a/test/decimal_test.cc b/test/decimal_test.cc
index ee19e4930..61119d7b7 100644
--- a/test/decimal_test.cc
+++ b/test/decimal_test.cc
@@ -24,11 +24,12 @@
 #include <cstdint>
 #include <vector>
 
+#include <gmock/gmock.h>
 #include <gtest/gtest.h>
 #include <sys/types.h>
 
 #include "gmock/gmock.h"
-#include "gtest/gtest.h"
+#include "iceberg/util/port.h"
 #include "matchers.h"
 
 namespace iceberg {
@@ -47,6 +48,11 @@ void AssertDecimalFromString(const std::string& s, const Decimal& expected,
   EXPECT_EQ(expected_scale, scale);
 }
 
+Decimal DecimalFromInt128(int128_t value) {
+  return {static_cast<int64_t>(value >> 64),
+          static_cast<uint64_t>(value & 0xFFFFFFFFFFFFFFFFULL)};
+}
+
 }  // namespace
 
 TEST(DecimalTest, Basics) {
@@ -519,7 +525,7 @@ class TestDecimalFromReal : public ::testing::Test {
     };
     for (const ParamType& param : overflow_params) {
       auto result = Decimal::FromReal(param.real, param.precision, param.scale);
-      ASSERT_THAT(result, IsError(ErrorKind::kOverflow));
+      ASSERT_THAT(result, IsError(ErrorKind::kInvalid));
     }
   }
 };
@@ -535,23 +541,52 @@ TEST(TestDecimalFromReal, FromFloat) {
   const std::vector<FromFloatTestParam> params{
       // -- Stress the 24 bits of precision of a float
       // 2**63 + 2**40
-      FromFloatTestParam{9.223373e+18f, 19, 0, "9223373136366403584"},
+      FromFloatTestParam{.real = 9.223373e+18f,
+                         .precision = 19,
+                         .scale = 0,
+                         .expected_string = "9223373136366403584"},
       // 2**64 - 2**40
-      FromFloatTestParam{1.8446743e+19f, 20, 0, "18446742974197923840"},
+      FromFloatTestParam{.real = 1.8446743e+19f,
+                         .precision = 20,
+                         .scale = 0,
+                         .expected_string = "18446742974197923840"},
       // 2**64 + 2**41
-      FromFloatTestParam{1.8446746e+19f, 20, 0, "18446746272732807168"},
+      FromFloatTestParam{.real = 1.8446746e+19f,
+                         .precision = 20,
+                         .scale = 0,
+                         .expected_string = "18446746272732807168"},
       // 2**14 - 2**-10
-      FromFloatTestParam{16383.999f, 8, 3, "16383.999"},
-      FromFloatTestParam{16383.999f, 19, 3, "16383.999"},
+      FromFloatTestParam{
+          .real = 16383.999f, .precision = 8, .scale = 3, .expected_string = "16383.999"},
+      FromFloatTestParam{16383.999f, .precision = 19, .scale = 3,
+                         .expected_string = "16383.999"},
       // 1 - 2**-24
-      FromFloatTestParam{0.99999994f, 10, 10, "0.9999999404"},
-      FromFloatTestParam{0.99999994f, 15, 15, "0.999999940395355"},
-      FromFloatTestParam{0.99999994f, 20, 20, "0.99999994039535522461"},
-      FromFloatTestParam{0.99999994f, 21, 21, "0.999999940395355224609"},
-      FromFloatTestParam{0.99999994f, 38, 38, "0.99999994039535522460937500000000000000"},
+      FromFloatTestParam{.real = 0.99999994f,
+                         .precision = 10,
+                         .scale = 10,
+                         .expected_string = "0.9999999404"},
+      FromFloatTestParam{.real = 0.99999994f,
+                         .precision = 15,
+                         .scale = 15,
+                         .expected_string = "0.999999940395355"},
+      FromFloatTestParam{.real = 0.99999994f,
+                         .precision = 20,
+                         .scale = 20,
+                         .expected_string = "0.99999994039535522461"},
+      FromFloatTestParam{.real = 0.99999994f,
+                         .precision = 21,
+                         .scale = 21,
+                         .expected_string = "0.999999940395355224609"},
+      FromFloatTestParam{.real = 0.99999994f,
+                         .precision = 38,
+                         .scale = 38,
+                         .expected_string = "0.99999994039535522460937500000000000000"},
       // -- Other cases
       // 10**38 - 2**103
-      FromFloatTestParam{9.999999e+37f, 38, 0, "99999986661652122824821048795547566080"},
+      FromFloatTestParam{.real = 9.999999e+37f,
+                         .precision = 38,
+                         .scale = 0,
+                         .expected_string = "99999986661652122824821048795547566080"},
   };
 
   for (const auto& param : params) {
@@ -577,51 +612,123 @@ TEST(TestDecimalFromReal, FromDouble) {
   const std::vector<FromDoubleTestParam> params{
       // -- Stress the 53 bits of precision of a double
       // 2**63 + 2**11
-      FromDoubleTestParam{9.223372036854778e+18, 19, 0, "9223372036854777856"},
+      FromDoubleTestParam{.real = 9.223372036854778e+18,
+                          .precision = 19,
+                          .scale = 0,
+                          .expected_string = "9223372036854777856"},
       // 2**64 - 2**11
-      FromDoubleTestParam{1.844674407370955e+19, 20, 0, "18446744073709549568"},
+      FromDoubleTestParam{.real = 1.844674407370955e+19,
+                          .precision = 20,
+                          .scale = 0,
+                          .expected_string = "18446744073709549568"},
       // 2**64 + 2**11
-      FromDoubleTestParam{1.8446744073709556e+19, 20, 0, "18446744073709555712"},
+      FromDoubleTestParam{.real = 1.8446744073709556e+19,
+                          .precision = 20,
+                          .scale = 0,
+                          .expected_string = "18446744073709555712"},
       // 2**126
-      FromDoubleTestParam{8.507059173023462e+37, 38, 0,
-                          "85070591730234615865843651857942052864"},
+      FromDoubleTestParam{.real = 8.507059173023462e+37,
+                          .precision = 38,
+                          .scale = 0,
+                          .expected_string = "85070591730234615865843651857942052864"},
       // 2**126 - 2**74
-      FromDoubleTestParam{8.50705917302346e+37, 38, 0,
-                          "85070591730234596976377720379361198080"},
+      FromDoubleTestParam{.real = 8.50705917302346e+37,
+                          .precision = 38,
+                          .scale = 0,
+                          .expected_string = "85070591730234596976377720379361198080"},
       // 2**36 - 2**-16
-      FromDoubleTestParam{68719476735.999985, 11, 0, "68719476736"},
-      FromDoubleTestParam{68719476735.999985, 38, 27,
-                          "68719476735.999984741210937500000000000"},
+      FromDoubleTestParam{.real = 68719476735.999985,
+                          .precision = 11,
+                          .scale = 0,
+                          .expected_string = "68719476736"},
+      FromDoubleTestParam{.real = 68719476735.999985,
+                          .precision = 38,
+                          .scale = 27,
+                          .expected_string = "68719476735.999984741210937500000000000"},
       // -- Other cases
       // Almost 10**38 (minus 2**73)
-      FromDoubleTestParam{9.999999999999998e+37, 38, 0,
-                          "99999999999999978859343891977453174784"},
-      FromDoubleTestParam{9.999999999999998e+27, 38, 10,
-                          "9999999999999997384096481280.0000000000"},
+      FromDoubleTestParam{.real = 9.999999999999998e+37,
+                          .precision = 38,
+                          .scale = 0,
+                          .expected_string = "99999999999999978859343891977453174784"},
+      FromDoubleTestParam{.real = 9.999999999999998e+27,
+                          .precision = 38,
+                          .scale = 10,
+                          .expected_string = "9999999999999997384096481280.0000000000"},
       // 10**N (sometimes fits in N digits)
-      FromDoubleTestParam{1e23, 23, 0, "99999999999999991611392"},
-      FromDoubleTestParam{1e23, 24, 1, "99999999999999991611392.0"},
-      FromDoubleTestParam{1e36, 37, 0, "1000000000000000042420637374017961984"},
-      FromDoubleTestParam{1e36, 38, 1, "1000000000000000042420637374017961984.0"},
-      FromDoubleTestParam{1e37, 37, 0, "9999999999999999538762658202121142272"},
-      FromDoubleTestParam{1e37, 38, 1, "9999999999999999538762658202121142272.0"},
-      FromDoubleTestParam{1e38, 38, 0, "99999999999999997748809823456034029568"},
+      FromDoubleTestParam{.real = 1e23,
+                          .precision = 23,
+                          .scale = 0,
+                          .expected_string = "99999999999999991611392"},
+      FromDoubleTestParam{.real = 1e23,
+                          .precision = 24,
+                          .scale = 1,
+                          .expected_string = "99999999999999991611392.0"},
+      FromDoubleTestParam{.real = 1e36,
+                          .precision = 37,
+                          .scale = 0,
+                          .expected_string = "1000000000000000042420637374017961984"},
+      FromDoubleTestParam{.real = 1e36,
+                          .precision = 38,
+                          .scale = 1,
+                          .expected_string = "1000000000000000042420637374017961984.0"},
+      FromDoubleTestParam{.real = 1e37,
+                          .precision = 37,
+                          .scale = 0,
+                          .expected_string = "9999999999999999538762658202121142272"},
+      FromDoubleTestParam{.real = 1e37,
+                          .precision = 38,
+                          .scale = 1,
+                          .expected_string = "9999999999999999538762658202121142272.0"},
+      FromDoubleTestParam{.real = 1e38,
+                          .precision = 38,
+                          .scale = 0,
+                          .expected_string = "99999999999999997748809823456034029568"},
       // Hand-picked test cases that can involve precision issues.
       // More comprehensive testing is done in the PyArrow test suite.
-      FromDoubleTestParam{9.223372036854778e+10, 19, 8, "92233720368.54777527"},
-      FromDoubleTestParam{1.8446744073709556e+15, 20, 4, "1844674407370955.5000"},
-      FromDoubleTestParam{999999999999999.0, 16, 1, "999999999999999.0"},
-      FromDoubleTestParam{9999999999999998.0, 17, 1, "9999999999999998.0"},
-      FromDoubleTestParam{999999999999999.9, 16, 1, "999999999999999.9"},
-      FromDoubleTestParam{9999999987., 38, 22, "9999999987.0000000000000000000000"},
-      FromDoubleTestParam{9999999987., 38, 28, "9999999987.0000000000000000000000000000"},
+      FromDoubleTestParam{.real = 9.223372036854778e+10,
+                          .precision = 19,
+                          .scale = 8,
+                          .expected_string = "92233720368.54777527"},
+      FromDoubleTestParam{.real = 1.8446744073709556e+15,
+                          .precision = 20,
+                          .scale = 4,
+                          .expected_string = "1844674407370955.5000"},
+      FromDoubleTestParam{.real = 999999999999999.0,
+                          .precision = 16,
+                          .scale = 1,
+                          .expected_string = "999999999999999.0"},
+      FromDoubleTestParam{.real = 9999999999999998.0,
+                          .precision = 17,
+                          .scale = 1,
+                          .expected_string = "9999999999999998.0"},
+      FromDoubleTestParam{.real = 999999999999999.9,
+                          .precision = 16,
+                          .scale = 1,
+                          .expected_string = "999999999999999.9"},
+      FromDoubleTestParam{.real = 9999999987.,
+                          .precision = 38,
+                          .scale = 22,
+                          .expected_string = "9999999987.0000000000000000000000"},
+      FromDoubleTestParam{.real = 9999999987.,
+                          .precision = 38,
+                          .scale = 28,
+                          .expected_string = "9999999987.0000000000000000000000000000"},
       // 1 - 2**-52
       // XXX the result should be 0.99999999999999977795539507496869191527
       // but our algorithm loses the right digit.
-      FromDoubleTestParam{0.9999999999999998, 38, 38,
-                          "0.99999999999999977795539507496869191520"},
-      FromDoubleTestParam{0.9999999999999998, 20, 20, "0.99999999999999977796"},
-      FromDoubleTestParam{0.9999999999999998, 16, 16, "0.9999999999999998"},
+      FromDoubleTestParam{.real = 0.9999999999999998,
+                          .precision = 38,
+                          .scale = 38,
+                          .expected_string = "0.99999999999999977795539507496869191520"},
+      FromDoubleTestParam{.real = 0.9999999999999998,
+                          .precision = 20,
+                          .scale = 20,
+                          .expected_string = "0.99999999999999977796"},
+      FromDoubleTestParam{.real = 0.9999999999999998,
+                          .precision = 16,
+                          .scale = 16,
+                          .expected_string = "0.9999999999999998"},
   };
 
   for (const auto& param : params) {
@@ -973,4 +1080,215 @@ TEST(DecimalTest, FromBigEndianInvalid) {
               IsError(ErrorKind::kInvalidArgument));
 }
 
+TEST(DecimalTestFunctionality, Multiply) {
+  ASSERT_EQ(Decimal(60501), Decimal(301) * Decimal(201));
+  ASSERT_EQ(Decimal(-60501), Decimal(-301) * Decimal(201));
+  ASSERT_EQ(Decimal(-60501), Decimal(301) * Decimal(-201));
+  ASSERT_EQ(Decimal(60501), Decimal(-301) * Decimal(-201));
+
+  // Edge cases
+  for (auto x : std::vector<int128_t>{-INT64_MAX, -INT32_MAX, 0, INT32_MAX, INT64_MAX}) {
+    for (auto y :
+         std::vector<int128_t>{-INT32_MAX, -32, -2, -1, 0, 1, 2, 32, INT32_MAX}) {
+      Decimal decimal_x = DecimalFromInt128(x);
+      Decimal decimal_y = DecimalFromInt128(y);
+      Decimal result = decimal_x * decimal_y;
+      EXPECT_EQ(DecimalFromInt128(x * y), result)
+          << " x: " << decimal_x << " y: " << decimal_y;
+    }
+  }
+}
+
+TEST(DecimalTestFunctionality, Divide) {
+  ASSERT_EQ(Decimal(66), Decimal(20100) / Decimal(301));
+  ASSERT_EQ(Decimal(-66), Decimal(-20100) / Decimal(301));
+  ASSERT_EQ(Decimal(-66), Decimal(20100) / Decimal(-301));
+  ASSERT_EQ(Decimal(66), Decimal(-20100) / Decimal(-301));
+
+  for (auto x : std::vector<int128_t>{-INT64_MAX, -INT32_MAX, 0, INT32_MAX, INT64_MAX}) {
+    for (auto y : std::vector<int128_t>{-INT32_MAX, -32, -2, -1, 1, 2, 32, INT32_MAX}) {
+      Decimal decimal_x = DecimalFromInt128(x);
+      Decimal decimal_y = DecimalFromInt128(y);
+      Decimal result = decimal_x / decimal_y;
+      EXPECT_EQ(DecimalFromInt128(x / y), result)
+          << " x: " << decimal_x << " y: " << decimal_y;
+    }
+  }
+}
+
+TEST(DecimalTestFunctionality, Modulo) {
+  ASSERT_EQ(Decimal(234), Decimal(20100) % Decimal(301));
+  ASSERT_EQ(Decimal(-234), Decimal(-20100) % Decimal(301));
+  ASSERT_EQ(Decimal(234), Decimal(20100) % Decimal(-301));
+  ASSERT_EQ(Decimal(-234), Decimal(-20100) % Decimal(-301));
+
+  // Test some edge cases
+  for (auto x : std::vector<int128_t>{-INT64_MAX, -INT32_MAX, 0, INT32_MAX, INT64_MAX}) {
+    for (auto y : std::vector<int128_t>{-INT32_MAX, -32, -2, -1, 1, 2, 32, INT32_MAX}) {
+      Decimal decimal_x = DecimalFromInt128(x);
+      Decimal decimal_y = DecimalFromInt128(y);
+      Decimal result = decimal_x % decimal_y;
+      EXPECT_EQ(DecimalFromInt128(x % y), result)
+          << " x: " << decimal_x << " y: " << decimal_y;
+    }
+  }
+}
+
+TEST(DecimalTestFunctionality, Sign) {
+  ASSERT_EQ(1, Decimal(999999).Sign());
+  ASSERT_EQ(-1, Decimal(-999999).Sign());
+  ASSERT_EQ(1, Decimal(0).Sign());
+}
+
+TEST(DecimalTestFunctionality, GetWholeAndFraction) {
+  Decimal value("123456");
+
+  auto check = [value](int32_t scale, std::pair<int32_t, int32_t> expected) {
+    int32_t out;
+    auto result = value.GetWholeAndFraction(scale);
+    ASSERT_THAT(result->first.ToInteger(&out), IsOk());
+    ASSERT_EQ(expected.first, out);
+    ASSERT_THAT(result->second.ToInteger(&out), IsOk());
+    ASSERT_EQ(expected.second, out);
+  };
+
+  check(0, {123456, 0});
+  check(1, {12345, 6});
+  check(5, {1, 23456});
+  check(7, {0, 123456});
+}
+
+TEST(DecimalTestFunctionality, GetWholeAndFractionNegative) {
+  Decimal value("-123456");
+
+  auto check = [value](int32_t scale, std::pair<int32_t, int32_t> expected) {
+    int32_t out;
+    auto result = value.GetWholeAndFraction(scale);
+    ASSERT_THAT(result->first.ToInteger(&out), IsOk());
+    ASSERT_EQ(expected.first, out);
+    ASSERT_THAT(result->second.ToInteger(&out), IsOk());
+    ASSERT_EQ(expected.second, out);
+  };
+
+  check(0, {-123456, 0});
+  check(1, {-12345, -6});
+  check(5, {-1, -23456});
+  check(7, {0, -123456});
+}
+
+TEST(DecimalTestFunctionality, FitsInPrecision) {
+  ASSERT_TRUE(Decimal("0").FitsInPrecision(1));
+  ASSERT_TRUE(Decimal("9").FitsInPrecision(1));
+  ASSERT_TRUE(Decimal("-9").FitsInPrecision(1));
+  ASSERT_FALSE(Decimal("10").FitsInPrecision(1));
+  ASSERT_FALSE(Decimal("-10").FitsInPrecision(1));
+
+  ASSERT_TRUE(Decimal("0").FitsInPrecision(2));
+  ASSERT_TRUE(Decimal("10").FitsInPrecision(2));
+  ASSERT_TRUE(Decimal("-10").FitsInPrecision(2));
+  ASSERT_TRUE(Decimal("99").FitsInPrecision(2));
+  ASSERT_TRUE(Decimal("-99").FitsInPrecision(2));
+  ASSERT_FALSE(Decimal("100").FitsInPrecision(2));
+  ASSERT_FALSE(Decimal("-100").FitsInPrecision(2));
+
+  std::string max_nines(Decimal::kMaxPrecision, '9');
+  ASSERT_TRUE(Decimal(max_nines).FitsInPrecision(Decimal::kMaxPrecision));
+  ASSERT_TRUE(Decimal("-" + max_nines).FitsInPrecision(Decimal::kMaxPrecision));
+
+  std::string max_zeros(Decimal::kMaxPrecision, '0');
+  ASSERT_FALSE(Decimal("1" + max_zeros).FitsInPrecision(Decimal::kMaxPrecision));
+  ASSERT_FALSE(Decimal("-1" + max_zeros).FitsInPrecision(Decimal::kMaxPrecision));
+}
+
+TEST(DecimalTest, LeftShift) {
+  auto check = [](int128_t x, uint32_t bits) {
+    auto expected = DecimalFromInt128(x << bits);
+    auto actual = DecimalFromInt128(x) << bits;
+    ASSERT_EQ(actual.low(), expected.low());
+    ASSERT_EQ(actual.high(), expected.high());
+  };
+
+  ASSERT_EQ(Decimal("0"), Decimal("0") << 0);
+  ASSERT_EQ(Decimal("0"), Decimal("0") << 1);
+  ASSERT_EQ(Decimal("0"), Decimal("0") << 63);
+  ASSERT_EQ(Decimal("0"), Decimal("0") << 127);
+
+  check(123, 0);
+  check(123, 1);
+  check(123, 63);
+  check(123, 64);
+  check(123, 120);
+
+  ASSERT_EQ(Decimal("199999999999998"), Decimal("99999999999999") << 1);
+  ASSERT_EQ(Decimal("3435973836799965640261632"), Decimal("99999999999999") << 35);
+  ASSERT_EQ(Decimal("120892581961461708544797985370825293824"), Decimal("99999999999999")
+                                                                    << 80);
+
+  ASSERT_EQ(Decimal("1234567890123456789012"), Decimal("1234567890123456789012") << 0);
+  ASSERT_EQ(Decimal("2469135780246913578024"), Decimal("1234567890123456789012") << 1);
+  ASSERT_EQ(Decimal("88959991838777271103427858320412639232"),
+            Decimal("1234567890123456789012") << 56);
+
+  check(-123, 0);
+  check(-123, 1);
+  check(-123, 63);
+  check(-123, 64);
+  check(-123, 120);
+
+  ASSERT_EQ(Decimal("-199999999999998"), Decimal("-99999999999999") << 1);
+  ASSERT_EQ(Decimal("-3435973836799965640261632"), Decimal("-99999999999999") << 35);
+  ASSERT_EQ(Decimal("-120892581961461708544797985370825293824"),
+            Decimal("-99999999999999") << 80);
+
+  ASSERT_EQ(Decimal("-1234567890123456789012"), Decimal("-1234567890123456789012") << 0);
+  ASSERT_EQ(Decimal("-2469135780246913578024"), Decimal("-1234567890123456789012") << 1);
+  ASSERT_EQ(Decimal("-88959991838777271103427858320412639232"),
+            Decimal("-1234567890123456789012") << 56);
+}
+
+TEST(DecimalTest, RightShift) {
+  ASSERT_EQ(Decimal("0"), Decimal("0") >> 0);
+  ASSERT_EQ(Decimal("0"), Decimal("0") >> 1);
+  ASSERT_EQ(Decimal("0"), Decimal("0") >> 63);
+  ASSERT_EQ(Decimal("0"), Decimal("0") >> 127);
+
+  ASSERT_EQ(Decimal("1"), Decimal("1") >> 0);
+  ASSERT_EQ(Decimal("0"), Decimal("1") >> 1);
+  ASSERT_EQ(Decimal("0"), Decimal("1") >> 63);
+  ASSERT_EQ(Decimal("0"), Decimal("1") >> 127);
+
+  ASSERT_EQ(Decimal("-1"), Decimal("-1") >> 0);
+  ASSERT_EQ(Decimal("-1"), Decimal("-1") >> 1);
+  ASSERT_EQ(Decimal("-1"), Decimal("-1") >> 63);
+  ASSERT_EQ(Decimal("-1"), Decimal("-1") >> 127);
+
+  ASSERT_EQ(Decimal("1096516"), Decimal("1234567890123456789012") >> 50);
+  ASSERT_EQ(Decimal("66"), Decimal("1234567890123456789012") >> 64);
+  ASSERT_EQ(Decimal("2"), Decimal("1234567890123456789012") >> 69);
+  ASSERT_EQ(Decimal("0"), Decimal("1234567890123456789012") >> 71);
+  ASSERT_EQ(Decimal("0"), Decimal("1234567890123456789012") >> 127);
+
+  ASSERT_EQ(Decimal("-1096517"), Decimal("-1234567890123456789012") >> 50);
+  ASSERT_EQ(Decimal("-67"), Decimal("-1234567890123456789012") >> 64);
+  ASSERT_EQ(Decimal("-3"), Decimal("-1234567890123456789012") >> 69);
+  ASSERT_EQ(Decimal("-1"), Decimal("-1234567890123456789012") >> 71);
+  ASSERT_EQ(Decimal("-1"), Decimal("-1234567890123456789012") >> 127);
+}
+
+TEST(DecimalTest, Negate) {
+  auto check = [](Decimal pos, Decimal neg) {
+    EXPECT_EQ(-pos, neg);
+    EXPECT_EQ(-neg, pos);
+  };
+
+  check(Decimal(0, 0), Decimal(0, 0));
+  check(Decimal(0, 1), Decimal(-1, 0xFFFFFFFFFFFFFFFFULL));
+  check(Decimal(0, 2), Decimal(-1, 0xFFFFFFFFFFFFFFFEULL));
+  check(Decimal(0, 0x8000000000000000ULL), Decimal(-1, 0x8000000000000000ULL));
+  check(Decimal(0, 0xFFFFFFFFFFFFFFFFULL), Decimal(-1, 1));
+  check(Decimal(12, 0), Decimal(-12, 0));
+  check(Decimal(12, 1), Decimal(-13, 0xFFFFFFFFFFFFFFFFULL));
+  check(Decimal(12, 0xFFFFFFFFFFFFFFFFULL), Decimal(-13, 1));
+}
+
 }  // namespace iceberg

From 00dac135201a9baa17ae7eb00341ca5fb58ca94f Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 22 Aug 2025 21:41:13 +0800
Subject: [PATCH 09/20] feat: add rescale

---
 src/iceberg/expression/decimal.cc | 31 ++++++++++++++++++++++++++++++-
 test/decimal_test.cc              | 14 ++++++++++++--
 2 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/src/iceberg/expression/decimal.cc b/src/iceberg/expression/decimal.cc
index 3cef87f32..c658d2f69 100644
--- a/src/iceberg/expression/decimal.cc
+++ b/src/iceberg/expression/decimal.cc
@@ -1234,6 +1234,19 @@ static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length)
 #endif
 }
 
+static bool RescaleWouldCauseDataLoss(const Decimal& value, int32_t delta_scale,
+                                      const Decimal& multiplier, Decimal* result) {
+  if (delta_scale < 0) {
+    auto res = value.Divide(multiplier);
+    assert(res);
+    *result = res->first;
+    return res->second != 0;
+  }
+
+  *result = value * multiplier;
+  return (value < 0) ? *result > value : *result < value;
+}
+
 }  // namespace
 
 Result<Decimal> Decimal::FromReal(float x, int32_t precision, int32_t scale) {
@@ -1322,7 +1335,23 @@ Result<Decimal> Decimal::Rescale(int32_t orig_scale, int32_t new_scale) const {
     return *this;
   }
 
-  return NotImplemented("Decimal::Rescale is not implemented yet");
+  const int32_t delta_scale = new_scale - orig_scale;
+  const int32_t abs_delta_scale = std::abs(delta_scale);
+  Decimal out;
+
+  assert(abs_delta_scale <= kMaxScale);
+
+  auto& multiplier = kDecimal128PowersOfTen[abs_delta_scale];
+
+  const bool rescale_would_cause_data_loss =
+      RescaleWouldCauseDataLoss(*this, delta_scale, multiplier, &out);
+
+  if (rescale_would_cause_data_loss) {
+    return Invalid("Rescale would cause data loss: {} -> {}", ToIntegerString(),
+                   out.ToIntegerString());
+  }
+
+  return out;
 }
 
 bool Decimal::FitsInPrecision(int32_t precision) const {
diff --git a/test/decimal_test.cc b/test/decimal_test.cc
index 61119d7b7..7e9d9ae3b 100644
--- a/test/decimal_test.cc
+++ b/test/decimal_test.cc
@@ -28,7 +28,6 @@
 #include <gtest/gtest.h>
 #include <sys/types.h>
 
-#include "gmock/gmock.h"
 #include "iceberg/util/port.h"
 #include "matchers.h"
 
@@ -558,7 +557,9 @@ TEST(TestDecimalFromReal, FromFloat) {
       // 2**14 - 2**-10
       FromFloatTestParam{
           .real = 16383.999f, .precision = 8, .scale = 3, .expected_string = "16383.999"},
-      FromFloatTestParam{16383.999f, .precision = 19, .scale = 3,
+      FromFloatTestParam{.real = 16383.999f,
+                         .precision = 19,
+                         .scale = 3,
                          .expected_string = "16383.999"},
       // 1 - 2**-24
       FromFloatTestParam{.real = 0.99999994f,
@@ -1291,4 +1292,13 @@ TEST(DecimalTest, Negate) {
   check(Decimal(12, 0xFFFFFFFFFFFFFFFFULL), Decimal(-13, 1));
 }
 
+TEST(DecimalTest, Rescale) {
+  ASSERT_EQ(Decimal(11100), Decimal(111).Rescale(0, 2).value());
+  ASSERT_EQ(Decimal(111), Decimal(11100).Rescale(2, 0).value());
+  ASSERT_EQ(Decimal(5), Decimal(500000).Rescale(6, 1).value());
+  ASSERT_EQ(Decimal(500000), Decimal(5).Rescale(1, 6).value());
+
+  ASSERT_THAT(Decimal(5555555).Rescale(6, 1), IsError(ErrorKind::kInvalid));
+}
+
 }  // namespace iceberg

From 74742e74fc9693504686975dd2cd575ae9812b71 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 22 Aug 2025 22:55:13 +0800
Subject: [PATCH 10/20] fix: windows compiling error

---
 src/iceberg/expression/decimal.cc | 2 +-
 src/iceberg/expression/decimal.h  | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/iceberg/expression/decimal.cc b/src/iceberg/expression/decimal.cc
index c658d2f69..ecf72640d 100644
--- a/src/iceberg/expression/decimal.cc
+++ b/src/iceberg/expression/decimal.cc
@@ -1373,7 +1373,7 @@ std::array<uint8_t, Decimal::kByteWidth> Decimal::ToBytes() const {
   return out;
 }
 
-ICEBERG_EXPORT std::ostream& operator<<(std::ostream& os, const Decimal& decimal) {
+std::ostream& operator<<(std::ostream& os, const Decimal& decimal) {
   os << decimal.ToIntegerString();
   return os;
 }
diff --git a/src/iceberg/expression/decimal.h b/src/iceberg/expression/decimal.h
index 0d497fbfe..80d26f33f 100644
--- a/src/iceberg/expression/decimal.h
+++ b/src/iceberg/expression/decimal.h
@@ -21,6 +21,7 @@
 
 #include <array>
 #include <cstdint>
+#include <iosfwd>
 #include <string>
 #include <string_view>
 #include <type_traits>
@@ -235,8 +236,6 @@ class ICEBERG_EXPORT Decimal {
     return lhs.data_ != rhs.data_;
   }
 
-  friend std::ostream& operator<<(std::ostream& os, const Decimal& decimal);
-
  private:
 #if ICEBERG_LITTLE_ENDIAN
   static constexpr int32_t kHighIndex = 1;
@@ -249,6 +248,8 @@ class ICEBERG_EXPORT Decimal {
   std::array<uint64_t, 2> data_;
 };
 
+ICEBERG_EXPORT std::ostream& operator<<(std::ostream& os, const Decimal& decimal);
+
 // Unary operators
 ICEBERG_EXPORT Decimal operator-(const Decimal& operand);
 ICEBERG_EXPORT Decimal operator~(const Decimal& operand);

From feff5bdf1f737399f23c4d5298e248d8944cf36f Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 22 Aug 2025 23:43:02 +0800
Subject: [PATCH 11/20] chore: add license

---
 LICENSE                           | 14 ++++++++++++++
 src/iceberg/expression/decimal.cc |  5 +++++
 src/iceberg/expression/decimal.h  |  5 +++++
 3 files changed, 24 insertions(+)

diff --git a/LICENSE b/LICENSE
index f97d28dfe..e57409056 100644
--- a/LICENSE
+++ b/LICENSE
@@ -253,3 +253,17 @@ https://github.com/apache/arrow/blob/main/cpp/src/arrow/visit_type_inline.h
 Copyright: 2016-2025 The Apache Software Foundation.
 Home page: https://arrow.apache.org/
 License: https://www.apache.org/licenses/LICENSE-2.0
+
+--------------------------------------------------------------------------------
+
+The file src/iceberg/expression/decimal.h contains code adapted from
+
+https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.h
+
+The file src/iceberg/expression/decimal.cc contains code adapted from
+
+https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.cc
+
+Copyright: 2016-2025 The Apache Software Foundation.
+Home page: https://arrow.apache.org/
+License: https://www.apache.org/licenses/LICENSE-2.0
diff --git a/src/iceberg/expression/decimal.cc b/src/iceberg/expression/decimal.cc
index ecf72640d..0061a411a 100644
--- a/src/iceberg/expression/decimal.cc
+++ b/src/iceberg/expression/decimal.cc
@@ -17,6 +17,11 @@
  * under the License.
  */
 
+/// \file iceberg/expression/decimal.cc
+/// \brief 128-bit fixed-point decimal numbers.
+/// Adapted from Apache Arrow with only Decimal128 support.
+/// https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.cc
+
 #include "iceberg/expression/decimal.h"
 
 #include <algorithm>
diff --git a/src/iceberg/expression/decimal.h b/src/iceberg/expression/decimal.h
index 80d26f33f..3b1587e82 100644
--- a/src/iceberg/expression/decimal.h
+++ b/src/iceberg/expression/decimal.h
@@ -19,6 +19,11 @@
 
 #pragma once
 
+/// \file iceberg/expression/decimal.h
+/// \brief 128-bit fixed-point decimal numbers.
+/// Adapted from Apache Arrow with only Decimal128 support.
+/// https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.h
+
 #include <array>
 #include <cstdint>
 #include <iosfwd>

From 026dfc94405c46a85f2cfbc2b10148af08a9ef95 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Wed, 27 Aug 2025 21:24:51 +0800
Subject: [PATCH 12/20] fix: review comments

---
 LICENSE                                     |  4 +-
 src/iceberg/CMakeLists.txt                  |  3 +-
 src/iceberg/{expression => util}/decimal.cc | 34 +++-------
 src/iceberg/{expression => util}/decimal.h  | 72 ++++++++++++---------
 src/iceberg/util/{port.h => int128.h}       | 15 +----
 test/decimal_test.cc                        | 29 +++++----
 6 files changed, 74 insertions(+), 83 deletions(-)
 rename src/iceberg/{expression => util}/decimal.cc (98%)
 rename src/iceberg/{expression => util}/decimal.h (87%)
 rename src/iceberg/util/{port.h => int128.h} (68%)

diff --git a/LICENSE b/LICENSE
index e57409056..b7f4c880b 100644
--- a/LICENSE
+++ b/LICENSE
@@ -256,11 +256,11 @@ License: https://www.apache.org/licenses/LICENSE-2.0
 
 --------------------------------------------------------------------------------
 
-The file src/iceberg/expression/decimal.h contains code adapted from
+The file src/iceberg/util/decimal.h contains code adapted from
 
 https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.h
 
-The file src/iceberg/expression/decimal.cc contains code adapted from
+The file src/iceberg/util/decimal.cc contains code adapted from
 
 https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.cc
 
diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
index 882c5a681..18ec6c60e 100644
--- a/src/iceberg/CMakeLists.txt
+++ b/src/iceberg/CMakeLists.txt
@@ -19,7 +19,7 @@ set(ICEBERG_INCLUDES "$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/src>"
                      "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>")
 set(ICEBERG_SOURCES
     catalog/in_memory_catalog.cc
-    expression/decimal.cc
+    util/decimal.cc
     expression/expression.cc
     expression/literal.cc
     file_reader.cc
@@ -50,6 +50,7 @@ set(ICEBERG_SOURCES
     manifest_reader_internal.cc
     manifest_writer.cc
     arrow_c_data_guard_internal.cc
+    util/decimal.cc
     util/murmurhash3_internal.cc
     util/timepoint.cc
     util/gzip_internal.cc)
diff --git a/src/iceberg/expression/decimal.cc b/src/iceberg/util/decimal.cc
similarity index 98%
rename from src/iceberg/expression/decimal.cc
rename to src/iceberg/util/decimal.cc
index 0061a411a..b22db9166 100644
--- a/src/iceberg/expression/decimal.cc
+++ b/src/iceberg/util/decimal.cc
@@ -17,12 +17,12 @@
  * under the License.
  */
 
-/// \file iceberg/expression/decimal.cc
+/// \file iceberg/util/decimal.cc
 /// \brief 128-bit fixed-point decimal numbers.
 /// Adapted from Apache Arrow with only Decimal128 support.
 /// https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.cc
 
-#include "iceberg/expression/decimal.h"
+#include "iceberg/util/decimal.h"
 
 #include <algorithm>
 #include <array>
@@ -44,6 +44,7 @@
 #include <string_view>
 
 #include "iceberg/result.h"
+#include "iceberg/util/int128.h"
 #include "iceberg/util/macros.h"
 
 namespace iceberg {
@@ -314,7 +315,7 @@ static inline Status DecimalDivide(const Decimal& dividend, const Decimal& divis
     uint32_t prev = dividend_array[i];
     dividend_array[i] -= static_cast<uint32_t>(mult);
 
-    // if guess was too big,  we add back divisor
+    // if guess was too big, we add back divisor
     if (dividend_array[i] > prev) {
       guess--;
       uint32_t carry = 0;
@@ -1232,11 +1233,12 @@ static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length)
   // and doing the conversion in 16, 32 parts, which could
   // possibly create unaligned memory access on certain platforms
   memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length);
-#if ICEBERG_LITTLE_ENDIAN
-  return std::byteswap(result);
-#else
-  return result;
-#endif
+
+  if constexpr (std::endian::native == std::endian::little) {
+    return std::byteswap(result);
+  } else {
+    return result;
+  }
 }
 
 static bool RescaleWouldCauseDataLoss(const Decimal& value, int32_t delta_scale,
@@ -1420,20 +1422,4 @@ ICEBERG_EXPORT Decimal operator%(const Decimal& lhs, const Decimal& rhs) {
   return lhs.Divide(rhs).value().second;
 }
 
-ICEBERG_EXPORT bool operator<(const Decimal& lhs, const Decimal& rhs) {
-  return (lhs.high() < rhs.high()) || (lhs.high() == rhs.high() && lhs.low() < rhs.low());
-}
-
-ICEBERG_EXPORT bool operator<=(const Decimal& lhs, const Decimal& rhs) {
-  return !operator>(lhs, rhs);
-}
-
-ICEBERG_EXPORT bool operator>(const Decimal& lhs, const Decimal& rhs) {
-  return operator<(rhs, lhs);
-}
-
-ICEBERG_EXPORT bool operator>=(const Decimal& lhs, const Decimal& rhs) {
-  return !operator<(lhs, rhs);
-}
-
 }  // namespace iceberg
diff --git a/src/iceberg/expression/decimal.h b/src/iceberg/util/decimal.h
similarity index 87%
rename from src/iceberg/expression/decimal.h
rename to src/iceberg/util/decimal.h
index 3b1587e82..3b42d3a13 100644
--- a/src/iceberg/expression/decimal.h
+++ b/src/iceberg/util/decimal.h
@@ -19,22 +19,24 @@
 
 #pragma once
 
-/// \file iceberg/expression/decimal.h
+/// \file iceberg/util/decimal.h
 /// \brief 128-bit fixed-point decimal numbers.
 /// Adapted from Apache Arrow with only Decimal128 support.
 /// https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.h
 
 #include <array>
+#include <bit>
 #include <cstdint>
 #include <iosfwd>
 #include <string>
 #include <string_view>
 #include <type_traits>
 
+#include <iceberg/type.h>
+
 #include "iceberg/iceberg_export.h"
 #include "iceberg/result.h"
 #include "iceberg/util/macros.h"
-#include "iceberg/util/port.h"
 
 namespace iceberg {
 
@@ -60,24 +62,24 @@ class ICEBERG_EXPORT Decimal {
   constexpr Decimal(T value) noexcept  // NOLINT
   {
     if (value < T{}) {
-      data_[kHighIndex] = ~static_cast<uint64_t>(0);
+      data_[highIndex()] = ~static_cast<uint64_t>(0);
     } else {
-      data_[kHighIndex] = 0;
+      data_[highIndex()] = 0;
     }
-    data_[kLowIndex] = static_cast<uint64_t>(value);
+    data_[lowIndex()] = static_cast<uint64_t>(value);
   }
 
   /// \brief Parse a Decimal from a string representation.
   explicit Decimal(std::string_view str);
 
-/// \brief Create a Decimal from two 64-bit integers.
-#if ICEBERG_LITTLE_ENDIAN
-  constexpr Decimal(int64_t high, uint64_t low) noexcept
-      : data_{low, static_cast<uint64_t>(high)} {}
-#else
-  constexpr Decimal(int64_t high, uint64_t low) noexcept
-      : data_{static_cast<uint64_t>(high), low} {}
-#endif
+  /// \brief Create a Decimal from two 64-bit integers.
+  constexpr Decimal(int64_t high, uint64_t low) noexcept {
+    if constexpr (std::endian::native == std::endian::little) {
+      data_ = {low, static_cast<uint64_t>(high)};
+    } else {
+      data_ = {static_cast<uint64_t>(high), low};
+    }
+  }
 
   /// \brief Negate the current Decimal value (in place)
   Decimal& Negate();
@@ -139,10 +141,10 @@ class ICEBERG_EXPORT Decimal {
   }
 
   /// \brief Get the high bits of the two's complement representation of the number.
-  constexpr int64_t high() const { return static_cast<int64_t>(data_[kHighIndex]); }
+  constexpr int64_t high() const { return static_cast<int64_t>(data_[highIndex()]); }
 
   /// \brief Get the low bits of the two's complement representation of the number.
-  constexpr uint64_t low() const { return data_[kLowIndex]; }
+  constexpr uint64_t low() const { return data_[lowIndex()]; }
 
   /// \brief Convert the Decimal value to a base 10 decimal string with the given scale.
   /// \param scale The scale to use for the string representation.
@@ -178,6 +180,14 @@ class ICEBERG_EXPORT Decimal {
   /// Returns true if the number of significant digits is less or equal to `precision`.
   bool FitsInPrecision(int32_t precision) const;
 
+  /// \brief Spaceship operator for three-way comparison.
+  std::strong_ordering operator<=>(const Decimal& other) const {
+    if (high() != other.high()) {
+      return high() <=> other.high();
+    }
+    return low() <=> other.low();
+  }
+
   /// \brief Convert to a signed integer
   template <typename T>
     requires std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t>
@@ -226,10 +236,10 @@ class ICEBERG_EXPORT Decimal {
   std::array<uint8_t, kByteWidth> ToBytes() const;
 
   /// \brief Returns 1 if positive or zero, -1 if strictly negative.
-  int64_t Sign() const { return 1 | (static_cast<int64_t>(data_[kHighIndex]) >> 63); }
+  int64_t Sign() const { return 1 | (static_cast<int64_t>(data_[highIndex()]) >> 63); }
 
   /// \brief Check if the Decimal value is negative.
-  bool IsNegative() const { return static_cast<int64_t>(data_[kHighIndex]) < 0; }
+  bool IsNegative() const { return static_cast<int64_t>(data_[highIndex()]) < 0; }
 
   explicit operator bool() const { return data_ != std::array<uint64_t, 2>{0, 0}; }
 
@@ -242,13 +252,21 @@ class ICEBERG_EXPORT Decimal {
   }
 
  private:
-#if ICEBERG_LITTLE_ENDIAN
-  static constexpr int32_t kHighIndex = 1;
-  static constexpr int32_t kLowIndex = 0;
-#else
-  static constexpr int32_t kHighIndex = 0;
-  static constexpr int32_t kLowIndex = 1;
-#endif
+  static constexpr int32_t highIndex() {
+    if constexpr (std::endian::native == std::endian::little) {
+      return 1;
+    } else {
+      return 0;
+    }
+  }
+
+  static constexpr int32_t lowIndex() {
+    if constexpr (std::endian::native == std::endian::little) {
+      return 0;
+    } else {
+      return 1;
+    }
+  }
 
   std::array<uint64_t, 2> data_;
 };
@@ -259,12 +277,6 @@ ICEBERG_EXPORT std::ostream& operator<<(std::ostream& os, const Decimal& decimal
 ICEBERG_EXPORT Decimal operator-(const Decimal& operand);
 ICEBERG_EXPORT Decimal operator~(const Decimal& operand);
 
-// Binary operators
-ICEBERG_EXPORT bool operator<=(const Decimal& lhs, const Decimal& rhs);
-ICEBERG_EXPORT bool operator<(const Decimal& lhs, const Decimal& rhs);
-ICEBERG_EXPORT bool operator>=(const Decimal& lhs, const Decimal& rhs);
-ICEBERG_EXPORT bool operator>(const Decimal& lhs, const Decimal& rhs);
-
 ICEBERG_EXPORT Decimal operator+(const Decimal& lhs, const Decimal& rhs);
 ICEBERG_EXPORT Decimal operator-(const Decimal& lhs, const Decimal& rhs);
 ICEBERG_EXPORT Decimal operator*(const Decimal& lhs, const Decimal& rhs);
diff --git a/src/iceberg/util/port.h b/src/iceberg/util/int128.h
similarity index 68%
rename from src/iceberg/util/port.h
rename to src/iceberg/util/int128.h
index bdd65520b..8e9d27dc7 100644
--- a/src/iceberg/util/port.h
+++ b/src/iceberg/util/int128.h
@@ -17,22 +17,11 @@
  * under the License.
  */
 
-/// \file iceberg/util/port.h
-/// \brief Portability macros and definitions for Iceberg C++ library
+/// \file iceberg/util/int128.h
+/// \brief 128-bit integer type
 
 #pragma once
 
-#if defined(_WIN32) /* Windows is always little endian */ \
-    || defined(__LITTLE_ENDIAN__) ||                      \
-    (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
-#  define ICEBERG_LITTLE_ENDIAN 1
-#elif defined(__BIG_ENDIAN__) || \
-    (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
-#  define ICEBERG_LITTLE_ENDIAN 0
-#else
-#  error "Unsupported or unknown endianness"
-#endif
-
 #if defined(_MSC_VER)
 #  include <__msvc_int128.hpp>
 using int128_t = std::_Signed128;
diff --git a/test/decimal_test.cc b/test/decimal_test.cc
index 7e9d9ae3b..1ba633bf0 100644
--- a/test/decimal_test.cc
+++ b/test/decimal_test.cc
@@ -16,10 +16,11 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-#include "iceberg/expression/decimal.h"
+#include "iceberg/util/decimal.h"
 
 #include <algorithm>
 #include <array>
+#include <bit>
 #include <cmath>
 #include <cstdint>
 #include <vector>
@@ -28,7 +29,7 @@
 #include <gtest/gtest.h>
 #include <sys/types.h>
 
-#include "iceberg/util/port.h"
+#include "iceberg/util/int128.h"
 #include "matchers.h"
 
 namespace iceberg {
@@ -1030,9 +1031,10 @@ TEST(DecimalTest, FromBigEndian) {
     Decimal value(start);
     for (int ii = 0; ii < Decimal::kByteWidth; ++ii) {
       auto native_endian = value.ToBytes();
-#if ICEBERG_LITTLE_ENDIAN
-      std::ranges::reverse(native_endian);
-#endif
+      if constexpr (std::endian::native == std::endian::little) {
+        // convert to big endian
+        std::ranges::reverse(native_endian);
+      }
       // Limit the number of bytes we are passing to make
       // sure that it works correctly. That's why all of the
       // 'start' values don't have a 1 in the most significant
@@ -1047,10 +1049,11 @@ TEST(DecimalTest, FromBigEndian) {
       auto negated = -value;
       native_endian = negated.ToBytes();
 
-#if ICEBERG_LITTLE_ENDIAN
-      // convert to big endian
-      std::ranges::reverse(native_endian);
-#endif
+      if constexpr (std::endian::native == std::endian::little) {
+        // convert to big endian
+        std::ranges::reverse(native_endian);
+      }
+
       result = Decimal::FromBigEndian(native_endian.data() + WidthMinusOne - ii, ii + 1);
       ASSERT_THAT(result, IsOk());
       const Decimal& negated_decimal = result.value();
@@ -1060,10 +1063,10 @@ TEST(DecimalTest, FromBigEndian) {
       auto complement = ~value;
       native_endian = complement.ToBytes();
 
-#if ICEBERG_LITTLE_ENDIAN
-      // convert to big endian
-      std::ranges::reverse(native_endian);
-#endif
+      if constexpr (std::endian::native == std::endian::little) {
+        // convert to big endian
+        std::ranges::reverse(native_endian);
+      }
       result = Decimal::FromBigEndian(native_endian.data(), Decimal::kByteWidth);
       ASSERT_THAT(result, IsOk());
       const Decimal& complement_decimal = result.value();

From c272c55b8ae0903461885769dd2f2c74dac1a2e3 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Wed, 27 Aug 2025 21:45:31 +0800
Subject: [PATCH 13/20] fix: use int128_t as Decimal data

---
 src/iceberg/util/decimal.cc |  8 +++-----
 src/iceberg/util/decimal.h  | 33 +++++++++++----------------------
 2 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/src/iceberg/util/decimal.cc b/src/iceberg/util/decimal.cc
index b22db9166..89d26635e 100644
--- a/src/iceberg/util/decimal.cc
+++ b/src/iceberg/util/decimal.cc
@@ -419,14 +419,12 @@ Decimal& Decimal::operator/=(const Decimal& other) {
 }
 
 Decimal& Decimal::operator|=(const Decimal& other) {
-  data_[0] |= other.data_[0];
-  data_[1] |= other.data_[1];
+  data_ |= other.data_;
   return *this;
 }
 
 Decimal& Decimal::operator&=(const Decimal& other) {
-  data_[0] &= other.data_[0];
-  data_[1] &= other.data_[1];
+  data_ &= other.data_;
   return *this;
 }
 
@@ -1376,7 +1374,7 @@ double Decimal::ToDouble(int32_t scale) const {
 
 std::array<uint8_t, Decimal::kByteWidth> Decimal::ToBytes() const {
   std::array<uint8_t, kByteWidth> out{{0}};
-  memcpy(out.data(), data_.data(), kByteWidth);
+  memcpy(out.data(), &data_, kByteWidth);
   return out;
 }
 
diff --git a/src/iceberg/util/decimal.h b/src/iceberg/util/decimal.h
index 3b42d3a13..322f706ea 100644
--- a/src/iceberg/util/decimal.h
+++ b/src/iceberg/util/decimal.h
@@ -36,6 +36,7 @@
 
 #include "iceberg/iceberg_export.h"
 #include "iceberg/result.h"
+#include "iceberg/util/int128.h"
 #include "iceberg/util/macros.h"
 
 namespace iceberg {
@@ -51,22 +52,14 @@ class ICEBERG_EXPORT Decimal {
   static constexpr int32_t kMaxScale = 38;
 
   /// \brief Default constructor initializes to zero.
-  constexpr Decimal() noexcept : data_{0, 0} {}
-
-  /// \brief Construct a Decimal from an array of 2 64-bit integers.
-  explicit Decimal(std::array<uint64_t, 2> data) noexcept : data_(std::move(data)) {}
+  constexpr Decimal() noexcept = default;
 
   /// \brief Create a Decimal from any integer not wider than 64 bits.
   template <typename T>
     requires(std::is_integral_v<T> && (sizeof(T) <= sizeof(uint64_t)))
   constexpr Decimal(T value) noexcept  // NOLINT
   {
-    if (value < T{}) {
-      data_[highIndex()] = ~static_cast<uint64_t>(0);
-    } else {
-      data_[highIndex()] = 0;
-    }
-    data_[lowIndex()] = static_cast<uint64_t>(value);
+    data_ = static_cast<int128_t>(value);
   }
 
   /// \brief Parse a Decimal from a string representation.
@@ -74,11 +67,7 @@ class ICEBERG_EXPORT Decimal {
 
   /// \brief Create a Decimal from two 64-bit integers.
   constexpr Decimal(int64_t high, uint64_t low) noexcept {
-    if constexpr (std::endian::native == std::endian::little) {
-      data_ = {low, static_cast<uint64_t>(high)};
-    } else {
-      data_ = {static_cast<uint64_t>(high), low};
-    }
+    data_ = (static_cast<int128_t>(high) << 64) | low;
   }
 
   /// \brief Negate the current Decimal value (in place)
@@ -141,10 +130,10 @@ class ICEBERG_EXPORT Decimal {
   }
 
   /// \brief Get the high bits of the two's complement representation of the number.
-  constexpr int64_t high() const { return static_cast<int64_t>(data_[highIndex()]); }
+  constexpr int64_t high() const { return static_cast<int64_t>(data_ >> 64); }
 
   /// \brief Get the low bits of the two's complement representation of the number.
-  constexpr uint64_t low() const { return data_[lowIndex()]; }
+  constexpr uint64_t low() const { return static_cast<uint64_t>(data_); }
 
   /// \brief Convert the Decimal value to a base 10 decimal string with the given scale.
   /// \param scale The scale to use for the string representation.
@@ -229,19 +218,19 @@ class ICEBERG_EXPORT Decimal {
   }
 
   const uint8_t* native_endian_bytes() const {
-    return reinterpret_cast<const uint8_t*>(data_.data());
+    return reinterpret_cast<const uint8_t*>(data_);
   }
 
   /// \brief Returns the raw bytes of the value in native-endian byte order.
   std::array<uint8_t, kByteWidth> ToBytes() const;
 
   /// \brief Returns 1 if positive or zero, -1 if strictly negative.
-  int64_t Sign() const { return 1 | (static_cast<int64_t>(data_[highIndex()]) >> 63); }
+  int64_t Sign() const { return 1 | (high() >> 63); }
 
   /// \brief Check if the Decimal value is negative.
-  bool IsNegative() const { return static_cast<int64_t>(data_[highIndex()]) < 0; }
+  bool IsNegative() const { return (high() >> 63) < 0; }
 
-  explicit operator bool() const { return data_ != std::array<uint64_t, 2>{0, 0}; }
+  explicit operator bool() const { return data_ != 0; }
 
   friend bool operator==(const Decimal& lhs, const Decimal& rhs) {
     return lhs.data_ == rhs.data_;
@@ -268,7 +257,7 @@ class ICEBERG_EXPORT Decimal {
     }
   }
 
-  std::array<uint64_t, 2> data_;
+  int128_t data_{0};
 };
 
 ICEBERG_EXPORT std::ostream& operator<<(std::ostream& os, const Decimal& decimal);

From 2d5096acce248a12f960a572943c022852375f07 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Wed, 27 Aug 2025 22:15:12 +0800
Subject: [PATCH 14/20] chore: use int128_t calculation

---
 src/iceberg/util/decimal.cc | 652 ++++++++----------------------------
 src/iceberg/util/decimal.h  |  26 +-
 2 files changed, 159 insertions(+), 519 deletions(-)

diff --git a/src/iceberg/util/decimal.cc b/src/iceberg/util/decimal.cc
index 89d26635e..80fad5cc5 100644
--- a/src/iceberg/util/decimal.cc
+++ b/src/iceberg/util/decimal.cc
@@ -42,6 +42,7 @@
 #include <sstream>
 #include <string>
 #include <string_view>
+#include <utility>
 
 #include "iceberg/result.h"
 #include "iceberg/util/int128.h"
@@ -51,61 +52,6 @@ namespace iceberg {
 
 namespace {
 
-static constexpr uint64_t kInt64Mask = 0xFFFFFFFFFFFFFFFF;
-
-// Convenience wrapper type over 128 bit unsigned integers. This class merely provides the
-// minimum necessary set of functions to perform 128 bit multiplication operations.
-struct Uint128 {
-  Uint128() = default;
-  Uint128(uint64_t high, uint64_t low)
-      : val_((static_cast<uint128_t>(high) << 64) | low) {}
-
-  explicit Uint128(uint64_t value) : val_(value) {}
-  explicit Uint128(const Decimal& decimal)
-      : val_((static_cast<uint128_t>(decimal.high()) << 64) | decimal.low()) {}
-
-  uint64_t high() const { return static_cast<uint64_t>(val_ >> 64); }
-  uint64_t low() const { return static_cast<uint64_t>(val_ & kInt64Mask); }
-
-  Uint128& operator+=(const Uint128& other) {
-    val_ += other.val_;
-    return *this;
-  }
-
-  Uint128& operator*=(const Uint128& other) {
-    val_ *= other.val_;
-    return *this;
-  }
-
-  uint128_t val_{};
-};
-
-// Signed addition with well-defined behaviour on overflow (as unsigned)
-template <typename SignedInt>
-  requires std::is_signed_v<SignedInt>
-constexpr SignedInt SafeSignedAdd(SignedInt u, SignedInt v) {
-  using UnsignedInt = std::make_unsigned_t<SignedInt>;
-  return static_cast<SignedInt>(static_cast<UnsignedInt>(u) +
-                                static_cast<UnsignedInt>(v));
-}
-
-// Signed subtraction with well-defined behaviour on overflow (as unsigned)
-template <typename SignedInt>
-  requires std::is_signed_v<SignedInt>
-constexpr SignedInt SafeSignedSubtract(SignedInt u, SignedInt v) {
-  using UnsignedInt = std::make_unsigned_t<SignedInt>;
-  return static_cast<SignedInt>(static_cast<UnsignedInt>(u) -
-                                static_cast<UnsignedInt>(v));
-}
-
-// Signed negation with well-defined behaviour on overflow (as unsigned)
-template <typename SignedInt>
-  requires std::is_signed_v<SignedInt>
-constexpr SignedInt SafeSignedNegate(SignedInt u) {
-  using UnsignedInt = std::make_unsigned_t<SignedInt>;
-  return static_cast<SignedInt>(~static_cast<UnsignedInt>(u) + 1);
-}
-
 // Signed left shift with well-defined behaviour on negative numbers or overflow
 template <typename SignedInt, typename Shift>
   requires std::is_signed_v<SignedInt> && std::is_integral_v<Shift>
@@ -114,364 +60,6 @@ constexpr SignedInt SafeLeftShift(SignedInt u, Shift shift) {
   return static_cast<SignedInt>(static_cast<UnsignedInt>(u) << shift);
 }
 
-/// \brief Expands the given value into a big endian array of ints so that we can work on
-/// it. The array will be converted to an absolute value. The array will remove leading
-/// zeros from the value.
-/// \param array a big endian array of length 4 to set with the value
-/// \result the output length of the array
-static inline int64_t FillInArray(const Decimal& value, uint32_t* array) {
-  Decimal abs_value = Decimal::Abs(value);
-  auto high = static_cast<uint64_t>(abs_value.high());
-  auto low = abs_value.low();
-
-  if (high != 0) {
-    if (high > std::numeric_limits<uint32_t>::max()) {
-      array[0] = static_cast<uint32_t>(high >> 32);
-      array[1] = static_cast<uint32_t>(high);
-      array[2] = static_cast<uint32_t>(low >> 32);
-      array[3] = static_cast<uint32_t>(low);
-      return 4;
-    }
-
-    array[0] = static_cast<uint32_t>(high);
-    array[1] = static_cast<uint32_t>(low >> 32);
-    array[2] = static_cast<uint32_t>(low);
-    return 3;
-  }
-
-  if (low > std::numeric_limits<uint32_t>::max()) {
-    array[0] = static_cast<uint32_t>(low >> 32);
-    array[1] = static_cast<uint32_t>(low);
-    return 2;
-  }
-
-  if (low == 0) {
-    return 0;
-  }
-
-  array[0] = static_cast<uint32_t>(low);
-  return 1;
-}
-
-/// \brief Shift the number in the array left by bits positions.
-static inline void ShiftArrayLeft(uint32_t* array, int64_t length, int64_t bits) {
-  if (length > 0 && bits > 0) {
-    for (int32_t i = 0; i < length - 1; ++i) {
-      array[i] = (array[i] << bits) | (array[i + 1] >> (32 - bits));
-    }
-    array[length - 1] <<= bits;
-  }
-}
-
-/// \brief Shift the number in the array right by bits positions.
-static inline void ShiftArrayRight(uint32_t* array, int64_t length, int64_t bits) {
-  if (length > 0 && bits > 0) {
-    for (int32_t i = length - 1; i > 0; --i) {
-      array[i] = (array[i] >> bits) | (array[i - 1] << (32 - bits));
-    }
-    array[0] >>= bits;
-  }
-}
-
-/// \brief Fix the signs of the result and remainder after a division operation.
-static inline void FixDivisionSigns(Decimal* result, Decimal* remainder,
-                                    bool dividend_negative, bool divisor_negative) {
-  if (dividend_negative != divisor_negative) {
-    result->Negate();
-  }
-  if (dividend_negative) {
-    remainder->Negate();
-  }
-}
-
-/// \brief Build a Decimal from a big-endian array of uint32_t.
-static Status BuildFromArray(Decimal* result, const uint32_t* array, int64_t length) {
-  // result_array[0] is low, result_array[1] is high
-  std::array<uint64_t, 2> result_array = {0, 0};
-  for (int64_t i = length - 2 * 2 - 1; i >= 0; i--) {
-    if (array[i] != 0) {
-      return Invalid("Decimal division overflow");
-    }
-  }
-
-  // Construct the array in reverse order
-  int64_t next_index = length - 1;
-  for (size_t i = 0; i < 2 && next_index >= 0; i++) {
-    uint64_t lower_bits = array[next_index--];
-    result_array[i] =
-        (next_index < 0)
-            ? lower_bits
-            : (static_cast<uint64_t>(array[next_index--]) << 32) | lower_bits;
-  }
-
-  *result = Decimal(result_array[1], result_array[0]);
-  return {};
-}
-
-/// \brief Do a division where the divisor fits into a single 32-bit integer.
-static inline Status SingleDivide(const uint32_t* dividend, int64_t dividend_length,
-                                  uint32_t divisor, bool dividend_negative,
-                                  bool divisor_negative, Decimal* result,
-                                  Decimal* remainder) {
-  uint64_t r = 0;
-  constexpr int64_t kDecimalArrayLength = Decimal::kBitWidth / sizeof(uint32_t) + 1;
-  std::array<uint32_t, kDecimalArrayLength> quotient;
-  for (int64_t i = 0; i < dividend_length; ++i) {
-    r <<= 32;
-    r |= dividend[i];
-    if (r < divisor) {
-      quotient[i] = 0;
-    } else {
-      quotient[i] = static_cast<uint32_t>(r / divisor);
-      r -= static_cast<uint64_t>(quotient[i]) * divisor;
-    }
-  }
-
-  ICEBERG_RETURN_UNEXPECTED(BuildFromArray(result, quotient.data(), dividend_length));
-
-  *remainder = static_cast<int64_t>(r);
-  FixDivisionSigns(result, remainder, dividend_negative, divisor_negative);
-  return {};
-}
-
-/// \brief Divide two Decimal values and return the result and remainder.
-static inline Status DecimalDivide(const Decimal& dividend, const Decimal& divisor,
-                                   Decimal* result, Decimal* remainder) {
-  constexpr int64_t kDecimalArrayLength = Decimal::kBitWidth / sizeof(uint32_t);
-  // Split the dividend and divisor into 32-bit integer pieces so that we can work on
-  // them.
-  std::array<uint32_t, kDecimalArrayLength + 1> dividend_array;
-  std::array<uint32_t, kDecimalArrayLength> divisor_array;
-  bool dividend_negative = dividend.high() < 0;
-  bool divisor_negative = divisor.high() < 0;
-
-  // leave an extra zero before the dividend
-  dividend_array[0] = 0;
-  int64_t dividend_length = FillInArray(dividend, dividend_array.data() + 1) + 1;
-  int64_t divisor_length = FillInArray(divisor, divisor_array.data());
-
-  if (dividend_length <= divisor_length) {
-    *remainder = dividend;
-    *result = 0;
-    return {};
-  }
-
-  if (divisor_length == 0) {
-    return Invalid("Cannot divide by zero in DecimalDivide");
-  }
-
-  if (divisor_length == 1) {
-    ICEBERG_RETURN_UNEXPECTED(SingleDivide(dividend_array.data(), dividend_length,
-                                           divisor_array[0], dividend_negative,
-                                           divisor_negative, result, remainder));
-    return {};
-  }
-
-  int64_t result_length = dividend_length - divisor_length;
-  std::array<uint32_t, kDecimalArrayLength> result_array;
-
-  assert(result_length < kDecimalArrayLength);
-
-  // Normalize by shifting both by a multiple of 2 so that the digit guessing is easier.
-  // The requirement is that divisor_array[0] is greater than 2**31.
-  int64_t normalize_bits = std::countl_zero(divisor_array[0]);
-  ShiftArrayLeft(divisor_array.data(), divisor_length, normalize_bits);
-  ShiftArrayLeft(dividend_array.data(), dividend_length, normalize_bits);
-
-  // compute each digit in the result
-  for (int64_t i = 0; i < result_length; ++i) {
-    // Guess the next digit. At worst it is two too large
-    uint32_t guess = std::numeric_limits<uint32_t>::max();
-    const auto high_dividend =
-        static_cast<uint64_t>(dividend_array[i]) << 32 | dividend_array[i + 1];
-    if (dividend_array[i] != divisor_array[0]) {
-      guess = static_cast<uint32_t>(high_dividend / divisor_array[0]);
-    }
-
-    // catch all the cases where the guess is two too large and most of the cases where it
-    // is one too large
-    auto rhat = static_cast<uint32_t>(high_dividend -
-                                      guess * static_cast<uint64_t>(divisor_array[0]));
-    while (static_cast<uint64_t>(divisor_array[1]) * guess >
-           (static_cast<uint64_t>(rhat) << 32) + dividend_array[i + 2]) {
-      guess--;
-      rhat += divisor_array[0];
-      if (static_cast<uint64_t>(rhat) < divisor_array[1]) {
-        break;
-      }
-    }
-
-    // substract off the guess * divisor from the dividend
-    uint64_t mult = 0;
-    for (int64_t j = divisor_length - 1; j >= 0; --j) {
-      mult += static_cast<uint64_t>(guess) * divisor_array[j];
-      uint32_t prev = dividend_array[i + j + 1];
-      dividend_array[i + j + 1] -= static_cast<uint32_t>(mult);
-      mult >>= 32;
-      if (dividend_array[i + j + 1] > prev) {
-        ++mult;
-      }
-    }
-    uint32_t prev = dividend_array[i];
-    dividend_array[i] -= static_cast<uint32_t>(mult);
-
-    // if guess was too big, we add back divisor
-    if (dividend_array[i] > prev) {
-      guess--;
-      uint32_t carry = 0;
-      for (int64_t j = divisor_length - 1; j >= 0; --j) {
-        const auto sum =
-            static_cast<uint64_t>(divisor_array[j]) + dividend_array[i + j + 1] + carry;
-        dividend_array[i + j + 1] = static_cast<uint32_t>(sum);
-        carry = static_cast<uint32_t>(sum >> 32);
-      }
-      dividend_array[i] += carry;
-    }
-
-    result_array[i] = guess;
-  }
-
-  // denormalize the remainder
-  ShiftArrayRight(dividend_array.data(), dividend_length, normalize_bits);
-
-  // return result and remainder
-  ICEBERG_RETURN_UNEXPECTED(BuildFromArray(result, result_array.data(), result_length));
-  ICEBERG_RETURN_UNEXPECTED(
-      BuildFromArray(remainder, dividend_array.data(), dividend_length));
-
-  FixDivisionSigns(result, remainder, dividend_negative, divisor_negative);
-  return {};
-}
-
-}  // namespace
-
-Decimal::Decimal(std::string_view str) {
-  auto result = Decimal::FromString(str);
-  if (!result) {
-    throw std::runtime_error(
-        std::format("Failed to parse Decimal from string: {}, error: {}", str,
-                    result.error().message));
-  }
-  *this = std::move(result.value());
-}
-
-Decimal& Decimal::Negate() {
-  uint64_t result_low = ~low() + 1;
-  int64_t result_high = ~high();
-  if (result_low == 0) {
-    result_high = SafeSignedAdd<int64_t>(result_high, 1);
-  }
-  *this = Decimal(result_high, result_low);
-  return *this;
-}
-
-Decimal& Decimal::Abs() { return *this < 0 ? Negate() : *this; }
-
-Decimal Decimal::Abs(const Decimal& value) {
-  Decimal result(value);
-  return result.Abs();
-}
-
-Decimal& Decimal::operator+=(const Decimal& other) {
-  int64_t result_high = SafeSignedAdd(high(), other.high());
-  uint64_t result_low = low() + other.low();
-  result_high = SafeSignedAdd<int64_t>(result_high, result_low < low());
-  *this = Decimal(result_high, result_low);
-  return *this;
-}
-
-Decimal& Decimal::operator-=(const Decimal& other) {
-  int64_t result_high = SafeSignedSubtract(high(), other.high());
-  uint64_t result_low = low() - other.low();
-  result_high = SafeSignedSubtract<int64_t>(result_high, result_low > low());
-  *this = Decimal(result_high, result_low);
-  return *this;
-}
-
-Decimal& Decimal::operator*=(const Decimal& other) {
-  // Since the max value of Decimal is supposed to be 1e38 - 1 and the min the
-  // negation taking the abosolute values here should aways be safe.
-  const bool negate = Sign() != other.Sign();
-  Decimal x = Decimal::Abs(*this);
-  Decimal y = Decimal::Abs(other);
-
-  Uint128 r(x);
-  r *= Uint128(y);
-
-  *this = Decimal(static_cast<int64_t>(r.high()), r.low());
-  if (negate) {
-    Negate();
-  }
-  return *this;
-}
-
-Result<std::pair<Decimal, Decimal>> Decimal::Divide(const Decimal& divisor) const {
-  std::pair<Decimal, Decimal> result;
-  ICEBERG_RETURN_UNEXPECTED(DecimalDivide(*this, divisor, &result.first, &result.second));
-  return result;
-}
-
-Decimal& Decimal::operator/=(const Decimal& other) {
-  Decimal remainder;
-  auto s = DecimalDivide(*this, other, this, &remainder);
-  assert(s);
-  return *this;
-}
-
-Decimal& Decimal::operator|=(const Decimal& other) {
-  data_ |= other.data_;
-  return *this;
-}
-
-Decimal& Decimal::operator&=(const Decimal& other) {
-  data_ &= other.data_;
-  return *this;
-}
-
-Decimal& Decimal::operator<<=(uint32_t shift) {
-  if (shift != 0) {
-    uint64_t low_;
-    int64_t high_;
-    if (shift < 64) {
-      high_ = SafeLeftShift<int64_t>(high(), shift);
-      high_ |= low() >> (64 - shift);
-      low_ = low() << shift;
-    } else if (shift < 128) {
-      high_ = static_cast<int64_t>(low() << (shift - 64));
-      low_ = 0;
-    } else {
-      high_ = 0;
-      low_ = 0;
-    }
-    *this = Decimal(high_, low_);
-  }
-
-  return *this;
-}
-
-Decimal& Decimal::operator>>=(uint32_t shift) {
-  if (shift != 0) {
-    uint64_t low_;
-    int64_t high_;
-    if (shift < 64) {
-      low_ = low() >> shift;
-      low_ |= static_cast<uint64_t>(high()) << (64 - shift);
-      high_ = high() >> shift;
-    } else if (shift < 128) {
-      low_ = static_cast<uint64_t>(high() >> (shift - 64));
-      high_ = high() >> 63;
-    } else {
-      high_ = high() >> 63;
-      low_ = static_cast<uint64_t>(high_);
-    }
-    *this = Decimal(high_, low_);
-  }
-
-  return *this;
-}
-
-namespace {
-
 struct DecimalComponents {
   std::string_view while_digits;
   std::string_view fractional_digits;
@@ -616,7 +204,7 @@ constexpr std::array<Decimal, Decimal::kMaxScale + 1> kDecimal128PowersOfTen = {
     Decimal(542101086242752217LL, 68739955140067328ULL),
     Decimal(5421010862427522170LL, 687399551400673280ULL)};
 
-static inline void ShiftAndAdd(std::string_view input, std::array<uint64_t, 2>& out) {
+static inline void ShiftAndAdd(std::string_view input, uint128_t& out) {
   for (size_t pos = 0; pos < input.size();) {
     const size_t group_size = std::min(kInt64DecimalDigits, input.size() - pos);
     const uint64_t multiple = kUInt64PowersOfTen[group_size];
@@ -624,81 +212,11 @@ static inline void ShiftAndAdd(std::string_view input, std::array<uint64_t, 2>&
 
     std::from_chars(input.data() + pos, input.data() + pos + group_size, value);
 
-    for (size_t i = 0; i < 2; ++i) {
-      Uint128 tmp(out[i]);
-      tmp *= Uint128(multiple);
-      tmp += Uint128(value);
-      out[i] = tmp.low();
-      value = tmp.high();
-    }
+    out = out * multiple + value;
     pos += group_size;
   }
 }
 
-// Returns a mask for the bit_index lower order bits.
-// Only valid for bit_index in the range [0, 64).
-constexpr uint64_t LeastSignificantBitMask(int64_t bit_index) {
-  return (static_cast<uint64_t>(1) << bit_index) - 1;
-}
-
-static void AppendLittleEndianArrayToString(const std::array<uint64_t, 2>& array,
-                                            std::string* out) {
-  const auto most_significant_non_zero = std::ranges::find_if(
-      array.rbegin(), array.rend(), [](uint64_t v) { return v != 0; });
-  if (most_significant_non_zero == array.rend()) {
-    out->push_back('0');
-    return;
-  }
-
-  size_t most_significant_elem_idx = &*most_significant_non_zero - array.data();
-  std::array<uint64_t, 2> copy = array;
-  constexpr uint32_t k1e9 = 1000000000U;
-  constexpr size_t kNumBits = 128;
-  // Segments will contain the array split into groups that map to decimal digits, in
-  // little endian order. Each segment will hold at most 9 decimal digits. For example, if
-  // the input represents 9876543210123456789, then segments will be [123456789,
-  // 876543210, 9].
-  // The max number of segments needed = ceil(kNumBits * log(2) / log(1e9))
-  // = ceil(kNumBits / 29.897352854) <= ceil(kNumBits / 29).
-  std::array<uint32_t, (kNumBits + 28) / 29> segments;
-  size_t num_segments = 0;
-  uint64_t* most_significant_elem = &copy[most_significant_elem_idx];
-
-  do {
-    // Compute remainder = copy % 1e9 and copy = copy / 1e9.
-    uint32_t remainder = 0;
-    uint64_t* elem = most_significant_elem;
-    do {
-      // Compute dividend = (remainder << 32) | *elem  (a virtual 96-bit integer);
-      // *elem = dividend / 1e9;
-      // remainder = dividend % 1e9.
-      auto hi = static_cast<uint32_t>(*elem >> 32);
-      auto lo = static_cast<uint32_t>(*elem & LeastSignificantBitMask(32));
-      uint64_t dividend_hi = (static_cast<uint64_t>(remainder) << 32) | hi;
-      uint64_t quotient_hi = dividend_hi / k1e9;
-      remainder = static_cast<uint32_t>(dividend_hi % k1e9);
-      uint64_t dividend_lo = (static_cast<uint64_t>(remainder) << 32) | lo;
-      uint64_t quotient_lo = dividend_lo / k1e9;
-      remainder = static_cast<uint32_t>(dividend_lo % k1e9);
-      *elem = (quotient_hi << 32) | quotient_lo;
-    } while (elem-- != copy.data());
-
-    segments[num_segments++] = remainder;
-  } while (*most_significant_elem != 0 || most_significant_elem-- != copy.data());
-
-  const uint32_t* segment = &segments[num_segments - 1];
-  std::stringstream oss;
-  // First segment is formatted as-is.
-  oss << *segment;
-  // Remaining segments are formatted with leading zeros to fill 9 digits. e.g. 123 is
-  // formatted as "000000123"
-  while (segment != segments.data()) {
-    --segment;
-    oss << std::setfill('0') << std::setw(9) << *segment;
-  }
-  out->append(oss.str());
-}
-
 static void AdjustIntegerStringWithScale(std::string* str, int32_t scale) {
   if (scale == 0) {
     return;
@@ -766,6 +284,91 @@ static void AdjustIntegerStringWithScale(std::string* str, int32_t scale) {
 
 }  // namespace
 
+Decimal::Decimal(std::string_view str) {
+  auto result = Decimal::FromString(str);
+  if (!result) {
+    throw std::runtime_error(
+        std::format("Failed to parse Decimal from string: {}, error: {}", str,
+                    result.error().message));
+  }
+  *this = std::move(result.value());
+}
+
+Decimal& Decimal::Negate() {
+  uint128_t u = static_cast<uint128_t>(~data_) + 1;
+  data_ = static_cast<int128_t>(u);
+  return *this;
+}
+
+Decimal& Decimal::Abs() { return *this < 0 ? Negate() : *this; }
+
+Decimal Decimal::Abs(const Decimal& value) {
+  Decimal result(value);
+  return result.Abs();
+}
+
+Decimal& Decimal::operator+=(const Decimal& other) {
+  data_ += other.data_;
+  return *this;
+}
+
+Decimal& Decimal::operator-=(const Decimal& other) {
+  data_ -= other.data_;
+  return *this;
+}
+
+Decimal& Decimal::operator*=(const Decimal& other) {
+  data_ *= other.data_;
+  return *this;
+}
+
+Result<std::pair<Decimal, Decimal>> Decimal::Divide(const Decimal& divisor) const {
+  std::pair<Decimal, Decimal> result;
+  if (divisor == 0) {
+    return Invalid("Cannot divide by zero in Decimal::Divide");
+  }
+  return std::make_pair(*this / divisor, *this % divisor);
+}
+
+Decimal& Decimal::operator/=(const Decimal& other) {
+  data_ /= other.data_;
+  return *this;
+}
+
+Decimal& Decimal::operator|=(const Decimal& other) {
+  data_ |= other.data_;
+  return *this;
+}
+
+Decimal& Decimal::operator&=(const Decimal& other) {
+  data_ &= other.data_;
+  return *this;
+}
+
+Decimal& Decimal::operator<<=(uint32_t shift) {
+  if (shift != 0) {
+    if (shift < 128) {
+      data_ = static_cast<int128_t>(static_cast<uint128_t>(data_) << shift);
+    } else {
+      data_ = 0;
+    }
+  }
+
+  return *this;
+}
+
+Decimal& Decimal::operator>>=(uint32_t shift) {
+  if (shift != 0) {
+    if (shift < 128) {
+      data_ >>= shift;
+    } else {
+      data_ = (data_ < 0) ? -1 : 0;
+    }
+  }
+
+  return *this;
+}
+
 Result<std::string> Decimal::ToString(int32_t scale) const {
   if (scale < -kMaxScale || scale > kMaxScale) {
     return InvalidArgument(
@@ -778,17 +381,47 @@ Result<std::string> Decimal::ToString(int32_t scale) const {
 }
 
 std::string Decimal::ToIntegerString() const {
-  std::string result;
-  if (high() < 0) {
-    result.push_back('-');
-    Decimal abs = *this;
-    abs.Negate();
-    AppendLittleEndianArrayToString({abs.low(), static_cast<uint64_t>(abs.high())},
-                                    &result);
-  } else {
-    AppendLittleEndianArrayToString({low(), static_cast<uint64_t>(high())}, &result);
+  if (data_ == 0) {
+    return "0";
   }
-  return result;
+
+  bool negative = data_ < 0;
+  uint128_t uval =
+      negative ? -static_cast<uint128_t>(data_) : static_cast<uint128_t>(data_);
+
+  constexpr uint32_t k1e9 = 1000000000U;
+  constexpr size_t kNumBits = 128;
+  // Segments will contain the array split into groups that map to decimal digits, in
+  // little endian order. Each segment will hold at most 9 decimal digits. For example, if
+  // the input represents 9876543210123456789, then segments will be [123456789,
+  // 876543210, 9].
+  // The max number of segments needed = ceil(kNumBits * log(2) / log(1e9))
+  // = ceil(kNumBits / 29.897352854) <= ceil(kNumBits / 29).
+  std::array<uint32_t, (kNumBits + 28) / 29> segments;
+  size_t num_segments = 0;
+
+  while (uval > 0) {
+    // Compute remainder = uval % 1e9 and uval = uval / 1e9.
+    auto remainder = static_cast<uint32_t>(uval % k1e9);
+    uval /= k1e9;
+    segments[num_segments++] = remainder;
+  }
+
+  std::ostringstream oss;
+  if (negative) {
+    oss << '-';
+  }
+
+  // First segment is formatted as-is.
+  oss << segments[num_segments - 1];
+
+  // Remaining segments are formatted with leading zeros to fill 9 digits. e.g. 123 is
+  // formatted as "000000123"
+  for (size_t i = num_segments - 1; i-- > 0;) {
+    oss << std::setw(9) << std::setfill('0') << segments[i];
+  }
+
+  return oss.str();
 }
 
 Result<Decimal> Decimal::FromString(std::string_view str, int32_t* precision,
@@ -820,9 +453,10 @@ Result<Decimal> Decimal::FromString(std::string_view str, int32_t* precision,
 
   // Index 1 is the high part, index 0 is the low part
   std::array<uint64_t, 2> result_array = {0, 0};
-  ShiftAndAdd(dec.while_digits, result_array);
-  ShiftAndAdd(dec.fractional_digits, result_array);
-  Decimal result(static_cast<int64_t>(result_array[1]), result_array[0]);
+  uint128_t value = 0;
+  ShiftAndAdd(dec.while_digits, value);
+  ShiftAndAdd(dec.fractional_digits, value);
+  Decimal result(static_cast<int128_t>(value));
 
   if (dec.sign == '-') {
     result.Negate();
@@ -1384,40 +1018,38 @@ std::ostream& operator<<(std::ostream& os, const Decimal& decimal) {
 }
 
 // Unary operators
-ICEBERG_EXPORT Decimal operator-(const Decimal& operand) {
-  Decimal result(operand.high(), operand.low());
+Decimal operator-(const Decimal& operand) {
+  Decimal result(operand.data_);
   return result.Negate();
 }
 
-ICEBERG_EXPORT Decimal operator~(const Decimal& operand) {
-  return {~operand.high(), ~operand.low()};
-}
+Decimal operator~(const Decimal& operand) { return {~operand.data_}; }
 
 // Binary operators
-ICEBERG_EXPORT Decimal operator+(const Decimal& lhs, const Decimal& rhs) {
+Decimal operator+(const Decimal& lhs, const Decimal& rhs) {
   Decimal result(lhs);
   result += rhs;
   return result;
 }
 
-ICEBERG_EXPORT Decimal operator-(const Decimal& lhs, const Decimal& rhs) {
+Decimal operator-(const Decimal& lhs, const Decimal& rhs) {
   Decimal result(lhs);
   result -= rhs;
   return result;
 }
 
-ICEBERG_EXPORT Decimal operator*(const Decimal& lhs, const Decimal& rhs) {
+Decimal operator*(const Decimal& lhs, const Decimal& rhs) {
   Decimal result(lhs);
   result *= rhs;
   return result;
 }
 
-ICEBERG_EXPORT Decimal operator/(const Decimal& lhs, const Decimal& rhs) {
-  return lhs.Divide(rhs).value().first;
+Decimal operator/(const Decimal& lhs, const Decimal& rhs) {
+  return lhs.data_ / rhs.data_;
 }
 
-ICEBERG_EXPORT Decimal operator%(const Decimal& lhs, const Decimal& rhs) {
-  return lhs.Divide(rhs).value().second;
+Decimal operator%(const Decimal& lhs, const Decimal& rhs) {
+  return lhs.data_ % rhs.data_;
 }
 
 }  // namespace iceberg
diff --git a/src/iceberg/util/decimal.h b/src/iceberg/util/decimal.h
index 322f706ea..678074b91 100644
--- a/src/iceberg/util/decimal.h
+++ b/src/iceberg/util/decimal.h
@@ -54,6 +54,10 @@ class ICEBERG_EXPORT Decimal {
   /// \brief Default constructor initializes to zero.
   constexpr Decimal() noexcept = default;
 
+  /// \brief Create a Decimal from a 128-bit integer.
+  constexpr Decimal(int128_t value) noexcept  // NOLINT
+      : data_(value) {}
+
   /// \brief Create a Decimal from any integer not wider than 64 bits.
   template <typename T>
     requires(std::is_integral_v<T> && (sizeof(T) <= sizeof(uint64_t)))
@@ -129,6 +133,9 @@ class ICEBERG_EXPORT Decimal {
     return result;
   }
 
+  /// \brief Get the underlying 128-bit integer representation of the number.
+  constexpr int128_t value() const { return data_; }
+
   /// \brief Get the high bits of the two's complement representation of the number.
   constexpr int64_t high() const { return static_cast<int64_t>(data_ >> 64); }
 
@@ -218,7 +225,7 @@ class ICEBERG_EXPORT Decimal {
   }
 
   const uint8_t* native_endian_bytes() const {
-    return reinterpret_cast<const uint8_t*>(data_);
+    return reinterpret_cast<const uint8_t*>(&data_);
   }
 
   /// \brief Returns the raw bytes of the value in native-endian byte order.
@@ -240,6 +247,15 @@ class ICEBERG_EXPORT Decimal {
     return lhs.data_ != rhs.data_;
   }
 
+  friend Decimal operator-(const Decimal& operand);
+  friend Decimal operator~(const Decimal& operand);
+
+  friend Decimal operator+(const Decimal& lhs, const Decimal& rhs);
+  friend Decimal operator-(const Decimal& lhs, const Decimal& rhs);
+  friend Decimal operator*(const Decimal& lhs, const Decimal& rhs);
+  friend Decimal operator/(const Decimal& lhs, const Decimal& rhs);
+  friend Decimal operator%(const Decimal& lhs, const Decimal& rhs);
+
  private:
   static constexpr int32_t highIndex() {
     if constexpr (std::endian::native == std::endian::little) {
@@ -263,13 +279,5 @@ class ICEBERG_EXPORT Decimal {
 ICEBERG_EXPORT std::ostream& operator<<(std::ostream& os, const Decimal& decimal);
 
 // Unary operators
-ICEBERG_EXPORT Decimal operator-(const Decimal& operand);
-ICEBERG_EXPORT Decimal operator~(const Decimal& operand);
-
-ICEBERG_EXPORT Decimal operator+(const Decimal& lhs, const Decimal& rhs);
-ICEBERG_EXPORT Decimal operator-(const Decimal& lhs, const Decimal& rhs);
-ICEBERG_EXPORT Decimal operator*(const Decimal& lhs, const Decimal& rhs);
-ICEBERG_EXPORT Decimal operator/(const Decimal& lhs, const Decimal& rhs);
-ICEBERG_EXPORT Decimal operator%(const Decimal& lhs, const Decimal& rhs);
 
 }  // namespace iceberg

From a46b2f49b4cee20caf1dcf24bf1693ac5be2b19e Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 29 Aug 2025 23:27:56 +0800
Subject: [PATCH 15/20] fix: remove unrelated codes

---
 src/iceberg/CMakeLists.txt  |  1 -
 src/iceberg/util/decimal.cc | 25 +++++++----------------
 src/iceberg/util/decimal.h  | 18 -----------------
 test/decimal_test.cc        | 40 +++++++++++++------------------------
 4 files changed, 21 insertions(+), 63 deletions(-)

diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
index 18ec6c60e..07070d5d5 100644
--- a/src/iceberg/CMakeLists.txt
+++ b/src/iceberg/CMakeLists.txt
@@ -19,7 +19,6 @@ set(ICEBERG_INCLUDES "$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/src>"
                      "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/src>")
 set(ICEBERG_SOURCES
     catalog/in_memory_catalog.cc
-    util/decimal.cc
     expression/expression.cc
     expression/literal.cc
     file_reader.cc
diff --git a/src/iceberg/util/decimal.cc b/src/iceberg/util/decimal.cc
index 80fad5cc5..faf312238 100644
--- a/src/iceberg/util/decimal.cc
+++ b/src/iceberg/util/decimal.cc
@@ -24,21 +24,17 @@
 
 #include "iceberg/util/decimal.h"
 
-#include <algorithm>
 #include <array>
 #include <bit>
 #include <cassert>
 #include <charconv>
 #include <climits>
 #include <cmath>
-#include <cstddef>
 #include <cstdint>
 #include <cstring>
 #include <format>
 #include <iomanip>
-#include <iostream>
 #include <limits>
-#include <ostream>
 #include <sstream>
 #include <string>
 #include <string_view>
@@ -427,11 +423,11 @@ std::string Decimal::ToIntegerString() const {
 Result<Decimal> Decimal::FromString(std::string_view str, int32_t* precision,
                                     int32_t* scale) {
   if (str.empty()) {
-    return InvalidArgument("Decimal::FromString: empty string is not a valid Decimal");
+    return InvalidArgument("Empty string is not a valid Decimal");
   }
   DecimalComponents dec;
   if (!ParseDecimalComponents(str, &dec)) {
-    return InvalidArgument("Decimal::FromString: invalid decimal string '{}'", str);
+    return InvalidArgument("Invalid decimal string '{}'", str);
   }
 
   // Count number of significant digits (without leading zeros)
@@ -451,8 +447,6 @@ Result<Decimal> Decimal::FromString(std::string_view str, int32_t* precision,
     parsed_scale = static_cast<int32_t>(dec.fractional_digits.size());
   }
 
-  // Index 1 is the high part, index 0 is the low part
-  std::array<uint64_t, 2> result_array = {0, 0};
   uint128_t value = 0;
   ShiftAndAdd(dec.while_digits, value);
   ShiftAndAdd(dec.fractional_digits, value);
@@ -466,9 +460,8 @@ Result<Decimal> Decimal::FromString(std::string_view str, int32_t* precision,
     // For the scale to 0, to avoid negative scales (due to compatibility issues with
     // external systems such as databases)
     if (parsed_scale < -kMaxScale) {
-      return InvalidArgument(
-          "Decimal::FromString: scale must be in the range [-{}, {}], was {}", kMaxScale,
-          kMaxScale, parsed_scale);
+      return InvalidArgument("scale must be in the range [-{}, {}], was {}", kMaxScale,
+                             kMaxScale, parsed_scale);
     }
 
     result *= kDecimal128PowersOfTen[-parsed_scale];
@@ -552,8 +545,6 @@ struct RealTraits<float> {
   static constexpr const float* powers_of_ten() { return kFloatPowersOfTen.data(); }
 
   static constexpr float two_to_64(float x) { return x * 1.8446744e+19f; }
-  static constexpr float two_to_128(float x) { return x == 0 ? 0 : kFloatInf; }
-  static constexpr float two_to_192(float x) { return x == 0 ? 0 : kFloatInf; }
 
   static constexpr int32_t kMantissaBits = 24;
   // ceil(log10(2 ^ kMantissaBits))
@@ -567,8 +558,6 @@ struct RealTraits<double> {
   static constexpr const double* powers_of_ten() { return kDoublePowersOfTen.data(); }
 
   static constexpr double two_to_64(double x) { return x * 1.8446744073709552e+19; }
-  static constexpr double two_to_128(double x) { return x * 3.402823669209385e+38; }
-  static constexpr double two_to_192(double x) { return x * 6.277101735386681e+57; }
 
   static constexpr int32_t kMantissaBits = 53;
   // ceil(log10(2 ^ kMantissaBits))
@@ -791,7 +780,7 @@ struct DecimalRealConversion {
     return x;
   }
 
-  /// An approximate conversion from Decimal128 to Real that guarantees:
+  /// An approximate conversion from Decimal to Real that guarantees:
   /// 1. If the decimal is an integer, the conversion is exact.
   /// 2. If the number of fractional digits is <= RealTraits<Real>::kMantissaDigits (e.g.
   ///    8 for float and 16 for double), the conversion is within 1 ULP of the exact
@@ -864,7 +853,7 @@ static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length)
   // Using memcpy instead of special casing for length
   // and doing the conversion in 16, 32 parts, which could
   // possibly create unaligned memory access on certain platforms
-  memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length);
+  std::memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length);
 
   if constexpr (std::endian::native == std::endian::little) {
     return std::byteswap(result);
@@ -1008,7 +997,7 @@ double Decimal::ToDouble(int32_t scale) const {
 
 std::array<uint8_t, Decimal::kByteWidth> Decimal::ToBytes() const {
   std::array<uint8_t, kByteWidth> out{{0}};
-  memcpy(out.data(), &data_, kByteWidth);
+  std::memcpy(out.data(), &data_, kByteWidth);
   return out;
 }
 
diff --git a/src/iceberg/util/decimal.h b/src/iceberg/util/decimal.h
index 678074b91..f765fc468 100644
--- a/src/iceberg/util/decimal.h
+++ b/src/iceberg/util/decimal.h
@@ -257,27 +257,9 @@ class ICEBERG_EXPORT Decimal {
   friend Decimal operator%(const Decimal& lhs, const Decimal& rhs);
 
  private:
-  static constexpr int32_t highIndex() {
-    if constexpr (std::endian::native == std::endian::little) {
-      return 1;
-    } else {
-      return 0;
-    }
-  }
-
-  static constexpr int32_t lowIndex() {
-    if constexpr (std::endian::native == std::endian::little) {
-      return 0;
-    } else {
-      return 1;
-    }
-  }
-
   int128_t data_{0};
 };
 
 ICEBERG_EXPORT std::ostream& operator<<(std::ostream& os, const Decimal& decimal);
 
-// Unary operators
-
 }  // namespace iceberg
diff --git a/test/decimal_test.cc b/test/decimal_test.cc
index 1ba633bf0..61c5cca74 100644
--- a/test/decimal_test.cc
+++ b/test/decimal_test.cc
@@ -23,11 +23,9 @@
 #include <bit>
 #include <cmath>
 #include <cstdint>
-#include <vector>
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
-#include <sys/types.h>
 
 #include "iceberg/util/int128.h"
 #include "matchers.h"
@@ -48,11 +46,6 @@ void AssertDecimalFromString(const std::string& s, const Decimal& expected,
   EXPECT_EQ(expected_scale, scale);
 }
 
-Decimal DecimalFromInt128(int128_t value) {
-  return {static_cast<int64_t>(value >> 64),
-          static_cast<uint64_t>(value & 0xFFFFFFFFFFFFFFFFULL)};
-}
-
 }  // namespace
 
 TEST(DecimalTest, Basics) {
@@ -279,23 +272,21 @@ TEST(DecimalTest, FromStringInvalid) {
   // Empty string
   auto result = Decimal::FromString("");
   ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
-  ASSERT_THAT(result, HasErrorMessage(
-                          "Decimal::FromString: empty string is not a valid Decimal"));
+  ASSERT_THAT(result, HasErrorMessage("Empty string is not a valid Decimal"));
   for (const auto& invalid_string :
        std::vector<std::string>{"-", "0.0.0", "0-13-32", "a", "-23092.235-",
                                 "-+23092.235", "+-23092.235", "00a", "1e1a", "0.00123D/3",
                                 "1.23eA8", "1.23E+3A", "-1.23E--5", "1.2345E+++07"}) {
     auto result = Decimal::FromString(invalid_string);
     ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
-    ASSERT_THAT(result, HasErrorMessage("Decimal::FromString: invalid decimal string"));
+    ASSERT_THAT(result, HasErrorMessage("Invalid decimal string"));
   }
 
   for (const auto& invalid_string :
        std::vector<std::string>{"1e39", "-1e39", "9e39", "-9e39", "9.9e40", "-9.9e40"}) {
     auto result = Decimal::FromString(invalid_string);
     ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
-    ASSERT_THAT(result,
-                HasErrorMessage("Decimal::FromString: scale must be in the range"));
+    ASSERT_THAT(result, HasErrorMessage("scale must be in the range"));
   }
 }
 
@@ -1094,11 +1085,10 @@ TEST(DecimalTestFunctionality, Multiply) {
   for (auto x : std::vector<int128_t>{-INT64_MAX, -INT32_MAX, 0, INT32_MAX, INT64_MAX}) {
     for (auto y :
          std::vector<int128_t>{-INT32_MAX, -32, -2, -1, 0, 1, 2, 32, INT32_MAX}) {
-      Decimal decimal_x = DecimalFromInt128(x);
-      Decimal decimal_y = DecimalFromInt128(y);
+      Decimal decimal_x(x);
+      Decimal decimal_y(y);
       Decimal result = decimal_x * decimal_y;
-      EXPECT_EQ(DecimalFromInt128(x * y), result)
-          << " x: " << decimal_x << " y: " << decimal_y;
+      EXPECT_EQ(Decimal(x * y), result) << " x: " << decimal_x << " y: " << decimal_y;
     }
   }
 }
@@ -1111,11 +1101,10 @@ TEST(DecimalTestFunctionality, Divide) {
 
   for (auto x : std::vector<int128_t>{-INT64_MAX, -INT32_MAX, 0, INT32_MAX, INT64_MAX}) {
     for (auto y : std::vector<int128_t>{-INT32_MAX, -32, -2, -1, 1, 2, 32, INT32_MAX}) {
-      Decimal decimal_x = DecimalFromInt128(x);
-      Decimal decimal_y = DecimalFromInt128(y);
+      Decimal decimal_x(x);
+      Decimal decimal_y(y);
       Decimal result = decimal_x / decimal_y;
-      EXPECT_EQ(DecimalFromInt128(x / y), result)
-          << " x: " << decimal_x << " y: " << decimal_y;
+      EXPECT_EQ(Decimal(x / y), result) << " x: " << decimal_x << " y: " << decimal_y;
     }
   }
 }
@@ -1129,11 +1118,10 @@ TEST(DecimalTestFunctionality, Modulo) {
   // Test some edge cases
   for (auto x : std::vector<int128_t>{-INT64_MAX, -INT32_MAX, 0, INT32_MAX, INT64_MAX}) {
     for (auto y : std::vector<int128_t>{-INT32_MAX, -32, -2, -1, 1, 2, 32, INT32_MAX}) {
-      Decimal decimal_x = DecimalFromInt128(x);
-      Decimal decimal_y = DecimalFromInt128(y);
+      Decimal decimal_x(x);
+      Decimal decimal_y(y);
       Decimal result = decimal_x % decimal_y;
-      EXPECT_EQ(DecimalFromInt128(x % y), result)
-          << " x: " << decimal_x << " y: " << decimal_y;
+      EXPECT_EQ(Decimal(x % y), result) << " x: " << decimal_x << " y: " << decimal_y;
     }
   }
 }
@@ -1206,8 +1194,8 @@ TEST(DecimalTestFunctionality, FitsInPrecision) {
 
 TEST(DecimalTest, LeftShift) {
   auto check = [](int128_t x, uint32_t bits) {
-    auto expected = DecimalFromInt128(x << bits);
-    auto actual = DecimalFromInt128(x) << bits;
+    auto expected = Decimal(x << bits);
+    auto actual = Decimal(x) << bits;
     ASSERT_EQ(actual.low(), expected.low());
     ASSERT_EQ(actual.high(), expected.high());
   };

From 922d930f57d07c8fd000acb419205dd411e685e5 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Wed, 3 Sep 2025 23:05:38 +0800
Subject: [PATCH 16/20] fix: review comments

---
 LICENSE                     |  6 ---
 src/iceberg/util/decimal.cc | 97 +++++++++++++++----------------------
 src/iceberg/util/decimal.h  | 17 ++++---
 src/iceberg/util/macros.h   |  4 ++
 test/CMakeLists.txt         |  7 +--
 5 files changed, 55 insertions(+), 76 deletions(-)

diff --git a/LICENSE b/LICENSE
index b7f4c880b..d8ddd9575 100644
--- a/LICENSE
+++ b/LICENSE
@@ -250,12 +250,6 @@ The file src/iceberg/util/visit_type.h contains code adapted from
 
 https://github.com/apache/arrow/blob/main/cpp/src/arrow/visit_type_inline.h
 
-Copyright: 2016-2025 The Apache Software Foundation.
-Home page: https://arrow.apache.org/
-License: https://www.apache.org/licenses/LICENSE-2.0
-
---------------------------------------------------------------------------------
-
 The file src/iceberg/util/decimal.h contains code adapted from
 
 https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.h
diff --git a/src/iceberg/util/decimal.cc b/src/iceberg/util/decimal.cc
index faf312238..4e8e68928 100644
--- a/src/iceberg/util/decimal.cc
+++ b/src/iceberg/util/decimal.cc
@@ -51,15 +51,15 @@ namespace {
 // Signed left shift with well-defined behaviour on negative numbers or overflow
 template <typename SignedInt, typename Shift>
   requires std::is_signed_v<SignedInt> && std::is_integral_v<Shift>
-constexpr SignedInt SafeLeftShift(SignedInt u, Shift shift) {
+constexpr SignedInt SafeLeftShift(SignedInt u, Shift bits) {
   using UnsignedInt = std::make_unsigned_t<SignedInt>;
-  return static_cast<SignedInt>(static_cast<UnsignedInt>(u) << shift);
+  return static_cast<SignedInt>(static_cast<UnsignedInt>(u) << bits);
 }
 
 struct DecimalComponents {
   std::string_view while_digits;
   std::string_view fractional_digits;
-  int32_t exponent;
+  int32_t exponent{0};
   char sign{0};
   bool has_exponent{false};
 };
@@ -100,9 +100,8 @@ bool ParseDecimalComponents(std::string_view str, DecimalComponents* out) {
 
   // Optional dot
   if (IsDot(str[pos])) {
-    ++pos;
     // Second run of digits after the dot
-    pos = ParseDigitsRun(str, pos, &out->fractional_digits);
+    pos = ParseDigitsRun(str, ++pos, &out->fractional_digits);
   }
   if (out->fractional_digits.empty() && out->while_digits.empty()) {
     // Need at least some digits (whole or fractional)
@@ -200,7 +199,7 @@ constexpr std::array<Decimal, Decimal::kMaxScale + 1> kDecimal128PowersOfTen = {
     Decimal(542101086242752217LL, 68739955140067328ULL),
     Decimal(5421010862427522170LL, 687399551400673280ULL)};
 
-static inline void ShiftAndAdd(std::string_view input, uint128_t& out) {
+inline void ShiftAndAdd(std::string_view input, uint128_t& out) {
   for (size_t pos = 0; pos < input.size();) {
     const size_t group_size = std::min(kInt64DecimalDigits, input.size() - pos);
     const uint64_t multiple = kUInt64PowersOfTen[group_size];
@@ -213,12 +212,11 @@ static inline void ShiftAndAdd(std::string_view input, uint128_t& out) {
   }
 }
 
-static void AdjustIntegerStringWithScale(std::string* str, int32_t scale) {
+void AdjustIntegerStringWithScale(int32_t scale, std::string* str) {
   if (scale == 0) {
     return;
   }
-  assert(str != nullptr);
-  assert(!str->empty());
+  ICEBERG_DCHECK(str != nullptr && !str->empty(), "str must not be null or empty");
   const bool is_negative = str->front() == '-';
   const auto is_negative_offset = static_cast<int32_t>(is_negative);
   const auto len = static_cast<int32_t>(str->size());
@@ -291,7 +289,7 @@ Decimal::Decimal(std::string_view str) {
 }
 
 Decimal& Decimal::Negate() {
-  uint128_t u = static_cast<uint128_t>(~data_) + 1;
+  uint128_t u = ~static_cast<uint128_t>(data_) + 1;
   data_ = static_cast<int128_t>(u);
   return *this;
 }
@@ -341,25 +339,17 @@ Decimal& Decimal::operator&=(const Decimal& other) {
   return *this;
 }
 
-Decimal& Decimal::operator<<=(uint32_t shift) {
-  if (shift != 0) {
-    if (shift < 128) {
-      data_ = static_cast<int128_t>(static_cast<uint128_t>(data_) << shift);
-    } else {
-      data_ = 0;
-    }
+Decimal& Decimal::operator<<=(uint32_t bits) {
+  if (bits != 0) {
+    data_ = static_cast<int128_t>(static_cast<uint128_t>(data_) << bits);
   }
 
   return *this;
 }
 
-Decimal& Decimal::operator>>=(uint32_t shift) {
-  if (shift != 0) {
-    if (shift < 128) {
-      data_ >>= shift;
-    } else {
-      data_ = (data_ < 0) ? -1 : 0;
-    }
+Decimal& Decimal::operator>>=(uint32_t bits) {
+  if (bits != 0) {
+    data_ >>= bits;
   }
 
   return *this;
@@ -372,7 +362,7 @@ Result<std::string> Decimal::ToString(int32_t scale) const {
         kMaxScale, scale);
   }
   std::string str(ToIntegerString());
-  AdjustIntegerStringWithScale(&str, scale);
+  AdjustIntegerStringWithScale(scale, &str);
   return str;
 }
 
@@ -571,7 +561,7 @@ struct DecimalRealConversion {
   template <typename Real>
   static Real PowerOfTen(int32_t exp) {
     constexpr int32_t N = kPrecomputedPowersOfTen;
-    assert(exp >= -N && exp <= N);
+    ICEBERG_DCHECK(exp >= -N && exp <= N, "");
     return RealTraits<Real>::powers_of_ten()[N + exp];
   }
 
@@ -590,7 +580,7 @@ struct DecimalRealConversion {
   static constexpr int32_t kMaxScale = Decimal::kMaxScale;
 
   static constexpr auto& DecimalPowerOfTen(int32_t exp) {
-    assert(exp >= 0 && exp <= kMaxPrecision);
+    ICEBERG_DCHECK(exp >= 0 && exp <= kMaxPrecision, "");
     return kDecimal128PowersOfTen[exp];
   }
 
@@ -635,11 +625,12 @@ struct DecimalRealConversion {
   }
 
   template <typename Real>
-  static Result<Decimal> FromPositiveApprox(Real real, int32_t precision, int32_t scale) {
+  static Result<Decimal> FromPositiveRealApprox(Real real, int32_t precision,
+                                                int32_t scale) {
     // Approximate algorithm that operates in the FP domain (thus subject
     // to precision loss).
-    const auto x = std::nearbyint(real * PowerOfTen<double>(scale));
-    const auto max_abs = PowerOfTen<double>(precision);
+    const auto x = std::nearbyint(real * PowerOfTen<Real>(scale));
+    const auto max_abs = PowerOfTen<Real>(precision);
     if (x <= -max_abs || x >= max_abs) {
       return Invalid("Cannot convert {} to Decimal(precision = {}, scale = {}): overflow",
                      real, precision, scale);
@@ -649,10 +640,10 @@ struct DecimalRealConversion {
     const auto high = std::floor(std::ldexp(x, -64));
     const auto low = x - std::ldexp(high, 64);
 
-    assert(high >= 0);
-    assert(high < 9.223372036854776e+18);  // 2**63
-    assert(low >= 0);
-    assert(low < 1.8446744073709552e+19);  // 2**64
+    ICEBERG_DCHECK(high >= 0, "");
+    ICEBERG_DCHECK(high < 9.223372036854776e+18, "");  // 2**63
+    ICEBERG_DCHECK(low >= 0, "");
+    ICEBERG_DCHECK(low < 1.8446744073709552e+19, "");  // 2**64
     return Decimal(static_cast<int64_t>(high), static_cast<uint64_t>(low));
   }
 
@@ -665,7 +656,7 @@ struct DecimalRealConversion {
     // closest to `real * 10^scale`.
     if (scale < 0) {
       // Negative scales are not handled below, fall back to approx algorithm
-      return FromPositiveApprox(real, precision, scale);
+      return FromPositiveRealApprox(real, precision, scale);
     }
 
     // 1. Check that `real` is within acceptable bounds.
@@ -737,7 +728,8 @@ struct DecimalRealConversion {
           total_exp += exp;
           // The supplementary right shift required so that
           // `x * 10^total_exp / 2^total_shift` fits in the decimal.
-          assert(static_cast<size_t>(total_exp) < sizeof(kCeilLog2PowersOfTen));
+          ICEBERG_DCHECK(static_cast<size_t>(total_exp) < sizeof(kCeilLog2PowersOfTen),
+                         "");
           const int32_t bits =
               std::min(right_shift_by, kCeilLog2PowersOfTen[total_exp] - total_shift);
           total_shift += bits;
@@ -799,7 +791,7 @@ struct DecimalRealConversion {
 
     // Split decimal into whole and fractional parts to avoid precision loss
     auto s = decimal.GetWholeAndFraction(scale);
-    assert(s);
+    ICEBERG_DCHECK(s, "Decimal::GetWholeAndFraction failed");
 
     Real whole = ToRealPositiveNoSplit<Real>(s->first, 0);
     Real fraction = ToRealPositiveNoSplit<Real>(s->second, scale);
@@ -809,8 +801,8 @@ struct DecimalRealConversion {
 
   template <typename Real>
   static Result<Decimal> FromReal(Real value, int32_t precision, int32_t scale) {
-    assert(precision >= 1 && precision <= kMaxPrecision);
-    assert(scale >= -kMaxScale && scale <= kMaxScale);
+    ICEBERG_DCHECK(precision >= 1 && precision <= kMaxPrecision, "");
+    ICEBERG_DCHECK(scale >= -kMaxScale && scale <= kMaxScale, "");
 
     if (!std::isfinite(value)) {
       return InvalidArgument("Cannot convert {} to Decimal", value);
@@ -830,7 +822,7 @@ struct DecimalRealConversion {
 
   template <typename Real>
   static Real ToReal(const Decimal& decimal, int32_t scale) {
-    assert(scale >= -kMaxScale && scale <= kMaxScale);
+    ICEBERG_DCHECK(scale >= -kMaxScale && scale <= kMaxScale, "");
 
     if (decimal.IsNegative()) {
       // Convert the absolute value to avoid precision loss
@@ -866,7 +858,7 @@ static bool RescaleWouldCauseDataLoss(const Decimal& value, int32_t delta_scale,
                                       const Decimal& multiplier, Decimal* result) {
   if (delta_scale < 0) {
     auto res = value.Divide(multiplier);
-    assert(res);
+    ICEBERG_DCHECK(res, "Decimal::Divide failed");
     *result = res->first;
     return res->second != 0;
   }
@@ -886,7 +878,7 @@ Result<Decimal> Decimal::FromReal(double x, int32_t precision, int32_t scale) {
 }
 
 Result<std::pair<Decimal, Decimal>> Decimal::GetWholeAndFraction(int32_t scale) const {
-  assert(scale >= 0 && scale <= kMaxScale);
+  ICEBERG_DCHECK(scale >= 0 && scale <= kMaxScale, "");
 
   Decimal multiplier(kDecimal128PowersOfTen[scale]);
   return Divide(multiplier);
@@ -946,19 +938,6 @@ Result<Decimal> Decimal::FromBigEndian(const uint8_t* bytes, int32_t length) {
 }
 
 Result<Decimal> Decimal::Rescale(int32_t orig_scale, int32_t new_scale) const {
-  if (orig_scale < 0 || orig_scale > kMaxScale) {
-    return InvalidArgument(
-        "Decimal::Rescale: original scale must be in the range [0, {}], "
-        "was {}",
-        kMaxScale, orig_scale);
-  }
-  if (new_scale < 0 || new_scale > kMaxScale) {
-    return InvalidArgument(
-        "Decimal::Rescale: new scale must be in the range [0, {}], "
-        "was {}",
-        kMaxScale, new_scale);
-  }
-
   if (orig_scale == new_scale) {
     return *this;
   }
@@ -967,7 +946,7 @@ Result<Decimal> Decimal::Rescale(int32_t orig_scale, int32_t new_scale) const {
   const int32_t abs_delta_scale = std::abs(delta_scale);
   Decimal out;
 
-  assert(abs_delta_scale <= kMaxScale);
+  ICEBERG_DCHECK(abs_delta_scale <= kMaxScale, "");
 
   auto& multiplier = kDecimal128PowersOfTen[abs_delta_scale];
 
@@ -975,15 +954,15 @@ Result<Decimal> Decimal::Rescale(int32_t orig_scale, int32_t new_scale) const {
       RescaleWouldCauseDataLoss(*this, delta_scale, multiplier, &out);
 
   if (rescale_would_cause_data_loss) {
-    return Invalid("Rescale would cause data loss: {} -> {}", ToIntegerString(),
-                   out.ToIntegerString());
+    return Invalid("Rescale {} from {} to {} would cause data loss", ToIntegerString(),
+                   orig_scale, new_scale);
   }
 
   return out;
 }
 
 bool Decimal::FitsInPrecision(int32_t precision) const {
-  assert(precision >= 1 && precision <= kMaxPrecision);
+  ICEBERG_DCHECK(precision >= 1 && precision <= kMaxPrecision, "");
   return Decimal::Abs(*this) < kDecimal128PowersOfTen[precision];
 }
 
diff --git a/src/iceberg/util/decimal.h b/src/iceberg/util/decimal.h
index f765fc468..9da204088 100644
--- a/src/iceberg/util/decimal.h
+++ b/src/iceberg/util/decimal.h
@@ -25,17 +25,15 @@
 /// https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.h
 
 #include <array>
-#include <bit>
 #include <cstdint>
 #include <iosfwd>
 #include <string>
 #include <string_view>
 #include <type_traits>
 
-#include <iceberg/type.h>
-
 #include "iceberg/iceberg_export.h"
 #include "iceberg/result.h"
+#include "iceberg/util/formattable.h"
 #include "iceberg/util/int128.h"
 #include "iceberg/util/macros.h"
 
@@ -44,7 +42,7 @@ namespace iceberg {
 /// \brief Represents 128-bit fixed-point decimal numbers.
 /// The max decimal precision that can be safely represented is
 /// 38 significant digits.
-class ICEBERG_EXPORT Decimal {
+class ICEBERG_EXPORT Decimal : public util::Formattable {
  public:
   static constexpr int32_t kBitWidth = 128;
   static constexpr int32_t kByteWidth = kBitWidth / 8;
@@ -55,13 +53,13 @@ class ICEBERG_EXPORT Decimal {
   constexpr Decimal() noexcept = default;
 
   /// \brief Create a Decimal from a 128-bit integer.
-  constexpr Decimal(int128_t value) noexcept  // NOLINT
+  constexpr Decimal(int128_t value) noexcept  // NOLINT(google-explicit-constructor)
       : data_(value) {}
 
   /// \brief Create a Decimal from any integer not wider than 64 bits.
   template <typename T>
     requires(std::is_integral_v<T> && (sizeof(T) <= sizeof(uint64_t)))
-  constexpr Decimal(T value) noexcept  // NOLINT
+  constexpr Decimal(T value) noexcept  // NOLINT(google-explicit-constructor)
   {
     data_ = static_cast<int128_t>(value);
   }
@@ -150,8 +148,15 @@ class ICEBERG_EXPORT Decimal {
   /// \brief Convert the Decimal value to an integer string.
   std::string ToIntegerString() const;
 
+  /// \brief Returns an integer string representation of the decimal value.
+  std::string ToString() const override { return ToIntegerString(); }
+
   /// \brief Convert the decimal string to a Decimal value, optionally including precision
   /// and scale if they are provided not null.
+  /// \param str The string representation of the Decimal value.
+  /// \param[out] precision Optional pointer to store the precision of the parsed value.
+  /// \param[out] scale Optional pointer to store the scale of the parsed value.
+  /// \return The Decimal value.
   static Result<Decimal> FromString(std::string_view str, int32_t* precision = nullptr,
                                     int32_t* scale = nullptr);
 
diff --git a/src/iceberg/util/macros.h b/src/iceberg/util/macros.h
index f11a680cc..278035d3f 100644
--- a/src/iceberg/util/macros.h
+++ b/src/iceberg/util/macros.h
@@ -19,6 +19,8 @@
 
 #pragma once
 
+#include <cassert>
+
 #define ICEBERG_RETURN_UNEXPECTED(result)                       \
   if (auto&& result_name = result; !result_name) [[unlikely]] { \
     return std::unexpected<Error>(result_name.error());         \
@@ -36,3 +38,5 @@
 #define ICEBERG_ASSIGN_OR_RAISE(lhs, rexpr)                                             \
   ICEBERG_ASSIGN_OR_RAISE_IMPL(ICEBERG_ASSIGN_OR_RAISE_NAME(result_, __COUNTER__), lhs, \
                                rexpr)
+
+#define ICEBERG_DCHECK(expr, message) assert((expr) && (message))
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index e0583468b..d32a2d8bb 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -75,11 +75,7 @@ add_iceberg_test(table_test
                  table_test.cc
                  schema_json_test.cc)
 
-add_iceberg_test(expression_test
-                 SOURCES
-                 decimal_test.cc
-                 expression_test.cc
-                 literal_test.cc)
+add_iceberg_test(expression_test SOURCES expression_test.cc literal_test.cc)
 
 add_iceberg_test(json_serde_test
                  SOURCES
@@ -91,6 +87,7 @@ add_iceberg_test(json_serde_test
 add_iceberg_test(util_test
                  SOURCES
                  config_test.cc
+                 decimal_test.cc
                  endian_test.cc
                  formatter_test.cc
                  string_util_test.cc

From e293fc9f21f9ed4544cfbf05ae0dbcd0a4223f8b Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Wed, 3 Sep 2025 23:26:27 +0800
Subject: [PATCH 17/20] chore: remove FromReal/ToReal/ToInteger conversions

---
 src/iceberg/util/decimal.cc | 339 --------------------
 src/iceberg/util/decimal.h  |  49 ---
 test/decimal_test.cc        | 621 ------------------------------------
 3 files changed, 1009 deletions(-)

diff --git a/src/iceberg/util/decimal.cc b/src/iceberg/util/decimal.cc
index 4e8e68928..cb10d3f37 100644
--- a/src/iceberg/util/decimal.cc
+++ b/src/iceberg/util/decimal.cc
@@ -519,322 +519,6 @@ constexpr std::array<double, 2 * kPrecomputedPowersOfTen + 1> kDoublePowersOfTen
     1e56,  1e57,  1e58,  1e59,  1e60,  1e61,  1e62,  1e63,  1e64,  1e65,  1e66,  1e67,
     1e68,  1e69,  1e70,  1e71,  1e72,  1e73,  1e74,  1e75,  1e76};
 
-// ceil(log2(10 ^ k)) for k in [0...76]
-constexpr std::array<int32_t, 76 + 1> kCeilLog2PowersOfTen = {
-    0,   4,   7,   10,  14,  17,  20,  24,  27,  30,  34,  37,  40,  44,  47,  50,
-    54,  57,  60,  64,  67,  70,  74,  77,  80,  84,  87,  90,  94,  97,  100, 103,
-    107, 110, 113, 117, 120, 123, 127, 130, 133, 137, 140, 143, 147, 150, 153, 157,
-    160, 163, 167, 170, 173, 177, 180, 183, 187, 190, 193, 196, 200, 203, 206, 210,
-    213, 216, 220, 223, 226, 230, 233, 236, 240, 243, 246, 250, 253};
-
-template <typename Real>
-struct RealTraits {};
-
-template <>
-struct RealTraits<float> {
-  static constexpr const float* powers_of_ten() { return kFloatPowersOfTen.data(); }
-
-  static constexpr float two_to_64(float x) { return x * 1.8446744e+19f; }
-
-  static constexpr int32_t kMantissaBits = 24;
-  // ceil(log10(2 ^ kMantissaBits))
-  static constexpr int32_t kMantissaDigits = 8;
-  // Integers between zero and kMaxPreciseInteger can be precisely represented
-  static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1;
-};
-
-template <>
-struct RealTraits<double> {
-  static constexpr const double* powers_of_ten() { return kDoublePowersOfTen.data(); }
-
-  static constexpr double two_to_64(double x) { return x * 1.8446744073709552e+19; }
-
-  static constexpr int32_t kMantissaBits = 53;
-  // ceil(log10(2 ^ kMantissaBits))
-  static constexpr int32_t kMantissaDigits = 16;
-  // Integers between zero and kMaxPreciseInteger can be precisely represented
-  static constexpr uint64_t kMaxPreciseInteger = (1ULL << kMantissaBits) - 1;
-};
-
-struct DecimalRealConversion {
-  // Return 10**exp, with a fast lookup, assuming `exp` is within bounds
-  template <typename Real>
-  static Real PowerOfTen(int32_t exp) {
-    constexpr int32_t N = kPrecomputedPowersOfTen;
-    ICEBERG_DCHECK(exp >= -N && exp <= N, "");
-    return RealTraits<Real>::powers_of_ten()[N + exp];
-  }
-
-  // Return 10**exp, with a fast lookup if possible
-  template <typename Real>
-  static Real LargePowerOfTen(int32_t exp) {
-    constexpr int32_t N = kPrecomputedPowersOfTen;
-    if (exp >= -N && exp <= N) {
-      return RealTraits<Real>::powers_of_ten()[N + exp];
-    } else {
-      return std::pow(static_cast<Real>(10.0), static_cast<Real>(exp));
-    }
-  }
-
-  static constexpr int32_t kMaxPrecision = Decimal::kMaxPrecision;
-  static constexpr int32_t kMaxScale = Decimal::kMaxScale;
-
-  static constexpr auto& DecimalPowerOfTen(int32_t exp) {
-    ICEBERG_DCHECK(exp >= 0 && exp <= kMaxPrecision, "");
-    return kDecimal128PowersOfTen[exp];
-  }
-
-  // Right shift positive `x` by positive `bits`, rounded half to even
-  static Decimal RoundedRightShift(const Decimal& x, int32_t bits) {
-    if (bits == 0) {
-      return x;
-    }
-    int64_t result_hi = x.high();
-    uint64_t result_lo = x.low();
-    uint64_t shifted = 0;
-    while (bits >= 64) {
-      // Retain the information that set bits were shifted right.
-      // This is important to detect an exact half.
-      shifted = result_lo | (shifted > 0);
-      result_lo = result_hi;
-      result_hi >>= 63;  // for sign
-      bits -= 64;
-    }
-    if (bits > 0) {
-      shifted = (result_lo << (64 - bits)) | (shifted > 0);
-      result_lo >>= bits;
-      result_lo |= static_cast<uint64_t>(result_hi) << (64 - bits);
-      result_hi >>= bits;
-    }
-    // We almost have our result, but now do the rounding.
-    constexpr uint64_t kHalf = 0x8000000000000000ULL;
-    if (shifted > kHalf) {
-      // Strictly more than half => round up
-      result_lo += 1;
-      result_hi += (result_lo == 0);
-    } else if (shifted == kHalf) {
-      // Exactly half => round to even
-      if ((result_lo & 1) != 0) {
-        result_lo += 1;
-        result_hi += (result_lo == 0);
-      }
-    } else {
-      // Strictly less than half => round down
-    }
-    return Decimal{result_hi, result_lo};
-  }
-
-  template <typename Real>
-  static Result<Decimal> FromPositiveRealApprox(Real real, int32_t precision,
-                                                int32_t scale) {
-    // Approximate algorithm that operates in the FP domain (thus subject
-    // to precision loss).
-    const auto x = std::nearbyint(real * PowerOfTen<Real>(scale));
-    const auto max_abs = PowerOfTen<Real>(precision);
-    if (x <= -max_abs || x >= max_abs) {
-      return Invalid("Cannot convert {} to Decimal(precision = {}, scale = {}): overflow",
-                     real, precision, scale);
-    }
-
-    // Extract high and low bits
-    const auto high = std::floor(std::ldexp(x, -64));
-    const auto low = x - std::ldexp(high, 64);
-
-    ICEBERG_DCHECK(high >= 0, "");
-    ICEBERG_DCHECK(high < 9.223372036854776e+18, "");  // 2**63
-    ICEBERG_DCHECK(low >= 0, "");
-    ICEBERG_DCHECK(low < 1.8446744073709552e+19, "");  // 2**64
-    return Decimal(static_cast<int64_t>(high), static_cast<uint64_t>(low));
-  }
-
-  template <typename Real>
-  static Result<Decimal> FromPositiveReal(Real real, int32_t precision, int32_t scale) {
-    constexpr int32_t kMantissaBits = RealTraits<Real>::kMantissaBits;
-    constexpr int32_t kMantissaDigits = RealTraits<Real>::kMantissaDigits;
-
-    // Problem statement: construct the Decimal with the value
-    // closest to `real * 10^scale`.
-    if (scale < 0) {
-      // Negative scales are not handled below, fall back to approx algorithm
-      return FromPositiveRealApprox(real, precision, scale);
-    }
-
-    // 1. Check that `real` is within acceptable bounds.
-    const Real limit = PowerOfTen<Real>(precision - scale);
-    if (real > limit) {
-      // Checking the limit early helps ensure the computations below do not
-      // overflow.
-      // NOTE: `limit` is allowed here as rounding can make it smaller than
-      // the theoretical limit (for example, 1.0e23 < 10^23).
-      return Invalid("Cannot convert {} to Decimal(precision = {}, scale = {}): overflow",
-                     real, precision, scale);
-    }
-
-    // 2. Losslessly convert `real` to `mant * 2**k`
-    int32_t binary_exp = 0;
-    const Real real_mant = std::frexp(real, &binary_exp);
-    // `real_mant` is within 0.5 and 1 and has M bits of precision.
-    // Multiply it by 2^M to get an exact integer.
-    const auto mant = static_cast<uint64_t>(std::ldexp(real_mant, kMantissaBits));
-    const int32_t k = binary_exp - kMantissaBits;
-    // (note that `real = mant * 2^k`)
-
-    // 3. Start with `mant`.
-    // We want to end up with `real * 10^scale` i.e. `mant * 2^k * 10^scale`.
-    Decimal x(mant);
-
-    if (k < 0) {
-      // k < 0 (i.e. binary_exp < kMantissaBits), is probably the common case
-      // when converting to decimal. It implies right-shifting by -k bits,
-      // while multiplying by 10^scale. We also must avoid overflow (losing
-      // bits on the left) and precision loss (losing bits on the right).
-      int32_t right_shift_by = -k;
-      int32_t mul_by_ten_to = scale;
-
-      // At this point, `x` has kMantissaDigits significant digits but it can
-      // fit kMaxPrecision (excluding sign). We can therefore multiply by up
-      // to 10^(kMaxPrecision - kMantissaDigits).
-      constexpr int32_t kSafeMulByTenTo = kMaxPrecision - kMantissaDigits;
-
-      if (mul_by_ten_to <= kSafeMulByTenTo) {
-        // Scale is small enough, so we can do it all at once.
-        x *= DecimalPowerOfTen(mul_by_ten_to);
-        x = RoundedRightShift(x, right_shift_by);
-      } else {
-        // Scale is too large, we cannot multiply at once without overflow.
-        // We use an iterative algorithm which alternately shifts left by
-        // multiplying by a power of ten, and shifts right by a number of bits.
-
-        // First multiply `x` by as large a power of ten as possible
-        // without overflowing.
-        x *= DecimalPowerOfTen(kSafeMulByTenTo);
-        mul_by_ten_to -= kSafeMulByTenTo;
-
-        // `x` now has full precision. However, we know we'll only
-        // keep `precision` digits at the end. Extraneous bits/digits
-        // on the right can be safely shifted away, before multiplying
-        // again.
-        // NOTE: if `precision` is the full precision then the algorithm will
-        // lose the last digit. If `precision` is almost the full precision,
-        // there can be an off-by-one error due to rounding.
-        const int32_t mul_step = std::max(1, kMaxPrecision - precision);
-
-        // The running exponent, useful to compute by how much we must
-        // shift right to make place on the left before the next multiply.
-        int32_t total_exp = 0;
-        int32_t total_shift = 0;
-        while (mul_by_ten_to > 0 && right_shift_by > 0) {
-          const int32_t exp = std::min(mul_by_ten_to, mul_step);
-          total_exp += exp;
-          // The supplementary right shift required so that
-          // `x * 10^total_exp / 2^total_shift` fits in the decimal.
-          ICEBERG_DCHECK(static_cast<size_t>(total_exp) < sizeof(kCeilLog2PowersOfTen),
-                         "");
-          const int32_t bits =
-              std::min(right_shift_by, kCeilLog2PowersOfTen[total_exp] - total_shift);
-          total_shift += bits;
-          // Right shift to make place on the left, then multiply
-          x = RoundedRightShift(x, bits);
-          right_shift_by -= bits;
-          // Should not overflow thanks to the precautions taken
-          x *= DecimalPowerOfTen(exp);
-          mul_by_ten_to -= exp;
-        }
-        if (mul_by_ten_to > 0) {
-          x *= DecimalPowerOfTen(mul_by_ten_to);
-        }
-        if (right_shift_by > 0) {
-          x = RoundedRightShift(x, right_shift_by);
-        }
-      }
-    } else {
-      // k >= 0 implies left-shifting by k bits and multiplying by 10^scale.
-      // The order of these operations therefore doesn't matter. We know
-      // we won't overflow because of the limit check above, and we also
-      // won't lose any significant bits on the right.
-      x *= DecimalPowerOfTen(scale);
-      x <<= k;
-    }
-
-    // Rounding might have pushed `x` just above the max precision, check again
-    if (!x.FitsInPrecision(precision)) {
-      return Invalid("Cannot convert {} to Decimal(precision = {}, scale = {}): overflow",
-                     real, precision, scale);
-    }
-    return x;
-  }
-
-  template <typename Real>
-  static Real ToRealPositiveNoSplit(const Decimal& decimal, int32_t scale) {
-    Real x = RealTraits<Real>::two_to_64(static_cast<Real>(decimal.high()));
-    x += static_cast<Real>(decimal.low());
-    x *= LargePowerOfTen<Real>(-scale);
-    return x;
-  }
-
-  /// An approximate conversion from Decimal to Real that guarantees:
-  /// 1. If the decimal is an integer, the conversion is exact.
-  /// 2. If the number of fractional digits is <= RealTraits<Real>::kMantissaDigits (e.g.
-  ///    8 for float and 16 for double), the conversion is within 1 ULP of the exact
-  ///    value.
-  /// 3. Otherwise, the conversion is within 2^(-RealTraits<Real>::kMantissaDigits+1)
-  ///    (e.g. 2^-23 for float and 2^-52 for double) of the exact value.
-  /// Here "exact value" means the closest representable value by Real.
-  template <typename Real>
-  static Real ToRealPositive(const Decimal& decimal, int32_t scale) {
-    if (scale <= 0 ||
-        (decimal.high() == 0 && decimal.low() <= RealTraits<Real>::kMaxPreciseInteger)) {
-      // No need to split the decimal if it is already an integer (scale <= 0) or if it
-      // can be precisely represented by Real
-      return ToRealPositiveNoSplit<Real>(decimal, scale);
-    }
-
-    // Split decimal into whole and fractional parts to avoid precision loss
-    auto s = decimal.GetWholeAndFraction(scale);
-    ICEBERG_DCHECK(s, "Decimal::GetWholeAndFraction failed");
-
-    Real whole = ToRealPositiveNoSplit<Real>(s->first, 0);
-    Real fraction = ToRealPositiveNoSplit<Real>(s->second, scale);
-
-    return whole + fraction;
-  }
-
-  template <typename Real>
-  static Result<Decimal> FromReal(Real value, int32_t precision, int32_t scale) {
-    ICEBERG_DCHECK(precision >= 1 && precision <= kMaxPrecision, "");
-    ICEBERG_DCHECK(scale >= -kMaxScale && scale <= kMaxScale, "");
-
-    if (!std::isfinite(value)) {
-      return InvalidArgument("Cannot convert {} to Decimal", value);
-    }
-
-    if (value == 0) {
-      return Decimal{};
-    }
-
-    if (value < 0) {
-      ICEBERG_ASSIGN_OR_RAISE(auto decimal, FromPositiveReal(-value, precision, scale));
-      return decimal.Negate();
-    } else {
-      return FromPositiveReal(value, precision, scale);
-    }
-  }
-
-  template <typename Real>
-  static Real ToReal(const Decimal& decimal, int32_t scale) {
-    ICEBERG_DCHECK(scale >= -kMaxScale && scale <= kMaxScale, "");
-
-    if (decimal.IsNegative()) {
-      // Convert the absolute value to avoid precision loss
-      auto abs = decimal;
-      abs.Negate();
-      return -ToRealPositive<Real>(abs, scale);
-    } else {
-      return ToRealPositive<Real>(decimal, scale);
-    }
-  }
-};
-
 // Helper function used by Decimal::FromBigEndian
 static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) {
   // We don't bounds check the length here because this is called by
@@ -869,21 +553,6 @@ static bool RescaleWouldCauseDataLoss(const Decimal& value, int32_t delta_scale,
 
 }  // namespace
 
-Result<Decimal> Decimal::FromReal(float x, int32_t precision, int32_t scale) {
-  return DecimalRealConversion::FromReal(x, precision, scale);
-}
-
-Result<Decimal> Decimal::FromReal(double x, int32_t precision, int32_t scale) {
-  return DecimalRealConversion::FromReal(x, precision, scale);
-}
-
-Result<std::pair<Decimal, Decimal>> Decimal::GetWholeAndFraction(int32_t scale) const {
-  ICEBERG_DCHECK(scale >= 0 && scale <= kMaxScale, "");
-
-  Decimal multiplier(kDecimal128PowersOfTen[scale]);
-  return Divide(multiplier);
-}
-
 Result<Decimal> Decimal::FromBigEndian(const uint8_t* bytes, int32_t length) {
   static constexpr int32_t kMinDecimalBytes = 1;
   static constexpr int32_t kMaxDecimalBytes = 16;
@@ -966,14 +635,6 @@ bool Decimal::FitsInPrecision(int32_t precision) const {
   return Decimal::Abs(*this) < kDecimal128PowersOfTen[precision];
 }
 
-float Decimal::ToFloat(int32_t scale) const {
-  return DecimalRealConversion::ToReal<float>(*this, scale);
-}
-
-double Decimal::ToDouble(int32_t scale) const {
-  return DecimalRealConversion::ToReal<double>(*this, scale);
-}
-
 std::array<uint8_t, Decimal::kByteWidth> Decimal::ToBytes() const {
   std::array<uint8_t, kByteWidth> out{{0}};
   std::memcpy(out.data(), &data_, kByteWidth);
diff --git a/src/iceberg/util/decimal.h b/src/iceberg/util/decimal.h
index 9da204088..695bbbbf1 100644
--- a/src/iceberg/util/decimal.h
+++ b/src/iceberg/util/decimal.h
@@ -35,7 +35,6 @@
 #include "iceberg/result.h"
 #include "iceberg/util/formattable.h"
 #include "iceberg/util/int128.h"
-#include "iceberg/util/macros.h"
 
 namespace iceberg {
 
@@ -160,19 +159,11 @@ class ICEBERG_EXPORT Decimal : public util::Formattable {
   static Result<Decimal> FromString(std::string_view str, int32_t* precision = nullptr,
                                     int32_t* scale = nullptr);
 
-  /// \brief Convert the floating-point value to a Decimal value with the given
-  /// precision and scale.
-  static Result<Decimal> FromReal(double real, int32_t precision, int32_t scale);
-  static Result<Decimal> FromReal(float real, int32_t precision, int32_t scale);
-
   /// \brief Convert from a big-endian byte representation. The length must be
   ///        between 1 and 16.
   /// \return error status if the length is an invalid value
   static Result<Decimal> FromBigEndian(const uint8_t* data, int32_t length);
 
-  /// \brief separate the integer and fractional parts for the given scale.
-  Result<std::pair<Decimal, Decimal>> GetWholeAndFraction(int32_t scale) const;
-
   /// \brief Convert Decimal from one scale to another.
   Result<Decimal> Rescale(int32_t orig_scale, int32_t new_scale) const;
 
@@ -189,46 +180,6 @@ class ICEBERG_EXPORT Decimal : public util::Formattable {
     return low() <=> other.low();
   }
 
-  /// \brief Convert to a signed integer
-  template <typename T>
-    requires std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t>
-  Result<T> ToInteger() const {
-    constexpr auto min_value = std::numeric_limits<T>::min();
-    constexpr auto max_value = std::numeric_limits<T>::max();
-    const auto& self = *this;
-    if (self < min_value || self > max_value) {
-      return Invalid("Invalid cast from Decimal to {} byte integer", sizeof(T));
-    }
-    return static_cast<T>(low());
-  }
-
-  /// \brief Convert to a signed integer
-  template <typename T>
-    requires std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t>
-  Status ToInteger(T* out) const {
-    ICEBERG_ASSIGN_OR_RAISE(auto result, ToInteger<T>());
-    *out = result;
-    return {};
-  }
-
-  /// \brief Convert to a floating-point number (scaled)
-  float ToFloat(int32_t scale) const;
-  /// \brief Convert to a floating-point number (scaled)
-  double ToDouble(int32_t scale) const;
-
-  /// \brief Convert the Decimal value to a floating-point value with the given scale.
-  /// \param scale The scale to use for the conversion.
-  /// \return The floating-point value.
-  template <typename T>
-    requires std::is_floating_point_v<T>
-  T ToReal(int32_t scale) const {
-    if constexpr (std::is_same_v<T, float>) {
-      return ToFloat(scale);
-    } else {
-      return ToDouble(scale);
-    }
-  }
-
   const uint8_t* native_endian_bytes() const {
     return reinterpret_cast<const uint8_t*>(&data_);
   }
diff --git a/test/decimal_test.cc b/test/decimal_test.cc
index 61c5cca74..6850d7aad 100644
--- a/test/decimal_test.cc
+++ b/test/decimal_test.cc
@@ -21,7 +21,6 @@
 #include <algorithm>
 #include <array>
 #include <bit>
-#include <cmath>
 #include <cstdint>
 
 #include <gmock/gmock.h>
@@ -416,590 +415,6 @@ TEST(DecimalTest, ToString) {
   }
 }
 
-template <typename Real>
-struct FromRealTestParam {
-  Real real;
-  int32_t precision;
-  int32_t scale;
-  const char* expected_string;
-};
-
-template <typename Real>
-void CheckDecimalFromReal(Real real, int32_t precision, int32_t scale,
-                          const char* expected_string) {
-  auto result = Decimal::FromReal(real, precision, scale);
-  ASSERT_THAT(result, IsOk()) << "Failed to convert real to Decimal: " << real
-                              << ", precision: " << precision << ", scale: " << scale;
-  const Decimal& decimal = result.value();
-  EXPECT_EQ(decimal.ToString(scale).value(), expected_string);
-  const std::string expected_neg =
-      (decimal) ? "-" + std::string(expected_string) : expected_string;
-  auto neg_result = Decimal::FromReal(-real, precision, scale);
-  ASSERT_THAT(neg_result, IsOk())
-      << "Failed to convert negative real to Decimal: " << -real
-      << ", precision: " << precision << ", scale: " << scale;
-  const Decimal& neg_decimal = neg_result.value();
-  EXPECT_EQ(neg_decimal.ToString(scale).value(), expected_neg);
-}
-
-template <typename Real>
-void CheckDecimalFromRealIntegerString(Real real, int32_t precision, int32_t scale,
-                                       const char* expected_string) {
-  auto result = Decimal::FromReal(real, precision, scale);
-  ASSERT_THAT(result, IsOk()) << "Failed to convert real to Decimal: " << real
-                              << ", precision: " << precision << ", scale: " << scale;
-  const Decimal& decimal = result.value();
-  EXPECT_EQ(decimal.ToIntegerString(), expected_string);
-  const std::string expected_neg =
-      (decimal) ? "-" + std::string(expected_string) : expected_string;
-  auto neg_result = Decimal::FromReal(-real, precision, scale);
-  ASSERT_THAT(neg_result, IsOk())
-      << "Failed to convert negative real to Decimal: " << -real
-      << ", precision: " << precision << ", scale: " << scale;
-  const Decimal& neg_decimal = neg_result.value();
-  EXPECT_EQ(neg_decimal.ToIntegerString(), expected_neg);
-}
-
-using FromFloatTestParam = FromRealTestParam<float>;
-using FromDoubleTestParam = FromRealTestParam<double>;
-
-template <typename Real>
-class TestDecimalFromReal : public ::testing::Test {
- public:
-  using ParamType = FromRealTestParam<Real>;
-
-  void TestSuccess() {
-    const std::vector<ParamType> params{
-        // clang-format off
-        {0.0f, 1, 0, "0"},
-        {0.0f, 19, 4, "0.0000"},
-        {123.0f, 7, 4, "123.0000"},
-        {456.78f, 7, 4, "456.7800"},
-        {456.784f, 5, 2, "456.78"},
-        {456.786f, 5, 2, "456.79"},
-        {999.99f, 5, 2, "999.99"},
-        {123.0f, 19, 0, "123"},
-        {123.4f, 19, 0, "123"},
-        {123.6f, 19, 0, "124"},
-        // 2**62
-        {4.6116860184273879e+18, 19, 0, "4611686018427387904"},
-        // 2**63
-        {9.2233720368547758e+18, 19, 0, "9223372036854775808"},
-        // 2**64
-        {1.8446744073709552e+19, 20, 0, "18446744073709551616"},
-        // clang-format on
-    };
-
-    for (const ParamType& param : params) {
-      CheckDecimalFromReal(param.real, param.precision, param.scale,
-                           param.expected_string);
-    }
-  }
-
-  void TestErrors() {
-    const std::vector<ParamType> params{
-        {std::numeric_limits<Real>::infinity(), Decimal::kMaxPrecision / 2, 4, ""},
-        {-std::numeric_limits<Real>::infinity(), Decimal::kMaxPrecision / 2, 4, ""},
-        {std::numeric_limits<Real>::quiet_NaN(), Decimal::kMaxPrecision / 2, 4, ""},
-        {-std::numeric_limits<Real>::quiet_NaN(), Decimal::kMaxPrecision / 2, 4, ""},
-    };
-
-    for (const ParamType& param : params) {
-      auto result = Decimal::FromReal(param.real, param.precision, param.scale);
-      ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
-    }
-
-    const std::vector<ParamType> overflow_params{
-        // Overflow errors
-        {1000.0, 3, 0, ""},  {-1000.0, 3, 0, ""}, {1000.0, 5, 2, ""},
-        {-1000.0, 5, 2, ""}, {999.996, 5, 2, ""}, {-999.996, 5, 2, ""},
-    };
-    for (const ParamType& param : overflow_params) {
-      auto result = Decimal::FromReal(param.real, param.precision, param.scale);
-      ASSERT_THAT(result, IsError(ErrorKind::kInvalid));
-    }
-  }
-};
-
-using RealTypes = ::testing::Types<float, double>;
-TYPED_TEST_SUITE(TestDecimalFromReal, RealTypes);
-
-TYPED_TEST(TestDecimalFromReal, TestSuccess) { this->TestSuccess(); }
-
-TYPED_TEST(TestDecimalFromReal, TestErrors) { this->TestErrors(); }
-
-TEST(TestDecimalFromReal, FromFloat) {
-  const std::vector<FromFloatTestParam> params{
-      // -- Stress the 24 bits of precision of a float
-      // 2**63 + 2**40
-      FromFloatTestParam{.real = 9.223373e+18f,
-                         .precision = 19,
-                         .scale = 0,
-                         .expected_string = "9223373136366403584"},
-      // 2**64 - 2**40
-      FromFloatTestParam{.real = 1.8446743e+19f,
-                         .precision = 20,
-                         .scale = 0,
-                         .expected_string = "18446742974197923840"},
-      // 2**64 + 2**41
-      FromFloatTestParam{.real = 1.8446746e+19f,
-                         .precision = 20,
-                         .scale = 0,
-                         .expected_string = "18446746272732807168"},
-      // 2**14 - 2**-10
-      FromFloatTestParam{
-          .real = 16383.999f, .precision = 8, .scale = 3, .expected_string = "16383.999"},
-      FromFloatTestParam{.real = 16383.999f,
-                         .precision = 19,
-                         .scale = 3,
-                         .expected_string = "16383.999"},
-      // 1 - 2**-24
-      FromFloatTestParam{.real = 0.99999994f,
-                         .precision = 10,
-                         .scale = 10,
-                         .expected_string = "0.9999999404"},
-      FromFloatTestParam{.real = 0.99999994f,
-                         .precision = 15,
-                         .scale = 15,
-                         .expected_string = "0.999999940395355"},
-      FromFloatTestParam{.real = 0.99999994f,
-                         .precision = 20,
-                         .scale = 20,
-                         .expected_string = "0.99999994039535522461"},
-      FromFloatTestParam{.real = 0.99999994f,
-                         .precision = 21,
-                         .scale = 21,
-                         .expected_string = "0.999999940395355224609"},
-      FromFloatTestParam{.real = 0.99999994f,
-                         .precision = 38,
-                         .scale = 38,
-                         .expected_string = "0.99999994039535522460937500000000000000"},
-      // -- Other cases
-      // 10**38 - 2**103
-      FromFloatTestParam{.real = 9.999999e+37f,
-                         .precision = 38,
-                         .scale = 0,
-                         .expected_string = "99999986661652122824821048795547566080"},
-  };
-
-  for (const auto& param : params) {
-    CheckDecimalFromReal(param.real, param.precision, param.scale, param.expected_string);
-  }
-}
-
-TEST(TestDecimalFromReal, FromFloatLargeValues) {
-  for (int32_t scale = -38; scale <= 38; ++scale) {
-    float real = std::pow(10.0f, static_cast<float>(scale));
-    CheckDecimalFromRealIntegerString(real, 1, -scale, "1");
-  }
-
-  for (int32_t scale = -37; scale <= 36; ++scale) {
-    float real = 123.f * std::pow(10.f, static_cast<float>(scale));
-    CheckDecimalFromRealIntegerString(real, 2, -scale - 1, "12");
-    CheckDecimalFromRealIntegerString(real, 3, -scale, "123");
-    CheckDecimalFromRealIntegerString(real, 4, -scale + 1, "1230");
-  }
-}
-
-TEST(TestDecimalFromReal, FromDouble) {
-  const std::vector<FromDoubleTestParam> params{
-      // -- Stress the 53 bits of precision of a double
-      // 2**63 + 2**11
-      FromDoubleTestParam{.real = 9.223372036854778e+18,
-                          .precision = 19,
-                          .scale = 0,
-                          .expected_string = "9223372036854777856"},
-      // 2**64 - 2**11
-      FromDoubleTestParam{.real = 1.844674407370955e+19,
-                          .precision = 20,
-                          .scale = 0,
-                          .expected_string = "18446744073709549568"},
-      // 2**64 + 2**11
-      FromDoubleTestParam{.real = 1.8446744073709556e+19,
-                          .precision = 20,
-                          .scale = 0,
-                          .expected_string = "18446744073709555712"},
-      // 2**126
-      FromDoubleTestParam{.real = 8.507059173023462e+37,
-                          .precision = 38,
-                          .scale = 0,
-                          .expected_string = "85070591730234615865843651857942052864"},
-      // 2**126 - 2**74
-      FromDoubleTestParam{.real = 8.50705917302346e+37,
-                          .precision = 38,
-                          .scale = 0,
-                          .expected_string = "85070591730234596976377720379361198080"},
-      // 2**36 - 2**-16
-      FromDoubleTestParam{.real = 68719476735.999985,
-                          .precision = 11,
-                          .scale = 0,
-                          .expected_string = "68719476736"},
-      FromDoubleTestParam{.real = 68719476735.999985,
-                          .precision = 38,
-                          .scale = 27,
-                          .expected_string = "68719476735.999984741210937500000000000"},
-      // -- Other cases
-      // Almost 10**38 (minus 2**73)
-      FromDoubleTestParam{.real = 9.999999999999998e+37,
-                          .precision = 38,
-                          .scale = 0,
-                          .expected_string = "99999999999999978859343891977453174784"},
-      FromDoubleTestParam{.real = 9.999999999999998e+27,
-                          .precision = 38,
-                          .scale = 10,
-                          .expected_string = "9999999999999997384096481280.0000000000"},
-      // 10**N (sometimes fits in N digits)
-      FromDoubleTestParam{.real = 1e23,
-                          .precision = 23,
-                          .scale = 0,
-                          .expected_string = "99999999999999991611392"},
-      FromDoubleTestParam{.real = 1e23,
-                          .precision = 24,
-                          .scale = 1,
-                          .expected_string = "99999999999999991611392.0"},
-      FromDoubleTestParam{.real = 1e36,
-                          .precision = 37,
-                          .scale = 0,
-                          .expected_string = "1000000000000000042420637374017961984"},
-      FromDoubleTestParam{.real = 1e36,
-                          .precision = 38,
-                          .scale = 1,
-                          .expected_string = "1000000000000000042420637374017961984.0"},
-      FromDoubleTestParam{.real = 1e37,
-                          .precision = 37,
-                          .scale = 0,
-                          .expected_string = "9999999999999999538762658202121142272"},
-      FromDoubleTestParam{.real = 1e37,
-                          .precision = 38,
-                          .scale = 1,
-                          .expected_string = "9999999999999999538762658202121142272.0"},
-      FromDoubleTestParam{.real = 1e38,
-                          .precision = 38,
-                          .scale = 0,
-                          .expected_string = "99999999999999997748809823456034029568"},
-      // Hand-picked test cases that can involve precision issues.
-      // More comprehensive testing is done in the PyArrow test suite.
-      FromDoubleTestParam{.real = 9.223372036854778e+10,
-                          .precision = 19,
-                          .scale = 8,
-                          .expected_string = "92233720368.54777527"},
-      FromDoubleTestParam{.real = 1.8446744073709556e+15,
-                          .precision = 20,
-                          .scale = 4,
-                          .expected_string = "1844674407370955.5000"},
-      FromDoubleTestParam{.real = 999999999999999.0,
-                          .precision = 16,
-                          .scale = 1,
-                          .expected_string = "999999999999999.0"},
-      FromDoubleTestParam{.real = 9999999999999998.0,
-                          .precision = 17,
-                          .scale = 1,
-                          .expected_string = "9999999999999998.0"},
-      FromDoubleTestParam{.real = 999999999999999.9,
-                          .precision = 16,
-                          .scale = 1,
-                          .expected_string = "999999999999999.9"},
-      FromDoubleTestParam{.real = 9999999987.,
-                          .precision = 38,
-                          .scale = 22,
-                          .expected_string = "9999999987.0000000000000000000000"},
-      FromDoubleTestParam{.real = 9999999987.,
-                          .precision = 38,
-                          .scale = 28,
-                          .expected_string = "9999999987.0000000000000000000000000000"},
-      // 1 - 2**-52
-      // XXX the result should be 0.99999999999999977795539507496869191527
-      // but our algorithm loses the right digit.
-      FromDoubleTestParam{.real = 0.9999999999999998,
-                          .precision = 38,
-                          .scale = 38,
-                          .expected_string = "0.99999999999999977795539507496869191520"},
-      FromDoubleTestParam{.real = 0.9999999999999998,
-                          .precision = 20,
-                          .scale = 20,
-                          .expected_string = "0.99999999999999977796"},
-      FromDoubleTestParam{.real = 0.9999999999999998,
-                          .precision = 16,
-                          .scale = 16,
-                          .expected_string = "0.9999999999999998"},
-  };
-
-  for (const auto& param : params) {
-    CheckDecimalFromReal(param.real, param.precision, param.scale, param.expected_string);
-  }
-}
-
-TEST(TestDecimalFromReal, FromDoubleLargeValues) {
-  constexpr auto kMaxScale = Decimal::kMaxScale;
-  for (int32_t scale = -kMaxScale; scale <= kMaxScale; ++scale) {
-    if (std::abs(1 - scale) < kMaxScale) {
-      double real = std::pow(10.0, static_cast<double>(scale));
-      CheckDecimalFromRealIntegerString(real, 1, -scale, "1");
-    }
-  }
-
-  for (int32_t scale = -kMaxScale + 1; scale <= kMaxScale - 1; ++scale) {
-    if (std::abs(4 - scale) < kMaxScale) {
-      double real = 123. * std::pow(10.0, static_cast<double>(scale));
-      CheckDecimalFromRealIntegerString(real, 2, -scale - 1, "12");
-      CheckDecimalFromRealIntegerString(real, 3, -scale, "123");
-      CheckDecimalFromRealIntegerString(real, 4, -scale + 1, "1230");
-    }
-  }
-}
-
-template <typename Real>
-struct ToRealTestParam {
-  std::string decimal_value;
-  int32_t scale;
-  Real expected;
-};
-
-using ToFloatTestParam = ToRealTestParam<float>;
-using ToDoubleTestParam = ToRealTestParam<double>;
-
-template <typename Real>
-void CheckDecimalToReal(const std::string& decimal_value, int32_t scale, Real expected) {
-  auto result = Decimal::FromString(decimal_value);
-  ASSERT_THAT(result, IsOk()) << "Failed to convert string to Decimal: " << decimal_value;
-
-  const Decimal& decimal = result.value();
-  auto real_result = decimal.ToReal<Real>(scale);
-
-  EXPECT_EQ(real_result, expected)
-      << "Expected: " << expected << ", but got: " << real_result;
-}
-
-template <typename Real>
-void CheckDecimalToRealWithinOneULP(const std::string& decimal_value, int32_t scale,
-                                    Real expected) {
-  Decimal dec(decimal_value);
-  Real actual = dec.template ToReal<Real>(scale);
-  ASSERT_TRUE(actual == expected || actual == std::nextafter(expected, expected + 1) ||
-              actual == std::nextafter(expected, expected - 1))
-      << "Decimal value: " << decimal_value << ", scale: " << scale
-      << ", expected: " << expected << ", actual: " << actual;
-}
-
-template <typename Real>
-void CheckDecimalToRealWithinEpsilon(const std::string& decimal_value, int32_t scale,
-                                     Real epsilon, Real expected) {
-  Decimal dec(decimal_value);
-  Real actual = dec.template ToReal<Real>(scale);
-  ASSERT_LE(std::abs(actual - expected), epsilon)
-      << "Decimal value: " << decimal_value << ", scale: " << scale
-      << ", expected: " << expected << ", actual: " << actual;
-}
-
-void CheckDecimalToRealApprox(const std::string& decimal_value, int32_t scale,
-                              float expected) {
-  Decimal dec(decimal_value);
-  auto actual = dec.template ToReal<float>(scale);
-  ASSERT_FLOAT_EQ(actual, expected)
-      << "Decimal value: " << decimal_value << ", scale: " << scale
-      << ", expected: " << expected << ", actual: " << actual;
-}
-
-void CheckDecimalToRealApprox(const std::string& decimal_value, int32_t scale,
-                              double expected) {
-  Decimal dec(decimal_value);
-  auto actual = dec.template ToReal<double>(scale);
-  ASSERT_DOUBLE_EQ(actual, expected)
-      << "Decimal value: " << decimal_value << ", scale: " << scale
-      << ", expected: " << expected << ", actual: " << actual;
-}
-
-template <typename Real>
-class TestDecimalToReal : public ::testing::Test {
- public:
-  using ParamType = ToRealTestParam<Real>;
-
-  Real Pow2(int exp) { return std::pow(static_cast<Real>(2), static_cast<Real>(exp)); }
-
-  Real Pow10(int exp) { return std::pow(static_cast<Real>(10), static_cast<Real>(exp)); }
-
-  void TestSuccess() {
-    const std::vector<ParamType> params{
-        // clang-format off
-        {"0", 0, 0.0f},
-        {"0", 10, 0.0f},
-        {"0", -10, 0.0f},
-        {"1", 0, 1.0f},
-        {"12345", 0, 12345.f},
-#ifndef __MINGW32__  // MinGW has precision issues
-        {"12345", 1, 1234.5f},
-#endif
-        {"12345", -3, 12345000.f},
-        // 2**62
-        {"4611686018427387904", 0, Pow2(62)},
-        // 2**63 + 2**62
-        {"13835058055282163712", 0, Pow2(63) + Pow2(62)},
-        // 2**64 + 2**62
-        {"23058430092136939520", 0, Pow2(64) + Pow2(62)},
-        // 10**38 - 2**103
-#ifndef __MINGW32__  // MinGW has precision issues
-        {"99999989858795198174164788026374356992", 0, Pow10(38) - Pow2(103)},
-#endif
-        // clang-format on
-    };
-
-    for (const ParamType& param : params) {
-      CheckDecimalToReal(param.decimal_value, param.scale, param.expected);
-      if (param.decimal_value != "0") {
-        // Check negative values as well
-        CheckDecimalToReal("-" + param.decimal_value, param.scale, -param.expected);
-      }
-    }
-  }
-};
-
-TYPED_TEST_SUITE(TestDecimalToReal, RealTypes);
-
-TYPED_TEST(TestDecimalToReal, TestSuccess) { this->TestSuccess(); }
-
-// Custom test for Decimal::ToReal<float>
-TEST(TestDecimalToReal, ToFloatLargeValues) {
-  constexpr auto max_scale = Decimal::kMaxScale;
-
-  // Note that exact comparisons would succeed on some platforms (Linux, macOS).
-  // Nevertheless, power-of-ten factors are not all exactly representable
-  // in binary floating point.
-  for (int32_t scale = -max_scale; scale <= max_scale; scale++) {
-#ifdef _WIN32
-    // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
-    if (scale == 45) continue;
-#endif
-    CheckDecimalToRealApprox(
-        "1", scale, std::pow(static_cast<float>(10), static_cast<float>(-scale)));
-  }
-  for (int32_t scale = -max_scale; scale <= max_scale - 2; scale++) {
-#ifdef _WIN32
-    // MSVC gives pow(10.f, -45.f) == 0 even though 1e-45f is nonzero
-    if (scale == 45) continue;
-#endif
-    const auto factor = static_cast<float>(123);
-    CheckDecimalToRealApprox(
-        "123", scale,
-        factor * std::pow(static_cast<float>(10), static_cast<float>(-scale)));
-  }
-}
-
-TEST(TestDecimalToReal, ToFloatPrecision) {
-  // 2**63 + 2**40 (exactly representable in a float's 24 bits of precision)
-  CheckDecimalToReal<float>("9223373136366403584", 0, 9.223373e+18f);
-  CheckDecimalToReal<float>("-9223373136366403584", 0, -9.223373e+18f);
-
-  // 2**64 + 2**41 (exactly representable in a float)
-  CheckDecimalToReal<float>("18446746272732807168", 0, 1.8446746e+19f);
-  CheckDecimalToReal<float>("-18446746272732807168", 0, -1.8446746e+19f);
-
-  // Integers are always exact
-  auto scale = Decimal::kMaxScale - 1;
-  std::string seven = "7.";
-  seven.append(scale, '0');  // pad with trailing zeros
-  CheckDecimalToReal<float>(seven, scale, 7.0f);
-  CheckDecimalToReal<float>("-" + seven, scale, -7.0f);
-
-  CheckDecimalToReal<float>("99999999999999999999.0000000000000000", 16,
-                            99999999999999999999.0f);
-  CheckDecimalToReal<float>("-99999999999999999999.0000000000000000", 16,
-                            -99999999999999999999.0f);
-
-  // Small fractions are within one ULP
-  CheckDecimalToRealWithinOneULP<float>("9999999.9", 1, 9999999.9f);
-  CheckDecimalToRealWithinOneULP<float>("-9999999.9", 1, -9999999.9f);
-
-  CheckDecimalToRealWithinOneULP<float>("9999999.999999", 6, 9999999.999999f);
-  CheckDecimalToRealWithinOneULP<float>("-9999999.999999", 6, -9999999.999999f);
-
-  // Large fractions are within 2^-23
-  constexpr float epsilon = 1.1920928955078125e-07f;  // 2^-23
-  CheckDecimalToRealWithinEpsilon<float>("112334829348925.99070703983306884765625", 23,
-                                         epsilon,
-                                         112334829348925.99070703983306884765625f);
-  CheckDecimalToRealWithinEpsilon<float>("1.987748987892758765582589910934859345", 36,
-                                         epsilon,
-                                         1.987748987892758765582589910934859345f);
-}
-
-// ToReal<double> tests are disabled on MinGW because of precision issues in results
-#ifndef __MINGW32__
-
-TEST(TestDecimalToReal, ToDoubleLargeValues) {
-  // Note that exact comparisons would succeed on some platforms (Linux, macOS).
-  // Nevertheless, power-of-ten factors are not all exactly representable
-  // in binary floating point.
-  constexpr auto max_scale = Decimal::kMaxScale;
-
-  for (int32_t scale = -max_scale; scale <= max_scale; scale++) {
-    CheckDecimalToRealApprox(
-        "1", scale, std::pow(static_cast<double>(10), static_cast<double>(-scale)));
-  }
-  for (int32_t scale = -max_scale + 1; scale <= max_scale - 1; scale++) {
-    const auto factor = static_cast<double>(123);
-    CheckDecimalToRealApprox(
-        "123", scale,
-        factor * std::pow(static_cast<double>(10), static_cast<double>(-scale)));
-  }
-}
-
-TEST(TestDecimalToReal, ToDoublePrecision) {
-  // 2**63 + 2**11 (exactly representable in a double's 53 bits of precision)
-  CheckDecimalToReal<double>("9223372036854777856", 0, 9.223372036854778e+18);
-  CheckDecimalToReal<double>("-9223372036854777856", 0, -9.223372036854778e+18);
-  // 2**64 - 2**11 (exactly representable in a double)
-  CheckDecimalToReal<double>("18446744073709549568", 0, 1.844674407370955e+19);
-  CheckDecimalToReal<double>("-18446744073709549568", 0, -1.844674407370955e+19);
-  // 2**64 + 2**11 (exactly representable in a double)
-  CheckDecimalToReal<double>("18446744073709555712", 0, 1.8446744073709556e+19);
-  CheckDecimalToReal<double>("-18446744073709555712", 0, -1.8446744073709556e+19);
-
-  // Almost 10**38 (minus 2**73)
-  CheckDecimalToReal<double>("99999999999999978859343891977453174784", 0,
-                             9.999999999999998e+37);
-  CheckDecimalToReal<double>("-99999999999999978859343891977453174784", 0,
-                             -9.999999999999998e+37);
-  CheckDecimalToReal<double>("99999999999999978859343891977453174784", 10,
-                             9.999999999999998e+27);
-  CheckDecimalToReal<double>("-99999999999999978859343891977453174784", 10,
-                             -9.999999999999998e+27);
-  CheckDecimalToReal<double>("99999999999999978859343891977453174784", -10,
-                             9.999999999999998e+47);
-  CheckDecimalToReal<double>("-99999999999999978859343891977453174784", -10,
-                             -9.999999999999998e+47);
-
-  // Integers are always exact
-  auto scale = Decimal::kMaxScale - 1;
-  std::string seven = "7.";
-  seven.append(scale, '0');
-  CheckDecimalToReal<double>(seven, scale, 7.0);
-  CheckDecimalToReal<double>("-" + seven, scale, -7.0);
-
-  CheckDecimalToReal<double>("99999999999999999999.0000000000000000", 16,
-                             99999999999999999999.0);
-  CheckDecimalToReal<double>("-99999999999999999999.0000000000000000", 16,
-                             -99999999999999999999.0);
-
-  // Small fractions are within one ULP
-  CheckDecimalToRealWithinOneULP<double>("9999999.9", 1, 9999999.9);
-  CheckDecimalToRealWithinOneULP<double>("-9999999.9", 1, -9999999.9);
-
-  CheckDecimalToRealWithinOneULP<double>("9999999.999999999999999", 15,
-                                         9999999.999999999999999);
-  CheckDecimalToRealWithinOneULP<double>("-9999999.999999999999999", 15,
-                                         -9999999.999999999999999);
-  // Large fractions are within 2^-52
-  constexpr double epsilon = 2.220446049250313080847263336181640625e-16;  // 2^-52
-  CheckDecimalToRealWithinEpsilon<double>("112334829348925.99070703983306884765625", 23,
-                                          epsilon,
-                                          112334829348925.99070703983306884765625);
-  CheckDecimalToRealWithinEpsilon<double>("1.987748987892758765582589910934859345", 36,
-                                          epsilon,
-                                          1.987748987892758765582589910934859345);
-}
-
-#endif  // __MINGW32__
-
 TEST(DecimalTest, FromBigEndian) {
   // We test out a variety of scenarios:
   //
@@ -1132,42 +547,6 @@ TEST(DecimalTestFunctionality, Sign) {
   ASSERT_EQ(1, Decimal(0).Sign());
 }
 
-TEST(DecimalTestFunctionality, GetWholeAndFraction) {
-  Decimal value("123456");
-
-  auto check = [value](int32_t scale, std::pair<int32_t, int32_t> expected) {
-    int32_t out;
-    auto result = value.GetWholeAndFraction(scale);
-    ASSERT_THAT(result->first.ToInteger(&out), IsOk());
-    ASSERT_EQ(expected.first, out);
-    ASSERT_THAT(result->second.ToInteger(&out), IsOk());
-    ASSERT_EQ(expected.second, out);
-  };
-
-  check(0, {123456, 0});
-  check(1, {12345, 6});
-  check(5, {1, 23456});
-  check(7, {0, 123456});
-}
-
-TEST(DecimalTestFunctionality, GetWholeAndFractionNegative) {
-  Decimal value("-123456");
-
-  auto check = [value](int32_t scale, std::pair<int32_t, int32_t> expected) {
-    int32_t out;
-    auto result = value.GetWholeAndFraction(scale);
-    ASSERT_THAT(result->first.ToInteger(&out), IsOk());
-    ASSERT_EQ(expected.first, out);
-    ASSERT_THAT(result->second.ToInteger(&out), IsOk());
-    ASSERT_EQ(expected.second, out);
-  };
-
-  check(0, {-123456, 0});
-  check(1, {-12345, -6});
-  check(5, {-1, -23456});
-  check(7, {0, -123456});
-}
-
 TEST(DecimalTestFunctionality, FitsInPrecision) {
   ASSERT_TRUE(Decimal("0").FitsInPrecision(1));
   ASSERT_TRUE(Decimal("9").FitsInPrecision(1));

From d1fe1863722baa49279a1086d70569b2694ded92 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Wed, 3 Sep 2025 23:41:35 +0800
Subject: [PATCH 18/20] chore: use uint128 for FromBigEndian

---
 src/iceberg/util/decimal.cc | 126 ++++++------------------------------
 1 file changed, 18 insertions(+), 108 deletions(-)

diff --git a/src/iceberg/util/decimal.cc b/src/iceberg/util/decimal.cc
index cb10d3f37..ba1ca6c32 100644
--- a/src/iceberg/util/decimal.cc
+++ b/src/iceberg/util/decimal.cc
@@ -48,14 +48,6 @@ namespace iceberg {
 
 namespace {
 
-// Signed left shift with well-defined behaviour on negative numbers or overflow
-template <typename SignedInt, typename Shift>
-  requires std::is_signed_v<SignedInt> && std::is_integral_v<Shift>
-constexpr SignedInt SafeLeftShift(SignedInt u, Shift bits) {
-  using UnsignedInt = std::make_unsigned_t<SignedInt>;
-  return static_cast<SignedInt>(static_cast<UnsignedInt>(u) << bits);
-}
-
 struct DecimalComponents {
   std::string_view while_digits;
   std::string_view fractional_digits;
@@ -205,7 +197,9 @@ inline void ShiftAndAdd(std::string_view input, uint128_t& out) {
     const uint64_t multiple = kUInt64PowersOfTen[group_size];
     uint64_t value = 0;
 
-    std::from_chars(input.data() + pos, input.data() + pos + group_size, value);
+    auto [_, ec] =
+        std::from_chars(input.data() + pos, input.data() + pos + group_size, value);
+    ICEBERG_DCHECK(ec == std::errc(), "Failed to parse digits in ShiftAndAdd");
 
     out = out * multiple + value;
     pos += group_size;
@@ -471,73 +465,6 @@ Result<Decimal> Decimal::FromString(std::string_view str, int32_t* precision,
 
 namespace {
 
-constexpr float kFloatInf = std::numeric_limits<float>::infinity();
-
-// Attention: these pre-computed constants might not exactly represent their
-// decimal counterparts:
-//   >>> int32_t(1e38)
-//   99999999999999997748809823456034029568
-
-constexpr int32_t kPrecomputedPowersOfTen = 76;
-
-constexpr std::array<float, 2 * kPrecomputedPowersOfTen + 1> kFloatPowersOfTen = {
-    0,         0,         0,         0,         0,         0,         0,
-    0,         0,         0,         0,         0,         0,         0,
-    0,         0,         0,         0,         0,         0,         0,
-    0,         0,         0,         0,         0,         0,         0,
-    0,         0,         0,         1e-45f,    1e-44f,    1e-43f,    1e-42f,
-    1e-41f,    1e-40f,    1e-39f,    1e-38f,    1e-37f,    1e-36f,    1e-35f,
-    1e-34f,    1e-33f,    1e-32f,    1e-31f,    1e-30f,    1e-29f,    1e-28f,
-    1e-27f,    1e-26f,    1e-25f,    1e-24f,    1e-23f,    1e-22f,    1e-21f,
-    1e-20f,    1e-19f,    1e-18f,    1e-17f,    1e-16f,    1e-15f,    1e-14f,
-    1e-13f,    1e-12f,    1e-11f,    1e-10f,    1e-9f,     1e-8f,     1e-7f,
-    1e-6f,     1e-5f,     1e-4f,     1e-3f,     1e-2f,     1e-1f,     1e0f,
-    1e1f,      1e2f,      1e3f,      1e4f,      1e5f,      1e6f,      1e7f,
-    1e8f,      1e9f,      1e10f,     1e11f,     1e12f,     1e13f,     1e14f,
-    1e15f,     1e16f,     1e17f,     1e18f,     1e19f,     1e20f,     1e21f,
-    1e22f,     1e23f,     1e24f,     1e25f,     1e26f,     1e27f,     1e28f,
-    1e29f,     1e30f,     1e31f,     1e32f,     1e33f,     1e34f,     1e35f,
-    1e36f,     1e37f,     1e38f,     kFloatInf, kFloatInf, kFloatInf, kFloatInf,
-    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
-    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
-    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
-    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf,
-    kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf, kFloatInf};
-
-constexpr std::array<double, 2 * kPrecomputedPowersOfTen + 1> kDoublePowersOfTen = {
-    1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65,
-    1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53,
-    1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41,
-    1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29,
-    1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
-    1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9,  1e-8,  1e-7,  1e-6,  1e-5,
-    1e-4,  1e-3,  1e-2,  1e-1,  1e0,   1e1,   1e2,   1e3,   1e4,   1e5,   1e6,   1e7,
-    1e8,   1e9,   1e10,  1e11,  1e12,  1e13,  1e14,  1e15,  1e16,  1e17,  1e18,  1e19,
-    1e20,  1e21,  1e22,  1e23,  1e24,  1e25,  1e26,  1e27,  1e28,  1e29,  1e30,  1e31,
-    1e32,  1e33,  1e34,  1e35,  1e36,  1e37,  1e38,  1e39,  1e40,  1e41,  1e42,  1e43,
-    1e44,  1e45,  1e46,  1e47,  1e48,  1e49,  1e50,  1e51,  1e52,  1e53,  1e54,  1e55,
-    1e56,  1e57,  1e58,  1e59,  1e60,  1e61,  1e62,  1e63,  1e64,  1e65,  1e66,  1e67,
-    1e68,  1e69,  1e70,  1e71,  1e72,  1e73,  1e74,  1e75,  1e76};
-
-// Helper function used by Decimal::FromBigEndian
-static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length) {
-  // We don't bounds check the length here because this is called by
-  // FromBigEndian that has a Decimal128 as its out parameters and
-  // that function is already checking the length of the bytes and only
-  // passes lengths between zero and eight.
-  uint64_t result = 0;
-  // Using memcpy instead of special casing for length
-  // and doing the conversion in 16, 32 parts, which could
-  // possibly create unaligned memory access on certain platforms
-  std::memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length);
-
-  if constexpr (std::endian::native == std::endian::little) {
-    return std::byteswap(result);
-  } else {
-    return result;
-  }
-}
-
 static bool RescaleWouldCauseDataLoss(const Decimal& value, int32_t delta_scale,
                                       const Decimal& multiplier, Decimal* result) {
   if (delta_scale < 0) {
@@ -569,41 +496,24 @@ Result<Decimal> Decimal::FromBigEndian(const uint8_t* bytes, int32_t length) {
   // sign bit.
   const bool is_negative = static_cast<int8_t>(bytes[0]) < 0;
 
-  // 1. Extract the high bytes
-  // Stop byte of the high bytes
-  const int32_t high_bits_offset = std::max(0, length - 8);
-  const auto high_bits = UInt64FromBigEndian(bytes, high_bits_offset);
+  uint128_t result = 0;
+  std::memcpy(reinterpret_cast<uint8_t*>(&result) + 16 - length, bytes, length);
 
-  if (high_bits_offset == 8) {
-    // Avoid undefined shift by 64 below
-    high = high_bits;
-  } else {
-    high = -1 * (is_negative && length < kMaxDecimalBytes);
-    // Shift left enough bits to make room for the incoming int64_t
-    high = SafeLeftShift(high, high_bits_offset * CHAR_BIT);
-    // Preserve the upper bits by inplace OR-ing the int64_t
-    high |= high_bits;
-  }
-
-  // 2. Extract the low bytes
-  // Stop byte of the low bytes
-  const int32_t low_bits_offset = std::min(length, 8);
-  const auto low_bits =
-      UInt64FromBigEndian(bytes + high_bits_offset, length - high_bits_offset);
-
-  if (low_bits_offset == 8) {
-    // Avoid undefined shift by 64 below
-    low = low_bits;
-  } else {
-    // Sign extend the low bits if necessary
-    low = -1 * (is_negative && length < 8);
-    // Shift left enough bits to make room for the incoming int64_t
-    low = SafeLeftShift(low, low_bits_offset * CHAR_BIT);
-    // Preserve the upper bits by inplace OR-ing the int64_t
-    low |= low_bits;
+  if constexpr (std::endian::native == std::endian::little) {
+    auto high = static_cast<uint64_t>(result >> 64);
+    auto low = static_cast<uint64_t>(result);
+    high = std::byteswap(high);
+    low = std::byteswap(low);
+    // also need to swap the two halves
+    result = (static_cast<uint128_t>(low) << 64) | high;
+  }
+
+  if (is_negative && length < kMaxDecimalBytes) {
+    // Sign extend the high bits
+    result |= (static_cast<uint128_t>(-1) << (length * CHAR_BIT));
   }
 
-  return Decimal(high, static_cast<uint64_t>(low));
+  return Decimal(static_cast<int128_t>(result));
 }
 
 Result<Decimal> Decimal::Rescale(int32_t orig_scale, int32_t new_scale) const {

From cb22a3c965ec501997bcdd794850a67a2f777a85 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Thu, 4 Sep 2025 07:31:16 +0800
Subject: [PATCH 19/20] chore: fix minor issue

---
 src/iceberg/inheritable_metadata.cc |  1 -
 src/iceberg/util/decimal.cc         | 31 ++++++++++++-----------------
 2 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/src/iceberg/inheritable_metadata.cc b/src/iceberg/inheritable_metadata.cc
index ae0920873..58eb28345 100644
--- a/src/iceberg/inheritable_metadata.cc
+++ b/src/iceberg/inheritable_metadata.cc
@@ -19,7 +19,6 @@
 
 #include "iceberg/inheritable_metadata.h"
 
-#include <cassert>
 #include <utility>
 
 #include <iceberg/result.h>
diff --git a/src/iceberg/util/decimal.cc b/src/iceberg/util/decimal.cc
index ba1ca6c32..33e7ecdff 100644
--- a/src/iceberg/util/decimal.cc
+++ b/src/iceberg/util/decimal.cc
@@ -26,7 +26,6 @@
 
 #include <array>
 #include <bit>
-#include <cassert>
 #include <charconv>
 #include <climits>
 #include <cmath>
@@ -270,6 +269,19 @@ void AdjustIntegerStringWithScale(int32_t scale, std::string* str) {
   str->at(is_negative_offset + 1) = '.';
 }
 
+bool RescaleWouldCauseDataLoss(const Decimal& value, int32_t delta_scale,
+                               const Decimal& multiplier, Decimal* result) {
+  if (delta_scale < 0) {
+    auto res = value.Divide(multiplier);
+    ICEBERG_DCHECK(res, "Decimal::Divide failed");
+    *result = res->first;
+    return res->second != 0;
+  }
+
+  *result = value * multiplier;
+  return (value < 0) ? *result > value : *result < value;
+}
+
 }  // namespace
 
 Decimal::Decimal(std::string_view str) {
@@ -463,23 +475,6 @@ Result<Decimal> Decimal::FromString(std::string_view str, int32_t* precision,
   return result;
 }
 
-namespace {
-
-static bool RescaleWouldCauseDataLoss(const Decimal& value, int32_t delta_scale,
-                                      const Decimal& multiplier, Decimal* result) {
-  if (delta_scale < 0) {
-    auto res = value.Divide(multiplier);
-    ICEBERG_DCHECK(res, "Decimal::Divide failed");
-    *result = res->first;
-    return res->second != 0;
-  }
-
-  *result = value * multiplier;
-  return (value < 0) ? *result > value : *result < value;
-}
-
-}  // namespace
-
 Result<Decimal> Decimal::FromBigEndian(const uint8_t* bytes, int32_t length) {
   static constexpr int32_t kMinDecimalBytes = 1;
   static constexpr int32_t kMaxDecimalBytes = 16;

From 081ef1f059f551a9a08c68bf282a64b190823b67 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Sun, 7 Sep 2025 19:32:32 +0800
Subject: [PATCH 20/20] fix: review comments

---
 src/iceberg/util/decimal.cc | 10 +++-------
 src/iceberg/util/decimal.h  | 10 +++++-----
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/src/iceberg/util/decimal.cc b/src/iceberg/util/decimal.cc
index 33e7ecdff..606bf2fdb 100644
--- a/src/iceberg/util/decimal.cc
+++ b/src/iceberg/util/decimal.cc
@@ -24,21 +24,18 @@
 
 #include "iceberg/util/decimal.h"
 
-#include <array>
 #include <bit>
 #include <charconv>
 #include <climits>
 #include <cmath>
-#include <cstdint>
 #include <cstring>
 #include <format>
 #include <iomanip>
 #include <limits>
 #include <sstream>
-#include <string>
-#include <string_view>
 #include <utility>
 
+#include "iceberg/exception.h"
 #include "iceberg/result.h"
 #include "iceberg/util/int128.h"
 #include "iceberg/util/macros.h"
@@ -287,9 +284,8 @@ bool RescaleWouldCauseDataLoss(const Decimal& value, int32_t delta_scale,
 Decimal::Decimal(std::string_view str) {
   auto result = Decimal::FromString(str);
   if (!result) {
-    throw std::runtime_error(
-        std::format("Failed to parse Decimal from string: {}, error: {}", str,
-                    result.error().message));
+    throw IcebergError(std::format("Failed to parse Decimal from string: {}, error: {}",
+                                   str, result.error().message));
   }
   *this = std::move(result.value());
 }
diff --git a/src/iceberg/util/decimal.h b/src/iceberg/util/decimal.h
index 695bbbbf1..f118bde62 100644
--- a/src/iceberg/util/decimal.h
+++ b/src/iceberg/util/decimal.h
@@ -52,18 +52,18 @@ class ICEBERG_EXPORT Decimal : public util::Formattable {
   constexpr Decimal() noexcept = default;
 
   /// \brief Create a Decimal from a 128-bit integer.
-  constexpr Decimal(int128_t value) noexcept  // NOLINT(google-explicit-constructor)
+  constexpr Decimal(int128_t value) noexcept  // NOLINT implicit conversion
       : data_(value) {}
 
   /// \brief Create a Decimal from any integer not wider than 64 bits.
   template <typename T>
     requires(std::is_integral_v<T> && (sizeof(T) <= sizeof(uint64_t)))
-  constexpr Decimal(T value) noexcept  // NOLINT(google-explicit-constructor)
-  {
-    data_ = static_cast<int128_t>(value);
-  }
+  constexpr Decimal(T value) noexcept  // NOLINT implicit conversion
+      : data_(static_cast<int128_t>(value)) {}
 
   /// \brief Parse a Decimal from a string representation.
+  /// \throw This constructor throws an exception if parsing fails. Use
+  /// Decimal::FromString() if you want to handle errors more gracefully.
   explicit Decimal(std::string_view str);
 
   /// \brief Create a Decimal from two 64-bit integers.