fix: review comments

zhjwpku · zhjwpku · commit a77eb55e20e8 · 2025-08-27T21:24:51.000+08:00
diff --git a/LICENSE b/LICENSE
@@ -256,11 +256,11 @@ License: https://www.apache.org/licenses/LICENSE-2.0
 
 --------------------------------------------------------------------------------
 
-The file src/iceberg/expression/decimal.h contains code adapted from
+The file src/iceberg/util/decimal.h contains code adapted from
 
 https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.h
 
-The file src/iceberg/expression/decimal.cc contains code adapted from
+The file src/iceberg/util/decimal.cc contains code adapted from
 
 https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.cc
 
diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt
@@ -20,7 +20,7 @@ set(ICEBERG_INCLUDES "$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/src>"
 set(ICEBERG_SOURCES
     arrow_c_data_internal.cc
     catalog/in_memory_catalog.cc
-    expression/decimal.cc
+    util/decimal.cc
     expression/expression.cc
     expression/literal.cc
     file_reader.cc
@@ -48,6 +48,7 @@ set(ICEBERG_SOURCES
     manifest_reader.cc
     manifest_reader_internal.cc
     arrow_c_data_guard_internal.cc
+    util/decimal.cc
     util/murmurhash3_internal.cc
     util/timepoint.cc
     util/gzip_internal.cc)
diff --git a/src/iceberg/util/decimal.cc b/src/iceberg/util/decimal.cc
@@ -17,12 +17,12 @@
  * under the License.
  */
 
-/// \file iceberg/expression/decimal.cc
+/// \file iceberg/util/decimal.cc
 /// \brief 128-bit fixed-point decimal numbers.
 /// Adapted from Apache Arrow with only Decimal128 support.
 /// https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.cc
 
-#include "iceberg/expression/decimal.h"
+#include "iceberg/util/decimal.h"
 
 #include <algorithm>
 #include <array>
@@ -44,6 +44,7 @@
 #include <string_view>
 
 #include "iceberg/result.h"
+#include "iceberg/util/int128.h"
 #include "iceberg/util/macros.h"
 
 namespace iceberg {
@@ -314,7 +315,7 @@ static inline Status DecimalDivide(const Decimal& dividend, const Decimal& divis
     uint32_t prev = dividend_array[i];
     dividend_array[i] -= static_cast<uint32_t>(mult);
 
-    // if guess was too big,  we add back divisor
+    // if guess was too big, we add back divisor
     if (dividend_array[i] > prev) {
       guess--;
       uint32_t carry = 0;
@@ -1232,11 +1233,12 @@ static inline uint64_t UInt64FromBigEndian(const uint8_t* bytes, int32_t length)
   // and doing the conversion in 16, 32 parts, which could
   // possibly create unaligned memory access on certain platforms
   memcpy(reinterpret_cast<uint8_t*>(&result) + 8 - length, bytes, length);
-#if ICEBERG_LITTLE_ENDIAN
-  return std::byteswap(result);
-#else
-  return result;
-#endif
+
+  if constexpr (std::endian::native == std::endian::little) {
+    return std::byteswap(result);
+  } else {
+    return result;
+  }
 }
 
 static bool RescaleWouldCauseDataLoss(const Decimal& value, int32_t delta_scale,
@@ -1420,20 +1422,4 @@ ICEBERG_EXPORT Decimal operator%(const Decimal& lhs, const Decimal& rhs) {
   return lhs.Divide(rhs).value().second;
 }
 
-ICEBERG_EXPORT bool operator<(const Decimal& lhs, const Decimal& rhs) {
-  return (lhs.high() < rhs.high()) || (lhs.high() == rhs.high() && lhs.low() < rhs.low());
-}
-
-ICEBERG_EXPORT bool operator<=(const Decimal& lhs, const Decimal& rhs) {
-  return !operator>(lhs, rhs);
-}
-
-ICEBERG_EXPORT bool operator>(const Decimal& lhs, const Decimal& rhs) {
-  return operator<(rhs, lhs);
-}
-
-ICEBERG_EXPORT bool operator>=(const Decimal& lhs, const Decimal& rhs) {
-  return !operator<(lhs, rhs);
-}
-
 }  // namespace iceberg
diff --git a/src/iceberg/util/decimal.h b/src/iceberg/util/decimal.h
@@ -19,22 +19,24 @@
 
 #pragma once
 
-/// \file iceberg/expression/decimal.h
+/// \file iceberg/util/decimal.h
 /// \brief 128-bit fixed-point decimal numbers.
 /// Adapted from Apache Arrow with only Decimal128 support.
 /// https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.h
 
 #include <array>
+#include <bit>
 #include <cstdint>
 #include <iosfwd>
 #include <string>
 #include <string_view>
 #include <type_traits>
 
+#include <iceberg/type.h>
+
 #include "iceberg/iceberg_export.h"
 #include "iceberg/result.h"
 #include "iceberg/util/macros.h"
-#include "iceberg/util/port.h"
 
 namespace iceberg {
 
@@ -60,24 +62,24 @@ class ICEBERG_EXPORT Decimal {
   constexpr Decimal(T value) noexcept  // NOLINT
   {
     if (value < T{}) {
-      data_[kHighIndex] = ~static_cast<uint64_t>(0);
+      data_[highIndex()] = ~static_cast<uint64_t>(0);
     } else {
-      data_[kHighIndex] = 0;
+      data_[highIndex()] = 0;
     }
-    data_[kLowIndex] = static_cast<uint64_t>(value);
+    data_[lowIndex()] = static_cast<uint64_t>(value);
   }
 
   /// \brief Parse a Decimal from a string representation.
   explicit Decimal(std::string_view str);
 
-/// \brief Create a Decimal from two 64-bit integers.
-#if ICEBERG_LITTLE_ENDIAN
-  constexpr Decimal(int64_t high, uint64_t low) noexcept
-      : data_{low, static_cast<uint64_t>(high)} {}
-#else
-  constexpr Decimal(int64_t high, uint64_t low) noexcept
-      : data_{static_cast<uint64_t>(high), low} {}
-#endif
+  /// \brief Create a Decimal from two 64-bit integers.
+  constexpr Decimal(int64_t high, uint64_t low) noexcept {
+    if constexpr (std::endian::native == std::endian::little) {
+      data_ = {low, static_cast<uint64_t>(high)};
+    } else {
+      data_ = {static_cast<uint64_t>(high), low};
+    }
+  }
 
   /// \brief Negate the current Decimal value (in place)
   Decimal& Negate();
@@ -139,10 +141,10 @@ class ICEBERG_EXPORT Decimal {
   }
 
   /// \brief Get the high bits of the two's complement representation of the number.
-  constexpr int64_t high() const { return static_cast<int64_t>(data_[kHighIndex]); }
+  constexpr int64_t high() const { return static_cast<int64_t>(data_[highIndex()]); }
 
   /// \brief Get the low bits of the two's complement representation of the number.
-  constexpr uint64_t low() const { return data_[kLowIndex]; }
+  constexpr uint64_t low() const { return data_[lowIndex()]; }
 
   /// \brief Convert the Decimal value to a base 10 decimal string with the given scale.
   /// \param scale The scale to use for the string representation.
@@ -178,6 +180,14 @@ class ICEBERG_EXPORT Decimal {
   /// Returns true if the number of significant digits is less or equal to `precision`.
   bool FitsInPrecision(int32_t precision) const;
 
+  /// \brief Spaceship operator for three-way comparison.
+  std::strong_ordering operator<=>(const Decimal& other) const {
+    if (high() != other.high()) {
+      return high() <=> other.high();
+    }
+    return low() <=> other.low();
+  }
+
   /// \brief Convert to a signed integer
   template <typename T>
     requires std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t>
@@ -226,10 +236,10 @@ class ICEBERG_EXPORT Decimal {
   std::array<uint8_t, kByteWidth> ToBytes() const;
 
   /// \brief Returns 1 if positive or zero, -1 if strictly negative.
-  int64_t Sign() const { return 1 | (static_cast<int64_t>(data_[kHighIndex]) >> 63); }
+  int64_t Sign() const { return 1 | (static_cast<int64_t>(data_[highIndex()]) >> 63); }
 
   /// \brief Check if the Decimal value is negative.
-  bool IsNegative() const { return static_cast<int64_t>(data_[kHighIndex]) < 0; }
+  bool IsNegative() const { return static_cast<int64_t>(data_[highIndex()]) < 0; }
 
   explicit operator bool() const { return data_ != std::array<uint64_t, 2>{0, 0}; }
 
@@ -242,13 +252,21 @@ class ICEBERG_EXPORT Decimal {
   }
 
  private:
-#if ICEBERG_LITTLE_ENDIAN
-  static constexpr int32_t kHighIndex = 1;
-  static constexpr int32_t kLowIndex = 0;
-#else
-  static constexpr int32_t kHighIndex = 0;
-  static constexpr int32_t kLowIndex = 1;
-#endif
+  static constexpr int32_t highIndex() {
+    if constexpr (std::endian::native == std::endian::little) {
+      return 1;
+    } else {
+      return 0;
+    }
+  }
+
+  static constexpr int32_t lowIndex() {
+    if constexpr (std::endian::native == std::endian::little) {
+      return 0;
+    } else {
+      return 1;
+    }
+  }
 
   std::array<uint64_t, 2> data_;
 };
@@ -259,12 +277,6 @@ ICEBERG_EXPORT std::ostream& operator<<(std::ostream& os, const Decimal& decimal
 ICEBERG_EXPORT Decimal operator-(const Decimal& operand);
 ICEBERG_EXPORT Decimal operator~(const Decimal& operand);
 
-// Binary operators
-ICEBERG_EXPORT bool operator<=(const Decimal& lhs, const Decimal& rhs);
-ICEBERG_EXPORT bool operator<(const Decimal& lhs, const Decimal& rhs);
-ICEBERG_EXPORT bool operator>=(const Decimal& lhs, const Decimal& rhs);
-ICEBERG_EXPORT bool operator>(const Decimal& lhs, const Decimal& rhs);
-
 ICEBERG_EXPORT Decimal operator+(const Decimal& lhs, const Decimal& rhs);
 ICEBERG_EXPORT Decimal operator-(const Decimal& lhs, const Decimal& rhs);
 ICEBERG_EXPORT Decimal operator*(const Decimal& lhs, const Decimal& rhs);
diff --git a/src/iceberg/util/int128.h b/src/iceberg/util/int128.h
@@ -17,22 +17,11 @@
  * under the License.
  */
 
-/// \file iceberg/util/port.h
-/// \brief Portability macros and definitions for Iceberg C++ library
+/// \file iceberg/util/int128.h
+/// \brief 128-bit integer type
 
 #pragma once
 
-#if defined(_WIN32) /* Windows is always little endian */ \
-    || defined(__LITTLE_ENDIAN__) ||                      \
-    (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
-#  define ICEBERG_LITTLE_ENDIAN 1
-#elif defined(__BIG_ENDIAN__) || \
-    (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
-#  define ICEBERG_LITTLE_ENDIAN 0
-#else
-#  error "Unsupported or unknown endianness"
-#endif
-
 #if defined(_MSC_VER)
 #  include <__msvc_int128.hpp>
 using int128_t = std::_Signed128;
diff --git a/test/decimal_test.cc b/test/decimal_test.cc
@@ -16,10 +16,11 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-#include "iceberg/expression/decimal.h"
+#include "iceberg/util/decimal.h"
 
 #include <algorithm>
 #include <array>
+#include <bit>
 #include <cmath>
 #include <cstdint>
 #include <vector>
@@ -28,7 +29,7 @@
 #include <gtest/gtest.h>
 #include <sys/types.h>
 
-#include "iceberg/util/port.h"
+#include "iceberg/util/int128.h"
 #include "matchers.h"
 
 namespace iceberg {
@@ -1030,9 +1031,10 @@ TEST(DecimalTest, FromBigEndian) {
     Decimal value(start);
     for (int ii = 0; ii < Decimal::kByteWidth; ++ii) {
       auto native_endian = value.ToBytes();
-#if ICEBERG_LITTLE_ENDIAN
-      std::ranges::reverse(native_endian);
-#endif
+      if constexpr (std::endian::native == std::endian::little) {
+        // convert to big endian
+        std::ranges::reverse(native_endian);
+      }
       // Limit the number of bytes we are passing to make
       // sure that it works correctly. That's why all of the
       // 'start' values don't have a 1 in the most significant
@@ -1047,10 +1049,11 @@ TEST(DecimalTest, FromBigEndian) {
       auto negated = -value;
       native_endian = negated.ToBytes();
 
-#if ICEBERG_LITTLE_ENDIAN
-      // convert to big endian
-      std::ranges::reverse(native_endian);
-#endif
+      if constexpr (std::endian::native == std::endian::little) {
+        // convert to big endian
+        std::ranges::reverse(native_endian);
+      }
+
       result = Decimal::FromBigEndian(native_endian.data() + WidthMinusOne - ii, ii + 1);
       ASSERT_THAT(result, IsOk());
       const Decimal& negated_decimal = result.value();
@@ -1060,10 +1063,10 @@ TEST(DecimalTest, FromBigEndian) {
       auto complement = ~value;
       native_endian = complement.ToBytes();
 
-#if ICEBERG_LITTLE_ENDIAN
-      // convert to big endian
-      std::ranges::reverse(native_endian);
-#endif
+      if constexpr (std::endian::native == std::endian::little) {
+        // convert to big endian
+        std::ranges::reverse(native_endian);
+      }
       result = Decimal::FromBigEndian(native_endian.data(), Decimal::kByteWidth);
       ASSERT_THAT(result, IsOk());
       const Decimal& complement_decimal = result.value();