diff --git a/LICENSE b/LICENSE index f97d28dfe..d8ddd9575 100644 --- a/LICENSE +++ b/LICENSE @@ -250,6 +250,14 @@ The file src/iceberg/util/visit_type.h contains code adapted from https://github.com/apache/arrow/blob/main/cpp/src/arrow/visit_type_inline.h +The file src/iceberg/util/decimal.h contains code adapted from + +https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.h + +The file src/iceberg/util/decimal.cc contains code adapted from + +https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.cc + Copyright: 2016-2025 The Apache Software Foundation. Home page: https://arrow.apache.org/ License: https://www.apache.org/licenses/LICENSE-2.0 diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index ae282a3af..07070d5d5 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -49,6 +49,7 @@ set(ICEBERG_SOURCES manifest_reader_internal.cc manifest_writer.cc arrow_c_data_guard_internal.cc + util/decimal.cc util/murmurhash3_internal.cc util/timepoint.cc util/gzip_internal.cc) diff --git a/src/iceberg/inheritable_metadata.cc b/src/iceberg/inheritable_metadata.cc index ae0920873..58eb28345 100644 --- a/src/iceberg/inheritable_metadata.cc +++ b/src/iceberg/inheritable_metadata.cc @@ -19,7 +19,6 @@ #include "iceberg/inheritable_metadata.h" -#include #include #include diff --git a/src/iceberg/result.h b/src/iceberg/result.h index d1aa4cedd..79dd52b93 100644 --- a/src/iceberg/result.h +++ b/src/iceberg/result.h @@ -32,6 +32,7 @@ enum class ErrorKind { kAlreadyExists, kCommitStateUnknown, kDecompressError, + kInvalid, // For general invalid errors kInvalidArgument, kInvalidArrowData, kInvalidExpression, @@ -79,6 +80,7 @@ using Status = Result; DEFINE_ERROR_FUNCTION(AlreadyExists) DEFINE_ERROR_FUNCTION(CommitStateUnknown) DEFINE_ERROR_FUNCTION(DecompressError) +DEFINE_ERROR_FUNCTION(Invalid) DEFINE_ERROR_FUNCTION(InvalidArgument) DEFINE_ERROR_FUNCTION(InvalidArrowData) DEFINE_ERROR_FUNCTION(InvalidExpression) diff --git a/src/iceberg/util/decimal.cc b/src/iceberg/util/decimal.cc new file mode 100644 index 000000000..606bf2fdb --- /dev/null +++ b/src/iceberg/util/decimal.cc @@ -0,0 +1,585 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/// \file iceberg/util/decimal.cc +/// \brief 128-bit fixed-point decimal numbers. +/// Adapted from Apache Arrow with only Decimal128 support. +/// https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.cc + +#include "iceberg/util/decimal.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iceberg/exception.h" +#include "iceberg/result.h" +#include "iceberg/util/int128.h" +#include "iceberg/util/macros.h" + +namespace iceberg { + +namespace { + +struct DecimalComponents { + std::string_view while_digits; + std::string_view fractional_digits; + int32_t exponent{0}; + char sign{0}; + bool has_exponent{false}; +}; + +inline bool IsSign(char c) { return c == '+' || c == '-'; } + +inline bool IsDigit(char c) { return c >= '0' && c <= '9'; } + +inline bool IsDot(char c) { return c == '.'; } + +inline bool StartsExponent(char c) { return c == 'e' || c == 'E'; } + +inline size_t ParseDigitsRun(std::string_view str, size_t pos, std::string_view* out) { + size_t start = pos; + while (pos < str.size() && IsDigit(str[pos])) { + ++pos; + } + *out = str.substr(start, pos - start); + return pos; +} + +bool ParseDecimalComponents(std::string_view str, DecimalComponents* out) { + size_t pos = 0; + + if (str.empty()) { + return false; + } + + // Sign of the number + if (IsSign(str[pos])) { + out->sign = str[pos++]; + } + // First run of digits + pos = ParseDigitsRun(str, pos, &out->while_digits); + if (pos == str.size()) { + return !out->while_digits.empty(); + } + + // Optional dot + if (IsDot(str[pos])) { + // Second run of digits after the dot + pos = ParseDigitsRun(str, ++pos, &out->fractional_digits); + } + if (out->fractional_digits.empty() && out->while_digits.empty()) { + // Need at least some digits (whole or fractional) + return false; + } + if (pos == str.size()) { + return true; + } + + // Optional exponent part + if (StartsExponent(str[pos])) { + ++pos; + // Skip '+' sign, '-' sign will be handled by from_chars + if (pos < str.size() && str[pos] == '+') { + ++pos; + } + out->has_exponent = true; + auto [ptr, ec] = + std::from_chars(str.data() + pos, str.data() + str.size(), out->exponent); + if (ec != std::errc()) { + return false; // Failed to parse exponent + } + pos = ptr - str.data(); + } + + return pos == str.size(); +} + +constexpr auto kInt64DecimalDigits = + static_cast(std::numeric_limits::digits10); + +constexpr std::array kUInt64PowersOfTen = { + // clang-format off + 1ULL, + 10ULL, + 100ULL, + 1000ULL, + 10000ULL, + 100000ULL, + 1000000ULL, + 10000000ULL, + 100000000ULL, + 1000000000ULL, + 10000000000ULL, + 100000000000ULL, + 1000000000000ULL, + 10000000000000ULL, + 100000000000000ULL, + 1000000000000000ULL, + 10000000000000000ULL, + 100000000000000000ULL, + 1000000000000000000ULL + // clang-format on +}; + +/// \brief Powers of ten for Decimal with scale from 0 to 38. +constexpr std::array kDecimal128PowersOfTen = { + Decimal(1LL), + Decimal(10LL), + Decimal(100LL), + Decimal(1000LL), + Decimal(10000LL), + Decimal(100000LL), + Decimal(1000000LL), + Decimal(10000000LL), + Decimal(100000000LL), + Decimal(1000000000LL), + Decimal(10000000000LL), + Decimal(100000000000LL), + Decimal(1000000000000LL), + Decimal(10000000000000LL), + Decimal(100000000000000LL), + Decimal(1000000000000000LL), + Decimal(10000000000000000LL), + Decimal(100000000000000000LL), + Decimal(1000000000000000000LL), + Decimal(0LL, 10000000000000000000ULL), + Decimal(5LL, 7766279631452241920ULL), + Decimal(54LL, 3875820019684212736ULL), + Decimal(542LL, 1864712049423024128ULL), + Decimal(5421LL, 200376420520689664ULL), + Decimal(54210LL, 2003764205206896640ULL), + Decimal(542101LL, 1590897978359414784ULL), + Decimal(5421010LL, 15908979783594147840ULL), + Decimal(54210108LL, 11515845246265065472ULL), + Decimal(542101086LL, 4477988020393345024ULL), + Decimal(5421010862LL, 7886392056514347008ULL), + Decimal(54210108624LL, 5076944270305263616ULL), + Decimal(542101086242LL, 13875954555633532928ULL), + Decimal(5421010862427LL, 9632337040368467968ULL), + Decimal(54210108624275LL, 4089650035136921600ULL), + Decimal(542101086242752LL, 4003012203950112768ULL), + Decimal(5421010862427522LL, 3136633892082024448ULL), + Decimal(54210108624275221LL, 12919594847110692864ULL), + Decimal(542101086242752217LL, 68739955140067328ULL), + Decimal(5421010862427522170LL, 687399551400673280ULL)}; + +inline void ShiftAndAdd(std::string_view input, uint128_t& out) { + for (size_t pos = 0; pos < input.size();) { + const size_t group_size = std::min(kInt64DecimalDigits, input.size() - pos); + const uint64_t multiple = kUInt64PowersOfTen[group_size]; + uint64_t value = 0; + + auto [_, ec] = + std::from_chars(input.data() + pos, input.data() + pos + group_size, value); + ICEBERG_DCHECK(ec == std::errc(), "Failed to parse digits in ShiftAndAdd"); + + out = out * multiple + value; + pos += group_size; + } +} + +void AdjustIntegerStringWithScale(int32_t scale, std::string* str) { + if (scale == 0) { + return; + } + ICEBERG_DCHECK(str != nullptr && !str->empty(), "str must not be null or empty"); + const bool is_negative = str->front() == '-'; + const auto is_negative_offset = static_cast(is_negative); + const auto len = static_cast(str->size()); + const int32_t num_digits = len - is_negative_offset; + const int32_t adjusted_exponent = num_digits - 1 - scale; + + // Note that the -6 is taken from the Java BigDecimal documentation. + if (scale < 0 || adjusted_exponent < -6) { + // Example 1: + // Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = -2, + // adjusted_exponent = 4 + // After inserting decimal point: *str = "1.23" + // After appending exponent: *str = "1.23E+4" + // Example 2: + // Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 9, + // adjusted_exponent = -7 + // After inserting decimal point: *str = "-1.23" + // After appending exponent: *str = "-1.23E-7" + // Example 3: + // Precondition: *str = "0", is_negative_offset = 0, num_digits = 1, scale = -1, + // adjusted_exponent = 1 + // After inserting decimal point: *str = "0" // Not inserted + // After appending exponent: *str = "0E+1" + if (num_digits > 1) { + str->insert(str->begin() + 1 + is_negative_offset, '.'); + } + str->push_back('E'); + if (adjusted_exponent >= 0) { + str->push_back('+'); + } + // Append the adjusted exponent as a string. + str->append(std::to_string(adjusted_exponent)); + return; + } + + if (num_digits > scale) { + const auto n = static_cast(len - scale); + // Example 1: + // Precondition: *str = "123", len = num_digits = 3, scale = 1, n = 2 + // After inserting decimal point: *str = "12.3" + // Example 2: + // Precondition: *str = "-123", len = 4, num_digits = 3, scale = 1, n = 3 + // After inserting decimal point: *str = "-12.3" + str->insert(str->begin() + n, '.'); + return; + } + + // Example 1: + // Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = 4 + // After insert: *str = "000123" + // After setting decimal point: *str = "0.0123" + // Example 2: + // Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 4 + // After insert: *str = "-000123" + // After setting decimal point: *str = "-0.0123" + str->insert(is_negative_offset, scale - num_digits + 2, '0'); + str->at(is_negative_offset + 1) = '.'; +} + +bool RescaleWouldCauseDataLoss(const Decimal& value, int32_t delta_scale, + const Decimal& multiplier, Decimal* result) { + if (delta_scale < 0) { + auto res = value.Divide(multiplier); + ICEBERG_DCHECK(res, "Decimal::Divide failed"); + *result = res->first; + return res->second != 0; + } + + *result = value * multiplier; + return (value < 0) ? *result > value : *result < value; +} + +} // namespace + +Decimal::Decimal(std::string_view str) { + auto result = Decimal::FromString(str); + if (!result) { + throw IcebergError(std::format("Failed to parse Decimal from string: {}, error: {}", + str, result.error().message)); + } + *this = std::move(result.value()); +} + +Decimal& Decimal::Negate() { + uint128_t u = ~static_cast(data_) + 1; + data_ = static_cast(u); + return *this; +} + +Decimal& Decimal::Abs() { return *this < 0 ? Negate() : *this; } + +Decimal Decimal::Abs(const Decimal& value) { + Decimal result(value); + return result.Abs(); +} + +Decimal& Decimal::operator+=(const Decimal& other) { + data_ += other.data_; + return *this; +} + +Decimal& Decimal::operator-=(const Decimal& other) { + data_ -= other.data_; + return *this; +} + +Decimal& Decimal::operator*=(const Decimal& other) { + data_ *= other.data_; + return *this; +} + +Result> Decimal::Divide(const Decimal& divisor) const { + std::pair result; + if (divisor == 0) { + return Invalid("Cannot divide by zero in Decimal::Divide"); + } + return std::make_pair(*this / divisor, *this % divisor); +} + +Decimal& Decimal::operator/=(const Decimal& other) { + data_ /= other.data_; + return *this; +} + +Decimal& Decimal::operator|=(const Decimal& other) { + data_ |= other.data_; + return *this; +} + +Decimal& Decimal::operator&=(const Decimal& other) { + data_ &= other.data_; + return *this; +} + +Decimal& Decimal::operator<<=(uint32_t bits) { + if (bits != 0) { + data_ = static_cast(static_cast(data_) << bits); + } + + return *this; +} + +Decimal& Decimal::operator>>=(uint32_t bits) { + if (bits != 0) { + data_ >>= bits; + } + + return *this; +} + +Result Decimal::ToString(int32_t scale) const { + if (scale < -kMaxScale || scale > kMaxScale) { + return InvalidArgument( + "Decimal::ToString: scale must be in the range [-{}, {}], was {}", kMaxScale, + kMaxScale, scale); + } + std::string str(ToIntegerString()); + AdjustIntegerStringWithScale(scale, &str); + return str; +} + +std::string Decimal::ToIntegerString() const { + if (data_ == 0) { + return "0"; + } + + bool negative = data_ < 0; + uint128_t uval = + negative ? -static_cast(data_) : static_cast(data_); + + constexpr uint32_t k1e9 = 1000000000U; + constexpr size_t kNumBits = 128; + // Segments will contain the array split into groups that map to decimal digits, in + // little endian order. Each segment will hold at most 9 decimal digits. For example, if + // the input represents 9876543210123456789, then segments will be [123456789, + // 876543210, 9]. + // The max number of segments needed = ceil(kNumBits * log(2) / log(1e9)) + // = ceil(kNumBits / 29.897352854) <= ceil(kNumBits / 29). + std::array segments; + size_t num_segments = 0; + + while (uval > 0) { + // Compute remainder = uval % 1e9 and uval = uval / 1e9. + auto remainder = static_cast(uval % k1e9); + uval /= k1e9; + segments[num_segments++] = remainder; + } + + std::ostringstream oss; + if (negative) { + oss << '-'; + } + + // First segment is formatted as-is. + oss << segments[num_segments - 1]; + + // Remaining segments are formatted with leading zeros to fill 9 digits. e.g. 123 is + // formatted as "000000123" + for (size_t i = num_segments - 1; i-- > 0;) { + oss << std::setw(9) << std::setfill('0') << segments[i]; + } + + return oss.str(); +} + +Result Decimal::FromString(std::string_view str, int32_t* precision, + int32_t* scale) { + if (str.empty()) { + return InvalidArgument("Empty string is not a valid Decimal"); + } + DecimalComponents dec; + if (!ParseDecimalComponents(str, &dec)) { + return InvalidArgument("Invalid decimal string '{}'", str); + } + + // Count number of significant digits (without leading zeros) + size_t first_non_zero = dec.while_digits.find_first_not_of('0'); + size_t significant_digits = dec.fractional_digits.size(); + if (first_non_zero != std::string_view::npos) { + significant_digits += dec.while_digits.size() - first_non_zero; + } + + auto parsed_precision = static_cast(significant_digits); + + int32_t parsed_scale = 0; + if (dec.has_exponent) { + auto adjusted_exponent = dec.exponent; + parsed_scale = static_cast(dec.fractional_digits.size()) - adjusted_exponent; + } else { + parsed_scale = static_cast(dec.fractional_digits.size()); + } + + uint128_t value = 0; + ShiftAndAdd(dec.while_digits, value); + ShiftAndAdd(dec.fractional_digits, value); + Decimal result(static_cast(value)); + + if (dec.sign == '-') { + result.Negate(); + } + + if (parsed_scale < 0) { + // For the scale to 0, to avoid negative scales (due to compatibility issues with + // external systems such as databases) + if (parsed_scale < -kMaxScale) { + return InvalidArgument("scale must be in the range [-{}, {}], was {}", kMaxScale, + kMaxScale, parsed_scale); + } + + result *= kDecimal128PowersOfTen[-parsed_scale]; + parsed_precision -= parsed_scale; + parsed_scale = 0; + } + + if (precision != nullptr) { + *precision = parsed_precision; + } + if (scale != nullptr) { + *scale = parsed_scale; + } + + return result; +} + +Result Decimal::FromBigEndian(const uint8_t* bytes, int32_t length) { + static constexpr int32_t kMinDecimalBytes = 1; + static constexpr int32_t kMaxDecimalBytes = 16; + + int64_t high, low; + + if (length < kMinDecimalBytes || length > kMaxDecimalBytes) { + return InvalidArgument( + "Decimal::FromBigEndian: length must be in the range [{}, {}], was {}", + kMinDecimalBytes, kMaxDecimalBytes, length); + } + + // Bytes are coming in big-endian, so the first byte is the MSB and therefore holds the + // sign bit. + const bool is_negative = static_cast(bytes[0]) < 0; + + uint128_t result = 0; + std::memcpy(reinterpret_cast(&result) + 16 - length, bytes, length); + + if constexpr (std::endian::native == std::endian::little) { + auto high = static_cast(result >> 64); + auto low = static_cast(result); + high = std::byteswap(high); + low = std::byteswap(low); + // also need to swap the two halves + result = (static_cast(low) << 64) | high; + } + + if (is_negative && length < kMaxDecimalBytes) { + // Sign extend the high bits + result |= (static_cast(-1) << (length * CHAR_BIT)); + } + + return Decimal(static_cast(result)); +} + +Result Decimal::Rescale(int32_t orig_scale, int32_t new_scale) const { + if (orig_scale == new_scale) { + return *this; + } + + const int32_t delta_scale = new_scale - orig_scale; + const int32_t abs_delta_scale = std::abs(delta_scale); + Decimal out; + + ICEBERG_DCHECK(abs_delta_scale <= kMaxScale, ""); + + auto& multiplier = kDecimal128PowersOfTen[abs_delta_scale]; + + const bool rescale_would_cause_data_loss = + RescaleWouldCauseDataLoss(*this, delta_scale, multiplier, &out); + + if (rescale_would_cause_data_loss) { + return Invalid("Rescale {} from {} to {} would cause data loss", ToIntegerString(), + orig_scale, new_scale); + } + + return out; +} + +bool Decimal::FitsInPrecision(int32_t precision) const { + ICEBERG_DCHECK(precision >= 1 && precision <= kMaxPrecision, ""); + return Decimal::Abs(*this) < kDecimal128PowersOfTen[precision]; +} + +std::array Decimal::ToBytes() const { + std::array out{{0}}; + std::memcpy(out.data(), &data_, kByteWidth); + return out; +} + +std::ostream& operator<<(std::ostream& os, const Decimal& decimal) { + os << decimal.ToIntegerString(); + return os; +} + +// Unary operators +Decimal operator-(const Decimal& operand) { + Decimal result(operand.data_); + return result.Negate(); +} + +Decimal operator~(const Decimal& operand) { return {~operand.data_}; } + +// Binary operators +Decimal operator+(const Decimal& lhs, const Decimal& rhs) { + Decimal result(lhs); + result += rhs; + return result; +} + +Decimal operator-(const Decimal& lhs, const Decimal& rhs) { + Decimal result(lhs); + result -= rhs; + return result; +} + +Decimal operator*(const Decimal& lhs, const Decimal& rhs) { + Decimal result(lhs); + result *= rhs; + return result; +} + +Decimal operator/(const Decimal& lhs, const Decimal& rhs) { + return lhs.data_ / rhs.data_; +} + +Decimal operator%(const Decimal& lhs, const Decimal& rhs) { + return lhs.data_ % rhs.data_; +} + +} // namespace iceberg diff --git a/src/iceberg/util/decimal.h b/src/iceberg/util/decimal.h new file mode 100644 index 000000000..f118bde62 --- /dev/null +++ b/src/iceberg/util/decimal.h @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/util/decimal.h +/// \brief 128-bit fixed-point decimal numbers. +/// Adapted from Apache Arrow with only Decimal128 support. +/// https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.h + +#include +#include +#include +#include +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/result.h" +#include "iceberg/util/formattable.h" +#include "iceberg/util/int128.h" + +namespace iceberg { + +/// \brief Represents 128-bit fixed-point decimal numbers. +/// The max decimal precision that can be safely represented is +/// 38 significant digits. +class ICEBERG_EXPORT Decimal : public util::Formattable { + public: + static constexpr int32_t kBitWidth = 128; + static constexpr int32_t kByteWidth = kBitWidth / 8; + static constexpr int32_t kMaxPrecision = 38; + static constexpr int32_t kMaxScale = 38; + + /// \brief Default constructor initializes to zero. + constexpr Decimal() noexcept = default; + + /// \brief Create a Decimal from a 128-bit integer. + constexpr Decimal(int128_t value) noexcept // NOLINT implicit conversion + : data_(value) {} + + /// \brief Create a Decimal from any integer not wider than 64 bits. + template + requires(std::is_integral_v && (sizeof(T) <= sizeof(uint64_t))) + constexpr Decimal(T value) noexcept // NOLINT implicit conversion + : data_(static_cast(value)) {} + + /// \brief Parse a Decimal from a string representation. + /// \throw This constructor throws an exception if parsing fails. Use + /// Decimal::FromString() if you want to handle errors more gracefully. + explicit Decimal(std::string_view str); + + /// \brief Create a Decimal from two 64-bit integers. + constexpr Decimal(int64_t high, uint64_t low) noexcept { + data_ = (static_cast(high) << 64) | low; + } + + /// \brief Negate the current Decimal value (in place) + Decimal& Negate(); + + /// \brief Absolute value of the current Decimal value (in place) + Decimal& Abs(); + + /// \brief Absolute value of the current Decimal value + static Decimal Abs(const Decimal& value); + + /// \brief Add a number to this one. The result is truncated to 128 bits. + Decimal& operator+=(const Decimal& other); + + /// \brief Subtract a number from this one. The result is truncated to 128 bits. + Decimal& operator-=(const Decimal& other); + + /// \brief Multiply this number by another. The result is truncated to 128 bits. + Decimal& operator*=(const Decimal& other); + + /// \brief Divide this number by another. + /// + /// The operation does not modify the current Decimal value. + /// The answer rounds towards zero. Signs work like: + /// 21 / 5 -> 4, 1 + /// -21 / 5 -> -4, -1 + /// 21 / -5 -> -4, 1 + /// -21 / -5 -> 4, -1 + /// \param[in] divisor the number to divide by + /// \return the pair of the quotient and the remainder + Result> Divide(const Decimal& divisor) const; + + /// \brief In place division. + Decimal& operator/=(const Decimal& other); + + /// \brief Bitwise OR operation. + Decimal& operator|=(const Decimal& other); + + /// \brief Bitwise AND operation. + Decimal& operator&=(const Decimal& other); + + /// \brief Shift left by the given number of bits (in place). + Decimal& operator<<=(uint32_t shift); + + /// \brief Shift left by the given number of bits. + Decimal operator<<(uint32_t shift) const { + Decimal result(*this); + result <<= shift; + return result; + } + + /// \brief Shift right by the given number of bits (in place). + Decimal& operator>>=(uint32_t shift); + + /// \brief Shift right by the given number of bits. + Decimal operator>>(uint32_t shift) const { + Decimal result(*this); + result >>= shift; + return result; + } + + /// \brief Get the underlying 128-bit integer representation of the number. + constexpr int128_t value() const { return data_; } + + /// \brief Get the high bits of the two's complement representation of the number. + constexpr int64_t high() const { return static_cast(data_ >> 64); } + + /// \brief Get the low bits of the two's complement representation of the number. + constexpr uint64_t low() const { return static_cast(data_); } + + /// \brief Convert the Decimal value to a base 10 decimal string with the given scale. + /// \param scale The scale to use for the string representation. + /// \return The string representation of the Decimal value. + Result ToString(int32_t scale = 0) const; + + /// \brief Convert the Decimal value to an integer string. + std::string ToIntegerString() const; + + /// \brief Returns an integer string representation of the decimal value. + std::string ToString() const override { return ToIntegerString(); } + + /// \brief Convert the decimal string to a Decimal value, optionally including precision + /// and scale if they are provided not null. + /// \param str The string representation of the Decimal value. + /// \param[out] precision Optional pointer to store the precision of the parsed value. + /// \param[out] scale Optional pointer to store the scale of the parsed value. + /// \return The Decimal value. + static Result FromString(std::string_view str, int32_t* precision = nullptr, + int32_t* scale = nullptr); + + /// \brief Convert from a big-endian byte representation. The length must be + /// between 1 and 16. + /// \return error status if the length is an invalid value + static Result FromBigEndian(const uint8_t* data, int32_t length); + + /// \brief Convert Decimal from one scale to another. + Result Rescale(int32_t orig_scale, int32_t new_scale) const; + + /// \brief Whether this number fits in the given precision + /// + /// Returns true if the number of significant digits is less or equal to `precision`. + bool FitsInPrecision(int32_t precision) const; + + /// \brief Spaceship operator for three-way comparison. + std::strong_ordering operator<=>(const Decimal& other) const { + if (high() != other.high()) { + return high() <=> other.high(); + } + return low() <=> other.low(); + } + + const uint8_t* native_endian_bytes() const { + return reinterpret_cast(&data_); + } + + /// \brief Returns the raw bytes of the value in native-endian byte order. + std::array ToBytes() const; + + /// \brief Returns 1 if positive or zero, -1 if strictly negative. + int64_t Sign() const { return 1 | (high() >> 63); } + + /// \brief Check if the Decimal value is negative. + bool IsNegative() const { return (high() >> 63) < 0; } + + explicit operator bool() const { return data_ != 0; } + + friend bool operator==(const Decimal& lhs, const Decimal& rhs) { + return lhs.data_ == rhs.data_; + } + + friend bool operator!=(const Decimal& lhs, const Decimal& rhs) { + return lhs.data_ != rhs.data_; + } + + friend Decimal operator-(const Decimal& operand); + friend Decimal operator~(const Decimal& operand); + + friend Decimal operator+(const Decimal& lhs, const Decimal& rhs); + friend Decimal operator-(const Decimal& lhs, const Decimal& rhs); + friend Decimal operator*(const Decimal& lhs, const Decimal& rhs); + friend Decimal operator/(const Decimal& lhs, const Decimal& rhs); + friend Decimal operator%(const Decimal& lhs, const Decimal& rhs); + + private: + int128_t data_{0}; +}; + +ICEBERG_EXPORT std::ostream& operator<<(std::ostream& os, const Decimal& decimal); + +} // namespace iceberg diff --git a/src/iceberg/util/int128.h b/src/iceberg/util/int128.h new file mode 100644 index 000000000..8e9d27dc7 --- /dev/null +++ b/src/iceberg/util/int128.h @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/// \file iceberg/util/int128.h +/// \brief 128-bit integer type + +#pragma once + +#if defined(_MSC_VER) +# include <__msvc_int128.hpp> +using int128_t = std::_Signed128; +using uint128_t = std::_Unsigned128; +#elif defined(__GNUC__) || defined(__clang__) +using int128_t = __int128; +using uint128_t = unsigned __int128; +#else +# error "128-bit integer type is not supported on this platform" +#endif diff --git a/src/iceberg/util/macros.h b/src/iceberg/util/macros.h index f11a680cc..278035d3f 100644 --- a/src/iceberg/util/macros.h +++ b/src/iceberg/util/macros.h @@ -19,6 +19,8 @@ #pragma once +#include + #define ICEBERG_RETURN_UNEXPECTED(result) \ if (auto&& result_name = result; !result_name) [[unlikely]] { \ return std::unexpected(result_name.error()); \ @@ -36,3 +38,5 @@ #define ICEBERG_ASSIGN_OR_RAISE(lhs, rexpr) \ ICEBERG_ASSIGN_OR_RAISE_IMPL(ICEBERG_ASSIGN_OR_RAISE_NAME(result_, __COUNTER__), lhs, \ rexpr) + +#define ICEBERG_DCHECK(expr, message) assert((expr) && (message)) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 42ad13209..d32a2d8bb 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -87,6 +87,7 @@ add_iceberg_test(json_serde_test add_iceberg_test(util_test SOURCES config_test.cc + decimal_test.cc endian_test.cc formatter_test.cc string_util_test.cc diff --git a/test/decimal_test.cc b/test/decimal_test.cc new file mode 100644 index 000000000..6850d7aad --- /dev/null +++ b/test/decimal_test.cc @@ -0,0 +1,674 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include "iceberg/util/decimal.h" + +#include +#include +#include +#include + +#include +#include + +#include "iceberg/util/int128.h" +#include "matchers.h" + +namespace iceberg { + +namespace { + +void AssertDecimalFromString(const std::string& s, const Decimal& expected, + int32_t expected_precision, int32_t expected_scale) { + int32_t precision = 0; + int32_t scale = 0; + auto result = Decimal::FromString(s, &precision, &scale); + EXPECT_THAT(result, IsOk()); + const Decimal& actual = result.value(); + EXPECT_EQ(expected, actual); + EXPECT_EQ(expected_precision, precision); + EXPECT_EQ(expected_scale, scale); +} + +} // namespace + +TEST(DecimalTest, Basics) { + AssertDecimalFromString("234.23445", Decimal(23423445), 8, 5); + + std::string string_value("-23049223942343532412"); + Decimal result(string_value); + Decimal expected(static_cast(-230492239423435324)); + ASSERT_EQ(result, expected * 100 - 12); + ASSERT_NE(result.high(), 0); + + result = Decimal("-23049223942343.532412"); + ASSERT_EQ(result, expected * 100 - 12); + ASSERT_NE(result.high(), 0); +} + +TEST(DecimalTest, StringStartingWithSign) { + AssertDecimalFromString("+234.567", Decimal(234567), 6, 3); + AssertDecimalFromString("+2342394230592.232349023094", + Decimal("2342394230592232349023094"), 25, 12); + AssertDecimalFromString("-234.567", Decimal("-234567"), 6, 3); + AssertDecimalFromString("-2342394230592.232349023094", + Decimal("-2342394230592232349023094"), 25, 12); +} + +TEST(DecimalTest, StringWithLeadingZeros) { + AssertDecimalFromString("0000000000000000000000000000000.234", Decimal(234), 3, 3); + AssertDecimalFromString("0000000000000000000000000000000.23400", Decimal(23400), 5, 5); + AssertDecimalFromString("234.00", Decimal(23400), 5, 2); + AssertDecimalFromString("234.0", Decimal(2340), 4, 1); + AssertDecimalFromString("0000000", Decimal(0), 0, 0); + AssertDecimalFromString("000.0000", Decimal(0), 4, 4); + AssertDecimalFromString(".00000", Decimal(0), 5, 5); +} + +TEST(DecimalTest, DecimalWithExponent) { + AssertDecimalFromString("1E1", Decimal(10), 2, 0); + AssertDecimalFromString("234.23445e2", Decimal(23423445), 8, 3); + AssertDecimalFromString("234.23445e-2", Decimal(23423445), 8, 7); + AssertDecimalFromString("234.23445E2", Decimal(23423445), 8, 3); + AssertDecimalFromString("234.23445E-2", Decimal(23423445), 8, 7); + AssertDecimalFromString("1.23E-8", Decimal(123), 3, 10); +} + +TEST(DecimalTest, SmallValues) { + struct TestValue { + std::string s; + int64_t expected; + int32_t expected_precision; + int32_t expected_scale; + }; + + for (const auto& tv : std::vector{ + {.s = "12.3", .expected = 123LL, .expected_precision = 3, .expected_scale = 1}, + {.s = "0.00123", + .expected = 123LL, + .expected_precision = 5, + .expected_scale = 5}, + {.s = "1.23E-8", + .expected = 123LL, + .expected_precision = 3, + .expected_scale = 10}, + {.s = "-1.23E-8", + .expected = -123LL, + .expected_precision = 3, + .expected_scale = 10}, + {.s = "1.23E+3", + .expected = 1230LL, + .expected_precision = 4, + .expected_scale = 0}, + {.s = "-1.23E+3", + .expected = -1230LL, + .expected_precision = 4, + .expected_scale = 0}, + {.s = "1.23E+5", + .expected = 123000LL, + .expected_precision = 6, + .expected_scale = 0}, + {.s = "1.2345E+7", + .expected = 12345000LL, + .expected_precision = 8, + .expected_scale = 0}, + {.s = "1.23e-8", + .expected = 123LL, + .expected_precision = 3, + .expected_scale = 10}, + {.s = "-1.23e-8", + .expected = -123LL, + .expected_precision = 3, + .expected_scale = 10}, + {.s = "1.23e+3", + .expected = 1230LL, + .expected_precision = 4, + .expected_scale = 0}, + {.s = "-1.23e+3", + .expected = -1230LL, + .expected_precision = 4, + .expected_scale = 0}, + {.s = "1.23e+5", + .expected = 123000LL, + .expected_precision = 6, + .expected_scale = 0}, + {.s = "1.2345e+7", + .expected = 12345000LL, + .expected_precision = 8, + .expected_scale = 0}}) { + AssertDecimalFromString(tv.s, Decimal(tv.expected), tv.expected_precision, + tv.expected_scale); + } +} + +TEST(DecimalTest, LargeValues) { + const std::array string_values = { + "99999999999999999999999999999999999999", "-99999999999999999999999999999999999999", + "170141183460469231731687303715884105727", // maximum positive value + "-170141183460469231731687303715884105728" // minimum negative value + }; + + for (const auto& s : string_values) { + const Decimal value(s); + const std::string printed_value = value.ToIntegerString(); + EXPECT_EQ(printed_value, s) << "Expected: " << s << ", but got: " << printed_value; + } +} + +TEST(DecimalTest, TestStringRoundTrip) { + static constexpr std::array kTestBits = { + 0, + 1, + 999, + 1000, + std::numeric_limits::max(), + (1ull << 31), + std::numeric_limits::max(), + (1ull << 32), + std::numeric_limits::max(), + (1ull << 63), + std::numeric_limits::max(), + }; + static constexpr std::array kScales = {0, 1, 10}; + for (uint64_t high : kTestBits) { + for (uint64_t low : kTestBits) { + Decimal value(high, low); + for (int32_t scale : kScales) { + auto result = value.ToString(scale); + + ASSERT_THAT(result, IsOk()) + << "Failed to convert Decimal to string: " << value.ToIntegerString() + << ", scale: " << scale; + + auto round_trip = Decimal::FromString(result.value()); + ASSERT_THAT(round_trip, IsOk()) + << "Failed to convert string back to Decimal: " << result.value(); + + EXPECT_EQ(value, round_trip.value()) + << "Round trip failed for value: " << value.ToIntegerString() + << ", scale: " << scale; + } + } + } +} + +TEST(DecimalTest, FromStringLimits) { + AssertDecimalFromString("1e37", Decimal(542101086242752217ULL, 68739955140067328ULL), + 38, 0); + + AssertDecimalFromString( + "-1e37", Decimal(17904642987466799398ULL, 18378004118569484288ULL), 38, 0); + AssertDecimalFromString( + "9.87e37", Decimal(5350537721215964381ULL, 15251391175463010304ULL), 38, 0); + AssertDecimalFromString( + "-9.87e37", Decimal(13096206352493587234ULL, 3195352898246541312ULL), 38, 0); + AssertDecimalFromString("12345678901234567890123456789012345678", + Decimal(669260594276348691ULL, 14143994781733811022ULL), 38, 0); + AssertDecimalFromString("-12345678901234567890123456789012345678", + Decimal(17777483479433202924ULL, 4302749291975740594ULL), 38, + 0); + + // "9..9" (38 times) + const auto dec38times9pos = Decimal(5421010862427522170ULL, 687399551400673279ULL); + // "-9..9" (38 times) + const auto dec38times9neg = Decimal(13025733211282029445ULL, 17759344522308878337ULL); + + AssertDecimalFromString("99999999999999999999999999999999999999", dec38times9pos, 38, + 0); + AssertDecimalFromString("-99999999999999999999999999999999999999", dec38times9neg, 38, + 0); + AssertDecimalFromString("9.9999999999999999999999999999999999999e37", dec38times9pos, + 38, 0); + AssertDecimalFromString("-9.9999999999999999999999999999999999999e37", dec38times9neg, + 38, 0); + + // No exponent, many fractional digits + AssertDecimalFromString("9.9999999999999999999999999999999999999", dec38times9pos, 38, + 37); + AssertDecimalFromString("-9.9999999999999999999999999999999999999", dec38times9neg, 38, + 37); + AssertDecimalFromString("0.99999999999999999999999999999999999999", dec38times9pos, 38, + 38); + AssertDecimalFromString("-0.99999999999999999999999999999999999999", dec38times9neg, 38, + 38); + + // Negative exponent + AssertDecimalFromString("1e-38", Decimal(0, 1), 1, 38); + AssertDecimalFromString( + "-1e-38", Decimal(18446744073709551615ULL, 18446744073709551615ULL), 1, 38); + AssertDecimalFromString("9.99e-36", Decimal(0, 999), 3, 38); + AssertDecimalFromString( + "-9.99e-36", Decimal(18446744073709551615ULL, 18446744073709550617ULL), 3, 38); + AssertDecimalFromString("987e-38", Decimal(0, 987), 3, 38); + AssertDecimalFromString( + "-987e-38", Decimal(18446744073709551615ULL, 18446744073709550629ULL), 3, 38); + AssertDecimalFromString("99999999999999999999999999999999999999e-37", dec38times9pos, + 38, 37); + AssertDecimalFromString("-99999999999999999999999999999999999999e-37", dec38times9neg, + 38, 37); + AssertDecimalFromString("99999999999999999999999999999999999999e-38", dec38times9pos, + 38, 38); + AssertDecimalFromString("-99999999999999999999999999999999999999e-38", dec38times9neg, + 38, 38); +} + +TEST(DecimalTest, FromStringInvalid) { + // Empty string + auto result = Decimal::FromString(""); + ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, HasErrorMessage("Empty string is not a valid Decimal")); + for (const auto& invalid_string : + std::vector{"-", "0.0.0", "0-13-32", "a", "-23092.235-", + "-+23092.235", "+-23092.235", "00a", "1e1a", "0.00123D/3", + "1.23eA8", "1.23E+3A", "-1.23E--5", "1.2345E+++07"}) { + auto result = Decimal::FromString(invalid_string); + ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, HasErrorMessage("Invalid decimal string")); + } + + for (const auto& invalid_string : + std::vector{"1e39", "-1e39", "9e39", "-9e39", "9.9e40", "-9.9e40"}) { + auto result = Decimal::FromString(invalid_string); + ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument)); + ASSERT_THAT(result, HasErrorMessage("scale must be in the range")); + } +} + +TEST(DecimalTest, Division) { + const std::string expected_string_value("-23923094039234029"); + const Decimal value(expected_string_value); + const Decimal result(value / 3); + const Decimal expected_value("-7974364679744676"); + ASSERT_EQ(expected_value, result); +} + +TEST(DecimalTest, ToString) { + struct ToStringCase { + int64_t test_value; + int32_t scale; + const char* expected_string; + }; + + for (const auto& t : std::vector{ + {.test_value = 0, .scale = -1, .expected_string = "0E+1"}, + {.test_value = 0, .scale = 0, .expected_string = "0"}, + {.test_value = 0, .scale = 1, .expected_string = "0.0"}, + {.test_value = 0, .scale = 6, .expected_string = "0.000000"}, + {.test_value = 2, .scale = 7, .expected_string = "2E-7"}, + {.test_value = 2, .scale = -1, .expected_string = "2E+1"}, + {.test_value = 2, .scale = 0, .expected_string = "2"}, + {.test_value = 2, .scale = 1, .expected_string = "0.2"}, + {.test_value = 2, .scale = 6, .expected_string = "0.000002"}, + {.test_value = -2, .scale = 7, .expected_string = "-2E-7"}, + {.test_value = -2, .scale = 7, .expected_string = "-2E-7"}, + {.test_value = -2, .scale = -1, .expected_string = "-2E+1"}, + {.test_value = -2, .scale = 0, .expected_string = "-2"}, + {.test_value = -2, .scale = 1, .expected_string = "-0.2"}, + {.test_value = -2, .scale = 6, .expected_string = "-0.000002"}, + {.test_value = -2, .scale = 7, .expected_string = "-2E-7"}, + {.test_value = 123, .scale = -3, .expected_string = "1.23E+5"}, + {.test_value = 123, .scale = -1, .expected_string = "1.23E+3"}, + {.test_value = 123, .scale = 1, .expected_string = "12.3"}, + {.test_value = 123, .scale = 0, .expected_string = "123"}, + {.test_value = 123, .scale = 5, .expected_string = "0.00123"}, + {.test_value = 123, .scale = 8, .expected_string = "0.00000123"}, + {.test_value = 123, .scale = 9, .expected_string = "1.23E-7"}, + {.test_value = 123, .scale = 10, .expected_string = "1.23E-8"}, + {.test_value = -123, .scale = -3, .expected_string = "-1.23E+5"}, + {.test_value = -123, .scale = -1, .expected_string = "-1.23E+3"}, + {.test_value = -123, .scale = 1, .expected_string = "-12.3"}, + {.test_value = -123, .scale = 0, .expected_string = "-123"}, + {.test_value = -123, .scale = 5, .expected_string = "-0.00123"}, + {.test_value = -123, .scale = 8, .expected_string = "-0.00000123"}, + {.test_value = -123, .scale = 9, .expected_string = "-1.23E-7"}, + {.test_value = -123, .scale = 10, .expected_string = "-1.23E-8"}, + {.test_value = 1000000000, .scale = -3, .expected_string = "1.000000000E+12"}, + {.test_value = 1000000000, .scale = -1, .expected_string = "1.000000000E+10"}, + {.test_value = 1000000000, .scale = 0, .expected_string = "1000000000"}, + {.test_value = 1000000000, .scale = 1, .expected_string = "100000000.0"}, + {.test_value = 1000000000, .scale = 5, .expected_string = "10000.00000"}, + {.test_value = 1000000000, + .scale = 15, + .expected_string = "0.000001000000000"}, + {.test_value = 1000000000, .scale = 16, .expected_string = "1.000000000E-7"}, + {.test_value = 1000000000, .scale = 17, .expected_string = "1.000000000E-8"}, + {.test_value = -1000000000, + .scale = -3, + .expected_string = "-1.000000000E+12"}, + {.test_value = -1000000000, + .scale = -1, + .expected_string = "-1.000000000E+10"}, + {.test_value = -1000000000, .scale = 0, .expected_string = "-1000000000"}, + {.test_value = -1000000000, .scale = 1, .expected_string = "-100000000.0"}, + {.test_value = -1000000000, .scale = 5, .expected_string = "-10000.00000"}, + {.test_value = -1000000000, + .scale = 15, + .expected_string = "-0.000001000000000"}, + {.test_value = -1000000000, .scale = 16, .expected_string = "-1.000000000E-7"}, + {.test_value = -1000000000, .scale = 17, .expected_string = "-1.000000000E-8"}, + {.test_value = 1234567890123456789LL, + .scale = -3, + .expected_string = "1.234567890123456789E+21"}, + {.test_value = 1234567890123456789LL, + .scale = -1, + .expected_string = "1.234567890123456789E+19"}, + {.test_value = 1234567890123456789LL, + .scale = 0, + .expected_string = "1234567890123456789"}, + {.test_value = 1234567890123456789LL, + .scale = 1, + .expected_string = "123456789012345678.9"}, + {.test_value = 1234567890123456789LL, + .scale = 5, + .expected_string = "12345678901234.56789"}, + {.test_value = 1234567890123456789LL, + .scale = 24, + .expected_string = "0.000001234567890123456789"}, + {.test_value = 1234567890123456789LL, + .scale = 25, + .expected_string = "1.234567890123456789E-7"}, + {.test_value = -1234567890123456789LL, + .scale = -3, + .expected_string = "-1.234567890123456789E+21"}, + {.test_value = -1234567890123456789LL, + .scale = -1, + .expected_string = "-1.234567890123456789E+19"}, + {.test_value = -1234567890123456789LL, + .scale = 0, + .expected_string = "-1234567890123456789"}, + {.test_value = -1234567890123456789LL, + .scale = 1, + .expected_string = "-123456789012345678.9"}, + {.test_value = -1234567890123456789LL, + .scale = 5, + .expected_string = "-12345678901234.56789"}, + {.test_value = -1234567890123456789LL, + .scale = 24, + .expected_string = "-0.000001234567890123456789"}, + {.test_value = -1234567890123456789LL, + .scale = 25, + .expected_string = "-1.234567890123456789E-7"}, + }) { + const Decimal value(t.test_value); + auto result = value.ToString(t.scale); + ASSERT_THAT(result, IsOk()) + << "Failed to convert Decimal to string: " << value.ToIntegerString() + << ", scale: " << t.scale; + + EXPECT_EQ(result.value(), t.expected_string) + << "Expected: " << t.expected_string << ", but got: " << result.value(); + } +} + +TEST(DecimalTest, FromBigEndian) { + // We test out a variety of scenarios: + // + // * Positive values that are left shifted + // and filled in with the same bit pattern + // * Negated of the positive values + // * Complement of the positive values + // + // For the positive values, we can call FromBigEndian + // with a length that is less than 16, whereas we must + // pass all 16 bytes for the negative and complement. + // + // We use a number of bit patterns to increase the coverage + // of scenarios + constexpr int WidthMinusOne = Decimal::kByteWidth - 1; + + for (int32_t start : {1, 15, /* 00001111 */ + 85, /* 01010101 */ + 127 /* 01111111 */}) { + Decimal value(start); + for (int ii = 0; ii < Decimal::kByteWidth; ++ii) { + auto native_endian = value.ToBytes(); + if constexpr (std::endian::native == std::endian::little) { + // convert to big endian + std::ranges::reverse(native_endian); + } + // Limit the number of bytes we are passing to make + // sure that it works correctly. That's why all of the + // 'start' values don't have a 1 in the most significant + // bit place + auto result = + Decimal::FromBigEndian(native_endian.data() + WidthMinusOne - ii, ii + 1); + ASSERT_THAT(result, IsOk()); + const Decimal& decimal = result.value(); + EXPECT_EQ(decimal, value); + + // Negate it + auto negated = -value; + native_endian = negated.ToBytes(); + + if constexpr (std::endian::native == std::endian::little) { + // convert to big endian + std::ranges::reverse(native_endian); + } + + result = Decimal::FromBigEndian(native_endian.data() + WidthMinusOne - ii, ii + 1); + ASSERT_THAT(result, IsOk()); + const Decimal& negated_decimal = result.value(); + EXPECT_EQ(negated_decimal, negated); + + // Take the complement + auto complement = ~value; + native_endian = complement.ToBytes(); + + if constexpr (std::endian::native == std::endian::little) { + // convert to big endian + std::ranges::reverse(native_endian); + } + result = Decimal::FromBigEndian(native_endian.data(), Decimal::kByteWidth); + ASSERT_THAT(result, IsOk()); + const Decimal& complement_decimal = result.value(); + EXPECT_EQ(complement_decimal, complement); + + value <<= 2; + value += Decimal(start); + } + } +} + +TEST(DecimalTest, FromBigEndianInvalid) { + ASSERT_THAT(Decimal::FromBigEndian(nullptr, -1), IsError(ErrorKind::kInvalidArgument)); + ASSERT_THAT(Decimal::FromBigEndian(nullptr, Decimal::kByteWidth + 1), + IsError(ErrorKind::kInvalidArgument)); +} + +TEST(DecimalTestFunctionality, Multiply) { + ASSERT_EQ(Decimal(60501), Decimal(301) * Decimal(201)); + ASSERT_EQ(Decimal(-60501), Decimal(-301) * Decimal(201)); + ASSERT_EQ(Decimal(-60501), Decimal(301) * Decimal(-201)); + ASSERT_EQ(Decimal(60501), Decimal(-301) * Decimal(-201)); + + // Edge cases + for (auto x : std::vector{-INT64_MAX, -INT32_MAX, 0, INT32_MAX, INT64_MAX}) { + for (auto y : + std::vector{-INT32_MAX, -32, -2, -1, 0, 1, 2, 32, INT32_MAX}) { + Decimal decimal_x(x); + Decimal decimal_y(y); + Decimal result = decimal_x * decimal_y; + EXPECT_EQ(Decimal(x * y), result) << " x: " << decimal_x << " y: " << decimal_y; + } + } +} + +TEST(DecimalTestFunctionality, Divide) { + ASSERT_EQ(Decimal(66), Decimal(20100) / Decimal(301)); + ASSERT_EQ(Decimal(-66), Decimal(-20100) / Decimal(301)); + ASSERT_EQ(Decimal(-66), Decimal(20100) / Decimal(-301)); + ASSERT_EQ(Decimal(66), Decimal(-20100) / Decimal(-301)); + + for (auto x : std::vector{-INT64_MAX, -INT32_MAX, 0, INT32_MAX, INT64_MAX}) { + for (auto y : std::vector{-INT32_MAX, -32, -2, -1, 1, 2, 32, INT32_MAX}) { + Decimal decimal_x(x); + Decimal decimal_y(y); + Decimal result = decimal_x / decimal_y; + EXPECT_EQ(Decimal(x / y), result) << " x: " << decimal_x << " y: " << decimal_y; + } + } +} + +TEST(DecimalTestFunctionality, Modulo) { + ASSERT_EQ(Decimal(234), Decimal(20100) % Decimal(301)); + ASSERT_EQ(Decimal(-234), Decimal(-20100) % Decimal(301)); + ASSERT_EQ(Decimal(234), Decimal(20100) % Decimal(-301)); + ASSERT_EQ(Decimal(-234), Decimal(-20100) % Decimal(-301)); + + // Test some edge cases + for (auto x : std::vector{-INT64_MAX, -INT32_MAX, 0, INT32_MAX, INT64_MAX}) { + for (auto y : std::vector{-INT32_MAX, -32, -2, -1, 1, 2, 32, INT32_MAX}) { + Decimal decimal_x(x); + Decimal decimal_y(y); + Decimal result = decimal_x % decimal_y; + EXPECT_EQ(Decimal(x % y), result) << " x: " << decimal_x << " y: " << decimal_y; + } + } +} + +TEST(DecimalTestFunctionality, Sign) { + ASSERT_EQ(1, Decimal(999999).Sign()); + ASSERT_EQ(-1, Decimal(-999999).Sign()); + ASSERT_EQ(1, Decimal(0).Sign()); +} + +TEST(DecimalTestFunctionality, FitsInPrecision) { + ASSERT_TRUE(Decimal("0").FitsInPrecision(1)); + ASSERT_TRUE(Decimal("9").FitsInPrecision(1)); + ASSERT_TRUE(Decimal("-9").FitsInPrecision(1)); + ASSERT_FALSE(Decimal("10").FitsInPrecision(1)); + ASSERT_FALSE(Decimal("-10").FitsInPrecision(1)); + + ASSERT_TRUE(Decimal("0").FitsInPrecision(2)); + ASSERT_TRUE(Decimal("10").FitsInPrecision(2)); + ASSERT_TRUE(Decimal("-10").FitsInPrecision(2)); + ASSERT_TRUE(Decimal("99").FitsInPrecision(2)); + ASSERT_TRUE(Decimal("-99").FitsInPrecision(2)); + ASSERT_FALSE(Decimal("100").FitsInPrecision(2)); + ASSERT_FALSE(Decimal("-100").FitsInPrecision(2)); + + std::string max_nines(Decimal::kMaxPrecision, '9'); + ASSERT_TRUE(Decimal(max_nines).FitsInPrecision(Decimal::kMaxPrecision)); + ASSERT_TRUE(Decimal("-" + max_nines).FitsInPrecision(Decimal::kMaxPrecision)); + + std::string max_zeros(Decimal::kMaxPrecision, '0'); + ASSERT_FALSE(Decimal("1" + max_zeros).FitsInPrecision(Decimal::kMaxPrecision)); + ASSERT_FALSE(Decimal("-1" + max_zeros).FitsInPrecision(Decimal::kMaxPrecision)); +} + +TEST(DecimalTest, LeftShift) { + auto check = [](int128_t x, uint32_t bits) { + auto expected = Decimal(x << bits); + auto actual = Decimal(x) << bits; + ASSERT_EQ(actual.low(), expected.low()); + ASSERT_EQ(actual.high(), expected.high()); + }; + + ASSERT_EQ(Decimal("0"), Decimal("0") << 0); + ASSERT_EQ(Decimal("0"), Decimal("0") << 1); + ASSERT_EQ(Decimal("0"), Decimal("0") << 63); + ASSERT_EQ(Decimal("0"), Decimal("0") << 127); + + check(123, 0); + check(123, 1); + check(123, 63); + check(123, 64); + check(123, 120); + + ASSERT_EQ(Decimal("199999999999998"), Decimal("99999999999999") << 1); + ASSERT_EQ(Decimal("3435973836799965640261632"), Decimal("99999999999999") << 35); + ASSERT_EQ(Decimal("120892581961461708544797985370825293824"), Decimal("99999999999999") + << 80); + + ASSERT_EQ(Decimal("1234567890123456789012"), Decimal("1234567890123456789012") << 0); + ASSERT_EQ(Decimal("2469135780246913578024"), Decimal("1234567890123456789012") << 1); + ASSERT_EQ(Decimal("88959991838777271103427858320412639232"), + Decimal("1234567890123456789012") << 56); + + check(-123, 0); + check(-123, 1); + check(-123, 63); + check(-123, 64); + check(-123, 120); + + ASSERT_EQ(Decimal("-199999999999998"), Decimal("-99999999999999") << 1); + ASSERT_EQ(Decimal("-3435973836799965640261632"), Decimal("-99999999999999") << 35); + ASSERT_EQ(Decimal("-120892581961461708544797985370825293824"), + Decimal("-99999999999999") << 80); + + ASSERT_EQ(Decimal("-1234567890123456789012"), Decimal("-1234567890123456789012") << 0); + ASSERT_EQ(Decimal("-2469135780246913578024"), Decimal("-1234567890123456789012") << 1); + ASSERT_EQ(Decimal("-88959991838777271103427858320412639232"), + Decimal("-1234567890123456789012") << 56); +} + +TEST(DecimalTest, RightShift) { + ASSERT_EQ(Decimal("0"), Decimal("0") >> 0); + ASSERT_EQ(Decimal("0"), Decimal("0") >> 1); + ASSERT_EQ(Decimal("0"), Decimal("0") >> 63); + ASSERT_EQ(Decimal("0"), Decimal("0") >> 127); + + ASSERT_EQ(Decimal("1"), Decimal("1") >> 0); + ASSERT_EQ(Decimal("0"), Decimal("1") >> 1); + ASSERT_EQ(Decimal("0"), Decimal("1") >> 63); + ASSERT_EQ(Decimal("0"), Decimal("1") >> 127); + + ASSERT_EQ(Decimal("-1"), Decimal("-1") >> 0); + ASSERT_EQ(Decimal("-1"), Decimal("-1") >> 1); + ASSERT_EQ(Decimal("-1"), Decimal("-1") >> 63); + ASSERT_EQ(Decimal("-1"), Decimal("-1") >> 127); + + ASSERT_EQ(Decimal("1096516"), Decimal("1234567890123456789012") >> 50); + ASSERT_EQ(Decimal("66"), Decimal("1234567890123456789012") >> 64); + ASSERT_EQ(Decimal("2"), Decimal("1234567890123456789012") >> 69); + ASSERT_EQ(Decimal("0"), Decimal("1234567890123456789012") >> 71); + ASSERT_EQ(Decimal("0"), Decimal("1234567890123456789012") >> 127); + + ASSERT_EQ(Decimal("-1096517"), Decimal("-1234567890123456789012") >> 50); + ASSERT_EQ(Decimal("-67"), Decimal("-1234567890123456789012") >> 64); + ASSERT_EQ(Decimal("-3"), Decimal("-1234567890123456789012") >> 69); + ASSERT_EQ(Decimal("-1"), Decimal("-1234567890123456789012") >> 71); + ASSERT_EQ(Decimal("-1"), Decimal("-1234567890123456789012") >> 127); +} + +TEST(DecimalTest, Negate) { + auto check = [](Decimal pos, Decimal neg) { + EXPECT_EQ(-pos, neg); + EXPECT_EQ(-neg, pos); + }; + + check(Decimal(0, 0), Decimal(0, 0)); + check(Decimal(0, 1), Decimal(-1, 0xFFFFFFFFFFFFFFFFULL)); + check(Decimal(0, 2), Decimal(-1, 0xFFFFFFFFFFFFFFFEULL)); + check(Decimal(0, 0x8000000000000000ULL), Decimal(-1, 0x8000000000000000ULL)); + check(Decimal(0, 0xFFFFFFFFFFFFFFFFULL), Decimal(-1, 1)); + check(Decimal(12, 0), Decimal(-12, 0)); + check(Decimal(12, 1), Decimal(-13, 0xFFFFFFFFFFFFFFFFULL)); + check(Decimal(12, 0xFFFFFFFFFFFFFFFFULL), Decimal(-13, 1)); +} + +TEST(DecimalTest, Rescale) { + ASSERT_EQ(Decimal(11100), Decimal(111).Rescale(0, 2).value()); + ASSERT_EQ(Decimal(111), Decimal(11100).Rescale(2, 0).value()); + ASSERT_EQ(Decimal(5), Decimal(500000).Rescale(6, 1).value()); + ASSERT_EQ(Decimal(500000), Decimal(5).Rescale(1, 6).value()); + + ASSERT_THAT(Decimal(5555555).Rescale(6, 1), IsError(ErrorKind::kInvalid)); +} + +} // namespace iceberg