Skip to content

Commit bff6234

Browse files
committed
fix: more review comments
1 parent 660a58b commit bff6234

File tree

7 files changed

+114
-58
lines changed

7 files changed

+114
-58
lines changed

src/iceberg/expression/literal.cc

Lines changed: 11 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include <cstdint>
2525
#include <string>
2626

27-
#include "iceberg/exception.h"
2827
#include "iceberg/util/checked_cast.h"
2928
#include "iceberg/util/conversions.h"
3029
#include "iceberg/util/macros.h"
@@ -339,50 +338,12 @@ std::strong_ordering CompareFloat(T lhs, T rhs) {
339338
return lhs_is_negative <=> rhs_is_negative;
340339
}
341340

342-
std::partial_ordering CompareDecimal(Literal const& lhs, Literal const& rhs) {
343-
ICEBERG_DCHECK(std::holds_alternative<Decimal>(lhs.value()),
344-
"LHS of decimal comparison must hold Decimal");
345-
ICEBERG_DCHECK(std::holds_alternative<Decimal>(rhs.value()),
346-
"RHS of decimal comparison must hold decimal");
347-
auto lhs_type = internal::checked_pointer_cast<DecimalType>(lhs.type());
348-
auto rhs_type = internal::checked_pointer_cast<DecimalType>(rhs.type());
349-
auto lhs_decimal = std::get<Decimal>(lhs.value());
350-
auto rhs_decimal = std::get<Decimal>(rhs.value());
351-
if (lhs_type->scale() == rhs_type->scale()) {
352-
return lhs_decimal <=> rhs_decimal;
353-
} else if (lhs_type->scale() > rhs_type->scale()) {
354-
// Rescale to larger scale
355-
auto rhs_res = rhs_decimal.Rescale(rhs_type->scale(), lhs_type->scale());
356-
if (!rhs_res.has_value()) {
357-
if (rhs_res.error().kind == ErrorKind::kRescaleDataLoss) {
358-
// Rescale would cause data loss, so lhs is definitely less than rhs
359-
return std::partial_ordering::less;
360-
}
361-
// Other errors should return unordered
362-
return std::partial_ordering::unordered;
363-
}
364-
return lhs_decimal <=> rhs_res.value();
365-
} else {
366-
// Rescale to larger scale
367-
auto lhs_res = lhs_decimal.Rescale(lhs_type->scale(), rhs_type->scale());
368-
if (!lhs_res.has_value()) {
369-
if (lhs_res.error().kind == ErrorKind::kRescaleDataLoss) {
370-
// Rescale would cause data loss, so lhs is definitely greater than rhs
371-
return std::partial_ordering::greater;
372-
}
373-
// Other errors should return unordered
374-
return std::partial_ordering::unordered;
375-
}
376-
return lhs_res.value() <=> rhs_decimal;
377-
}
378-
}
379-
380341
bool Literal::operator==(const Literal& other) const { return (*this <=> other) == 0; }
381342

382343
// Three-way comparison operator
383344
std::partial_ordering Literal::operator<=>(const Literal& other) const {
384345
// If types are different, comparison is unordered
385-
if (type_->type_id() != other.type_->type_id()) {
346+
if (*type_ != *other.type_) {
386347
return std::partial_ordering::unordered;
387348
}
388349

@@ -432,7 +393,12 @@ std::partial_ordering Literal::operator<=>(const Literal& other) const {
432393
}
433394

434395
case TypeId::kDecimal: {
435-
return CompareDecimal(*this, other);
396+
auto& this_val = std::get<::iceberg::Decimal>(value_);
397+
auto& other_val = std::get<::iceberg::Decimal>(other.value_);
398+
const auto& this_decimal_type = internal::checked_cast<DecimalType&>(*type_);
399+
const auto& other_decimal_type = internal::checked_cast<DecimalType&>(*other.type_);
400+
return ::iceberg::Decimal::Compare(this_val, other_val, this_decimal_type.scale(),
401+
other_decimal_type.scale());
436402
}
437403

438404
case TypeId::kString: {
@@ -491,11 +457,10 @@ std::string Literal::ToString() const {
491457
return std::to_string(std::get<double>(value_));
492458
}
493459
case TypeId::kDecimal: {
494-
auto decimal_type = internal::checked_pointer_cast<DecimalType>(type_);
495-
auto decimal = std::get<::iceberg::Decimal>(value_);
496-
auto result = decimal.ToString(decimal_type->scale());
497-
ICEBERG_CHECK(result, "Decimal ToString failed");
498-
return *result;
460+
const auto& decimal_type = internal::checked_cast<DecimalType&>(*type_);
461+
const auto& decimal = std::get<::iceberg::Decimal>(value_);
462+
return decimal.ToString(decimal_type.scale())
463+
.value_or("invalid literal of type decimal");
499464
}
500465
case TypeId::kString: {
501466
return "\"" + std::get<std::string>(value_) + "\"";

src/iceberg/expression/literal.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,10 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
5858
int64_t, // for long, timestamp, timestamp_tz, time
5959
float, // for float
6060
double, // for double
61+
std::string, // for string
62+
std::vector<uint8_t>, // for binary, fixed
6163
::iceberg::Decimal, // for decimal
62-
std::string, // for string
6364
Uuid, // for uuid
64-
std::vector<uint8_t>, // for binary, fixed
6565
BelowMin, AboveMax>;
6666

6767
/// \brief Factory methods for primitive types

src/iceberg/result.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ enum class ErrorKind {
4848
kNotFound,
4949
kNotImplemented,
5050
kNotSupported,
51-
kRescaleDataLoss,
5251
kUnknownError,
5352
};
5453

@@ -98,7 +97,6 @@ DEFINE_ERROR_FUNCTION(NotAllowed)
9897
DEFINE_ERROR_FUNCTION(NotFound)
9998
DEFINE_ERROR_FUNCTION(NotImplemented)
10099
DEFINE_ERROR_FUNCTION(NotSupported)
101-
DEFINE_ERROR_FUNCTION(RescaleDataLoss)
102100
DEFINE_ERROR_FUNCTION(UnknownError)
103101

104102
#undef DEFINE_ERROR_FUNCTION

src/iceberg/test/decimal_test.cc

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,51 @@ TEST(DecimalTest, Rescale) {
712712
ASSERT_EQ(Decimal(5), Decimal(500000).Rescale(6, 1).value());
713713
ASSERT_EQ(Decimal(500000), Decimal(5).Rescale(1, 6).value());
714714

715-
ASSERT_THAT(Decimal(5555555).Rescale(6, 1), IsError(ErrorKind::kRescaleDataLoss));
715+
ASSERT_THAT(Decimal(5555555).Rescale(6, 1), IsError(ErrorKind::kInvalid));
716+
}
717+
718+
TEST(DecimalTest, Compare) {
719+
// max positive unscaled value
720+
ASSERT_EQ(Decimal::Compare(Decimal("170141183460469231731687303715884105727"),
721+
Decimal("170141183460469231731687303715884105727"), 2, 3),
722+
std::partial_ordering::greater);
723+
724+
// min negative unscaled value
725+
ASSERT_EQ(Decimal::Compare(Decimal("-170141183460469231731687303715884105728"),
726+
Decimal("-170141183460469231731687303715884105728"), 2, 3),
727+
std::partial_ordering::less);
728+
729+
// equal values with different scales
730+
ASSERT_EQ(Decimal::Compare(Decimal("123456789"), Decimal("1234567890"), 2, 3),
731+
std::partial_ordering::equivalent);
732+
ASSERT_EQ(Decimal::Compare(Decimal("-1234567890"), Decimal("-123456789"), 3, 2),
733+
std::partial_ordering::equivalent);
734+
735+
// different values with different scales
736+
ASSERT_EQ(Decimal::Compare(Decimal("123456788"), Decimal("1234567890"), 2, 3),
737+
std::partial_ordering::less);
738+
ASSERT_EQ(Decimal::Compare(Decimal("-1234567890"), Decimal("-123456788"), 2, 3),
739+
std::partial_ordering::less);
740+
741+
// different values with same scales
742+
ASSERT_EQ(Decimal::Compare(Decimal("123456790"), Decimal("123456789"), 2, 2),
743+
std::partial_ordering::greater);
744+
ASSERT_EQ(Decimal::Compare(Decimal("-123456790"), Decimal("-123456789"), 2, 2),
745+
std::partial_ordering::less);
746+
747+
// different signs
748+
ASSERT_EQ(Decimal::Compare(Decimal("123456789"), Decimal("-123456789"), 2, 3),
749+
std::partial_ordering::greater);
750+
ASSERT_EQ(Decimal::Compare(Decimal("-123456789"), Decimal("123456789"), 2, 3),
751+
std::partial_ordering::less);
752+
753+
// zero comparisons
754+
ASSERT_EQ(Decimal::Compare(Decimal("0"), Decimal("0"), 2, 3),
755+
std::partial_ordering::equivalent);
756+
ASSERT_EQ(Decimal::Compare(Decimal("0"), Decimal("123456789"), 2, 3),
757+
std::partial_ordering::less);
758+
ASSERT_EQ(Decimal::Compare(Decimal("-123456789"), Decimal("0"), 2, 3),
759+
std::partial_ordering::less);
716760
}
717761

718762
} // namespace iceberg

src/iceberg/type_fwd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ enum class SnapshotRefType;
116116
enum class TransformType;
117117

118118
class Decimal;
119+
class Uuid;
120+
119121
class Expression;
120122
class Literal;
121123

src/iceberg/util/decimal.cc

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
#include <climits>
3131
#include <cmath>
3232
#include <cstring>
33-
#include <format>
3433
#include <iomanip>
3534
#include <limits>
3635
#include <sstream>
@@ -548,12 +547,9 @@ Result<Decimal> Decimal::Rescale(int32_t orig_scale, int32_t new_scale) const {
548547

549548
auto& multiplier = kDecimal128PowersOfTen[abs_delta_scale];
550549

551-
const bool rescale_would_cause_data_loss =
552-
RescaleWouldCauseDataLoss(*this, delta_scale, multiplier, &out);
553-
554-
if (rescale_would_cause_data_loss) {
555-
return RescaleDataLoss("Rescale {} from {} to {} would cause data loss",
556-
ToIntegerString(), orig_scale, new_scale);
550+
if (RescaleWouldCauseDataLoss(*this, delta_scale, multiplier, &out)) [[unlikely]] {
551+
return Invalid("Rescale {} from {} to {} would cause data loss", ToIntegerString(),
552+
orig_scale, new_scale);
557553
}
558554

559555
return out;
@@ -564,6 +560,52 @@ bool Decimal::FitsInPrecision(int32_t precision) const {
564560
return Decimal::Abs(*this) < kDecimal128PowersOfTen[precision];
565561
}
566562

563+
std::partial_ordering Decimal::Compare(const Decimal& lhs, const Decimal& rhs,
564+
int32_t lhs_scale, int32_t rhs_scale) {
565+
if (lhs_scale == rhs_scale || lhs.data_ == 0 || rhs.data_ == 0) {
566+
return lhs <=> rhs;
567+
}
568+
569+
// If one is negative and the other is positive, the positive is greater.
570+
if (lhs.data_ < 0 && rhs.data_ > 0) {
571+
return std::partial_ordering::less;
572+
}
573+
if (lhs.data_ > 0 && rhs.data_ < 0) {
574+
return std::partial_ordering::greater;
575+
}
576+
577+
// Both are negative
578+
bool negative = lhs.data_ < 0 && rhs.data_ < 0;
579+
580+
const int32_t delta_scale = lhs_scale - rhs_scale;
581+
const int32_t abs_delta_scale = std::abs(delta_scale);
582+
583+
ICEBERG_DCHECK(abs_delta_scale <= kMaxScale, "");
584+
585+
const auto& multiplier = kDecimal128PowersOfTen[abs_delta_scale];
586+
587+
Decimal adjusted_lhs;
588+
Decimal adjusted_rhs;
589+
590+
if (delta_scale < 0) {
591+
// lhs_scale < rhs_scale
592+
if (RescaleWouldCauseDataLoss(lhs, -delta_scale, multiplier, &adjusted_lhs))
593+
[[unlikely]] {
594+
return negative ? std::partial_ordering::less : std::partial_ordering::greater;
595+
}
596+
adjusted_rhs = rhs;
597+
} else {
598+
// lhs_scale > rhs_scale
599+
if (RescaleWouldCauseDataLoss(rhs, delta_scale, multiplier, &adjusted_rhs))
600+
[[unlikely]] {
601+
return negative ? std::partial_ordering::greater : std::partial_ordering::less;
602+
}
603+
adjusted_lhs = lhs;
604+
}
605+
606+
return adjusted_lhs <=> adjusted_rhs;
607+
}
608+
567609
std::array<uint8_t, Decimal::kByteWidth> Decimal::ToBytes() const {
568610
std::array<uint8_t, kByteWidth> out{{0}};
569611
std::memcpy(out.data(), &data_, kByteWidth);

src/iceberg/util/decimal.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
/// https://github.com/apache/arrow/blob/main/cpp/src/arrow/util/decimal.h
2626

2727
#include <array>
28+
#include <compare>
2829
#include <cstdint>
2930
#include <iosfwd>
3031
#include <string>
@@ -186,6 +187,10 @@ class ICEBERG_EXPORT Decimal : public util::Formattable {
186187
return low() <=> other.low();
187188
}
188189

190+
/// \brief Compare two Decimals with different scales.
191+
static std::partial_ordering Compare(const Decimal& lhs, const Decimal& rhs,
192+
int32_t lhs_scale, int32_t rhs_scale);
193+
189194
const uint8_t* native_endian_bytes() const {
190195
return reinterpret_cast<const uint8_t*>(&data_);
191196
}

0 commit comments

Comments
 (0)