diff --git a/src/iceberg/expression/literal.cc b/src/iceberg/expression/literal.cc index 790b59a2d..4f4a3c31b 100644 --- a/src/iceberg/expression/literal.cc +++ b/src/iceberg/expression/literal.cc @@ -554,4 +554,51 @@ Result LiteralCaster::CastTo(const Literal& literal, target_type->ToString()); } +// LiteralValueHash implementation +std::size_t LiteralValueHash::operator()(const Literal::Value& value) const noexcept { + return std::visit( + [](const auto& v) -> std::size_t { + using T = std::decay_t; + + constexpr size_t kHashPrime = 0x9e3779b9; + + if constexpr (std::is_same_v) { + return 0; + } else if constexpr (std::is_same_v) { + return std::numeric_limits::min(); + } else if constexpr (std::is_same_v) { + return std::numeric_limits::max(); + } else if constexpr (std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v || + std::is_same_v || + std::is_same_v) { + return std::hash{}(v); + } else if constexpr (std::is_same_v>) { + std::size_t hash = 0; + for (size_t i = 0; i < v.size(); ++i) { + hash ^= std::hash{}(v[i]) + kHashPrime + (hash << 6) + (hash >> 2); + } + return hash; + } else if constexpr (std::is_same_v) { + const int128_t& val = v.value(); + std::size_t hash = std::hash{}(static_cast(val >> 64)); + hash ^= std::hash{}(static_cast(val)) + kHashPrime + + (hash << 6) + (hash >> 2); + return hash; + } else if constexpr (std::is_same_v) { + std::size_t hash = 0; + const auto& bytes = v.bytes(); + for (size_t i = 0; i < bytes.size(); ++i) { + hash ^= + std::hash{}(bytes[i]) + kHashPrime + (hash << 6) + (hash >> 2); + } + return hash; + } else { + static_assert(sizeof(T) == 0, "Unhandled variant type in LiteralValueHash"); + return 0; + } + }, + value); +} + } // namespace iceberg diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h index 664b5a966..3ea94d00d 100644 --- a/src/iceberg/expression/literal.h +++ b/src/iceberg/expression/literal.h @@ -166,79 +166,43 @@ class ICEBERG_EXPORT Literal : public util::Formattable { std::shared_ptr type_; }; -template -struct LiteralTraits { - using ValueType = void; -}; - -template <> -struct LiteralTraits { - using ValueType = bool; -}; - -template <> -struct LiteralTraits { - using ValueType = int32_t; -}; - -template <> -struct LiteralTraits { - using ValueType = int32_t; -}; - -template <> -struct LiteralTraits { - using ValueType = int64_t; -}; - -template <> -struct LiteralTraits { - using ValueType = int64_t; -}; - -template <> -struct LiteralTraits { - using ValueType = int64_t; +/// \brief Hash function for Literal to facilitate use in unordered containers +struct ICEBERG_EXPORT LiteralValueHash { + std::size_t operator()(const Literal::Value& value) const noexcept; }; -template <> -struct LiteralTraits { - using ValueType = int64_t; -}; - -template <> -struct LiteralTraits { - using ValueType = float; -}; - -template <> -struct LiteralTraits { - using ValueType = double; -}; - -template <> -struct LiteralTraits { - using ValueType = Decimal; -}; - -template <> -struct LiteralTraits { - using ValueType = std::string; +struct ICEBERG_EXPORT LiteralHash { + std::size_t operator()(const Literal& value) const noexcept { + return LiteralValueHash{}(value.value()); + } }; -template <> -struct LiteralTraits { - using ValueType = Uuid; +template +struct LiteralTraits { + using ValueType = void; }; -template <> -struct LiteralTraits { - using ValueType = std::vector; -}; +#define DEFINE_LITERAL_TRAIT(TYPE_ID, VALUE_TYPE) \ + template <> \ + struct LiteralTraits { \ + using ValueType = VALUE_TYPE; \ + }; -template <> -struct LiteralTraits { - using ValueType = std::vector; -}; +DEFINE_LITERAL_TRAIT(kBoolean, bool) +DEFINE_LITERAL_TRAIT(kInt, int32_t) +DEFINE_LITERAL_TRAIT(kDate, int32_t) +DEFINE_LITERAL_TRAIT(kLong, int64_t) +DEFINE_LITERAL_TRAIT(kTime, int64_t) +DEFINE_LITERAL_TRAIT(kTimestamp, int64_t) +DEFINE_LITERAL_TRAIT(kTimestampTz, int64_t) +DEFINE_LITERAL_TRAIT(kFloat, float) +DEFINE_LITERAL_TRAIT(kDouble, double) +DEFINE_LITERAL_TRAIT(kDecimal, Decimal) +DEFINE_LITERAL_TRAIT(kString, std::string) +DEFINE_LITERAL_TRAIT(kUuid, Uuid) +DEFINE_LITERAL_TRAIT(kBinary, std::vector) +DEFINE_LITERAL_TRAIT(kFixed, std::vector) + +#undef DEFINE_LITERAL_TRAIT } // namespace iceberg diff --git a/src/iceberg/expression/predicate.cc b/src/iceberg/expression/predicate.cc index 2ce04a161..959443c04 100644 --- a/src/iceberg/expression/predicate.cc +++ b/src/iceberg/expression/predicate.cc @@ -20,9 +20,9 @@ #include "iceberg/expression/predicate.h" #include +#include #include -#include "iceberg/exception.h" #include "iceberg/expression/expressions.h" #include "iceberg/expression/literal.h" #include "iceberg/result.h" @@ -143,6 +143,26 @@ bool IsFloatingType(TypeId type) { return type == TypeId::kFloat || type == TypeId::kDouble; } +bool IsNan(const Literal& literal) { + const auto& value = literal.value(); + if (std::holds_alternative(value)) { + return std::isnan(std::get(value)); + } else if (std::holds_alternative(value)) { + return std::isnan(std::get(value)); + } + return false; +} + +bool StartsWith(const Literal& lhs, const Literal& rhs) { + const auto& lhs_value = lhs.value(); + const auto& rhs_value = rhs.value(); + if (std::holds_alternative(lhs_value) && + std::holds_alternative(rhs_value)) { + return std::get(lhs_value).starts_with(std::get(rhs_value)); + } + return false; +} + } // namespace template @@ -287,10 +307,10 @@ BoundPredicate::BoundPredicate(Expression::Operation op, std::shared_ptr BoundPredicate::Evaluate(const StructLike& data) const { +Result BoundPredicate::Evaluate(const StructLike& data) const { ICEBERG_ASSIGN_OR_RAISE(auto eval_result, term_->Evaluate(data)); ICEBERG_ASSIGN_OR_RAISE(auto test_result, Test(eval_result)); - return Literal::Value{test_result}; + return Literal::Boolean(test_result); } // BoundUnaryPredicate implementation @@ -300,12 +320,37 @@ BoundUnaryPredicate::BoundUnaryPredicate(Expression::Operation op, BoundUnaryPredicate::~BoundUnaryPredicate() = default; -Result BoundUnaryPredicate::Test(const Literal::Value& value) const { - return NotImplemented("BoundUnaryPredicate::Test not implemented"); +Result BoundUnaryPredicate::Test(const Literal& literal) const { + switch (op()) { + case Expression::Operation::kIsNull: + return literal.IsNull(); + case Expression::Operation::kNotNull: + return !literal.IsNull(); + case Expression::Operation::kIsNan: + return IsNan(literal); + case Expression::Operation::kNotNan: + return !IsNan(literal); + default: + return InvalidExpression("Invalid operation for BoundUnaryPredicate: {}", op()); + } +} + +Result> BoundUnaryPredicate::Negate() const { + ICEBERG_ASSIGN_OR_RAISE(auto negated_op, ::iceberg::Negate(op())); + return std::make_shared(negated_op, term_); } bool BoundUnaryPredicate::Equals(const Expression& other) const { - throw IcebergError("BoundUnaryPredicate::Equals not implemented"); + if (op() != other.op()) { + return false; + } + + if (const auto* other_pred = dynamic_cast(&other); + other_pred) { + return term_->Equals(*other_pred->term()); + } + + return false; } std::string BoundUnaryPredicate::ToString() const { @@ -331,12 +376,91 @@ BoundLiteralPredicate::BoundLiteralPredicate(Expression::Operation op, BoundLiteralPredicate::~BoundLiteralPredicate() = default; -Result BoundLiteralPredicate::Test(const Literal::Value& value) const { - return NotImplemented("BoundLiteralPredicate::Test not implemented"); +Result BoundLiteralPredicate::Test(const Literal& value) const { + switch (op()) { + case Expression::Operation::kLt: + return value < literal_; + case Expression::Operation::kLtEq: + return value <= literal_; + case Expression::Operation::kGt: + return value > literal_; + case Expression::Operation::kGtEq: + return value >= literal_; + case Expression::Operation::kEq: + return value == literal_; + case Expression::Operation::kNotEq: + return value != literal_; + case Expression::Operation::kStartsWith: + return StartsWith(value, literal_); + case Expression::Operation::kNotStartsWith: + return !StartsWith(value, literal_); + default: + return InvalidExpression("Invalid operation for BoundLiteralPredicate: {}", op()); + } +} + +Result> BoundLiteralPredicate::Negate() const { + ICEBERG_ASSIGN_OR_RAISE(auto negated_op, ::iceberg::Negate(op())); + return std::make_shared(negated_op, term_, literal_); } bool BoundLiteralPredicate::Equals(const Expression& other) const { - throw IcebergError("BoundLiteralPredicate::Equals not implemented"); + const auto* other_pred = dynamic_cast(&other); + if (!other_pred) { + return false; + } + + if (op() == other.op()) { + if (term_->Equals(*other_pred->term())) { + // because the term is equivalent, the literal must have the same type + return literal_ == other_pred->literal(); + } + } + + // TODO(gangwu): add TypeId::kTimestampNano + static const std::unordered_set kIntegralTypes = { + TypeId::kInt, TypeId::kLong, TypeId::kDate, + TypeId::kTime, TypeId::kTimestamp, TypeId::kTimestampTz}; + + if (kIntegralTypes.contains(term_->type()->type_id()) && + term_->Equals(*other_pred->term())) { + auto get_long = [](const Literal& lit) -> std::optional { + const auto& val = lit.value(); + if (std::holds_alternative(val)) { + return std::get(val); + } else if (std::holds_alternative(val)) { + return std::get(val); + } + return std::nullopt; + }; + + auto this_val = get_long(literal_); + auto other_val = get_long(other_pred->literal()); + if (this_val && other_val) { + switch (op()) { + case Expression::Operation::kLt: + // < 6 is equivalent to <= 5 + return other_pred->op() == Expression::Operation::kLtEq && + *this_val == *other_val + 1; + case Expression::Operation::kLtEq: + // <= 5 is equivalent to < 6 + return other_pred->op() == Expression::Operation::kLt && + *this_val == *other_val - 1; + case Expression::Operation::kGt: + // > 5 is equivalent to >= 6 + return other_pred->op() == Expression::Operation::kGtEq && + *this_val == *other_val - 1; + case Expression::Operation::kGtEq: + // >= 6 is equivalent to > 5 + return other_pred->op() == Expression::Operation::kGt && + *this_val == *other_val + 1; + default: + return false; + } + } + } + + return false; } std::string BoundLiteralPredicate::ToString() const { @@ -370,27 +494,54 @@ std::string BoundLiteralPredicate::ToString() const { BoundSetPredicate::BoundSetPredicate(Expression::Operation op, std::shared_ptr term, std::span literals) - : BoundPredicate(op, std::move(term)) { - for (const auto& literal : literals) { - ICEBERG_DCHECK((*literal.type() == *term_->type()), - "Literal type does not match term type"); - value_set_.push_back(literal.value()); - } -} + : BoundPredicate(op, std::move(term)), value_set_(literals.begin(), literals.end()) {} + +BoundSetPredicate::BoundSetPredicate(Expression::Operation op, + std::shared_ptr term, + LiteralSet value_set) + : BoundPredicate(op, std::move(term)), value_set_(std::move(value_set)) {} BoundSetPredicate::~BoundSetPredicate() = default; -Result BoundSetPredicate::Test(const Literal::Value& value) const { - return NotImplemented("BoundSetPredicate::Test not implemented"); +Result BoundSetPredicate::Test(const Literal& value) const { + switch (op()) { + case Expression::Operation::kIn: + return value_set_.contains(value); + case Expression::Operation::kNotIn: + return !value_set_.contains(value); + default: + return InvalidExpression("Invalid operation for BoundSetPredicate: {}", op()); + } +} + +Result> BoundSetPredicate::Negate() const { + ICEBERG_ASSIGN_OR_RAISE(auto negated_op, ::iceberg::Negate(op())); + return std::make_shared(negated_op, term_, value_set_); } bool BoundSetPredicate::Equals(const Expression& other) const { - throw IcebergError("BoundSetPredicate::Equals not implemented"); + if (op() != other.op()) { + return false; + } + + if (const auto* other_pred = dynamic_cast(&other); + other_pred) { + return value_set_ == other_pred->value_set_; + } + + return false; } std::string BoundSetPredicate::ToString() const { - // TODO(gangwu): Literal::Value does not have std::format support. - throw IcebergError("BoundSetPredicate::ToString not implemented"); + switch (op()) { + case Expression::Operation::kIn: + return std::format("{} in {}", *term(), FormatRange(value_set_, ", ", "(", ")")); + case Expression::Operation::kNotIn: + return std::format("{} not in {}", *term(), + FormatRange(value_set_, ", ", "(", ")")); + default: + return std::format("Invalid set predicate: operation = {}", op()); + } } // Explicit template instantiations diff --git a/src/iceberg/expression/predicate.h b/src/iceberg/expression/predicate.h index 3c40af694..979db1212 100644 --- a/src/iceberg/expression/predicate.h +++ b/src/iceberg/expression/predicate.h @@ -23,8 +23,10 @@ /// Predicate interface for boolean expressions that test terms. #include +#include #include "iceberg/expression/expression.h" +#include "iceberg/expression/literal.h" #include "iceberg/expression/term.h" namespace iceberg { @@ -111,13 +113,13 @@ class ICEBERG_EXPORT BoundPredicate : public Predicate, public Bound std::shared_ptr reference() override { return term_->reference(); } - Result Evaluate(const StructLike& data) const override; + Result Evaluate(const StructLike& data) const override; /// \brief Test a value against this predicate. /// - /// \param value The value to test + /// \param value The literal value to test /// \return true if the predicate passes, false otherwise - virtual Result Test(const Literal::Value& value) const = 0; + virtual Result Test(const Literal& value) const = 0; enum class Kind : int8_t { // A unary predicate (tests for null, not-null, etc.). @@ -143,12 +145,14 @@ class ICEBERG_EXPORT BoundUnaryPredicate : public BoundPredicate { ~BoundUnaryPredicate() override; - Result Test(const Literal::Value& value) const override; + Result Test(const Literal& value) const override; Kind kind() const override { return Kind::kUnary; } std::string ToString() const override; + Result> Negate() const override; + bool Equals(const Expression& other) const override; }; @@ -168,12 +172,14 @@ class ICEBERG_EXPORT BoundLiteralPredicate : public BoundPredicate { /// \brief Returns the literal being compared against. const Literal& literal() const { return literal_; } - Result Test(const Literal::Value& value) const override; + Result Test(const Literal& value) const override; Kind kind() const override { return Kind::kLiteral; } std::string ToString() const override; + Result> Negate() const override; + bool Equals(const Expression& other) const override; private: @@ -183,6 +189,8 @@ class ICEBERG_EXPORT BoundLiteralPredicate : public BoundPredicate { /// \brief Bound set predicate (membership testing against a set of values). class ICEBERG_EXPORT BoundSetPredicate : public BoundPredicate { public: + using LiteralSet = std::unordered_set; + /// \brief Create a bound set predicate. /// /// \param op The set operation (kIn, kNotIn) @@ -191,23 +199,27 @@ class ICEBERG_EXPORT BoundSetPredicate : public BoundPredicate { BoundSetPredicate(Expression::Operation op, std::shared_ptr term, std::span literals); + /// \brief Create a bound set predicate using a set of literals. + BoundSetPredicate(Expression::Operation op, std::shared_ptr term, + LiteralSet value_set); + ~BoundSetPredicate() override; /// \brief Returns the set of literals to test against. - const std::vector& literal_set() const { return value_set_; } + const LiteralSet& literal_set() const { return value_set_; } - Result Test(const Literal::Value& value) const override; + Result Test(const Literal& value) const override; Kind kind() const override { return Kind::kSet; } std::string ToString() const override; + Result> Negate() const override; + bool Equals(const Expression& other) const override; private: - /// FIXME: Literal::Value does not have hash support. We need to add this - /// and replace the vector with a unordered_set. - std::vector value_set_; + LiteralSet value_set_; }; } // namespace iceberg diff --git a/src/iceberg/expression/term.cc b/src/iceberg/expression/term.cc index 5bb9b71d8..30bdf8ed6 100644 --- a/src/iceberg/expression/term.cc +++ b/src/iceberg/expression/term.cc @@ -71,7 +71,7 @@ std::string BoundReference::ToString() const { return std::format("ref(id={}, type={})", field_.field_id(), field_.type()->ToString()); } -Result BoundReference::Evaluate(const StructLike& data) const { +Result BoundReference::Evaluate(const StructLike& data) const { return NotImplemented("BoundReference::Evaluate(StructLike) not implemented"); } @@ -119,7 +119,7 @@ std::string BoundTransform::ToString() const { return std::format("{}({})", transform_->ToString(), ref_->ToString()); } -Result BoundTransform::Evaluate(const StructLike& data) const { +Result BoundTransform::Evaluate(const StructLike& data) const { throw IcebergError("BoundTransform::Evaluate(StructLike) not implemented"); } diff --git a/src/iceberg/expression/term.h b/src/iceberg/expression/term.h index 2911dfaa3..e0a883c13 100644 --- a/src/iceberg/expression/term.h +++ b/src/iceberg/expression/term.h @@ -79,7 +79,7 @@ class ICEBERG_EXPORT Bound { virtual ~Bound(); /// \brief Evaluate this expression against a row-based data. - virtual Result Evaluate(const StructLike& data) const = 0; + virtual Result Evaluate(const StructLike& data) const = 0; /// \brief Returns the underlying bound reference for this term. virtual std::shared_ptr reference() = 0; @@ -176,7 +176,7 @@ class ICEBERG_EXPORT BoundReference std::string ToString() const override; - Result Evaluate(const StructLike& data) const override; + Result Evaluate(const StructLike& data) const override; std::shared_ptr reference() override { return shared_from_this(); } @@ -236,7 +236,7 @@ class ICEBERG_EXPORT BoundTransform : public BoundTerm { std::string ToString() const override; - Result Evaluate(const StructLike& data) const override; + Result Evaluate(const StructLike& data) const override; std::shared_ptr reference() override { return ref_; } diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc index 23703c20d..01a7a7ce6 100644 --- a/src/iceberg/test/literal_test.cc +++ b/src/iceberg/test/literal_test.cc @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -795,4 +796,34 @@ INSTANTIATE_TEST_SUITE_P( return info.param.test_name; }); +TEST(LiteralTest, LiteralHash) { + LiteralHash hasher; + + EXPECT_EQ(hasher(Literal::Int(42)), hasher(Literal::Int(42))); + EXPECT_NE(hasher(Literal::Int(42)), hasher(Literal::Int(43))); + + EXPECT_EQ(hasher(Literal::String("hello")), hasher(Literal::String("hello"))); + EXPECT_NE(hasher(Literal::String("hello")), hasher(Literal::String("world"))); +} + +TEST(LiteralTest, LiteralHashUnorderedSet) { + std::unordered_set literal_set; + + literal_set.insert(Literal::Int(1)); + literal_set.insert(Literal::Int(2)); + literal_set.insert(Literal::Int(1)); // Duplicate + + EXPECT_EQ(literal_set.size(), 2); + EXPECT_TRUE(literal_set.contains(Literal::Int(1))); + EXPECT_TRUE(literal_set.contains(Literal::Int(2))); + EXPECT_FALSE(literal_set.contains(Literal::Int(3))); + + std::unordered_set string_set; + string_set.insert(Literal::String("a")); + string_set.insert(Literal::String("b")); + string_set.insert(Literal::String("a")); // Duplicate + + EXPECT_EQ(string_set.size(), 2); +} + } // namespace iceberg diff --git a/src/iceberg/test/predicate_test.cc b/src/iceberg/test/predicate_test.cc index ca38769b3..5ab790820 100644 --- a/src/iceberg/test/predicate_test.cc +++ b/src/iceberg/test/predicate_test.cc @@ -17,10 +17,16 @@ * under the License. */ +#include "iceberg/expression/predicate.h" + +#include +#include + #include "iceberg/expression/expressions.h" #include "iceberg/schema.h" #include "iceberg/test/matchers.h" #include "iceberg/type.h" +#include "iceberg/util/macros.h" namespace iceberg { @@ -433,4 +439,435 @@ TEST_F(PredicateTest, ComplexExpressionCombinations) { EXPECT_EQ(nested->op(), Expression::Operation::kAnd); } +TEST_F(PredicateTest, BoundUnaryPredicateNegate) { + auto is_null_pred = Expressions::IsNull("name"); + auto bound_null = is_null_pred->Bind(*schema_, /*case_sensitive=*/true).value(); + + auto negated_result = bound_null->Negate(); + ASSERT_THAT(negated_result, IsOk()); + auto negated = negated_result.value(); + EXPECT_EQ(negated->op(), Expression::Operation::kNotNull); + + // Double negation should return the original predicate + auto double_neg_result = negated->Negate(); + ASSERT_THAT(double_neg_result, IsOk()); + auto double_neg = double_neg_result.value(); + EXPECT_EQ(double_neg->op(), Expression::Operation::kIsNull); +} + +TEST_F(PredicateTest, BoundUnaryPredicateEquals) { + auto is_null_name1 = Expressions::IsNull("name"); + auto is_null_name2 = Expressions::IsNull("name"); + auto is_null_age = Expressions::IsNull("age"); + auto not_null_name = Expressions::NotNull("name"); + + auto bound_null1 = is_null_name1->Bind(*schema_, true).value(); + auto bound_null2 = is_null_name2->Bind(*schema_, true).value(); + auto bound_null_age = is_null_age->Bind(*schema_, true).value(); + auto bound_not_null = not_null_name->Bind(*schema_, true).value(); + + // Same predicate should be equal + EXPECT_TRUE(bound_null1->Equals(*bound_null2)); + EXPECT_TRUE(bound_null2->Equals(*bound_null1)); + + // Different fields should not be equal + EXPECT_FALSE(bound_null1->Equals(*bound_null_age)); + + // Different operations should not be equal + EXPECT_FALSE(bound_null1->Equals(*bound_not_null)); +} + +TEST_F(PredicateTest, BoundLiteralPredicateNegate) { + auto eq_pred = Expressions::Equal("age", Literal::Int(25)); + auto bound_eq = eq_pred->Bind(*schema_, true).value(); + + auto negated_result = bound_eq->Negate(); + ASSERT_THAT(negated_result, IsOk()); + + auto negated = negated_result.value(); + EXPECT_EQ(negated->op(), Expression::Operation::kNotEq); + + // Test less than negation + auto lt_pred = Expressions::LessThan("age", Literal::Int(30)); + auto bound_lt = lt_pred->Bind(*schema_, true).value(); + auto neg_lt_result = bound_lt->Negate(); + ASSERT_THAT(neg_lt_result, IsOk()); + EXPECT_EQ(neg_lt_result.value()->op(), Expression::Operation::kGtEq); +} + +TEST_F(PredicateTest, BoundLiteralPredicateEquals) { + auto eq1 = Expressions::Equal("age", Literal::Int(25)); + auto eq2 = Expressions::Equal("age", Literal::Int(25)); + auto eq3 = Expressions::Equal("age", Literal::Int(30)); + auto neq = Expressions::NotEqual("age", Literal::Int(25)); + + auto bound_eq1 = eq1->Bind(*schema_, true).value(); + auto bound_eq2 = eq2->Bind(*schema_, true).value(); + auto bound_eq3 = eq3->Bind(*schema_, true).value(); + auto bound_neq = neq->Bind(*schema_, true).value(); + + // Same predicate should be equal + EXPECT_TRUE(bound_eq1->Equals(*bound_eq2)); + + // Different literal values should not be equal + EXPECT_FALSE(bound_eq1->Equals(*bound_eq3)); + + // Different operations should not be equal + EXPECT_FALSE(bound_eq1->Equals(*bound_neq)); +} + +TEST_F(PredicateTest, BoundLiteralPredicateIntegerEquivalence) { + // Test that < 6 is equivalent to <= 5 + auto lt_6 = Expressions::LessThan("age", Literal::Int(6)); + auto lte_5 = Expressions::LessThanOrEqual("age", Literal::Int(5)); + auto bound_lt = lt_6->Bind(*schema_, true).value(); + auto bound_lte = lte_5->Bind(*schema_, true).value(); + EXPECT_TRUE(bound_lt->Equals(*bound_lte)); + EXPECT_TRUE(bound_lte->Equals(*bound_lt)); + + // Test that > 5 is equivalent to >= 6 + auto gt_5 = Expressions::GreaterThan("age", Literal::Int(5)); + auto gte_6 = Expressions::GreaterThanOrEqual("age", Literal::Int(6)); + auto bound_gt = gt_5->Bind(*schema_, true).value(); + auto bound_gte = gte_6->Bind(*schema_, true).value(); + EXPECT_TRUE(bound_gt->Equals(*bound_gte)); + EXPECT_TRUE(bound_gte->Equals(*bound_gt)); + + // Test that < 6 is not equivalent to <= 6 + auto lte_6 = Expressions::LessThanOrEqual("age", Literal::Int(6)); + auto bound_lte_6 = lte_6->Bind(*schema_, true).value(); + EXPECT_FALSE(bound_lt->Equals(*bound_lte_6)); +} + +TEST_F(PredicateTest, BoundSetPredicateToString) { + auto in_pred = + Expressions::In("age", {Literal::Int(10), Literal::Int(20), Literal::Int(30)}); + auto bound_in = in_pred->Bind(*schema_, true).value(); + + auto str = bound_in->ToString(); + // The set order might vary, but should contain the key elements + // BoundReference uses field_id in ToString, so check for id=3 (age field) + EXPECT_TRUE(str.find("id=3") != std::string::npos); + EXPECT_TRUE(str.find("in") != std::string::npos); + + auto not_in_pred = + Expressions::NotIn("name", {Literal::String("a"), Literal::String("b")}); + auto bound_not_in = not_in_pred->Bind(*schema_, true).value(); + + auto not_in_str = bound_not_in->ToString(); + // Check for id=2 (name field) + EXPECT_TRUE(not_in_str.find("id=2") != std::string::npos); + EXPECT_TRUE(not_in_str.find("not in") != std::string::npos); +} + +TEST_F(PredicateTest, BoundSetPredicateNegate) { + auto in_pred = Expressions::In("age", {Literal::Int(10), Literal::Int(20)}); + auto bound_in = in_pred->Bind(*schema_, true).value(); + + auto negated_result = bound_in->Negate(); + ASSERT_THAT(negated_result, IsOk()); + + auto negated = negated_result.value(); + EXPECT_EQ(negated->op(), Expression::Operation::kNotIn); + + // Test double negation + auto double_neg_result = negated->Negate(); + ASSERT_THAT(double_neg_result, IsOk()); + EXPECT_EQ(double_neg_result.value()->op(), Expression::Operation::kIn); +} + +TEST_F(PredicateTest, BoundSetPredicateEquals) { + auto in1 = Expressions::In("age", {Literal::Int(10), Literal::Int(20)}); + auto in2 = + Expressions::In("age", {Literal::Int(20), Literal::Int(10)}); // Different order + auto in3 = + Expressions::In("age", {Literal::Int(10), Literal::Int(30)}); // Different values + + auto bound_in1 = in1->Bind(*schema_, /*case_sensitive=*/true).value(); + auto bound_in2 = in2->Bind(*schema_, /*case_sensitive=*/true).value(); + auto bound_in3 = in3->Bind(*schema_, /*case_sensitive=*/true).value(); + + // Same values in different order should be equal (unordered_set) + EXPECT_TRUE(bound_in1->Equals(*bound_in2)); + EXPECT_TRUE(bound_in2->Equals(*bound_in1)); + + // Different values should not be equal + EXPECT_FALSE(bound_in1->Equals(*bound_in3)); +} + +namespace { + +std::shared_ptr AssertAndCastToBoundPredicate( + std::shared_ptr expr) { + auto bound_pred = std::dynamic_pointer_cast(expr); + EXPECT_NE(bound_pred, nullptr) << "Expected a BoundPredicate, got " << expr->ToString(); + return bound_pred; +} + +} // namespace + +TEST_F(PredicateTest, BoundUnaryPredicateTestIsNull) { + ICEBERG_ASSIGN_OR_THROW(auto is_null_pred, Expressions::IsNull("name")->Bind( + *schema_, /*case_sensitive=*/true)); + auto bound_pred = AssertAndCastToBoundPredicate(is_null_pred); + EXPECT_THAT(bound_pred->Test(Literal::Null(string())), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_pred->Test(Literal::String("test")), HasValue(testing::Eq(false))); +} + +TEST_F(PredicateTest, BoundUnaryPredicateTestNotNull) { + ICEBERG_ASSIGN_OR_THROW(auto not_null_pred, Expressions::NotNull("name")->Bind( + *schema_, /*case_sensitive=*/true)); + auto bound_pred = AssertAndCastToBoundPredicate(not_null_pred); + EXPECT_THAT(bound_pred->Test(Literal::String("test")), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_pred->Test(Literal::Null(string())), HasValue(testing::Eq(false))); +} + +TEST_F(PredicateTest, BoundUnaryPredicateTestIsNaN) { + ICEBERG_ASSIGN_OR_THROW(auto is_nan_pred, Expressions::IsNaN("salary")->Bind( + *schema_, /*case_sensitive=*/true)); + auto bound_pred = AssertAndCastToBoundPredicate(is_nan_pred); + + // Test with NaN values + EXPECT_THAT(bound_pred->Test(Literal::Float(std::numeric_limits::quiet_NaN())), + HasValue(testing::Eq(true))); + EXPECT_THAT(bound_pred->Test(Literal::Double(std::numeric_limits::quiet_NaN())), + HasValue(testing::Eq(true))); + + // Test with regular values + EXPECT_THAT(bound_pred->Test(Literal::Float(3.14f)), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_pred->Test(Literal::Double(2.718)), HasValue(testing::Eq(false))); + + // Test with infinity + EXPECT_THAT(bound_pred->Test(Literal::Float(std::numeric_limits::infinity())), + HasValue(testing::Eq(false))); +} + +TEST_F(PredicateTest, BoundUnaryPredicateTestNotNaN) { + ICEBERG_ASSIGN_OR_THROW(auto not_nan_pred, Expressions::NotNaN("salary")->Bind( + *schema_, /*case_sensitive=*/true)); + auto bound_pred = AssertAndCastToBoundPredicate(not_nan_pred); + + // Test with regular values + EXPECT_THAT(bound_pred->Test(Literal::Double(100.5)), HasValue(testing::Eq(true))); + + // Test with NaN + EXPECT_THAT(bound_pred->Test(Literal::Double(std::numeric_limits::quiet_NaN())), + HasValue(testing::Eq(false))); + + // Test with infinity (should be true as infinity is not NaN) + EXPECT_THAT(bound_pred->Test(Literal::Double(std::numeric_limits::infinity())), + HasValue(testing::Eq(true))); +} + +TEST_F(PredicateTest, BoundLiteralPredicateTestComparison) { + // Test less than + ICEBERG_ASSIGN_OR_THROW(auto lt_pred, Expressions::LessThan("age", Literal::Int(30)) + ->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_lt = AssertAndCastToBoundPredicate(lt_pred); + EXPECT_THAT(bound_lt->Test(Literal::Int(20)), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_lt->Test(Literal::Int(30)), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_lt->Test(Literal::Int(40)), HasValue(testing::Eq(false))); + + // Test less than or equal + ICEBERG_ASSIGN_OR_THROW(auto lte_pred, + Expressions::LessThanOrEqual("age", Literal::Int(30)) + ->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_lte = AssertAndCastToBoundPredicate(lte_pred); + EXPECT_THAT(bound_lte->Test(Literal::Int(20)), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_lte->Test(Literal::Int(30)), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_lte->Test(Literal::Int(40)), HasValue(testing::Eq(false))); + + // Test greater than + ICEBERG_ASSIGN_OR_THROW(auto gt_pred, Expressions::GreaterThan("age", Literal::Int(30)) + ->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_gt = AssertAndCastToBoundPredicate(gt_pred); + EXPECT_THAT(bound_gt->Test(Literal::Int(20)), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_gt->Test(Literal::Int(30)), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_gt->Test(Literal::Int(40)), HasValue(testing::Eq(true))); + + // Test greater than or equal + ICEBERG_ASSIGN_OR_THROW(auto gte_pred, + Expressions::GreaterThanOrEqual("age", Literal::Int(30)) + ->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_gte = AssertAndCastToBoundPredicate(gte_pred); + EXPECT_THAT(bound_gte->Test(Literal::Int(20)), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_gte->Test(Literal::Int(30)), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_gte->Test(Literal::Int(40)), HasValue(testing::Eq(true))); +} + +TEST_F(PredicateTest, BoundLiteralPredicateTestEquality) { + // Test equal + ICEBERG_ASSIGN_OR_THROW(auto eq_pred, Expressions::Equal("age", Literal::Int(25)) + ->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_eq = AssertAndCastToBoundPredicate(eq_pred); + EXPECT_THAT(bound_eq->Test(Literal::Int(25)), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_eq->Test(Literal::Int(26)), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_eq->Test(Literal::Int(24)), HasValue(testing::Eq(false))); + + // Test not equal + ICEBERG_ASSIGN_OR_THROW(auto neq_pred, Expressions::NotEqual("age", Literal::Int(25)) + ->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_neq = AssertAndCastToBoundPredicate(neq_pred); + EXPECT_THAT(bound_neq->Test(Literal::Int(25)), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_neq->Test(Literal::Int(26)), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_neq->Test(Literal::Int(24)), HasValue(testing::Eq(true))); +} + +TEST_F(PredicateTest, BoundLiteralPredicateTestWithDifferentTypes) { + // Test with double + ICEBERG_ASSIGN_OR_THROW(auto gt_pred, + Expressions::GreaterThan("salary", Literal::Double(50000.0)) + ->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_double = AssertAndCastToBoundPredicate(gt_pred); + EXPECT_THAT(bound_double->Test(Literal::Double(60000.0)), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_double->Test(Literal::Double(40000.0)), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_double->Test(Literal::Double(50000.0)), HasValue(testing::Eq(false))); + + // Test with string + ICEBERG_ASSIGN_OR_THROW(auto str_eq_pred, + Expressions::Equal("name", Literal::String("Alice")) + ->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_string = AssertAndCastToBoundPredicate(str_eq_pred); + EXPECT_THAT(bound_string->Test(Literal::String("Alice")), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_string->Test(Literal::String("Bob")), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_string->Test(Literal::String("alice")), + HasValue(testing::Eq(false))); // Case sensitive + + // Test with boolean + ICEBERG_ASSIGN_OR_THROW(auto bool_eq_pred, + Expressions::Equal("active", Literal::Boolean(true)) + ->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_bool = AssertAndCastToBoundPredicate(bool_eq_pred); + EXPECT_THAT(bound_bool->Test(Literal::Boolean(true)), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_bool->Test(Literal::Boolean(false)), HasValue(testing::Eq(false))); +} + +TEST_F(PredicateTest, BoundLiteralPredicateTestStartsWith) { + ICEBERG_ASSIGN_OR_THROW( + auto starts_with_pred, + Expressions::StartsWith("name", "Jo")->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_pred = AssertAndCastToBoundPredicate(starts_with_pred); + + // Test strings that start with "Jo" + EXPECT_THAT(bound_pred->Test(Literal::String("John")), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_pred->Test(Literal::String("Joe")), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_pred->Test(Literal::String("Jo")), HasValue(testing::Eq(true))); + + // Test strings that don't start with "Jo" + EXPECT_THAT(bound_pred->Test(Literal::String("Alice")), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_pred->Test(Literal::String("Bob")), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_pred->Test(Literal::String("")), HasValue(testing::Eq(false))); + + // Test empty prefix + ICEBERG_ASSIGN_OR_THROW( + auto empty_prefix_pred, + Expressions::StartsWith("name", "")->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_empty = AssertAndCastToBoundPredicate(empty_prefix_pred); + + // All strings should start with empty prefix + EXPECT_THAT(bound_empty->Test(Literal::String("test")), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_empty->Test(Literal::String("")), HasValue(testing::Eq(true))); +} + +TEST_F(PredicateTest, BoundLiteralPredicateTestNotStartsWith) { + ICEBERG_ASSIGN_OR_THROW( + auto not_starts_with_pred, + Expressions::NotStartsWith("name", "Jo")->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_pred = AssertAndCastToBoundPredicate(not_starts_with_pred); + + // Test strings that don't start with "Jo" + EXPECT_THAT(bound_pred->Test(Literal::String("Alice")), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_pred->Test(Literal::String("Bob")), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_pred->Test(Literal::String("")), HasValue(testing::Eq(true))); + + // Test strings that start with "Jo" + EXPECT_THAT(bound_pred->Test(Literal::String("John")), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_pred->Test(Literal::String("Joe")), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_pred->Test(Literal::String("Jo")), HasValue(testing::Eq(false))); +} + +TEST_F(PredicateTest, BoundSetPredicateTestIn) { + ICEBERG_ASSIGN_OR_THROW( + auto in_pred, + Expressions::In("age", {Literal::Int(10), Literal::Int(20), Literal::Int(30)}) + ->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_pred = AssertAndCastToBoundPredicate(in_pred); + + // Test values in the set + EXPECT_THAT(bound_pred->Test(Literal::Int(10)), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_pred->Test(Literal::Int(20)), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_pred->Test(Literal::Int(30)), HasValue(testing::Eq(true))); + + // Test values not in the set + EXPECT_THAT(bound_pred->Test(Literal::Int(15)), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_pred->Test(Literal::Int(40)), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_pred->Test(Literal::Int(0)), HasValue(testing::Eq(false))); +} + +TEST_F(PredicateTest, BoundSetPredicateTestNotIn) { + ICEBERG_ASSIGN_OR_THROW( + auto not_in_pred, + Expressions::NotIn("age", {Literal::Int(10), Literal::Int(20), Literal::Int(30)}) + ->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_pred = AssertAndCastToBoundPredicate(not_in_pred); + + // Test values not in the set + EXPECT_THAT(bound_pred->Test(Literal::Int(15)), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_pred->Test(Literal::Int(40)), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_pred->Test(Literal::Int(0)), HasValue(testing::Eq(true))); + + // Test values in the set + EXPECT_THAT(bound_pred->Test(Literal::Int(10)), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_pred->Test(Literal::Int(20)), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_pred->Test(Literal::Int(30)), HasValue(testing::Eq(false))); +} + +TEST_F(PredicateTest, BoundSetPredicateTestWithStrings) { + ICEBERG_ASSIGN_OR_THROW( + auto in_pred, + Expressions::In("name", {Literal::String("Alice"), Literal::String("Bob"), + Literal::String("Charlie")}) + ->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_pred = AssertAndCastToBoundPredicate(in_pred); + + // Test strings in the set + EXPECT_THAT(bound_pred->Test(Literal::String("Alice")), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_pred->Test(Literal::String("Bob")), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_pred->Test(Literal::String("Charlie")), HasValue(testing::Eq(true))); + + // Test strings not in the set + EXPECT_THAT(bound_pred->Test(Literal::String("David")), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_pred->Test(Literal::String("alice")), + HasValue(testing::Eq(false))); // Case sensitive + EXPECT_THAT(bound_pred->Test(Literal::String("")), HasValue(testing::Eq(false))); +} + +TEST_F(PredicateTest, BoundSetPredicateTestWithLongs) { + ICEBERG_ASSIGN_OR_THROW(auto in_pred, + Expressions::In("id", {Literal::Long(100L), Literal::Long(200L), + Literal::Long(300L)}) + ->Bind(*schema_, /*case_sensitive=*/true)); + auto bound_pred = AssertAndCastToBoundPredicate(in_pred); + + // Test longs in the set + EXPECT_THAT(bound_pred->Test(Literal::Long(100L)), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_pred->Test(Literal::Long(200L)), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_pred->Test(Literal::Long(300L)), HasValue(testing::Eq(true))); + + // Test longs not in the set + EXPECT_THAT(bound_pred->Test(Literal::Long(150L)), HasValue(testing::Eq(false))); + EXPECT_THAT(bound_pred->Test(Literal::Long(400L)), HasValue(testing::Eq(false))); +} + +TEST_F(PredicateTest, BoundSetPredicateTestSingleLiteral) { + ICEBERG_ASSIGN_OR_THROW(auto in_pred, Expressions::In("age", {Literal::Int(42)}) + ->Bind(*schema_, /*case_sensitive=*/true)); + + // Single element IN becomes Equal + EXPECT_EQ(in_pred->op(), Expression::Operation::kEq); + auto bound_literal = AssertAndCastToBoundPredicate(in_pred); + EXPECT_THAT(bound_literal->Test(Literal::Int(42)), HasValue(testing::Eq(true))); + EXPECT_THAT(bound_literal->Test(Literal::Int(41)), HasValue(testing::Eq(false))); +} + } // namespace iceberg diff --git a/src/iceberg/util/macros.h b/src/iceberg/util/macros.h index 278035d3f..733b07f1a 100644 --- a/src/iceberg/util/macros.h +++ b/src/iceberg/util/macros.h @@ -21,6 +21,9 @@ #include +#include "iceberg/exception.h" +#include "iceberg/result.h" + #define ICEBERG_RETURN_UNEXPECTED(result) \ if (auto&& result_name = result; !result_name) [[unlikely]] { \ return std::unexpected(result_name.error()); \ @@ -40,3 +43,17 @@ rexpr) #define ICEBERG_DCHECK(expr, message) assert((expr) && (message)) + +#define ICEBERG_THROW_NOT_OK(result) \ + if (auto&& result_name = result; !result_name) [[unlikely]] { \ + throw iceberg::IcebergError(result_name.error().message); \ + } + +#define ICEBERG_ASSIGN_OR_THROW_IMPL(result_name, lhs, rexpr) \ + auto&& result_name = (rexpr); \ + ICEBERG_THROW_NOT_OK(result_name); \ + lhs = std::move(result_name.value()); + +#define ICEBERG_ASSIGN_OR_THROW(lhs, rexpr) \ + ICEBERG_ASSIGN_OR_THROW_IMPL( \ + ICEBERG_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), lhs, rexpr);