diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index a3a6cf566..141df4b27 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -23,6 +23,7 @@ set(ICEBERG_SOURCES demo.cc expression/expression.cc expression/literal.cc + expression/term.cc file_reader.cc json_internal.cc manifest_entry.cc diff --git a/src/iceberg/expression/common.h b/src/iceberg/expression/common.h new file mode 100644 index 000000000..8f3f95859 --- /dev/null +++ b/src/iceberg/expression/common.h @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include + +#include "iceberg/result.h" +#include "iceberg/schema.h" + +namespace iceberg { + +/// Operation types for expressions +enum class Operation { + kTrue, + kFalse, + kIsNull, + kNotNull, + kIsNan, + kNotNan, + kLt, + kLtEq, + kGt, + kGtEq, + kEq, + kNotEq, + kIn, + kNotIn, + kNot, + kAnd, + kOr, + kStartsWith, + kNotStartsWith, + kCount, + kCountStar, + kMax, + kMin +}; + +/// \brief Returns whether the operation is a predicate operation. +constexpr bool IsPredicate(Operation op) { + switch (op) { + case Operation::kTrue: + case Operation::kFalse: + case Operation::kIsNull: + case Operation::kNotNull: + case Operation::kIsNan: + case Operation::kNotNan: + case Operation::kLt: + case Operation::kLtEq: + case Operation::kGt: + case Operation::kGtEq: + case Operation::kEq: + case Operation::kNotEq: + case Operation::kIn: + case Operation::kNotIn: + case Operation::kNot: + case Operation::kAnd: + case Operation::kOr: + case Operation::kStartsWith: + case Operation::kNotStartsWith: + return true; + case Operation::kCount: + case Operation::kCountStar: + case Operation::kMax: + case Operation::kMin: + return false; + } + return false; +} + +constexpr bool IsUnaryPredicate(Operation op) { + switch (op) { + case Operation::kIsNull: + case Operation::kNotNull: + case Operation::kIsNan: + case Operation::kNotNan: + return true; + default: + return false; + } +} +constexpr bool IsBinaryPredicate(Operation op) { + switch (op) { + case Operation::kLt: + case Operation::kLtEq: + case Operation::kGt: + case Operation::kGtEq: + case Operation::kEq: + case Operation::kNotEq: + case Operation::kIn: + case Operation::kNotIn: + case Operation::kAnd: + case Operation::kOr: + case Operation::kStartsWith: + case Operation::kNotStartsWith: + return true; + default: + return false; + } +} + +template +concept Bindable = requires(const T& expr, const Schema& schema, bool case_sensitive) { + // Must have a BoundType alias that defines what type it binds to + typename T::BoundType; + // Must have a Bind method with the correct signature + { expr.Bind(schema, case_sensitive) } -> std::same_as>; +}; + +/// \brief Concept for types that behave like predicates (bound or unbound) +template +concept PredicateLike = requires(const T& pred) { + // Must have an operation type + { pred.op() } -> std::same_as; + // Must be convertible to string + { pred.ToString() } -> std::same_as; + // // Must have a Negate method that returns a shared_ptr to the same concept + // { pred.Negate() } -> std::convertible_to>; + // Must support equality comparison + { pred.Equals(pred) } -> std::same_as; +}; + +} // namespace iceberg diff --git a/src/iceberg/expression/expression.cc b/src/iceberg/expression/expression.cc index c6fa9406b..0f1de5a55 100644 --- a/src/iceberg/expression/expression.cc +++ b/src/iceberg/expression/expression.cc @@ -21,70 +21,500 @@ #include +#include "iceberg/exception.h" +#include "iceberg/expression/term.h" +#include "iceberg/util/checked_cast.h" +#include "iceberg/util/macros.h" +#include "literal.h" + namespace iceberg { -// True implementation -const std::shared_ptr& True::Instance() { - static const std::shared_ptr instance{new True()}; - return instance; -} +// Internal implementation classes -std::shared_ptr True::Negate() const { return False::Instance(); } +/// \brief An Expression that is always true. +template +class TrueImpl final : public PredicateType { + public: + /// \brief Returns the singleton instance + static const std::shared_ptr& Instance() { + static const std::shared_ptr instance = + std::shared_ptr(new TrueImpl()); + return instance; + } -// False implementation -const std::shared_ptr& False::Instance() { - static const std::shared_ptr instance = std::shared_ptr(new False()); - return instance; -} + Operation op() const override { return Operation::kTrue; } + + std::string ToString() const override { return "true"; } + + std::shared_ptr Negate() const override; + + bool Equals(const Expression& other) const override { + return other.op() == Operation::kTrue; + } + + protected: + constexpr TrueImpl() = default; +}; + +/// \brief An expression that is always false. +template +class FalseImpl final : public PredicateType { + public: + /// \brief Returns the singleton instance + static const std::shared_ptr& Instance() { + static const std::shared_ptr instance = + std::shared_ptr(new FalseImpl()); + return instance; + } + + Operation op() const override { return Operation::kFalse; } + + std::string ToString() const override { return "false"; } -std::shared_ptr False::Negate() const { return True::Instance(); } + std::shared_ptr Negate() const override { + return TrueImpl::Instance(); + } + + bool Equals(const Expression& other) const override { + return other.op() == Operation::kFalse; + } -// And implementation -And::And(std::shared_ptr left, std::shared_ptr right) - : left_(std::move(left)), right_(std::move(right)) {} + private: + constexpr FalseImpl() = default; +}; -std::string And::ToString() const { - return std::format("({} and {})", left_->ToString(), right_->ToString()); +template +std::shared_ptr TrueImpl::Negate() const { + return FalseImpl::Instance(); } -std::shared_ptr And::Negate() const { +/// \brief An Expression that represents a logical AND operation between two expressions. +template +class AndImpl final : public PredicateType { + public: + /// \brief Constructs an And expression from two sub-expressions. + AndImpl(std::shared_ptr left, std::shared_ptr right) + : left_(std::move(left)), right_(std::move(right)) {} + + /// \brief Returns the left operand of the AND expression. + const std::shared_ptr& left() const { return left_; } + + /// \brief Returns the right operand of the AND expression. + const std::shared_ptr& right() const { return right_; } + + Operation op() const override { return Operation::kAnd; } + + std::string ToString() const override { + return std::format("({} and {})", left_->ToString(), right_->ToString()); + } + + std::shared_ptr Negate() const override; + + bool Equals(const Expression& expr) const override { + if (expr.op() == Operation::kAnd) { + const auto& other = + iceberg::internal::checked_cast&>(expr); + return (left_->Equals(*other.left()) && right_->Equals(*other.right())) || + (left_->Equals(*other.right()) && right_->Equals(*other.left())); + } + return false; + } + + private: + std::shared_ptr left_; + std::shared_ptr right_; +}; + +/// \brief An Expression that represents a logical OR operation between two expressions. +template +class OrImpl final : public PredicateType { + public: + /// \brief Constructs an Or expression from two sub-expressions. + OrImpl(std::shared_ptr left, std::shared_ptr right) + : left_(std::move(left)), right_(std::move(right)) {} + + /// \brief Returns the left operand of the OR expression. + const std::shared_ptr& left() const { return left_; } + + /// \brief Returns the right operand of the OR expression. + const std::shared_ptr& right() const { return right_; } + + Operation op() const override { return Operation::kOr; } + + std::string ToString() const override { + return std::format("({} or {})", left_->ToString(), right_->ToString()); + } + + std::shared_ptr Negate() const override; + + bool Equals(const Expression& expr) const override { + if (expr.op() == Operation::kOr) { + const auto& other = + iceberg::internal::checked_cast&>(expr); + return (left_->Equals(*other.left()) && right_->Equals(*other.right())) || + (left_->Equals(*other.right()) && right_->Equals(*other.left())); + } + return false; + } + + private: + std::shared_ptr left_; + std::shared_ptr right_; +}; + +template +std::shared_ptr AndImpl::Negate() const { // De Morgan's law: not(A and B) = (not A) or (not B) auto left_negated = left_->Negate(); auto right_negated = right_->Negate(); - return std::make_shared(left_negated, right_negated); + return std::make_shared>(std::move(left_negated), + std::move(right_negated)); } -bool And::Equals(const Expression& expr) const { - if (expr.op() == Operation::kAnd) { - const auto& other = static_cast(expr); - return (left_->Equals(*other.left()) && right_->Equals(*other.right())) || - (left_->Equals(*other.right()) && right_->Equals(*other.left())); - } - return false; +template +std::shared_ptr OrImpl::Negate() const { + // De Morgan's law: not(A or B) = (not A) and (not B) + auto left_negated = left_->Negate(); + auto right_negated = right_->Negate(); + return std::make_shared>(left_negated, right_negated); } -// Or implementation -Or::Or(std::shared_ptr left, std::shared_ptr right) - : left_(std::move(left)), right_(std::move(right)) {} +using True = TrueImpl; +using BoundTrue = TrueImpl; +using False = FalseImpl; +using BoundFalse = FalseImpl; +using AndPredicate = AndImpl; +using BoundAndPredicate = AndImpl; +using OrPredicate = OrImpl; +using BoundOrPredicate = OrImpl; -std::string Or::ToString() const { - return std::format("({} or {})", left_->ToString(), right_->ToString()); +// Implementation of Predicate static factory methods +const std::shared_ptr& Predicate::AlwaysTrue() { + static const std::shared_ptr instance = True::Instance(); + return instance; } -std::shared_ptr Or::Negate() const { - // De Morgan's law: not(A or B) = (not A) and (not B) - auto left_negated = left_->Negate(); - auto right_negated = right_->Negate(); - return std::make_shared(left_negated, right_negated); +const std::shared_ptr& Predicate::AlwaysFalse() { + static const std::shared_ptr instance = False::Instance(); + return instance; } -bool Or::Equals(const Expression& expr) const { - if (expr.op() == Operation::kOr) { - const auto& other = static_cast(expr); - return (left_->Equals(*other.left()) && right_->Equals(*other.right())) || - (left_->Equals(*other.right()) && right_->Equals(*other.left())); +std::shared_ptr Predicate::And(std::shared_ptr left, + std::shared_ptr right) { + /* + auto left_op = left->op(); + auto right_op = right->op(); + if (left_op == Operation::kFalse || right_op == Operation::kFalse) { + return False::Instance(); + } + if (left_op == Operation::kTrue && right_op == Operation::kTrue) { + return left; } - return false; + */ + return std::make_shared(std::move(left), std::move(right)); +} + +std::shared_ptr Predicate::Or(std::shared_ptr left, + std::shared_ptr right) { + /* + auto left_op = left->op(); + auto right_op = right->op(); + if (left_op == Operation::kTrue || right_op == Operation::kTrue) { + return False::Instance(); + } + if (left_op == Operation::kFalse && right_op == Operation::kFalse) { + return left; + } + */ + return std::make_shared(std::move(left), std::move(right)); +} + +/// Unary predicate, for example, `a IS NULL`, which `a` is a Term. +/// +/// Note that this would not include UnaryPredicates like +/// `COALESCE(a, b) is not null`. +template +struct UnaryPredicateBase { + UnaryPredicateBase(Operation in_op, ReferenceType in_reference) + : unary_op(in_op), reference(std::move(in_reference)) { + if (!IsUnaryPredicate(unary_op)) { + throw IcebergError( + std::format("UnaryPredicateBase: operation {} is not a unary predicate", + static_cast(unary_op))); + } + } + + Operation unary_op; + ReferenceType reference; +}; + +class BoundUnaryPredicate; + +class UnaryPredicate final : public UnaryPredicateBase, public Predicate { + public: + using BoundType = BoundUnaryPredicate; + + UnaryPredicate(Operation op, Reference reference) + : UnaryPredicateBase(op, std::move(reference)) {} + + std::string ToString() const override { + switch (this->unary_op) { + case Operation::kIsNull: + return std::format("{} IS NULL", reference.ToString()); + case Operation::kNotNull: + return std::format("{} IS NOT NULL", reference.ToString()); + case Operation::kIsNan: + return std::format("{} IS NAN", reference.ToString()); + case Operation::kNotNan: + return std::format("{} IS NOT NAN", reference.ToString()); + default: + return std::format("UnaryPredicate({})", static_cast(unary_op)); + } + } + + std::shared_ptr Negate() const override { + Operation negated_op; + switch (op()) { + case Operation::kIsNull: + negated_op = Operation::kNotNull; + break; + case Operation::kNotNull: + negated_op = Operation::kIsNull; + break; + case Operation::kIsNan: + negated_op = Operation::kNotNan; + break; + case Operation::kNotNan: + negated_op = Operation::kIsNan; + break; + default: + throw IcebergError(std::format("Cannot negate unary predicate with operation {}", + static_cast(op()))); + } + return std::make_shared(negated_op, reference); + } + + bool Equals(const Expression& other) const override { + if (other.op() != op()) { + return false; + } + const auto& other_unary = + iceberg::internal::checked_cast(other); + return reference.Equals(other_unary.reference); + } + + Result> Bind(const Schema& schema, + bool case_sensitive) const override; + Operation op() const override { return unary_op; } +}; + +class BoundUnaryPredicate final : public UnaryPredicateBase, + public BoundPredicate { + public: + BoundUnaryPredicate(Operation op, BoundReference reference) + : UnaryPredicateBase(op, std::move(reference)) {} + + std::string ToString() const override { + switch (op()) { + case Operation::kIsNull: + return std::format("{} IS NULL", reference.ToString()); + case Operation::kNotNull: + return std::format("{} IS NOT NULL", reference.ToString()); + case Operation::kIsNan: + return std::format("{} IS NAN", reference.ToString()); + case Operation::kNotNan: + return std::format("{} IS NOT NAN", reference.ToString()); + default: + return std::format("BoundUnaryPredicate({})", static_cast(op())); + } + } + + bool Equals(const BoundExpression& other) const override { + if (other.op() != op()) { + return false; + } + const auto& other_unary = + iceberg::internal::checked_cast(other); + return reference.Equals(other_unary.reference); + } + + Operation op() const override { return unary_op; } +}; + +Result> UnaryPredicate::Bind(const Schema& schema, + bool case_sensitive) const { + return nullptr; +} + +/// Binary predicate, for example, `a = 10`, `b > 5`, etc. +/// +/// Represents comparisons between a term (Reference) and a literal value. +template +struct BinaryPredicateBase { + BinaryPredicateBase(Operation in_op, ReferenceType in_reference, Literal in_literal) + : binary_op(in_op), + reference(std::move(in_reference)), + literal(std::move(in_literal)) { + if (!IsBinaryPredicate(binary_op)) { + throw IcebergError( + std::format("BinaryPredicateBase: operation {} is not a binary predicate", + static_cast(binary_op))); + } + } + + Operation binary_op; + ReferenceType reference; + Literal literal; +}; + +class BoundBinaryPredicate; + +class BinaryPredicate final : public BinaryPredicateBase, public Predicate { + public: + using BoundType = BoundBinaryPredicate; + + BinaryPredicate(Operation op, Reference reference, Literal literal) + : BinaryPredicateBase(op, std::move(reference), std::move(literal)) {} + + std::string ToString() const override { + std::string op_str; + switch (binary_op) { + case Operation::kEq: + op_str = " = "; + break; + case Operation::kNotEq: + op_str = " != "; + break; + case Operation::kLt: + op_str = " < "; + break; + case Operation::kLtEq: + op_str = " <= "; + break; + case Operation::kGt: + op_str = " > "; + break; + case Operation::kGtEq: + op_str = " >= "; + break; + case Operation::kStartsWith: + return std::format("{} STARTS WITH {}", reference.ToString(), literal.ToString()); + case Operation::kNotStartsWith: + return std::format("{} NOT STARTS WITH {}", reference.ToString(), + literal.ToString()); + default: + return std::format("BinaryPredicate({}, {}, {})", static_cast(binary_op), + reference.ToString(), literal.ToString()); + } + return std::format("{}{}{}", reference.ToString(), op_str, literal.ToString()); + } + + std::shared_ptr Negate() const override { + Operation negated_op; + switch (binary_op) { + case Operation::kEq: + negated_op = Operation::kNotEq; + break; + case Operation::kNotEq: + negated_op = Operation::kEq; + break; + case Operation::kLt: + negated_op = Operation::kGtEq; + break; + case Operation::kLtEq: + negated_op = Operation::kGt; + break; + case Operation::kGt: + negated_op = Operation::kLtEq; + break; + case Operation::kGtEq: + negated_op = Operation::kLt; + break; + case Operation::kStartsWith: + negated_op = Operation::kNotStartsWith; + break; + case Operation::kNotStartsWith: + negated_op = Operation::kStartsWith; + break; + default: + throw IcebergError(std::format("Cannot negate binary predicate with operation {}", + static_cast(binary_op))); + } + return std::make_shared(negated_op, reference, literal); + } + + bool Equals(const Expression& other) const override { + if (other.op() != binary_op) { + return false; + } + const auto& other_binary = + iceberg::internal::checked_cast(other); + return reference.Equals(other_binary.reference) && literal == other_binary.literal; + } + + Result> Bind(const Schema& schema, + bool case_sensitive) const override; + + Operation op() const override { return binary_op; } +}; + +class BoundBinaryPredicate final : public BinaryPredicateBase, + public BoundPredicate { + public: + BoundBinaryPredicate(Operation op, BoundReference reference, Literal literal) + : BinaryPredicateBase(op, std::move(reference), + std::move(literal)) {} + + std::string ToString() const override { + std::string op_str; + switch (binary_op) { + case Operation::kEq: + op_str = " = "; + break; + case Operation::kNotEq: + op_str = " != "; + break; + case Operation::kLt: + op_str = " < "; + break; + case Operation::kLtEq: + op_str = " <= "; + break; + case Operation::kGt: + op_str = " > "; + break; + case Operation::kGtEq: + op_str = " >= "; + break; + case Operation::kStartsWith: + return std::format("{} STARTS WITH {}", reference.ToString(), literal.ToString()); + case Operation::kNotStartsWith: + return std::format("{} NOT STARTS WITH {}", reference.ToString(), + literal.ToString()); + default: + return std::format("BoundBinaryPredicate({}, {}, {})", + static_cast(binary_op), reference.ToString(), + literal.ToString()); + } + return std::format("{}{}{}", reference.ToString(), op_str, literal.ToString()); + } + + bool Equals(const BoundExpression& other) const override { + if (other.op() != binary_op) { + return false; + } + const auto& other_binary = + iceberg::internal::checked_cast(other); + return reference.Equals(other_binary.reference) && literal == other_binary.literal; + } + + Operation op() const override { return binary_op; } +}; + +// Implementation of BinaryPredicate::Bind +Result> BinaryPredicate::Bind( + const Schema& schema, bool case_sensitive) const { + return nullptr; } } // namespace iceberg diff --git a/src/iceberg/expression/expression.h b/src/iceberg/expression/expression.h index 9ceae1c69..b7620139a 100644 --- a/src/iceberg/expression/expression.h +++ b/src/iceberg/expression/expression.h @@ -25,51 +25,27 @@ #include #include -#include "iceberg/exception.h" +#include "iceberg/expression/common.h" +#include "iceberg/expression/literal.h" +#include "iceberg/expression/term.h" #include "iceberg/iceberg_export.h" +#include "iceberg/result.h" +#include "iceberg/schema.h" namespace iceberg { +class BoundExpression; + /// \brief Represents a boolean expression tree. class ICEBERG_EXPORT Expression { public: - /// Operation types for expressions - enum class Operation { - kTrue, - kFalse, - kIsNull, - kNotNull, - kIsNan, - kNotNan, - kLt, - kLtEq, - kGt, - kGtEq, - kEq, - kNotEq, - kIn, - kNotIn, - kNot, - kAnd, - kOr, - kStartsWith, - kNotStartsWith, - kCount, - kCountStar, - kMax, - kMin - }; + using BoundType = BoundExpression; virtual ~Expression() = default; /// \brief Returns the operation for an expression node. virtual Operation op() const = 0; - /// \brief Returns the negation of this expression, equivalent to not(this). - virtual std::shared_ptr Negate() const { - throw IcebergError("Expression cannot be negated"); - } - /// \brief Returns whether this expression will accept the same values as another. /// \param other another expression /// \return true if the expressions are equivalent @@ -78,119 +54,95 @@ class ICEBERG_EXPORT Expression { return false; } - virtual std::string ToString() const { return "Expression"; } -}; - -/// \brief An Expression that is always true. -/// -/// Represents a boolean predicate that always evaluates to true. -class ICEBERG_EXPORT True : public Expression { - public: - /// \brief Returns the singleton instance - static const std::shared_ptr& Instance(); - - Operation op() const override { return Operation::kTrue; } - - std::string ToString() const override { return "true"; } + virtual std::string ToString() const = 0; - std::shared_ptr Negate() const override; - - bool Equals(const Expression& other) const override { - return other.op() == Operation::kTrue; + virtual Result> Bind( + const Schema& schema, bool case_sensitive) const { + return NotImplemented("Binding of Expression is not implemented"); } - - private: - constexpr True() = default; }; -/// \brief An expression that is always false. -class ICEBERG_EXPORT False : public Expression { +class ICEBERG_EXPORT Predicate : public Expression { public: - /// \brief Returns the singleton instance - static const std::shared_ptr& Instance(); + /// \brief Returns a negated version of this predicate. + virtual std::shared_ptr Negate() const = 0; - Operation op() const override { return Operation::kFalse; } + // Factory functions for creating predicates - std::string ToString() const override { return "false"; } + /// \brief Creates a True predicate that always evaluates to true. + /// \return A shared pointer to a True predicate + static const std::shared_ptr& AlwaysTrue(); - std::shared_ptr Negate() const override; + /// \brief Creates a False predicate that always evaluates to false. + /// \return A shared pointer to a False predicate + static const std::shared_ptr& AlwaysFalse(); - bool Equals(const Expression& other) const override { - return other.op() == Operation::kFalse; - } + /// \brief Creates an And predicate that represents logical AND of two predicates. + /// \param left The left operand of the AND predicate + /// \param right The right operand of the AND predicate + /// \return A shared pointer to an And predicate + static std::shared_ptr And(std::shared_ptr left, + std::shared_ptr right); - private: - constexpr False() = default; -}; + /// \brief Creates an Or predicate that represents logical OR of two predicates. + /// \param left The left operand of the OR predicate + /// \param right The right operand of the OR predicate + /// \return A shared pointer to an Or predicate + static std::shared_ptr Or(std::shared_ptr left, + std::shared_ptr right); -/// \brief An Expression that represents a logical AND operation between two expressions. -/// -/// This expression evaluates to true if and only if both of its child expressions -/// evaluate to true. -class ICEBERG_EXPORT And : public Expression { - public: - /// \brief Constructs an And expression from two sub-expressions. - /// - /// \param left The left operand of the AND expression - /// \param right The right operand of the AND expression - And(std::shared_ptr left, std::shared_ptr right); + /// \brief Creates an IsNull predicate + static std::shared_ptr IsNull(Reference reference); - /// \brief Returns the left operand of the AND expression. - /// - /// \return The left operand of the AND expression - const std::shared_ptr& left() const { return left_; } + /// \brief Creates an IsNotNull predicate + static std::shared_ptr IsNotNull(Reference reference); - /// \brief Returns the right operand of the AND expression. - /// - /// \return The right operand of the AND expression - const std::shared_ptr& right() const { return right_; } + /// \brief Creates an IsNan predicate + static std::shared_ptr IsNan(Reference reference); - Operation op() const override { return Operation::kAnd; } + /// \brief Creates an IsNotNan predicate + static std::shared_ptr IsNotNan(Reference reference); - std::string ToString() const override; + /// \brief Creates an equal-to predicate: reference = literal + static std::shared_ptr Equal(Reference reference, Literal literal); - std::shared_ptr Negate() const override; + /// \brief Creates a not-equal-to predicate: reference != literal + static std::shared_ptr NotEqual(Reference reference, Literal literal); - bool Equals(const Expression& other) const override; + /// \brief Creates a less-than predicate: reference < literal + static std::shared_ptr LessThan(Reference reference, Literal literal); - private: - std::shared_ptr left_; - std::shared_ptr right_; -}; + /// \brief Creates a less-than-or-equal predicate: reference <= literal + static std::shared_ptr LessThanOrEqual(Reference reference, Literal literal); -/// \brief An Expression that represents a logical OR operation between two expressions. -/// -/// This expression evaluates to true if at least one of its child expressions -/// evaluates to true. -class ICEBERG_EXPORT Or : public Expression { - public: - /// \brief Constructs an Or expression from two sub-expressions. - /// - /// \param left The left operand of the OR expression - /// \param right The right operand of the OR expression - Or(std::shared_ptr left, std::shared_ptr right); + /// \brief Creates a greater-than predicate: reference > literal + static std::shared_ptr GreaterThan(Reference reference, Literal literal); - /// \brief Returns the left operand of the OR expression. - /// - /// \return The left operand of the OR expression - const std::shared_ptr& left() const { return left_; } + /// \brief Creates a greater-than-or-equal predicate: reference >= literal + static std::shared_ptr GreaterThanOrEqual(Reference reference, + Literal literal); - /// \brief Returns the right operand of the OR expression. - /// - /// \return The right operand of the OR expression - const std::shared_ptr& right() const { return right_; } + /// \brief Creates a starts-with predicate: reference STARTS WITH literal + static std::shared_ptr StartsWith(Reference reference, Literal literal); - Operation op() const override { return Operation::kOr; } + /// \brief Creates a not-starts-with predicate: reference NOT STARTS WITH literal + static std::shared_ptr NotStartsWith(Reference reference, Literal literal); +}; - std::string ToString() const override; +class ICEBERG_EXPORT BoundExpression { + public: + virtual ~BoundExpression() = default; - std::shared_ptr Negate() const override; + /// \brief Returns the operation for a bound expression node. + virtual Operation op() const = 0; - bool Equals(const Expression& other) const override; + /// \brief Returns whether this expression will accept the same values as another. + virtual bool Equals(const BoundExpression& other) const = 0; - private: - std::shared_ptr left_; - std::shared_ptr right_; + /// \brief Returns a string representation of this bound expression. + virtual std::string ToString() const = 0; }; +class ICEBERG_EXPORT BoundPredicate : public BoundExpression {}; + } // namespace iceberg diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h index 17752c488..3e7240c4f 100644 --- a/src/iceberg/expression/literal.h +++ b/src/iceberg/expression/literal.h @@ -19,6 +19,9 @@ #pragma once +/// \file iceberg/expression/literal.h +/// Literal class for Iceberg table operations. + #include #include #include @@ -109,6 +112,8 @@ class ICEBERG_EXPORT Literal { /// and should not be AboveMax or BelowMin. std::partial_ordering operator<=>(const Literal& other) const; + bool operator==(const Literal& other) const = default; + /// Check if this literal represents a value above the maximum allowed value /// for its type. This occurs when casting from a wider type to a narrower type /// and the value exceeds the target type's maximum. diff --git a/src/iceberg/expression/term.cc b/src/iceberg/expression/term.cc new file mode 100644 index 000000000..02ea333d7 --- /dev/null +++ b/src/iceberg/expression/term.cc @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "term.h" + +#include "iceberg/exception.h" + +namespace iceberg { +bool Reference::Equals(const Reference& other) const { return name == other.name; } + +bool BoundReference::Equals(const BoundReference& other) const { + throw IcebergError("BoundReference::Equals not implemented"); +} + +} // namespace iceberg \ No newline at end of file diff --git a/src/iceberg/expression/term.h b/src/iceberg/expression/term.h new file mode 100644 index 000000000..be25b2465 --- /dev/null +++ b/src/iceberg/expression/term.h @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/expression/term.h + +#include + +#include "iceberg/result.h" +#include "iceberg/schema.h" + +namespace iceberg { + +struct BoundReference; + +/// Unbounded reference type for expressions. +struct Reference { + using BoundType = BoundReference; + + std::string name; + + std::string ToString() const { return "Reference(name: " + name + ")"; } + + Result Bind(const Schema& schema, bool case_sensitive) const; + + bool Equals(const Reference& other) const; +}; + +struct BoundReference { + std::string column_name; + + std::string ToString() const { return "BoundReference(name: " + column_name + ")"; } + + bool Equals(const BoundReference& other) const; +}; + +} // namespace iceberg \ No newline at end of file diff --git a/test/expression_test.cc b/test/expression_test.cc index c14c7d9a3..8dcd01818 100644 --- a/test/expression_test.cc +++ b/test/expression_test.cc @@ -27,131 +27,142 @@ namespace iceberg { TEST(TrueFalseTest, Basic) { // Test negation of False returns True - auto false_instance = False::Instance(); + auto false_instance = Predicate::AlwaysFalse(); auto negated = false_instance->Negate(); // Check that negated expression is True - EXPECT_EQ(negated->op(), Expression::Operation::kTrue); + EXPECT_EQ(negated->op(), Operation::kTrue); EXPECT_EQ(negated->ToString(), "true"); // Test negation of True returns false - auto true_instance = True::Instance(); + auto true_instance = Predicate::AlwaysTrue(); negated = true_instance->Negate(); // Check that negated expression is False - EXPECT_EQ(negated->op(), Expression::Operation::kFalse); + EXPECT_EQ(negated->op(), Operation::kFalse); EXPECT_EQ(negated->ToString(), "false"); } TEST(ANDTest, Basic) { // Create two True expressions - auto true_expr1 = True::Instance(); - auto true_expr2 = True::Instance(); + auto true_expr1 = Predicate::AlwaysTrue(); + auto true_expr2 = Predicate::AlwaysTrue(); // Create an AND expression - auto and_expr = std::make_shared(true_expr1, true_expr2); + auto and_expr = Predicate::And(true_expr1, true_expr2); - EXPECT_EQ(and_expr->op(), Expression::Operation::kAnd); + EXPECT_EQ(and_expr->op(), Operation::kAnd); EXPECT_EQ(and_expr->ToString(), "(true and true)"); - EXPECT_EQ(and_expr->left()->op(), Expression::Operation::kTrue); - EXPECT_EQ(and_expr->right()->op(), Expression::Operation::kTrue); } TEST(ORTest, Basic) { // Create True and False expressions - auto true_expr = True::Instance(); - auto false_expr = False::Instance(); + auto true_expr = Predicate::AlwaysTrue(); + auto false_expr = Predicate::AlwaysFalse(); // Create an OR expression - auto or_expr = std::make_shared(true_expr, false_expr); + auto or_expr = Predicate::Or(true_expr, false_expr); - EXPECT_EQ(or_expr->op(), Expression::Operation::kOr); + EXPECT_EQ(or_expr->op(), Operation::kOr); EXPECT_EQ(or_expr->ToString(), "(true or false)"); - EXPECT_EQ(or_expr->left()->op(), Expression::Operation::kTrue); - EXPECT_EQ(or_expr->right()->op(), Expression::Operation::kFalse); } TEST(ORTest, Negation) { // Test De Morgan's law: not(A or B) = (not A) and (not B) - auto true_expr = True::Instance(); - auto false_expr = False::Instance(); + auto true_expr = Predicate::AlwaysTrue(); + auto false_expr = Predicate::AlwaysFalse(); - auto or_expr = std::make_shared(true_expr, false_expr); + auto or_expr = Predicate::Or(true_expr, false_expr); auto negated_or = or_expr->Negate(); // Should become AND expression - EXPECT_EQ(negated_or->op(), Expression::Operation::kAnd); + EXPECT_EQ(negated_or->op(), Operation::kAnd); EXPECT_EQ(negated_or->ToString(), "(false and true)"); } TEST(ORTest, Equals) { - auto true_expr = True::Instance(); - auto false_expr = False::Instance(); + auto true_expr = Predicate::AlwaysTrue(); + auto false_expr = Predicate::AlwaysFalse(); // Test basic equality - auto or_expr1 = std::make_shared(true_expr, false_expr); - auto or_expr2 = std::make_shared(true_expr, false_expr); + auto or_expr1 = Predicate::Or(true_expr, false_expr); + auto or_expr2 = Predicate::Or(true_expr, false_expr); EXPECT_TRUE(or_expr1->Equals(*or_expr2)); // Test commutativity: (A or B) equals (B or A) - auto or_expr3 = std::make_shared(false_expr, true_expr); + auto or_expr3 = Predicate::Or(false_expr, true_expr); EXPECT_TRUE(or_expr1->Equals(*or_expr3)); // Test inequality with different expressions - auto or_expr4 = std::make_shared(true_expr, true_expr); + auto or_expr4 = Predicate::Or(true_expr, true_expr); EXPECT_FALSE(or_expr1->Equals(*or_expr4)); // Test inequality with different operation types - auto and_expr = std::make_shared(true_expr, false_expr); + auto and_expr = Predicate::And(true_expr, false_expr); EXPECT_FALSE(or_expr1->Equals(*and_expr)); } TEST(ANDTest, Negation) { // Test De Morgan's law: not(A and B) = (not A) or (not B) - auto true_expr = True::Instance(); - auto false_expr = False::Instance(); + auto true_expr = Predicate::AlwaysTrue(); + auto false_expr = Predicate::AlwaysFalse(); - auto and_expr = std::make_shared(true_expr, false_expr); + auto and_expr = Predicate::And(true_expr, false_expr); auto negated_and = and_expr->Negate(); // Should become OR expression - EXPECT_EQ(negated_and->op(), Expression::Operation::kOr); + EXPECT_EQ(negated_and->op(), Operation::kOr); EXPECT_EQ(negated_and->ToString(), "(false or true)"); } TEST(ANDTest, Equals) { - auto true_expr = True::Instance(); - auto false_expr = False::Instance(); + auto true_expr = Predicate::AlwaysTrue(); + auto false_expr = Predicate::AlwaysFalse(); // Test basic equality - auto and_expr1 = std::make_shared(true_expr, false_expr); - auto and_expr2 = std::make_shared(true_expr, false_expr); + auto and_expr1 = Predicate::And(true_expr, false_expr); + auto and_expr2 = Predicate::And(true_expr, false_expr); EXPECT_TRUE(and_expr1->Equals(*and_expr2)); // Test commutativity: (A and B) equals (B and A) - auto and_expr3 = std::make_shared(false_expr, true_expr); + auto and_expr3 = Predicate::And(false_expr, true_expr); EXPECT_TRUE(and_expr1->Equals(*and_expr3)); // Test inequality with different expressions - auto and_expr4 = std::make_shared(true_expr, true_expr); + auto and_expr4 = Predicate::And(true_expr, true_expr); EXPECT_FALSE(and_expr1->Equals(*and_expr4)); // Test inequality with different operation types - auto or_expr = std::make_shared(true_expr, false_expr); + auto or_expr = Predicate::Or(true_expr, false_expr); EXPECT_FALSE(and_expr1->Equals(*or_expr)); } -TEST(ExpressionTest, BaseClassNegateThrowsException) { - // Create a mock expression that doesn't override Negate() - class MockExpression : public Expression { - public: - Operation op() const override { return Operation::kTrue; } - // Deliberately not overriding Negate() to test base class behavior - }; +TEST(PredicateFactoryTest, FactoryMethods) { + // Test that factory methods work correctly + auto true_pred = Predicate::AlwaysTrue(); + auto false_pred = Predicate::AlwaysFalse(); - auto mock_expr = std::make_shared(); + EXPECT_EQ(true_pred->op(), Operation::kTrue); + EXPECT_EQ(false_pred->op(), Operation::kFalse); - // Should throw IcebergError when calling Negate() on base class - EXPECT_THROW(mock_expr->Negate(), IcebergError); + // Test that multiple calls return equivalent instances + auto true_pred2 = Predicate::AlwaysTrue(); + auto false_pred2 = Predicate::AlwaysFalse(); + + EXPECT_TRUE(true_pred->Equals(*true_pred2)); + EXPECT_TRUE(false_pred->Equals(*false_pred2)); + + // Test compound expressions + auto and_pred = Predicate::And(true_pred, false_pred); + auto or_pred = Predicate::Or(true_pred, false_pred); + + EXPECT_EQ(and_pred->op(), Operation::kAnd); + EXPECT_EQ(or_pred->op(), Operation::kOr); + + // Test nested expressions + auto nested_and = Predicate::And(and_pred, or_pred); + EXPECT_EQ(nested_and->op(), Operation::kAnd); + EXPECT_EQ(nested_and->ToString(), "((true and false) and (true or false))"); } + } // namespace iceberg