From b33d86c23afaa0c15417d563dd2400e28f6da881 Mon Sep 17 00:00:00 2001 From: mwish Date: Sat, 5 Jul 2025 01:23:05 +0800 Subject: [PATCH 1/6] Part of expr works (unbound) --- src/iceberg/CMakeLists.txt | 1 + src/iceberg/expression/common.h | 29 ++++++ src/iceberg/expression/expression.cc | 18 ++-- src/iceberg/expression/expression.h | 135 +++++++++++++++++---------- src/iceberg/expression/literal.h | 3 + src/iceberg/expression/term.cc | 20 ++++ src/iceberg/expression/term.h | 46 +++++++++ test/expression_test.cc | 36 +++---- 8 files changed, 204 insertions(+), 84 deletions(-) create mode 100644 src/iceberg/expression/common.h create mode 100644 src/iceberg/expression/term.cc create mode 100644 src/iceberg/expression/term.h diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index a3a6cf566..141df4b27 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -23,6 +23,7 @@ set(ICEBERG_SOURCES demo.cc expression/expression.cc expression/literal.cc + expression/term.cc file_reader.cc json_internal.cc manifest_entry.cc diff --git a/src/iceberg/expression/common.h b/src/iceberg/expression/common.h new file mode 100644 index 000000000..0e4d63ef1 --- /dev/null +++ b/src/iceberg/expression/common.h @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +namespace iceberg { + +template +concept Bindable = requires(T expr) { + { expr.Bind() }; +}; + +} // namespace iceberg \ No newline at end of file diff --git a/src/iceberg/expression/expression.cc b/src/iceberg/expression/expression.cc index c6fa9406b..05860edaa 100644 --- a/src/iceberg/expression/expression.cc +++ b/src/iceberg/expression/expression.cc @@ -21,6 +21,8 @@ #include +#include "iceberg/util/checked_cast.h" + namespace iceberg { // True implementation @@ -29,7 +31,7 @@ const std::shared_ptr& True::Instance() { return instance; } -std::shared_ptr True::Negate() const { return False::Instance(); } +std::shared_ptr True::Negate() const { return False::Instance(); } // False implementation const std::shared_ptr& False::Instance() { @@ -37,17 +39,17 @@ const std::shared_ptr& False::Instance() { return instance; } -std::shared_ptr False::Negate() const { return True::Instance(); } +std::shared_ptr False::Negate() const { return True::Instance(); } // And implementation -And::And(std::shared_ptr left, std::shared_ptr right) +And::And(std::shared_ptr left, std::shared_ptr right) : left_(std::move(left)), right_(std::move(right)) {} std::string And::ToString() const { return std::format("({} and {})", left_->ToString(), right_->ToString()); } -std::shared_ptr And::Negate() const { +std::shared_ptr And::Negate() const { // De Morgan's law: not(A and B) = (not A) or (not B) auto left_negated = left_->Negate(); auto right_negated = right_->Negate(); @@ -56,7 +58,7 @@ std::shared_ptr And::Negate() const { bool And::Equals(const Expression& expr) const { if (expr.op() == Operation::kAnd) { - const auto& other = static_cast(expr); + const auto& other = iceberg::internal::checked_cast(expr); return (left_->Equals(*other.left()) && right_->Equals(*other.right())) || (left_->Equals(*other.right()) && right_->Equals(*other.left())); } @@ -64,14 +66,14 @@ bool And::Equals(const Expression& expr) const { } // Or implementation -Or::Or(std::shared_ptr left, std::shared_ptr right) +Or::Or(std::shared_ptr left, std::shared_ptr right) : left_(std::move(left)), right_(std::move(right)) {} std::string Or::ToString() const { return std::format("({} or {})", left_->ToString(), right_->ToString()); } -std::shared_ptr Or::Negate() const { +std::shared_ptr Or::Negate() const { // De Morgan's law: not(A or B) = (not A) and (not B) auto left_negated = left_->Negate(); auto right_negated = right_->Negate(); @@ -80,7 +82,7 @@ std::shared_ptr Or::Negate() const { bool Or::Equals(const Expression& expr) const { if (expr.op() == Operation::kOr) { - const auto& other = static_cast(expr); + const auto& other = iceberg::internal::checked_cast(expr); return (left_->Equals(*other.left()) && right_->Equals(*other.right())) || (left_->Equals(*other.right()) && right_->Equals(*other.left())); } diff --git a/src/iceberg/expression/expression.h b/src/iceberg/expression/expression.h index 9ceae1c69..c6e51c745 100644 --- a/src/iceberg/expression/expression.h +++ b/src/iceberg/expression/expression.h @@ -30,46 +30,73 @@ namespace iceberg { +/// Operation types for expressions +enum class Operation { + kTrue, + kFalse, + kIsNull, + kNotNull, + kIsNan, + kNotNan, + kLt, + kLtEq, + kGt, + kGtEq, + kEq, + kNotEq, + kIn, + kNotIn, + kNot, + kAnd, + kOr, + kStartsWith, + kNotStartsWith, + kCount, + kCountStar, + kMax, + kMin +}; + +/// \brief Returns whether the operation is a predicate operation. +constexpr bool IsPredicate(Operation op) { + switch (op) { + case Operation::kTrue: + case Operation::kFalse: + case Operation::kIsNull: + case Operation::kNotNull: + case Operation::kIsNan: + case Operation::kNotNan: + case Operation::kLt: + case Operation::kLtEq: + case Operation::kGt: + case Operation::kGtEq: + case Operation::kEq: + case Operation::kNotEq: + case Operation::kIn: + case Operation::kNotIn: + case Operation::kNot: + case Operation::kAnd: + case Operation::kOr: + case Operation::kStartsWith: + case Operation::kNotStartsWith: + return true; + case Operation::kCount: + case Operation::kCountStar: + case Operation::kMax: + case Operation::kMin: + return false; + } + return false; +} + /// \brief Represents a boolean expression tree. class ICEBERG_EXPORT Expression { public: - /// Operation types for expressions - enum class Operation { - kTrue, - kFalse, - kIsNull, - kNotNull, - kIsNan, - kNotNan, - kLt, - kLtEq, - kGt, - kGtEq, - kEq, - kNotEq, - kIn, - kNotIn, - kNot, - kAnd, - kOr, - kStartsWith, - kNotStartsWith, - kCount, - kCountStar, - kMax, - kMin - }; - virtual ~Expression() = default; /// \brief Returns the operation for an expression node. virtual Operation op() const = 0; - /// \brief Returns the negation of this expression, equivalent to not(this). - virtual std::shared_ptr Negate() const { - throw IcebergError("Expression cannot be negated"); - } - /// \brief Returns whether this expression will accept the same values as another. /// \param other another expression /// \return true if the expressions are equivalent @@ -78,13 +105,19 @@ class ICEBERG_EXPORT Expression { return false; } - virtual std::string ToString() const { return "Expression"; } + virtual std::string ToString() const = 0; +}; + +class ICEBERG_EXPORT Predicate : public Expression { + public: + /// \brief Returns a negated version of this predicate. + virtual std::shared_ptr Negate() const = 0; }; /// \brief An Expression that is always true. /// /// Represents a boolean predicate that always evaluates to true. -class ICEBERG_EXPORT True : public Expression { +class ICEBERG_EXPORT True : public Predicate { public: /// \brief Returns the singleton instance static const std::shared_ptr& Instance(); @@ -93,7 +126,7 @@ class ICEBERG_EXPORT True : public Expression { std::string ToString() const override { return "true"; } - std::shared_ptr Negate() const override; + std::shared_ptr Negate() const override; bool Equals(const Expression& other) const override { return other.op() == Operation::kTrue; @@ -104,7 +137,7 @@ class ICEBERG_EXPORT True : public Expression { }; /// \brief An expression that is always false. -class ICEBERG_EXPORT False : public Expression { +class ICEBERG_EXPORT False : public Predicate { public: /// \brief Returns the singleton instance static const std::shared_ptr& Instance(); @@ -113,7 +146,7 @@ class ICEBERG_EXPORT False : public Expression { std::string ToString() const override { return "false"; } - std::shared_ptr Negate() const override; + std::shared_ptr Negate() const override; bool Equals(const Expression& other) const override { return other.op() == Operation::kFalse; @@ -127,70 +160,70 @@ class ICEBERG_EXPORT False : public Expression { /// /// This expression evaluates to true if and only if both of its child expressions /// evaluate to true. -class ICEBERG_EXPORT And : public Expression { +class ICEBERG_EXPORT And : public Predicate { public: /// \brief Constructs an And expression from two sub-expressions. /// /// \param left The left operand of the AND expression /// \param right The right operand of the AND expression - And(std::shared_ptr left, std::shared_ptr right); + And(std::shared_ptr left, std::shared_ptr right); /// \brief Returns the left operand of the AND expression. /// /// \return The left operand of the AND expression - const std::shared_ptr& left() const { return left_; } + const std::shared_ptr& left() const { return left_; } /// \brief Returns the right operand of the AND expression. /// /// \return The right operand of the AND expression - const std::shared_ptr& right() const { return right_; } + const std::shared_ptr& right() const { return right_; } Operation op() const override { return Operation::kAnd; } std::string ToString() const override; - std::shared_ptr Negate() const override; + std::shared_ptr Negate() const override; bool Equals(const Expression& other) const override; private: - std::shared_ptr left_; - std::shared_ptr right_; + std::shared_ptr left_; + std::shared_ptr right_; }; /// \brief An Expression that represents a logical OR operation between two expressions. /// /// This expression evaluates to true if at least one of its child expressions /// evaluates to true. -class ICEBERG_EXPORT Or : public Expression { +class ICEBERG_EXPORT Or : public Predicate { public: /// \brief Constructs an Or expression from two sub-expressions. /// /// \param left The left operand of the OR expression /// \param right The right operand of the OR expression - Or(std::shared_ptr left, std::shared_ptr right); + Or(std::shared_ptr left, std::shared_ptr right); /// \brief Returns the left operand of the OR expression. /// /// \return The left operand of the OR expression - const std::shared_ptr& left() const { return left_; } + const std::shared_ptr& left() const { return left_; } /// \brief Returns the right operand of the OR expression. /// /// \return The right operand of the OR expression - const std::shared_ptr& right() const { return right_; } + const std::shared_ptr& right() const { return right_; } Operation op() const override { return Operation::kOr; } std::string ToString() const override; - std::shared_ptr Negate() const override; + std::shared_ptr Negate() const override; bool Equals(const Expression& other) const override; private: - std::shared_ptr left_; - std::shared_ptr right_; + std::shared_ptr left_; + std::shared_ptr right_; }; } // namespace iceberg diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h index 17752c488..3d4e8c81b 100644 --- a/src/iceberg/expression/literal.h +++ b/src/iceberg/expression/literal.h @@ -19,6 +19,9 @@ #pragma once +/// \file iceberg/expression/literal.h +/// Literal class for Iceberg table operations. + #include #include #include diff --git a/src/iceberg/expression/term.cc b/src/iceberg/expression/term.cc new file mode 100644 index 000000000..0553dd451 --- /dev/null +++ b/src/iceberg/expression/term.cc @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "term.h" diff --git a/src/iceberg/expression/term.h b/src/iceberg/expression/term.h new file mode 100644 index 000000000..4164546ad --- /dev/null +++ b/src/iceberg/expression/term.h @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/expression/term.h + +#include + +#include "iceberg/result.h" +#include "iceberg/schema.h" + +namespace iceberg { + +struct BoundedReference {}; + +/// Unbounded reference type for expressions. +struct Reference { + using BoundedType = BoundedReference; + + std::string name; + + std::string ToString() const { return "Reference(name: " + name + ")"; } + + Result Bind(const Schema& schema, bool case_sensitive) const; +}; + +using Term = Reference; + +} // namespace iceberg \ No newline at end of file diff --git a/test/expression_test.cc b/test/expression_test.cc index c14c7d9a3..8925a4159 100644 --- a/test/expression_test.cc +++ b/test/expression_test.cc @@ -31,7 +31,7 @@ TEST(TrueFalseTest, Basic) { auto negated = false_instance->Negate(); // Check that negated expression is True - EXPECT_EQ(negated->op(), Expression::Operation::kTrue); + EXPECT_EQ(negated->op(), Operation::kTrue); EXPECT_EQ(negated->ToString(), "true"); // Test negation of True returns false @@ -39,7 +39,7 @@ TEST(TrueFalseTest, Basic) { negated = true_instance->Negate(); // Check that negated expression is False - EXPECT_EQ(negated->op(), Expression::Operation::kFalse); + EXPECT_EQ(negated->op(), Operation::kFalse); EXPECT_EQ(negated->ToString(), "false"); } @@ -51,10 +51,10 @@ TEST(ANDTest, Basic) { // Create an AND expression auto and_expr = std::make_shared(true_expr1, true_expr2); - EXPECT_EQ(and_expr->op(), Expression::Operation::kAnd); - EXPECT_EQ(and_expr->ToString(), "(true and true)"); - EXPECT_EQ(and_expr->left()->op(), Expression::Operation::kTrue); - EXPECT_EQ(and_expr->right()->op(), Expression::Operation::kTrue); + // EXPECT_EQ(and_expr->op(), Operation::kAnd); + // EXPECT_EQ(and_expr->ToString(), "(true and true)"); + // EXPECT_EQ(and_expr->left()->op(), Operation::kTrue); + // EXPECT_EQ(and_expr->right()->op(), Operation::kTrue); } TEST(ORTest, Basic) { @@ -65,10 +65,10 @@ TEST(ORTest, Basic) { // Create an OR expression auto or_expr = std::make_shared(true_expr, false_expr); - EXPECT_EQ(or_expr->op(), Expression::Operation::kOr); + EXPECT_EQ(or_expr->op(), Operation::kOr); EXPECT_EQ(or_expr->ToString(), "(true or false)"); - EXPECT_EQ(or_expr->left()->op(), Expression::Operation::kTrue); - EXPECT_EQ(or_expr->right()->op(), Expression::Operation::kFalse); + EXPECT_EQ(or_expr->left()->op(), Operation::kTrue); + EXPECT_EQ(or_expr->right()->op(), Operation::kFalse); } TEST(ORTest, Negation) { @@ -80,7 +80,7 @@ TEST(ORTest, Negation) { auto negated_or = or_expr->Negate(); // Should become AND expression - EXPECT_EQ(negated_or->op(), Expression::Operation::kAnd); + EXPECT_EQ(negated_or->op(), Operation::kAnd); EXPECT_EQ(negated_or->ToString(), "(false and true)"); } @@ -115,7 +115,7 @@ TEST(ANDTest, Negation) { auto negated_and = and_expr->Negate(); // Should become OR expression - EXPECT_EQ(negated_and->op(), Expression::Operation::kOr); + EXPECT_EQ(negated_and->op(), Operation::kOr); EXPECT_EQ(negated_and->ToString(), "(false or true)"); } @@ -140,18 +140,4 @@ TEST(ANDTest, Equals) { auto or_expr = std::make_shared(true_expr, false_expr); EXPECT_FALSE(and_expr1->Equals(*or_expr)); } - -TEST(ExpressionTest, BaseClassNegateThrowsException) { - // Create a mock expression that doesn't override Negate() - class MockExpression : public Expression { - public: - Operation op() const override { return Operation::kTrue; } - // Deliberately not overriding Negate() to test base class behavior - }; - - auto mock_expr = std::make_shared(); - - // Should throw IcebergError when calling Negate() on base class - EXPECT_THROW(mock_expr->Negate(), IcebergError); -} } // namespace iceberg From 530a75afbaa7028865a1d548f0413a7509891736 Mon Sep 17 00:00:00 2001 From: mwish Date: Sat, 5 Jul 2025 01:29:49 +0800 Subject: [PATCH 2/6] bound type declare --- src/iceberg/expression/expression.h | 15 +++++++++++++++ src/iceberg/expression/term.h | 6 +++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/iceberg/expression/expression.h b/src/iceberg/expression/expression.h index c6e51c745..d47064d02 100644 --- a/src/iceberg/expression/expression.h +++ b/src/iceberg/expression/expression.h @@ -25,8 +25,11 @@ #include #include +#include "expression.h" #include "iceberg/exception.h" #include "iceberg/iceberg_export.h" +#include "iceberg/result.h" +#include "iceberg/schema.h" namespace iceberg { @@ -89,6 +92,9 @@ constexpr bool IsPredicate(Operation op) { return false; } + +class BoundExpression; + /// \brief Represents a boolean expression tree. class ICEBERG_EXPORT Expression { public: @@ -106,6 +112,11 @@ class ICEBERG_EXPORT Expression { } virtual std::string ToString() const = 0; + + virtual Result> Bind( + const Schema& schema, bool case_sensitive) const { + return NotImplemented("Binding of Expression is not implemented"); + } }; class ICEBERG_EXPORT Predicate : public Expression { @@ -226,4 +237,8 @@ class ICEBERG_EXPORT Or : public Predicate { std::shared_ptr right_; }; +class ICEBERG_EXPORT BoundExpression {}; + +class ICEBERG_EXPORT BoundPredicate : public BoundExpression {}; + } // namespace iceberg diff --git a/src/iceberg/expression/term.h b/src/iceberg/expression/term.h index 4164546ad..12b57d239 100644 --- a/src/iceberg/expression/term.h +++ b/src/iceberg/expression/term.h @@ -28,17 +28,17 @@ namespace iceberg { -struct BoundedReference {}; +struct BoundReference {}; /// Unbounded reference type for expressions. struct Reference { - using BoundedType = BoundedReference; + using BoundType = BoundReference; std::string name; std::string ToString() const { return "Reference(name: " + name + ")"; } - Result Bind(const Schema& schema, bool case_sensitive) const; + Result Bind(const Schema& schema, bool case_sensitive) const; }; using Term = Reference; From 667bd9f155dcffc6ad9983830ecd263ef2831da6 Mon Sep 17 00:00:00 2001 From: mwish Date: Sat, 5 Jul 2025 13:12:56 +0800 Subject: [PATCH 3/6] Refactor expr --- src/iceberg/expression/common.h | 13 +- src/iceberg/expression/expression.cc | 184 ++++++++++++++++++++++++--- src/iceberg/expression/expression.h | 145 ++++++--------------- test/expression_test.cc | 93 +++++++++----- 4 files changed, 270 insertions(+), 165 deletions(-) diff --git a/src/iceberg/expression/common.h b/src/iceberg/expression/common.h index 0e4d63ef1..361e326d1 100644 --- a/src/iceberg/expression/common.h +++ b/src/iceberg/expression/common.h @@ -19,11 +19,12 @@ #pragma once -namespace iceberg { +#include +#include + +#include "iceberg/result.h" -template -concept Bindable = requires(T expr) { - { expr.Bind() }; -}; +namespace iceberg { +class Schema; -} // namespace iceberg \ No newline at end of file +} // namespace iceberg diff --git a/src/iceberg/expression/expression.cc b/src/iceberg/expression/expression.cc index 05860edaa..a239dfee4 100644 --- a/src/iceberg/expression/expression.cc +++ b/src/iceberg/expression/expression.cc @@ -25,7 +25,144 @@ namespace iceberg { -// True implementation +template +concept Bindable = requires(const T& expr, const Schema& schema, bool case_sensitive) { + // Must have a BoundType alias that defines what type it binds to + typename T::BoundType; + + // Must have a Bind method with the correct signature + { expr.Bind(schema, case_sensitive) } -> std::same_as>; +}; + +/// \brief Concept for types that behave like predicates (bound or unbound) +template +concept PredicateLike = requires(const T& pred) { + // Must have an operation type + { pred.op() } -> std::same_as; + + // Must be convertible to string + { pred.ToString() } -> std::same_as; + + // Must have a Negate method that returns a shared_ptr to the same concept + { pred.Negate() } -> std::convertible_to>; + + // Must support equality comparison + { pred.Equals(pred) } -> std::same_as; +}; + +/// \brief Concept specifically for unbound predicates that can be bound +template +concept UnboundPredicate = PredicateLike && requires(const T& pred) { + // Must have a BoundType alias + typename T::BoundType; + + // Must be bindable to a schema + requires Bindable; +}; + +/// \brief Concept specifically for bound predicates +template +concept BoundPredicateLike = PredicateLike && requires(const T& pred) { + // Must have type information + { pred.type() } -> std::convertible_to>; + + // Must report that it's bound + { pred.IsBound() } -> std::convertible_to; +}; + +// Internal implementation classes + +/// \brief An Expression that is always true. +class True final : public Predicate { + public: + /// \brief Returns the singleton instance + static const std::shared_ptr& Instance(); + + Operation op() const override { return Operation::kTrue; } + + std::string ToString() const override { return "true"; } + + std::shared_ptr Negate() const override; + + bool Equals(const Expression& other) const override { + return other.op() == Operation::kTrue; + } + + private: + constexpr True() = default; +}; + +/// \brief An expression that is always false. +class False final : public Predicate { + public: + /// \brief Returns the singleton instance + static const std::shared_ptr& Instance(); + + Operation op() const override { return Operation::kFalse; } + + std::string ToString() const override { return "false"; } + + std::shared_ptr Negate() const override; + + bool Equals(const Expression& other) const override { + return other.op() == Operation::kFalse; + } + + private: + constexpr False() = default; +}; + +/// \brief An Expression that represents a logical AND operation between two expressions. +class AndImpl final : public Predicate { + public: + /// \brief Constructs an And expression from two sub-expressions. + AndImpl(std::shared_ptr left, std::shared_ptr right); + + /// \brief Returns the left operand of the AND expression. + const std::shared_ptr& left() const { return left_; } + + /// \brief Returns the right operand of the AND expression. + const std::shared_ptr& right() const { return right_; } + + Operation op() const override { return Operation::kAnd; } + + std::string ToString() const override; + + std::shared_ptr Negate() const override; + + bool Equals(const Expression& other) const override; + + private: + std::shared_ptr left_; + std::shared_ptr right_; +}; + +/// \brief An Expression that represents a logical OR operation between two expressions. +class OrImpl final : public Predicate { + public: + /// \brief Constructs an Or expression from two sub-expressions. + OrImpl(std::shared_ptr left, std::shared_ptr right); + + /// \brief Returns the left operand of the OR expression. + const std::shared_ptr& left() const { return left_; } + + /// \brief Returns the right operand of the OR expression. + const std::shared_ptr& right() const { return right_; } + + Operation op() const override { return Operation::kOr; } + + std::string ToString() const override; + + std::shared_ptr Negate() const override; + + bool Equals(const Expression& other) const override; + + private: + std::shared_ptr left_; + std::shared_ptr right_; +}; + +// Implementation of True const std::shared_ptr& True::Instance() { static const std::shared_ptr instance{new True()}; return instance; @@ -33,7 +170,7 @@ const std::shared_ptr& True::Instance() { std::shared_ptr True::Negate() const { return False::Instance(); } -// False implementation +// Implementation of False const std::shared_ptr& False::Instance() { static const std::shared_ptr instance = std::shared_ptr(new False()); return instance; @@ -41,52 +178,67 @@ const std::shared_ptr& False::Instance() { std::shared_ptr False::Negate() const { return True::Instance(); } -// And implementation -And::And(std::shared_ptr left, std::shared_ptr right) +// Implementation of AndImpl +AndImpl::AndImpl(std::shared_ptr left, std::shared_ptr right) : left_(std::move(left)), right_(std::move(right)) {} -std::string And::ToString() const { +std::string AndImpl::ToString() const { return std::format("({} and {})", left_->ToString(), right_->ToString()); } -std::shared_ptr And::Negate() const { +std::shared_ptr AndImpl::Negate() const { // De Morgan's law: not(A and B) = (not A) or (not B) auto left_negated = left_->Negate(); auto right_negated = right_->Negate(); - return std::make_shared(left_negated, right_negated); + return std::make_shared(left_negated, right_negated); } -bool And::Equals(const Expression& expr) const { +bool AndImpl::Equals(const Expression& expr) const { if (expr.op() == Operation::kAnd) { - const auto& other = iceberg::internal::checked_cast(expr); + const auto& other = iceberg::internal::checked_cast(expr); return (left_->Equals(*other.left()) && right_->Equals(*other.right())) || (left_->Equals(*other.right()) && right_->Equals(*other.left())); } return false; } -// Or implementation -Or::Or(std::shared_ptr left, std::shared_ptr right) +// Implementation of OrImpl +OrImpl::OrImpl(std::shared_ptr left, std::shared_ptr right) : left_(std::move(left)), right_(std::move(right)) {} -std::string Or::ToString() const { +std::string OrImpl::ToString() const { return std::format("({} or {})", left_->ToString(), right_->ToString()); } -std::shared_ptr Or::Negate() const { +std::shared_ptr OrImpl::Negate() const { // De Morgan's law: not(A or B) = (not A) and (not B) auto left_negated = left_->Negate(); auto right_negated = right_->Negate(); - return std::make_shared(left_negated, right_negated); + return std::make_shared(left_negated, right_negated); } -bool Or::Equals(const Expression& expr) const { +bool OrImpl::Equals(const Expression& expr) const { if (expr.op() == Operation::kOr) { - const auto& other = iceberg::internal::checked_cast(expr); + const auto& other = iceberg::internal::checked_cast(expr); return (left_->Equals(*other.left()) && right_->Equals(*other.right())) || (left_->Equals(*other.right()) && right_->Equals(*other.left())); } return false; } +// Implementation of Predicate static factory methods +std::shared_ptr Predicate::AlwaysTrue() { return True::Instance(); } + +std::shared_ptr Predicate::AlwaysFalse() { return False::Instance(); } + +std::shared_ptr Predicate::And(std::shared_ptr left, + std::shared_ptr right) { + return std::make_shared(std::move(left), std::move(right)); +} + +std::shared_ptr Predicate::Or(std::shared_ptr left, + std::shared_ptr right) { + return std::make_shared(std::move(left), std::move(right)); +} + } // namespace iceberg diff --git a/src/iceberg/expression/expression.h b/src/iceberg/expression/expression.h index d47064d02..869203e73 100644 --- a/src/iceberg/expression/expression.h +++ b/src/iceberg/expression/expression.h @@ -25,8 +25,6 @@ #include #include -#include "expression.h" -#include "iceberg/exception.h" #include "iceberg/iceberg_export.h" #include "iceberg/result.h" #include "iceberg/schema.h" @@ -92,12 +90,13 @@ constexpr bool IsPredicate(Operation op) { return false; } - class BoundExpression; /// \brief Represents a boolean expression tree. class ICEBERG_EXPORT Expression { public: + using BoundType = BoundExpression; + virtual ~Expression() = default; /// \brief Returns the operation for an expression node. @@ -123,122 +122,50 @@ class ICEBERG_EXPORT Predicate : public Expression { public: /// \brief Returns a negated version of this predicate. virtual std::shared_ptr Negate() const = 0; -}; - -/// \brief An Expression that is always true. -/// -/// Represents a boolean predicate that always evaluates to true. -class ICEBERG_EXPORT True : public Predicate { - public: - /// \brief Returns the singleton instance - static const std::shared_ptr& Instance(); - - Operation op() const override { return Operation::kTrue; } - - std::string ToString() const override { return "true"; } - - std::shared_ptr Negate() const override; - - bool Equals(const Expression& other) const override { - return other.op() == Operation::kTrue; - } - private: - constexpr True() = default; + // Factory functions for creating predicates + + /// \brief Creates a True predicate that always evaluates to true. + /// \return A shared pointer to a True predicate + static std::shared_ptr AlwaysTrue(); + + /// \brief Creates a False predicate that always evaluates to false. + /// \return A shared pointer to a False predicate + static std::shared_ptr AlwaysFalse(); + + /// \brief Creates an And predicate that represents logical AND of two predicates. + /// \param left The left operand of the AND predicate + /// \param right The right operand of the AND predicate + /// \return A shared pointer to an And predicate + static std::shared_ptr And(std::shared_ptr left, + std::shared_ptr right); + + /// \brief Creates an Or predicate that represents logical OR of two predicates. + /// \param left The left operand of the OR predicate + /// \param right The right operand of the OR predicate + /// \return A shared pointer to an Or predicate + static std::shared_ptr Or(std::shared_ptr left, + std::shared_ptr right); }; -/// \brief An expression that is always false. -class ICEBERG_EXPORT False : public Predicate { +class ICEBERG_EXPORT BoundExpression { public: - /// \brief Returns the singleton instance - static const std::shared_ptr& Instance(); - - Operation op() const override { return Operation::kFalse; } - - std::string ToString() const override { return "false"; } + virtual ~BoundExpression() = default; - std::shared_ptr Negate() const override; - - bool Equals(const Expression& other) const override { - return other.op() == Operation::kFalse; - } - - private: - constexpr False() = default; -}; - -/// \brief An Expression that represents a logical AND operation between two expressions. -/// -/// This expression evaluates to true if and only if both of its child expressions -/// evaluate to true. -class ICEBERG_EXPORT And : public Predicate { - public: - /// \brief Constructs an And expression from two sub-expressions. - /// - /// \param left The left operand of the AND expression - /// \param right The right operand of the AND expression - And(std::shared_ptr left, std::shared_ptr right); - - /// \brief Returns the left operand of the AND expression. - /// - /// \return The left operand of the AND expression - const std::shared_ptr& left() const { return left_; } - - /// \brief Returns the right operand of the AND expression. - /// - /// \return The right operand of the AND expression - const std::shared_ptr& right() const { return right_; } - - Operation op() const override { return Operation::kAnd; } - - std::string ToString() const override; - - std::shared_ptr Negate() const override; + /// \brief Returns the operation for a bound expression node. + virtual Operation op() const = 0; - bool Equals(const Expression& other) const override; + /// \brief Returns whether this expression will accept the same values as another. + virtual bool Equals(const BoundExpression& other) const = 0; - private: - std::shared_ptr left_; - std::shared_ptr right_; + /// \brief Returns a string representation of this bound expression. + virtual std::string ToString() const = 0; }; -/// \brief An Expression that represents a logical OR operation between two expressions. -/// -/// This expression evaluates to true if at least one of its child expressions -/// evaluates to true. -class ICEBERG_EXPORT Or : public Predicate { +class ICEBERG_EXPORT BoundPredicate : public BoundExpression { public: - /// \brief Constructs an Or expression from two sub-expressions. - /// - /// \param left The left operand of the OR expression - /// \param right The right operand of the OR expression - Or(std::shared_ptr left, std::shared_ptr right); - - /// \brief Returns the left operand of the OR expression. - /// - /// \return The left operand of the OR expression - const std::shared_ptr& left() const { return left_; } - - /// \brief Returns the right operand of the OR expression. - /// - /// \return The right operand of the OR expression - const std::shared_ptr& right() const { return right_; } - - Operation op() const override { return Operation::kOr; } - - std::string ToString() const override; - - std::shared_ptr Negate() const override; - - bool Equals(const Expression& other) const override; - - private: - std::shared_ptr left_; - std::shared_ptr right_; + /// \brief Returns a negated version of this bound predicate. + virtual std::shared_ptr Negate() const = 0; }; -class ICEBERG_EXPORT BoundExpression {}; - -class ICEBERG_EXPORT BoundPredicate : public BoundExpression {}; - } // namespace iceberg diff --git a/test/expression_test.cc b/test/expression_test.cc index 8925a4159..8dcd01818 100644 --- a/test/expression_test.cc +++ b/test/expression_test.cc @@ -27,7 +27,7 @@ namespace iceberg { TEST(TrueFalseTest, Basic) { // Test negation of False returns True - auto false_instance = False::Instance(); + auto false_instance = Predicate::AlwaysFalse(); auto negated = false_instance->Negate(); // Check that negated expression is True @@ -35,7 +35,7 @@ TEST(TrueFalseTest, Basic) { EXPECT_EQ(negated->ToString(), "true"); // Test negation of True returns false - auto true_instance = True::Instance(); + auto true_instance = Predicate::AlwaysTrue(); negated = true_instance->Negate(); // Check that negated expression is False @@ -45,38 +45,34 @@ TEST(TrueFalseTest, Basic) { TEST(ANDTest, Basic) { // Create two True expressions - auto true_expr1 = True::Instance(); - auto true_expr2 = True::Instance(); + auto true_expr1 = Predicate::AlwaysTrue(); + auto true_expr2 = Predicate::AlwaysTrue(); // Create an AND expression - auto and_expr = std::make_shared(true_expr1, true_expr2); + auto and_expr = Predicate::And(true_expr1, true_expr2); - // EXPECT_EQ(and_expr->op(), Operation::kAnd); - // EXPECT_EQ(and_expr->ToString(), "(true and true)"); - // EXPECT_EQ(and_expr->left()->op(), Operation::kTrue); - // EXPECT_EQ(and_expr->right()->op(), Operation::kTrue); + EXPECT_EQ(and_expr->op(), Operation::kAnd); + EXPECT_EQ(and_expr->ToString(), "(true and true)"); } TEST(ORTest, Basic) { // Create True and False expressions - auto true_expr = True::Instance(); - auto false_expr = False::Instance(); + auto true_expr = Predicate::AlwaysTrue(); + auto false_expr = Predicate::AlwaysFalse(); // Create an OR expression - auto or_expr = std::make_shared(true_expr, false_expr); + auto or_expr = Predicate::Or(true_expr, false_expr); EXPECT_EQ(or_expr->op(), Operation::kOr); EXPECT_EQ(or_expr->ToString(), "(true or false)"); - EXPECT_EQ(or_expr->left()->op(), Operation::kTrue); - EXPECT_EQ(or_expr->right()->op(), Operation::kFalse); } TEST(ORTest, Negation) { // Test De Morgan's law: not(A or B) = (not A) and (not B) - auto true_expr = True::Instance(); - auto false_expr = False::Instance(); + auto true_expr = Predicate::AlwaysTrue(); + auto false_expr = Predicate::AlwaysFalse(); - auto or_expr = std::make_shared(true_expr, false_expr); + auto or_expr = Predicate::Or(true_expr, false_expr); auto negated_or = or_expr->Negate(); // Should become AND expression @@ -85,33 +81,33 @@ TEST(ORTest, Negation) { } TEST(ORTest, Equals) { - auto true_expr = True::Instance(); - auto false_expr = False::Instance(); + auto true_expr = Predicate::AlwaysTrue(); + auto false_expr = Predicate::AlwaysFalse(); // Test basic equality - auto or_expr1 = std::make_shared(true_expr, false_expr); - auto or_expr2 = std::make_shared(true_expr, false_expr); + auto or_expr1 = Predicate::Or(true_expr, false_expr); + auto or_expr2 = Predicate::Or(true_expr, false_expr); EXPECT_TRUE(or_expr1->Equals(*or_expr2)); // Test commutativity: (A or B) equals (B or A) - auto or_expr3 = std::make_shared(false_expr, true_expr); + auto or_expr3 = Predicate::Or(false_expr, true_expr); EXPECT_TRUE(or_expr1->Equals(*or_expr3)); // Test inequality with different expressions - auto or_expr4 = std::make_shared(true_expr, true_expr); + auto or_expr4 = Predicate::Or(true_expr, true_expr); EXPECT_FALSE(or_expr1->Equals(*or_expr4)); // Test inequality with different operation types - auto and_expr = std::make_shared(true_expr, false_expr); + auto and_expr = Predicate::And(true_expr, false_expr); EXPECT_FALSE(or_expr1->Equals(*and_expr)); } TEST(ANDTest, Negation) { // Test De Morgan's law: not(A and B) = (not A) or (not B) - auto true_expr = True::Instance(); - auto false_expr = False::Instance(); + auto true_expr = Predicate::AlwaysTrue(); + auto false_expr = Predicate::AlwaysFalse(); - auto and_expr = std::make_shared(true_expr, false_expr); + auto and_expr = Predicate::And(true_expr, false_expr); auto negated_and = and_expr->Negate(); // Should become OR expression @@ -120,24 +116,53 @@ TEST(ANDTest, Negation) { } TEST(ANDTest, Equals) { - auto true_expr = True::Instance(); - auto false_expr = False::Instance(); + auto true_expr = Predicate::AlwaysTrue(); + auto false_expr = Predicate::AlwaysFalse(); // Test basic equality - auto and_expr1 = std::make_shared(true_expr, false_expr); - auto and_expr2 = std::make_shared(true_expr, false_expr); + auto and_expr1 = Predicate::And(true_expr, false_expr); + auto and_expr2 = Predicate::And(true_expr, false_expr); EXPECT_TRUE(and_expr1->Equals(*and_expr2)); // Test commutativity: (A and B) equals (B and A) - auto and_expr3 = std::make_shared(false_expr, true_expr); + auto and_expr3 = Predicate::And(false_expr, true_expr); EXPECT_TRUE(and_expr1->Equals(*and_expr3)); // Test inequality with different expressions - auto and_expr4 = std::make_shared(true_expr, true_expr); + auto and_expr4 = Predicate::And(true_expr, true_expr); EXPECT_FALSE(and_expr1->Equals(*and_expr4)); // Test inequality with different operation types - auto or_expr = std::make_shared(true_expr, false_expr); + auto or_expr = Predicate::Or(true_expr, false_expr); EXPECT_FALSE(and_expr1->Equals(*or_expr)); } + +TEST(PredicateFactoryTest, FactoryMethods) { + // Test that factory methods work correctly + auto true_pred = Predicate::AlwaysTrue(); + auto false_pred = Predicate::AlwaysFalse(); + + EXPECT_EQ(true_pred->op(), Operation::kTrue); + EXPECT_EQ(false_pred->op(), Operation::kFalse); + + // Test that multiple calls return equivalent instances + auto true_pred2 = Predicate::AlwaysTrue(); + auto false_pred2 = Predicate::AlwaysFalse(); + + EXPECT_TRUE(true_pred->Equals(*true_pred2)); + EXPECT_TRUE(false_pred->Equals(*false_pred2)); + + // Test compound expressions + auto and_pred = Predicate::And(true_pred, false_pred); + auto or_pred = Predicate::Or(true_pred, false_pred); + + EXPECT_EQ(and_pred->op(), Operation::kAnd); + EXPECT_EQ(or_pred->op(), Operation::kOr); + + // Test nested expressions + auto nested_and = Predicate::And(and_pred, or_pred); + EXPECT_EQ(nested_and->op(), Operation::kAnd); + EXPECT_EQ(nested_and->ToString(), "((true and false) and (true or false))"); +} + } // namespace iceberg From 934e16b993b552c4d25fc793dfa8be5401ed73bb Mon Sep 17 00:00:00 2001 From: mwish Date: Sat, 5 Jul 2025 13:29:37 +0800 Subject: [PATCH 4/6] Update: minor enhancement --- src/iceberg/expression/expression.cc | 30 ++++++++++++++++++++++++++-- src/iceberg/expression/expression.h | 10 +++------- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/iceberg/expression/expression.cc b/src/iceberg/expression/expression.cc index a239dfee4..f74d36820 100644 --- a/src/iceberg/expression/expression.cc +++ b/src/iceberg/expression/expression.cc @@ -227,17 +227,43 @@ bool OrImpl::Equals(const Expression& expr) const { } // Implementation of Predicate static factory methods -std::shared_ptr Predicate::AlwaysTrue() { return True::Instance(); } +const std::shared_ptr& Predicate::AlwaysTrue() { + static const std::shared_ptr instance = True::Instance(); + return instance; +} -std::shared_ptr Predicate::AlwaysFalse() { return False::Instance(); } +const std::shared_ptr& Predicate::AlwaysFalse() { + static const std::shared_ptr instance = False::Instance(); + return instance; +} std::shared_ptr Predicate::And(std::shared_ptr left, std::shared_ptr right) { + /* + auto left_op = left->op(); + auto right_op = right->op(); + if (left_op == Operation::kFalse || right_op == Operation::kFalse) { + return False::Instance(); + } + if (left_op == Operation::kTrue && right_op == Operation::kTrue) { + return left; + } + */ return std::make_shared(std::move(left), std::move(right)); } std::shared_ptr Predicate::Or(std::shared_ptr left, std::shared_ptr right) { + /* + auto left_op = left->op(); + auto right_op = right->op(); + if (left_op == Operation::kTrue || right_op == Operation::kTrue) { + return False::Instance(); + } + if (left_op == Operation::kFalse && right_op == Operation::kFalse) { + return left; + } + */ return std::make_shared(std::move(left), std::move(right)); } diff --git a/src/iceberg/expression/expression.h b/src/iceberg/expression/expression.h index 869203e73..ed6fed8ea 100644 --- a/src/iceberg/expression/expression.h +++ b/src/iceberg/expression/expression.h @@ -127,11 +127,11 @@ class ICEBERG_EXPORT Predicate : public Expression { /// \brief Creates a True predicate that always evaluates to true. /// \return A shared pointer to a True predicate - static std::shared_ptr AlwaysTrue(); + static const std::shared_ptr& AlwaysTrue(); /// \brief Creates a False predicate that always evaluates to false. /// \return A shared pointer to a False predicate - static std::shared_ptr AlwaysFalse(); + static const std::shared_ptr& AlwaysFalse(); /// \brief Creates an And predicate that represents logical AND of two predicates. /// \param left The left operand of the AND predicate @@ -162,10 +162,6 @@ class ICEBERG_EXPORT BoundExpression { virtual std::string ToString() const = 0; }; -class ICEBERG_EXPORT BoundPredicate : public BoundExpression { - public: - /// \brief Returns a negated version of this bound predicate. - virtual std::shared_ptr Negate() const = 0; -}; +class ICEBERG_EXPORT BoundPredicate : public BoundExpression {}; } // namespace iceberg From d4d8b6bb69d6664496bd963ef81f1db01f66cc30 Mon Sep 17 00:00:00 2001 From: mwish Date: Sat, 5 Jul 2025 14:11:36 +0800 Subject: [PATCH 5/6] Some code skeleton for Unary/Binary Ref --- src/iceberg/expression/common.h | 93 ++++++++- src/iceberg/expression/expression.cc | 292 +++++++++++++++++++++++++++ src/iceberg/expression/expression.h | 99 ++++----- src/iceberg/expression/literal.h | 2 + src/iceberg/expression/term.cc | 11 + src/iceberg/expression/term.h | 12 +- 6 files changed, 445 insertions(+), 64 deletions(-) diff --git a/src/iceberg/expression/common.h b/src/iceberg/expression/common.h index 361e326d1..6a7784b77 100644 --- a/src/iceberg/expression/common.h +++ b/src/iceberg/expression/common.h @@ -22,9 +22,96 @@ #include #include -#include "iceberg/result.h" - namespace iceberg { -class Schema; + +/// Operation types for expressions +enum class Operation { + kTrue, + kFalse, + kIsNull, + kNotNull, + kIsNan, + kNotNan, + kLt, + kLtEq, + kGt, + kGtEq, + kEq, + kNotEq, + kIn, + kNotIn, + kNot, + kAnd, + kOr, + kStartsWith, + kNotStartsWith, + kCount, + kCountStar, + kMax, + kMin +}; + +/// \brief Returns whether the operation is a predicate operation. +constexpr bool IsPredicate(Operation op) { + switch (op) { + case Operation::kTrue: + case Operation::kFalse: + case Operation::kIsNull: + case Operation::kNotNull: + case Operation::kIsNan: + case Operation::kNotNan: + case Operation::kLt: + case Operation::kLtEq: + case Operation::kGt: + case Operation::kGtEq: + case Operation::kEq: + case Operation::kNotEq: + case Operation::kIn: + case Operation::kNotIn: + case Operation::kNot: + case Operation::kAnd: + case Operation::kOr: + case Operation::kStartsWith: + case Operation::kNotStartsWith: + return true; + case Operation::kCount: + case Operation::kCountStar: + case Operation::kMax: + case Operation::kMin: + return false; + } + return false; +} + +constexpr bool IsUnaryPredicate(Operation op) { + switch (op) { + case Operation::kIsNull: + case Operation::kNotNull: + case Operation::kIsNan: + case Operation::kNotNan: + return true; + default: + return false; + } +} +constexpr bool IsBinaryPredicate(Operation op) { + switch (op) { + case Operation::kLt: + case Operation::kLtEq: + case Operation::kGt: + case Operation::kGtEq: + case Operation::kEq: + case Operation::kNotEq: + case Operation::kIn: + case Operation::kNotIn: + case Operation::kAnd: + case Operation::kOr: + case Operation::kStartsWith: + case Operation::kNotStartsWith: + return true; + default: + return false; + } +} } // namespace iceberg diff --git a/src/iceberg/expression/expression.cc b/src/iceberg/expression/expression.cc index f74d36820..1d4b46f05 100644 --- a/src/iceberg/expression/expression.cc +++ b/src/iceberg/expression/expression.cc @@ -21,7 +21,11 @@ #include +#include "iceberg/exception.h" +#include "iceberg/expression/term.h" #include "iceberg/util/checked_cast.h" +#include "iceberg/util/macros.h" +#include "literal.h" namespace iceberg { @@ -267,4 +271,292 @@ std::shared_ptr Predicate::Or(std::shared_ptr left, return std::make_shared(std::move(left), std::move(right)); } +/// Unary predicate, for example, `a IS NULL`, which `a` is a Term. +/// +/// Note that this would not include UnaryPredicates like +/// `COALESCE(a, b) is not null`. +template +struct UnaryPredicateBase { + UnaryPredicateBase(Operation in_op, ReferenceType in_reference) + : unary_op(in_op), reference(std::move(in_reference)) { + if (!IsUnaryPredicate(unary_op)) { + throw IcebergError( + std::format("UnaryPredicateBase: operation {} is not a unary predicate", + static_cast(unary_op))); + } + } + + Operation unary_op; + ReferenceType reference; +}; + +class BoundUnaryPredicate; + +class UnaryPredicate final : public UnaryPredicateBase, public Predicate { + public: + using BoundType = BoundUnaryPredicate; + + UnaryPredicate(Operation op, Reference reference) + : UnaryPredicateBase(op, std::move(reference)) {} + + std::string ToString() const override { + switch (this->unary_op) { + case Operation::kIsNull: + return std::format("{} IS NULL", reference.ToString()); + case Operation::kNotNull: + return std::format("{} IS NOT NULL", reference.ToString()); + case Operation::kIsNan: + return std::format("{} IS NAN", reference.ToString()); + case Operation::kNotNan: + return std::format("{} IS NOT NAN", reference.ToString()); + default: + return std::format("UnaryPredicate({})", static_cast(unary_op)); + } + } + + std::shared_ptr Negate() const override { + Operation negated_op; + switch (op()) { + case Operation::kIsNull: + negated_op = Operation::kNotNull; + break; + case Operation::kNotNull: + negated_op = Operation::kIsNull; + break; + case Operation::kIsNan: + negated_op = Operation::kNotNan; + break; + case Operation::kNotNan: + negated_op = Operation::kIsNan; + break; + default: + throw IcebergError(std::format("Cannot negate unary predicate with operation {}", + static_cast(op()))); + } + return std::make_shared(negated_op, reference); + } + + bool Equals(const Expression& other) const override { + if (other.op() != op()) { + return false; + } + const auto& other_unary = + iceberg::internal::checked_cast(other); + return reference.Equals(other_unary.reference); + } + + Result> Bind(const Schema& schema, + bool case_sensitive) const override; + Operation op() const override { return unary_op; } +}; + +class BoundUnaryPredicate final : public UnaryPredicateBase, + public BoundPredicate { + public: + BoundUnaryPredicate(Operation op, BoundReference reference) + : UnaryPredicateBase(op, std::move(reference)) {} + + std::string ToString() const override { + switch (op()) { + case Operation::kIsNull: + return std::format("{} IS NULL", reference.ToString()); + case Operation::kNotNull: + return std::format("{} IS NOT NULL", reference.ToString()); + case Operation::kIsNan: + return std::format("{} IS NAN", reference.ToString()); + case Operation::kNotNan: + return std::format("{} IS NOT NAN", reference.ToString()); + default: + return std::format("BoundUnaryPredicate({})", static_cast(op())); + } + } + + bool Equals(const BoundExpression& other) const override { + if (other.op() != op()) { + return false; + } + const auto& other_unary = + iceberg::internal::checked_cast(other); + return reference.Equals(other_unary.reference); + } + + Operation op() const override { return unary_op; } +}; + +Result> UnaryPredicate::Bind(const Schema& schema, + bool case_sensitive) const { + return nullptr; +} + +/// Binary predicate, for example, `a = 10`, `b > 5`, etc. +/// +/// Represents comparisons between a term (Reference) and a literal value. +template +struct BinaryPredicateBase { + BinaryPredicateBase(Operation in_op, ReferenceType in_reference, Literal in_literal) + : binary_op(in_op), + reference(std::move(in_reference)), + literal(std::move(in_literal)) { + if (!IsBinaryPredicate(binary_op)) { + throw IcebergError( + std::format("BinaryPredicateBase: operation {} is not a binary predicate", + static_cast(binary_op))); + } + } + + Operation binary_op; + ReferenceType reference; + Literal literal; +}; + +class BoundBinaryPredicate; + +class BinaryPredicate final : public BinaryPredicateBase, public Predicate { + public: + using BoundType = BoundBinaryPredicate; + + BinaryPredicate(Operation op, Reference reference, Literal literal) + : BinaryPredicateBase(op, std::move(reference), std::move(literal)) {} + + std::string ToString() const override { + std::string op_str; + switch (binary_op) { + case Operation::kEq: + op_str = " = "; + break; + case Operation::kNotEq: + op_str = " != "; + break; + case Operation::kLt: + op_str = " < "; + break; + case Operation::kLtEq: + op_str = " <= "; + break; + case Operation::kGt: + op_str = " > "; + break; + case Operation::kGtEq: + op_str = " >= "; + break; + case Operation::kStartsWith: + return std::format("{} STARTS WITH {}", reference.ToString(), literal.ToString()); + case Operation::kNotStartsWith: + return std::format("{} NOT STARTS WITH {}", reference.ToString(), + literal.ToString()); + default: + return std::format("BinaryPredicate({}, {}, {})", static_cast(binary_op), + reference.ToString(), literal.ToString()); + } + return std::format("{}{}{}", reference.ToString(), op_str, literal.ToString()); + } + + std::shared_ptr Negate() const override { + Operation negated_op; + switch (binary_op) { + case Operation::kEq: + negated_op = Operation::kNotEq; + break; + case Operation::kNotEq: + negated_op = Operation::kEq; + break; + case Operation::kLt: + negated_op = Operation::kGtEq; + break; + case Operation::kLtEq: + negated_op = Operation::kGt; + break; + case Operation::kGt: + negated_op = Operation::kLtEq; + break; + case Operation::kGtEq: + negated_op = Operation::kLt; + break; + case Operation::kStartsWith: + negated_op = Operation::kNotStartsWith; + break; + case Operation::kNotStartsWith: + negated_op = Operation::kStartsWith; + break; + default: + throw IcebergError(std::format("Cannot negate binary predicate with operation {}", + static_cast(binary_op))); + } + return std::make_shared(negated_op, reference, literal); + } + + bool Equals(const Expression& other) const override { + if (other.op() != binary_op) { + return false; + } + const auto& other_binary = + iceberg::internal::checked_cast(other); + return reference.Equals(other_binary.reference) && literal == other_binary.literal; + } + + Result> Bind(const Schema& schema, + bool case_sensitive) const override; + + Operation op() const override { return binary_op; } +}; + +class BoundBinaryPredicate final : public BinaryPredicateBase, + public BoundPredicate { + public: + BoundBinaryPredicate(Operation op, BoundReference reference, Literal literal) + : BinaryPredicateBase(op, std::move(reference), + std::move(literal)) {} + + std::string ToString() const override { + std::string op_str; + switch (binary_op) { + case Operation::kEq: + op_str = " = "; + break; + case Operation::kNotEq: + op_str = " != "; + break; + case Operation::kLt: + op_str = " < "; + break; + case Operation::kLtEq: + op_str = " <= "; + break; + case Operation::kGt: + op_str = " > "; + break; + case Operation::kGtEq: + op_str = " >= "; + break; + case Operation::kStartsWith: + return std::format("{} STARTS WITH {}", reference.ToString(), literal.ToString()); + case Operation::kNotStartsWith: + return std::format("{} NOT STARTS WITH {}", reference.ToString(), + literal.ToString()); + default: + return std::format("BoundBinaryPredicate({}, {}, {})", + static_cast(binary_op), reference.ToString(), + literal.ToString()); + } + return std::format("{}{}{}", reference.ToString(), op_str, literal.ToString()); + } + + bool Equals(const BoundExpression& other) const override { + if (other.op() != binary_op) { + return false; + } + const auto& other_binary = + iceberg::internal::checked_cast(other); + return reference.Equals(other_binary.reference) && literal == other_binary.literal; + } + + Operation op() const override { return binary_op; } +}; + +// Implementation of BinaryPredicate::Bind +Result> BinaryPredicate::Bind( + const Schema& schema, bool case_sensitive) const { + return nullptr; +} + } // namespace iceberg diff --git a/src/iceberg/expression/expression.h b/src/iceberg/expression/expression.h index ed6fed8ea..b7620139a 100644 --- a/src/iceberg/expression/expression.h +++ b/src/iceberg/expression/expression.h @@ -25,71 +25,15 @@ #include #include +#include "iceberg/expression/common.h" +#include "iceberg/expression/literal.h" +#include "iceberg/expression/term.h" #include "iceberg/iceberg_export.h" #include "iceberg/result.h" #include "iceberg/schema.h" namespace iceberg { -/// Operation types for expressions -enum class Operation { - kTrue, - kFalse, - kIsNull, - kNotNull, - kIsNan, - kNotNan, - kLt, - kLtEq, - kGt, - kGtEq, - kEq, - kNotEq, - kIn, - kNotIn, - kNot, - kAnd, - kOr, - kStartsWith, - kNotStartsWith, - kCount, - kCountStar, - kMax, - kMin -}; - -/// \brief Returns whether the operation is a predicate operation. -constexpr bool IsPredicate(Operation op) { - switch (op) { - case Operation::kTrue: - case Operation::kFalse: - case Operation::kIsNull: - case Operation::kNotNull: - case Operation::kIsNan: - case Operation::kNotNan: - case Operation::kLt: - case Operation::kLtEq: - case Operation::kGt: - case Operation::kGtEq: - case Operation::kEq: - case Operation::kNotEq: - case Operation::kIn: - case Operation::kNotIn: - case Operation::kNot: - case Operation::kAnd: - case Operation::kOr: - case Operation::kStartsWith: - case Operation::kNotStartsWith: - return true; - case Operation::kCount: - case Operation::kCountStar: - case Operation::kMax: - case Operation::kMin: - return false; - } - return false; -} - class BoundExpression; /// \brief Represents a boolean expression tree. @@ -146,6 +90,43 @@ class ICEBERG_EXPORT Predicate : public Expression { /// \return A shared pointer to an Or predicate static std::shared_ptr Or(std::shared_ptr left, std::shared_ptr right); + + /// \brief Creates an IsNull predicate + static std::shared_ptr IsNull(Reference reference); + + /// \brief Creates an IsNotNull predicate + static std::shared_ptr IsNotNull(Reference reference); + + /// \brief Creates an IsNan predicate + static std::shared_ptr IsNan(Reference reference); + + /// \brief Creates an IsNotNan predicate + static std::shared_ptr IsNotNan(Reference reference); + + /// \brief Creates an equal-to predicate: reference = literal + static std::shared_ptr Equal(Reference reference, Literal literal); + + /// \brief Creates a not-equal-to predicate: reference != literal + static std::shared_ptr NotEqual(Reference reference, Literal literal); + + /// \brief Creates a less-than predicate: reference < literal + static std::shared_ptr LessThan(Reference reference, Literal literal); + + /// \brief Creates a less-than-or-equal predicate: reference <= literal + static std::shared_ptr LessThanOrEqual(Reference reference, Literal literal); + + /// \brief Creates a greater-than predicate: reference > literal + static std::shared_ptr GreaterThan(Reference reference, Literal literal); + + /// \brief Creates a greater-than-or-equal predicate: reference >= literal + static std::shared_ptr GreaterThanOrEqual(Reference reference, + Literal literal); + + /// \brief Creates a starts-with predicate: reference STARTS WITH literal + static std::shared_ptr StartsWith(Reference reference, Literal literal); + + /// \brief Creates a not-starts-with predicate: reference NOT STARTS WITH literal + static std::shared_ptr NotStartsWith(Reference reference, Literal literal); }; class ICEBERG_EXPORT BoundExpression { diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h index 3d4e8c81b..3e7240c4f 100644 --- a/src/iceberg/expression/literal.h +++ b/src/iceberg/expression/literal.h @@ -112,6 +112,8 @@ class ICEBERG_EXPORT Literal { /// and should not be AboveMax or BelowMin. std::partial_ordering operator<=>(const Literal& other) const; + bool operator==(const Literal& other) const = default; + /// Check if this literal represents a value above the maximum allowed value /// for its type. This occurs when casting from a wider type to a narrower type /// and the value exceeds the target type's maximum. diff --git a/src/iceberg/expression/term.cc b/src/iceberg/expression/term.cc index 0553dd451..02ea333d7 100644 --- a/src/iceberg/expression/term.cc +++ b/src/iceberg/expression/term.cc @@ -18,3 +18,14 @@ */ #include "term.h" + +#include "iceberg/exception.h" + +namespace iceberg { +bool Reference::Equals(const Reference& other) const { return name == other.name; } + +bool BoundReference::Equals(const BoundReference& other) const { + throw IcebergError("BoundReference::Equals not implemented"); +} + +} // namespace iceberg \ No newline at end of file diff --git a/src/iceberg/expression/term.h b/src/iceberg/expression/term.h index 12b57d239..be25b2465 100644 --- a/src/iceberg/expression/term.h +++ b/src/iceberg/expression/term.h @@ -28,7 +28,7 @@ namespace iceberg { -struct BoundReference {}; +struct BoundReference; /// Unbounded reference type for expressions. struct Reference { @@ -39,8 +39,16 @@ struct Reference { std::string ToString() const { return "Reference(name: " + name + ")"; } Result Bind(const Schema& schema, bool case_sensitive) const; + + bool Equals(const Reference& other) const; }; -using Term = Reference; +struct BoundReference { + std::string column_name; + + std::string ToString() const { return "BoundReference(name: " + column_name + ")"; } + + bool Equals(const BoundReference& other) const; +}; } // namespace iceberg \ No newline at end of file From 02808863804867549a6b73157599c86dbd0f35b9 Mon Sep 17 00:00:00 2001 From: mwish Date: Sat, 5 Jul 2025 15:04:31 +0800 Subject: [PATCH 6/6] bound/unbound and/or/true/false --- src/iceberg/expression/common.h | 24 ++++ src/iceberg/expression/expression.cc | 202 +++++++++++---------------- 2 files changed, 104 insertions(+), 122 deletions(-) diff --git a/src/iceberg/expression/common.h b/src/iceberg/expression/common.h index 6a7784b77..8f3f95859 100644 --- a/src/iceberg/expression/common.h +++ b/src/iceberg/expression/common.h @@ -22,6 +22,9 @@ #include #include +#include "iceberg/result.h" +#include "iceberg/schema.h" + namespace iceberg { /// Operation types for expressions @@ -114,4 +117,25 @@ constexpr bool IsBinaryPredicate(Operation op) { } } +template +concept Bindable = requires(const T& expr, const Schema& schema, bool case_sensitive) { + // Must have a BoundType alias that defines what type it binds to + typename T::BoundType; + // Must have a Bind method with the correct signature + { expr.Bind(schema, case_sensitive) } -> std::same_as>; +}; + +/// \brief Concept for types that behave like predicates (bound or unbound) +template +concept PredicateLike = requires(const T& pred) { + // Must have an operation type + { pred.op() } -> std::same_as; + // Must be convertible to string + { pred.ToString() } -> std::same_as; + // // Must have a Negate method that returns a shared_ptr to the same concept + // { pred.Negate() } -> std::convertible_to>; + // Must support equality comparison + { pred.Equals(pred) } -> std::same_as; +}; + } // namespace iceberg diff --git a/src/iceberg/expression/expression.cc b/src/iceberg/expression/expression.cc index 1d4b46f05..0f1de5a55 100644 --- a/src/iceberg/expression/expression.cc +++ b/src/iceberg/expression/expression.cc @@ -29,58 +29,18 @@ namespace iceberg { -template -concept Bindable = requires(const T& expr, const Schema& schema, bool case_sensitive) { - // Must have a BoundType alias that defines what type it binds to - typename T::BoundType; - - // Must have a Bind method with the correct signature - { expr.Bind(schema, case_sensitive) } -> std::same_as>; -}; - -/// \brief Concept for types that behave like predicates (bound or unbound) -template -concept PredicateLike = requires(const T& pred) { - // Must have an operation type - { pred.op() } -> std::same_as; - - // Must be convertible to string - { pred.ToString() } -> std::same_as; - - // Must have a Negate method that returns a shared_ptr to the same concept - { pred.Negate() } -> std::convertible_to>; - - // Must support equality comparison - { pred.Equals(pred) } -> std::same_as; -}; - -/// \brief Concept specifically for unbound predicates that can be bound -template -concept UnboundPredicate = PredicateLike && requires(const T& pred) { - // Must have a BoundType alias - typename T::BoundType; - - // Must be bindable to a schema - requires Bindable; -}; - -/// \brief Concept specifically for bound predicates -template -concept BoundPredicateLike = PredicateLike && requires(const T& pred) { - // Must have type information - { pred.type() } -> std::convertible_to>; - - // Must report that it's bound - { pred.IsBound() } -> std::convertible_to; -}; - // Internal implementation classes /// \brief An Expression that is always true. -class True final : public Predicate { +template +class TrueImpl final : public PredicateType { public: /// \brief Returns the singleton instance - static const std::shared_ptr& Instance(); + static const std::shared_ptr& Instance() { + static const std::shared_ptr instance = + std::shared_ptr(new TrueImpl()); + return instance; + } Operation op() const override { return Operation::kTrue; } @@ -92,35 +52,49 @@ class True final : public Predicate { return other.op() == Operation::kTrue; } - private: - constexpr True() = default; + protected: + constexpr TrueImpl() = default; }; /// \brief An expression that is always false. -class False final : public Predicate { +template +class FalseImpl final : public PredicateType { public: /// \brief Returns the singleton instance - static const std::shared_ptr& Instance(); + static const std::shared_ptr& Instance() { + static const std::shared_ptr instance = + std::shared_ptr(new FalseImpl()); + return instance; + } Operation op() const override { return Operation::kFalse; } std::string ToString() const override { return "false"; } - std::shared_ptr Negate() const override; + std::shared_ptr Negate() const override { + return TrueImpl::Instance(); + } bool Equals(const Expression& other) const override { return other.op() == Operation::kFalse; } private: - constexpr False() = default; + constexpr FalseImpl() = default; }; +template +std::shared_ptr TrueImpl::Negate() const { + return FalseImpl::Instance(); +} + /// \brief An Expression that represents a logical AND operation between two expressions. -class AndImpl final : public Predicate { +template +class AndImpl final : public PredicateType { public: /// \brief Constructs an And expression from two sub-expressions. - AndImpl(std::shared_ptr left, std::shared_ptr right); + AndImpl(std::shared_ptr left, std::shared_ptr right) + : left_(std::move(left)), right_(std::move(right)) {} /// \brief Returns the left operand of the AND expression. const std::shared_ptr& left() const { return left_; } @@ -130,11 +104,21 @@ class AndImpl final : public Predicate { Operation op() const override { return Operation::kAnd; } - std::string ToString() const override; + std::string ToString() const override { + return std::format("({} and {})", left_->ToString(), right_->ToString()); + } - std::shared_ptr Negate() const override; + std::shared_ptr Negate() const override; - bool Equals(const Expression& other) const override; + bool Equals(const Expression& expr) const override { + if (expr.op() == Operation::kAnd) { + const auto& other = + iceberg::internal::checked_cast&>(expr); + return (left_->Equals(*other.left()) && right_->Equals(*other.right())) || + (left_->Equals(*other.right()) && right_->Equals(*other.left())); + } + return false; + } private: std::shared_ptr left_; @@ -142,93 +126,67 @@ class AndImpl final : public Predicate { }; /// \brief An Expression that represents a logical OR operation between two expressions. -class OrImpl final : public Predicate { +template +class OrImpl final : public PredicateType { public: /// \brief Constructs an Or expression from two sub-expressions. - OrImpl(std::shared_ptr left, std::shared_ptr right); + OrImpl(std::shared_ptr left, std::shared_ptr right) + : left_(std::move(left)), right_(std::move(right)) {} /// \brief Returns the left operand of the OR expression. - const std::shared_ptr& left() const { return left_; } + const std::shared_ptr& left() const { return left_; } /// \brief Returns the right operand of the OR expression. - const std::shared_ptr& right() const { return right_; } + const std::shared_ptr& right() const { return right_; } Operation op() const override { return Operation::kOr; } - std::string ToString() const override; + std::string ToString() const override { + return std::format("({} or {})", left_->ToString(), right_->ToString()); + } - std::shared_ptr Negate() const override; + std::shared_ptr Negate() const override; - bool Equals(const Expression& other) const override; + bool Equals(const Expression& expr) const override { + if (expr.op() == Operation::kOr) { + const auto& other = + iceberg::internal::checked_cast&>(expr); + return (left_->Equals(*other.left()) && right_->Equals(*other.right())) || + (left_->Equals(*other.right()) && right_->Equals(*other.left())); + } + return false; + } private: - std::shared_ptr left_; - std::shared_ptr right_; + std::shared_ptr left_; + std::shared_ptr right_; }; -// Implementation of True -const std::shared_ptr& True::Instance() { - static const std::shared_ptr instance{new True()}; - return instance; -} - -std::shared_ptr True::Negate() const { return False::Instance(); } - -// Implementation of False -const std::shared_ptr& False::Instance() { - static const std::shared_ptr instance = std::shared_ptr(new False()); - return instance; -} - -std::shared_ptr False::Negate() const { return True::Instance(); } - -// Implementation of AndImpl -AndImpl::AndImpl(std::shared_ptr left, std::shared_ptr right) - : left_(std::move(left)), right_(std::move(right)) {} - -std::string AndImpl::ToString() const { - return std::format("({} and {})", left_->ToString(), right_->ToString()); -} - -std::shared_ptr AndImpl::Negate() const { +template +std::shared_ptr AndImpl::Negate() const { // De Morgan's law: not(A and B) = (not A) or (not B) auto left_negated = left_->Negate(); auto right_negated = right_->Negate(); - return std::make_shared(left_negated, right_negated); + return std::make_shared>(std::move(left_negated), + std::move(right_negated)); } -bool AndImpl::Equals(const Expression& expr) const { - if (expr.op() == Operation::kAnd) { - const auto& other = iceberg::internal::checked_cast(expr); - return (left_->Equals(*other.left()) && right_->Equals(*other.right())) || - (left_->Equals(*other.right()) && right_->Equals(*other.left())); - } - return false; -} - -// Implementation of OrImpl -OrImpl::OrImpl(std::shared_ptr left, std::shared_ptr right) - : left_(std::move(left)), right_(std::move(right)) {} - -std::string OrImpl::ToString() const { - return std::format("({} or {})", left_->ToString(), right_->ToString()); -} - -std::shared_ptr OrImpl::Negate() const { +template +std::shared_ptr OrImpl::Negate() const { // De Morgan's law: not(A or B) = (not A) and (not B) auto left_negated = left_->Negate(); auto right_negated = right_->Negate(); - return std::make_shared(left_negated, right_negated); + return std::make_shared>(left_negated, right_negated); } -bool OrImpl::Equals(const Expression& expr) const { - if (expr.op() == Operation::kOr) { - const auto& other = iceberg::internal::checked_cast(expr); - return (left_->Equals(*other.left()) && right_->Equals(*other.right())) || - (left_->Equals(*other.right()) && right_->Equals(*other.left())); - } - return false; -} +using True = TrueImpl; +using BoundTrue = TrueImpl; +using False = FalseImpl; +using BoundFalse = FalseImpl; +using AndPredicate = AndImpl; +using BoundAndPredicate = AndImpl; +using OrPredicate = OrImpl; +using BoundOrPredicate = OrImpl; // Implementation of Predicate static factory methods const std::shared_ptr& Predicate::AlwaysTrue() { @@ -253,7 +211,7 @@ std::shared_ptr Predicate::And(std::shared_ptr left, return left; } */ - return std::make_shared(std::move(left), std::move(right)); + return std::make_shared(std::move(left), std::move(right)); } std::shared_ptr Predicate::Or(std::shared_ptr left, @@ -268,7 +226,7 @@ std::shared_ptr Predicate::Or(std::shared_ptr left, return left; } */ - return std::make_shared(std::move(left), std::move(right)); + return std::make_shared(std::move(left), std::move(right)); } /// Unary predicate, for example, `a IS NULL`, which `a` is a Term.