-
Notifications
You must be signed in to change notification settings - Fork 67
feat: implement expression evaluator #327
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,163 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| #include "iceberg/expression/evaluator.h" | ||
|
|
||
| #include "iceberg/expression/binder.h" | ||
| #include "iceberg/expression/expression_visitor.h" | ||
| #include "iceberg/schema.h" | ||
| #include "iceberg/util/macros.h" | ||
|
|
||
| namespace iceberg { | ||
|
|
||
| class Evaluator::EvalVisitor : public BoundVisitor<bool> { | ||
| public: | ||
| void UpdateRow(const StructLike* row) { row_ = row; } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: SetRow? since this method doesn't update anything. |
||
|
|
||
| Result<bool> AlwaysTrue() override { return true; } | ||
|
|
||
| Result<bool> AlwaysFalse() override { return false; } | ||
|
|
||
| Result<bool> Not(bool child_result) override { return !child_result; } | ||
|
|
||
| Result<bool> And(bool left_result, bool right_result) override { | ||
| return left_result && right_result; | ||
| } | ||
|
|
||
| Result<bool> Or(bool left_result, bool right_result) override { | ||
| return left_result || right_result; | ||
| } | ||
|
|
||
| Result<bool> IsNull(const std::shared_ptr<BoundTerm>& term) override { | ||
| ICEBERG_DCHECK(row_, "Row is not set"); | ||
| ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_)); | ||
| return value.IsNull(); | ||
| } | ||
|
|
||
| Result<bool> NotNull(const std::shared_ptr<BoundTerm>& term) override { | ||
| ICEBERG_DCHECK(row_, "Row is not set"); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not a strong opinion, we could adopt the same pattern used for Eq/NotEq, where NotXXX calls XXX and returns !result. Ditto for Lt/GtEq and Gt/LtEq. |
||
| ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_)); | ||
| return !value.IsNull(); | ||
| } | ||
|
|
||
| Result<bool> IsNaN(const std::shared_ptr<BoundTerm>& term) override { | ||
| ICEBERG_DCHECK(row_, "Row is not set"); | ||
| ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_)); | ||
| return value.IsNaN(); | ||
| } | ||
|
|
||
| Result<bool> NotNaN(const std::shared_ptr<BoundTerm>& term) override { | ||
| ICEBERG_DCHECK(row_, "Row is not set"); | ||
| ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_)); | ||
| return !value.IsNaN(); | ||
| } | ||
|
|
||
| Result<bool> Lt(const std::shared_ptr<BoundTerm>& term, const Literal& lit) override { | ||
| ICEBERG_DCHECK(row_, "Row is not set"); | ||
| ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_)); | ||
| return value < lit; | ||
| } | ||
|
|
||
| Result<bool> LtEq(const std::shared_ptr<BoundTerm>& term, const Literal& lit) override { | ||
| ICEBERG_DCHECK(row_, "Row is not set"); | ||
| ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_)); | ||
| return value <= lit; | ||
| } | ||
|
|
||
| Result<bool> Gt(const std::shared_ptr<BoundTerm>& term, const Literal& lit) override { | ||
| ICEBERG_DCHECK(row_, "Row is not set"); | ||
| ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_)); | ||
| return value > lit; | ||
| } | ||
|
|
||
| Result<bool> GtEq(const std::shared_ptr<BoundTerm>& term, const Literal& lit) override { | ||
| ICEBERG_DCHECK(row_, "Row is not set"); | ||
| ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_)); | ||
| return value >= lit; | ||
| } | ||
|
|
||
| Result<bool> Eq(const std::shared_ptr<BoundTerm>& term, const Literal& lit) override { | ||
| ICEBERG_DCHECK(row_, "Row is not set"); | ||
| ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_)); | ||
| return value == lit; | ||
| } | ||
|
|
||
| Result<bool> NotEq(const std::shared_ptr<BoundTerm>& term, | ||
| const Literal& lit) override { | ||
| ICEBERG_ASSIGN_OR_RAISE(auto eq_result, Eq(term, lit)); | ||
| return !eq_result; | ||
| } | ||
|
|
||
| Result<bool> In(const std::shared_ptr<BoundTerm>& term, | ||
| const BoundSetPredicate::LiteralSet& literal_set) override { | ||
| ICEBERG_DCHECK(row_, "Row is not set"); | ||
| ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_)); | ||
| return literal_set.contains(value); | ||
| } | ||
|
|
||
| Result<bool> NotIn(const std::shared_ptr<BoundTerm>& term, | ||
| const BoundSetPredicate::LiteralSet& literal_set) override { | ||
| ICEBERG_ASSIGN_OR_RAISE(auto in_result, In(term, literal_set)); | ||
| return !in_result; | ||
| } | ||
|
|
||
| Result<bool> StartsWith(const std::shared_ptr<BoundTerm>& term, | ||
| const Literal& lit) override { | ||
| ICEBERG_DCHECK(row_, "Row is not set"); | ||
| ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_)); | ||
|
|
||
| // Both value and literal should be strings | ||
| if (!std::holds_alternative<std::string>(value.value()) || | ||
| !std::holds_alternative<std::string>(lit.value())) { | ||
| return false; | ||
| } | ||
|
|
||
| const auto& str_value = std::get<std::string>(value.value()); | ||
| const auto& str_prefix = std::get<std::string>(lit.value()); | ||
| return str_value.starts_with(str_prefix); | ||
| } | ||
|
|
||
| Result<bool> NotStartsWith(const std::shared_ptr<BoundTerm>& term, | ||
| const Literal& lit) override { | ||
| ICEBERG_ASSIGN_OR_RAISE(auto starts_result, StartsWith(term, lit)); | ||
| return !starts_result; | ||
| } | ||
|
|
||
| private: | ||
| const StructLike* row_{nullptr}; | ||
| }; | ||
|
|
||
| Evaluator::Evaluator(std::shared_ptr<Expression> bound_expr) | ||
| : bound_expr_(std::move(bound_expr)), visitor_(std::make_unique<EvalVisitor>()) {} | ||
|
|
||
| Evaluator::~Evaluator() = default; | ||
|
|
||
| Result<std::unique_ptr<Evaluator>> Evaluator::Make(const Schema& schema, | ||
| std::shared_ptr<Expression> unbound, | ||
| bool case_sensitive) { | ||
| ICEBERG_ASSIGN_OR_RAISE(auto bound_expr, Binder::Bind(schema, unbound, case_sensitive)); | ||
| return std::unique_ptr<Evaluator>(new Evaluator(std::move(bound_expr))); | ||
| } | ||
|
|
||
| Result<bool> Evaluator::Eval(const StructLike& row) const { | ||
| visitor_->UpdateRow(&row); | ||
| return Visit<bool, EvalVisitor>(bound_expr_, *visitor_); | ||
| } | ||
|
|
||
| } // namespace iceberg | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,67 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| #pragma once | ||
|
|
||
| /// \file iceberg/expression/evaluator.h | ||
| /// Evaluator for checking if a data row matches a bound expression. | ||
|
|
||
| #include <memory> | ||
|
|
||
| #include "iceberg/iceberg_export.h" | ||
| #include "iceberg/result.h" | ||
| #include "iceberg/type_fwd.h" | ||
|
|
||
| namespace iceberg { | ||
|
|
||
| /// \brief Evaluates an Expression against data rows. | ||
| /// | ||
| /// This class evaluates bound expressions against StructLike data rows to determine | ||
| /// if the row matches the expression criteria. The evaluator binds unbound expressions | ||
| /// to a schema on construction and then can be used to evaluate multiple data rows. | ||
| /// | ||
| /// \note: The evaluator is not thread-safe. | ||
| class ICEBERG_EXPORT Evaluator { | ||
| public: | ||
| /// \brief Make an evaluator for an unbound expression. | ||
| /// | ||
| /// \param schema The schema to bind against | ||
| /// \param unbound The unbound expression to evaluate | ||
| /// \param case_sensitive Whether field name matching is case-sensitive | ||
| static Result<std::unique_ptr<Evaluator>> Make(const Schema& schema, | ||
| std::shared_ptr<Expression> unbound, | ||
| bool case_sensitive = true); | ||
|
|
||
| ~Evaluator(); | ||
|
|
||
| /// \brief Evaluate the expression against a data row. | ||
| /// | ||
| /// \param row The data row to evaluate | ||
| /// \return true if the row matches the expression, false otherwise, or error | ||
| Result<bool> Eval(const StructLike& row) const; | ||
|
|
||
| private: | ||
| explicit Evaluator(std::shared_ptr<Expression> bound_expr); | ||
|
|
||
| class EvalVisitor; | ||
| std::shared_ptr<Expression> bound_expr_; | ||
| std::unique_ptr<EvalVisitor> visitor_; | ||
| }; | ||
|
|
||
| } // namespace iceberg |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can create a new object in
Evaluator::Evallike java's implementation and changeUpdateRowto constructor. So we don't needICEBERG_DCHECK(row_, "Row is not set")in every methods.