Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ set(ICEBERG_SOURCES
arrow_c_data_guard_internal.cc
catalog/memory/in_memory_catalog.cc
expression/binder.cc
expression/evaluator.cc
expression/expression.cc
expression/expressions.cc
expression/literal.cc
Expand Down
163 changes: 163 additions & 0 deletions src/iceberg/expression/evaluator.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/expression/evaluator.h"

#include "iceberg/expression/binder.h"
#include "iceberg/expression/expression_visitor.h"
#include "iceberg/schema.h"
#include "iceberg/util/macros.h"

namespace iceberg {

class Evaluator::EvalVisitor : public BoundVisitor<bool> {
public:
void UpdateRow(const StructLike* row) { row_ = row; }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can create a new object in Evaluator::Eval like java's implementation and change UpdateRow to constructor. So we don't need ICEBERG_DCHECK(row_, "Row is not set") in every methods.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: SetRow? since this method doesn't update anything.


Result<bool> AlwaysTrue() override { return true; }

Result<bool> AlwaysFalse() override { return false; }

Result<bool> Not(bool child_result) override { return !child_result; }

Result<bool> And(bool left_result, bool right_result) override {
return left_result && right_result;
}

Result<bool> Or(bool left_result, bool right_result) override {
return left_result || right_result;
}

Result<bool> IsNull(const std::shared_ptr<BoundTerm>& term) override {
ICEBERG_DCHECK(row_, "Row is not set");
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
return value.IsNull();
}

Result<bool> NotNull(const std::shared_ptr<BoundTerm>& term) override {
ICEBERG_DCHECK(row_, "Row is not set");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a strong opinion, we could adopt the same pattern used for Eq/NotEq, where NotXXX calls XXX and returns !result. Ditto for Lt/GtEq and Gt/LtEq.

ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
return !value.IsNull();
}

Result<bool> IsNaN(const std::shared_ptr<BoundTerm>& term) override {
ICEBERG_DCHECK(row_, "Row is not set");
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
return value.IsNaN();
}

Result<bool> NotNaN(const std::shared_ptr<BoundTerm>& term) override {
ICEBERG_DCHECK(row_, "Row is not set");
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
return !value.IsNaN();
}

Result<bool> Lt(const std::shared_ptr<BoundTerm>& term, const Literal& lit) override {
ICEBERG_DCHECK(row_, "Row is not set");
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
return value < lit;
}

Result<bool> LtEq(const std::shared_ptr<BoundTerm>& term, const Literal& lit) override {
ICEBERG_DCHECK(row_, "Row is not set");
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
return value <= lit;
}

Result<bool> Gt(const std::shared_ptr<BoundTerm>& term, const Literal& lit) override {
ICEBERG_DCHECK(row_, "Row is not set");
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
return value > lit;
}

Result<bool> GtEq(const std::shared_ptr<BoundTerm>& term, const Literal& lit) override {
ICEBERG_DCHECK(row_, "Row is not set");
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
return value >= lit;
}

Result<bool> Eq(const std::shared_ptr<BoundTerm>& term, const Literal& lit) override {
ICEBERG_DCHECK(row_, "Row is not set");
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
return value == lit;
}

Result<bool> NotEq(const std::shared_ptr<BoundTerm>& term,
const Literal& lit) override {
ICEBERG_ASSIGN_OR_RAISE(auto eq_result, Eq(term, lit));
return !eq_result;
}

Result<bool> In(const std::shared_ptr<BoundTerm>& term,
const BoundSetPredicate::LiteralSet& literal_set) override {
ICEBERG_DCHECK(row_, "Row is not set");
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
return literal_set.contains(value);
}

Result<bool> NotIn(const std::shared_ptr<BoundTerm>& term,
const BoundSetPredicate::LiteralSet& literal_set) override {
ICEBERG_ASSIGN_OR_RAISE(auto in_result, In(term, literal_set));
return !in_result;
}

Result<bool> StartsWith(const std::shared_ptr<BoundTerm>& term,
const Literal& lit) override {
ICEBERG_DCHECK(row_, "Row is not set");
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));

// Both value and literal should be strings
if (!std::holds_alternative<std::string>(value.value()) ||
!std::holds_alternative<std::string>(lit.value())) {
return false;
}

const auto& str_value = std::get<std::string>(value.value());
const auto& str_prefix = std::get<std::string>(lit.value());
return str_value.starts_with(str_prefix);
}

Result<bool> NotStartsWith(const std::shared_ptr<BoundTerm>& term,
const Literal& lit) override {
ICEBERG_ASSIGN_OR_RAISE(auto starts_result, StartsWith(term, lit));
return !starts_result;
}

private:
const StructLike* row_{nullptr};
};

Evaluator::Evaluator(std::shared_ptr<Expression> bound_expr)
: bound_expr_(std::move(bound_expr)), visitor_(std::make_unique<EvalVisitor>()) {}

Evaluator::~Evaluator() = default;

Result<std::unique_ptr<Evaluator>> Evaluator::Make(const Schema& schema,
std::shared_ptr<Expression> unbound,
bool case_sensitive) {
ICEBERG_ASSIGN_OR_RAISE(auto bound_expr, Binder::Bind(schema, unbound, case_sensitive));
return std::unique_ptr<Evaluator>(new Evaluator(std::move(bound_expr)));
}

Result<bool> Evaluator::Eval(const StructLike& row) const {
visitor_->UpdateRow(&row);
return Visit<bool, EvalVisitor>(bound_expr_, *visitor_);
}

} // namespace iceberg
67 changes: 67 additions & 0 deletions src/iceberg/expression/evaluator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

/// \file iceberg/expression/evaluator.h
/// Evaluator for checking if a data row matches a bound expression.

#include <memory>

#include "iceberg/iceberg_export.h"
#include "iceberg/result.h"
#include "iceberg/type_fwd.h"

namespace iceberg {

/// \brief Evaluates an Expression against data rows.
///
/// This class evaluates bound expressions against StructLike data rows to determine
/// if the row matches the expression criteria. The evaluator binds unbound expressions
/// to a schema on construction and then can be used to evaluate multiple data rows.
///
/// \note: The evaluator is not thread-safe.
class ICEBERG_EXPORT Evaluator {
public:
/// \brief Make an evaluator for an unbound expression.
///
/// \param schema The schema to bind against
/// \param unbound The unbound expression to evaluate
/// \param case_sensitive Whether field name matching is case-sensitive
static Result<std::unique_ptr<Evaluator>> Make(const Schema& schema,
std::shared_ptr<Expression> unbound,
bool case_sensitive = true);

~Evaluator();

/// \brief Evaluate the expression against a data row.
///
/// \param row The data row to evaluate
/// \return true if the row matches the expression, false otherwise, or error
Result<bool> Eval(const StructLike& row) const;

private:
explicit Evaluator(std::shared_ptr<Expression> bound_expr);

class EvalVisitor;
std::shared_ptr<Expression> bound_expr_;
std::unique_ptr<EvalVisitor> visitor_;
};

} // namespace iceberg
5 changes: 5 additions & 0 deletions src/iceberg/expression/literal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,11 @@ bool Literal::IsAboveMax() const { return std::holds_alternative<AboveMax>(value

bool Literal::IsNull() const { return std::holds_alternative<std::monostate>(value_); }

bool Literal::IsNaN() const {
return std::holds_alternative<float>(value_) && std::isnan(std::get<float>(value_)) ||
std::holds_alternative<double>(value_) && std::isnan(std::get<double>(value_));
}

// LiteralCaster implementation

Result<Literal> LiteralCaster::CastTo(const Literal& literal,
Expand Down
4 changes: 4 additions & 0 deletions src/iceberg/expression/literal.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
/// \return true if this literal is null, false otherwise
bool IsNull() const;

/// Check if this literal is NaN.
/// \return true if this literal is NaN, false otherwise
bool IsNaN() const;

std::string ToString() const override;

private:
Expand Down
23 changes: 11 additions & 12 deletions src/iceberg/expression/predicate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ Result<std::unique_ptr<UnboundPredicateImpl<B>>> UnboundPredicateImpl<B>::Make(
if (!term) [[unlikely]] {
return InvalidExpression("UnboundPredicate cannot have null term");
}
if (op == Expression::Operation::kIn || op == Expression::Operation::kNotIn) {
return InvalidExpression("Cannot create {} predicate without a value",
::iceberg::ToString(op));
}
return std::unique_ptr<UnboundPredicateImpl<B>>(
new UnboundPredicateImpl<B>(op, std::move(term)));
}
Expand All @@ -71,6 +75,11 @@ Result<std::unique_ptr<UnboundPredicateImpl<B>>> UnboundPredicateImpl<B>::Make(
if (!term) [[unlikely]] {
return InvalidExpression("UnboundPredicate cannot have null term");
}
if (values.empty() &&
(op == Expression::Operation::kIn || op == Expression::Operation::kNotIn)) {
return InvalidExpression("Cannot create {} predicate without a value",
::iceberg::ToString(op));
}
return std::unique_ptr<UnboundPredicateImpl<B>>(
new UnboundPredicateImpl<B>(op, std::move(term), std::move(values)));
}
Expand Down Expand Up @@ -183,16 +192,6 @@ bool IsFloatingType(TypeId type) {
return type == TypeId::kFloat || type == TypeId::kDouble;
}

bool IsNan(const Literal& literal) {
const auto& value = literal.value();
if (std::holds_alternative<float>(value)) {
return std::isnan(std::get<float>(value));
} else if (std::holds_alternative<double>(value)) {
return std::isnan(std::get<double>(value));
}
return false;
}

bool StartsWith(const Literal& lhs, const Literal& rhs) {
const auto& lhs_value = lhs.value();
const auto& rhs_value = rhs.value();
Expand Down Expand Up @@ -383,9 +382,9 @@ Result<bool> BoundUnaryPredicate::Test(const Literal& literal) const {
case Expression::Operation::kNotNull:
return !literal.IsNull();
case Expression::Operation::kIsNan:
return IsNan(literal);
return literal.IsNaN();
case Expression::Operation::kNotNan:
return !IsNan(literal);
return !literal.IsNaN();
default:
return InvalidExpression("Invalid operation for BoundUnaryPredicate: {}", op());
}
Expand Down
2 changes: 2 additions & 0 deletions src/iceberg/expression/predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ class ICEBERG_EXPORT UnboundPredicateImpl : public UnboundPredicate,

Result<std::shared_ptr<Expression>> Negate() const override;

std::span<const Literal> literals() const { return values_; }

private:
UnboundPredicateImpl(Expression::Operation op, std::shared_ptr<UnboundTerm<B>> term);
UnboundPredicateImpl(Expression::Operation op, std::shared_ptr<UnboundTerm<B>> term,
Expand Down
2 changes: 1 addition & 1 deletion src/iceberg/expression/term.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ NamedReference::~NamedReference() = default;
Result<std::shared_ptr<BoundReference>> NamedReference::Bind(const Schema& schema,
bool case_sensitive) const {
ICEBERG_ASSIGN_OR_RAISE(auto field_opt,
schema.GetFieldByName(field_name_, case_sensitive));
schema.FindFieldByName(field_name_, case_sensitive));
if (!field_opt.has_value()) [[unlikely]] {
return InvalidExpression("Cannot find field '{}' in struct: {}", field_name_,
schema.ToString());
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ iceberg_sources = files(
'arrow_c_data_guard_internal.cc',
'catalog/memory/in_memory_catalog.cc',
'expression/binder.cc',
'expression/evaluator.cc',
'expression/expression.cc',
'expression/expressions.cc',
'expression/literal.cc',
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ if(ICEBERG_BUILD_BUNDLE)
USE_BUNDLE
SOURCES
eval_expr_test.cc
evaluator_test.cc
test_common.cc)

add_iceberg_test(parquet_test
Expand Down
Loading
Loading