Skip to content

Commit b54478a

Browse files
committed
feat: implement expression evaluator
1 parent 320a985 commit b54478a

File tree

11 files changed

+1174
-13
lines changed

11 files changed

+1174
-13
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ set(ICEBERG_SOURCES
2121
arrow_c_data_guard_internal.cc
2222
catalog/memory/in_memory_catalog.cc
2323
expression/binder.cc
24+
expression/evaluator.cc
2425
expression/expression.cc
2526
expression/expressions.cc
2627
expression/literal.cc
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/expression/evaluator.h"
21+
22+
#include "iceberg/expression/binder.h"
23+
#include "iceberg/expression/expression_visitor.h"
24+
#include "iceberg/schema.h"
25+
#include "iceberg/util/macros.h"
26+
27+
namespace iceberg {
28+
29+
class Evaluator::EvalVisitor : public BoundVisitor<bool> {
30+
public:
31+
void UpdateRow(const StructLike* row) { row_ = row; }
32+
33+
Result<bool> AlwaysTrue() override { return true; }
34+
35+
Result<bool> AlwaysFalse() override { return false; }
36+
37+
Result<bool> Not(bool child_result) override { return !child_result; }
38+
39+
Result<bool> And(bool left_result, bool right_result) override {
40+
return left_result && right_result;
41+
}
42+
43+
Result<bool> Or(bool left_result, bool right_result) override {
44+
return left_result || right_result;
45+
}
46+
47+
Result<bool> IsNull(const std::shared_ptr<BoundTerm>& term) override {
48+
ICEBERG_DCHECK(row_, "Row is not set");
49+
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
50+
return value.IsNull();
51+
}
52+
53+
Result<bool> NotNull(const std::shared_ptr<BoundTerm>& term) override {
54+
ICEBERG_DCHECK(row_, "Row is not set");
55+
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
56+
return !value.IsNull();
57+
}
58+
59+
Result<bool> IsNaN(const std::shared_ptr<BoundTerm>& term) override {
60+
ICEBERG_DCHECK(row_, "Row is not set");
61+
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
62+
return value.IsNaN();
63+
}
64+
65+
Result<bool> NotNaN(const std::shared_ptr<BoundTerm>& term) override {
66+
ICEBERG_DCHECK(row_, "Row is not set");
67+
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
68+
return !value.IsNaN();
69+
}
70+
71+
Result<bool> Lt(const std::shared_ptr<BoundTerm>& term, const Literal& lit) override {
72+
ICEBERG_DCHECK(row_, "Row is not set");
73+
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
74+
return value < lit;
75+
}
76+
77+
Result<bool> LtEq(const std::shared_ptr<BoundTerm>& term, const Literal& lit) override {
78+
ICEBERG_DCHECK(row_, "Row is not set");
79+
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
80+
return value <= lit;
81+
}
82+
83+
Result<bool> Gt(const std::shared_ptr<BoundTerm>& term, const Literal& lit) override {
84+
ICEBERG_DCHECK(row_, "Row is not set");
85+
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
86+
return value > lit;
87+
}
88+
89+
Result<bool> GtEq(const std::shared_ptr<BoundTerm>& term, const Literal& lit) override {
90+
ICEBERG_DCHECK(row_, "Row is not set");
91+
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
92+
return value >= lit;
93+
}
94+
95+
Result<bool> Eq(const std::shared_ptr<BoundTerm>& term, const Literal& lit) override {
96+
ICEBERG_DCHECK(row_, "Row is not set");
97+
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
98+
return value == lit;
99+
}
100+
101+
Result<bool> NotEq(const std::shared_ptr<BoundTerm>& term,
102+
const Literal& lit) override {
103+
ICEBERG_ASSIGN_OR_RAISE(auto eq_result, Eq(term, lit));
104+
return !eq_result;
105+
}
106+
107+
Result<bool> In(const std::shared_ptr<BoundTerm>& term,
108+
const BoundSetPredicate::LiteralSet& literal_set) override {
109+
ICEBERG_DCHECK(row_, "Row is not set");
110+
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
111+
return literal_set.contains(value);
112+
}
113+
114+
Result<bool> NotIn(const std::shared_ptr<BoundTerm>& term,
115+
const BoundSetPredicate::LiteralSet& literal_set) override {
116+
ICEBERG_ASSIGN_OR_RAISE(auto in_result, In(term, literal_set));
117+
return !in_result;
118+
}
119+
120+
Result<bool> StartsWith(const std::shared_ptr<BoundTerm>& term,
121+
const Literal& lit) override {
122+
ICEBERG_DCHECK(row_, "Row is not set");
123+
ICEBERG_ASSIGN_OR_RAISE(auto value, term->Evaluate(*row_));
124+
125+
// Both value and literal should be strings
126+
if (!std::holds_alternative<std::string>(value.value()) ||
127+
!std::holds_alternative<std::string>(lit.value())) {
128+
return false;
129+
}
130+
131+
const auto& str_value = std::get<std::string>(value.value());
132+
const auto& str_prefix = std::get<std::string>(lit.value());
133+
return str_value.starts_with(str_prefix);
134+
}
135+
136+
Result<bool> NotStartsWith(const std::shared_ptr<BoundTerm>& term,
137+
const Literal& lit) override {
138+
ICEBERG_ASSIGN_OR_RAISE(auto starts_result, StartsWith(term, lit));
139+
return !starts_result;
140+
}
141+
142+
private:
143+
const StructLike* row_{nullptr};
144+
};
145+
146+
Evaluator::Evaluator(std::shared_ptr<Expression> bound_expr)
147+
: bound_expr_(std::move(bound_expr)), visitor_(std::make_unique<EvalVisitor>()) {}
148+
149+
Evaluator::~Evaluator() = default;
150+
151+
Result<std::unique_ptr<Evaluator>> Evaluator::Make(const Schema& schema,
152+
std::shared_ptr<Expression> unbound,
153+
bool case_sensitive) {
154+
ICEBERG_ASSIGN_OR_RAISE(auto bound_expr, Binder::Bind(schema, unbound, case_sensitive));
155+
return std::unique_ptr<Evaluator>(new Evaluator(std::move(bound_expr)));
156+
}
157+
158+
Result<bool> Evaluator::Eval(const StructLike& row) const {
159+
visitor_->UpdateRow(&row);
160+
return Visit<bool, EvalVisitor>(bound_expr_, *visitor_);
161+
}
162+
163+
} // namespace iceberg

src/iceberg/expression/evaluator.h

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/expression/evaluator.h
23+
/// Evaluator for checking if a data row matches a bound expression.
24+
25+
#include <memory>
26+
27+
#include "iceberg/iceberg_export.h"
28+
#include "iceberg/result.h"
29+
#include "iceberg/type_fwd.h"
30+
31+
namespace iceberg {
32+
33+
/// \brief Evaluates an Expression against data rows.
34+
///
35+
/// This class evaluates bound expressions against StructLike data rows to determine
36+
/// if the row matches the expression criteria. The evaluator binds unbound expressions
37+
/// to a schema on construction and then can be used to evaluate multiple data rows.
38+
///
39+
/// Thread-safe: Yes, the evaluator is immutable after construction.
40+
class ICEBERG_EXPORT Evaluator {
41+
public:
42+
/// \brief Make an evaluator for an unbound expression.
43+
///
44+
/// \param schema The schema to bind against
45+
/// \param unbound The unbound expression to evaluate
46+
/// \param case_sensitive Whether field name matching is case-sensitive
47+
static Result<std::unique_ptr<Evaluator>> Make(const Schema& schema,
48+
std::shared_ptr<Expression> unbound,
49+
bool case_sensitive = true);
50+
51+
~Evaluator();
52+
53+
/// \brief Evaluate the expression against a data row.
54+
///
55+
/// \param row The data row to evaluate
56+
/// \return true if the row matches the expression, false otherwise, or error
57+
Result<bool> Eval(const StructLike& row) const;
58+
59+
private:
60+
explicit Evaluator(std::shared_ptr<Expression> bound_expr);
61+
62+
class EvalVisitor;
63+
std::shared_ptr<Expression> bound_expr_;
64+
std::unique_ptr<EvalVisitor> visitor_;
65+
};
66+
67+
} // namespace iceberg

src/iceberg/expression/literal.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,11 @@ bool Literal::IsAboveMax() const { return std::holds_alternative<AboveMax>(value
504504

505505
bool Literal::IsNull() const { return std::holds_alternative<std::monostate>(value_); }
506506

507+
bool Literal::IsNaN() const {
508+
return std::holds_alternative<float>(value_) && std::isnan(std::get<float>(value_)) ||
509+
std::holds_alternative<double>(value_) && std::isnan(std::get<double>(value_));
510+
}
511+
507512
// LiteralCaster implementation
508513

509514
Result<Literal> LiteralCaster::CastTo(const Literal& literal,

src/iceberg/expression/literal.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
154154
/// \return true if this literal is null, false otherwise
155155
bool IsNull() const;
156156

157+
/// Check if this literal is NaN.
158+
/// \return true if this literal is NaN, false otherwise
159+
bool IsNaN() const;
160+
157161
std::string ToString() const override;
158162

159163
private:

src/iceberg/expression/predicate.cc

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ Result<std::unique_ptr<UnboundPredicateImpl<B>>> UnboundPredicateImpl<B>::Make(
5050
if (!term) [[unlikely]] {
5151
return InvalidExpression("UnboundPredicate cannot have null term");
5252
}
53+
if (op == Expression::Operation::kIn || op == Expression::Operation::kNotIn) {
54+
return InvalidExpression("Cannot create {} predicate without a value",
55+
::iceberg::ToString(op));
56+
}
5357
return std::unique_ptr<UnboundPredicateImpl<B>>(
5458
new UnboundPredicateImpl<B>(op, std::move(term)));
5559
}
@@ -71,6 +75,11 @@ Result<std::unique_ptr<UnboundPredicateImpl<B>>> UnboundPredicateImpl<B>::Make(
7175
if (!term) [[unlikely]] {
7276
return InvalidExpression("UnboundPredicate cannot have null term");
7377
}
78+
if (values.empty() &&
79+
(op == Expression::Operation::kIn || op == Expression::Operation::kNotIn)) {
80+
return InvalidExpression("Cannot create {} predicate without a value",
81+
::iceberg::ToString(op));
82+
}
7483
return std::unique_ptr<UnboundPredicateImpl<B>>(
7584
new UnboundPredicateImpl<B>(op, std::move(term), std::move(values)));
7685
}
@@ -183,16 +192,6 @@ bool IsFloatingType(TypeId type) {
183192
return type == TypeId::kFloat || type == TypeId::kDouble;
184193
}
185194

186-
bool IsNan(const Literal& literal) {
187-
const auto& value = literal.value();
188-
if (std::holds_alternative<float>(value)) {
189-
return std::isnan(std::get<float>(value));
190-
} else if (std::holds_alternative<double>(value)) {
191-
return std::isnan(std::get<double>(value));
192-
}
193-
return false;
194-
}
195-
196195
bool StartsWith(const Literal& lhs, const Literal& rhs) {
197196
const auto& lhs_value = lhs.value();
198197
const auto& rhs_value = rhs.value();
@@ -383,9 +382,9 @@ Result<bool> BoundUnaryPredicate::Test(const Literal& literal) const {
383382
case Expression::Operation::kNotNull:
384383
return !literal.IsNull();
385384
case Expression::Operation::kIsNan:
386-
return IsNan(literal);
385+
return literal.IsNaN();
387386
case Expression::Operation::kNotNan:
388-
return !IsNan(literal);
387+
return !literal.IsNaN();
389388
default:
390389
return InvalidExpression("Invalid operation for BoundUnaryPredicate: {}", op());
391390
}

src/iceberg/expression/predicate.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ class ICEBERG_EXPORT UnboundPredicateImpl : public UnboundPredicate,
133133

134134
Result<std::shared_ptr<Expression>> Negate() const override;
135135

136+
std::span<const Literal> literals() const { return values_; }
137+
136138
private:
137139
UnboundPredicateImpl(Expression::Operation op, std::shared_ptr<UnboundTerm<B>> term);
138140
UnboundPredicateImpl(Expression::Operation op, std::shared_ptr<UnboundTerm<B>> term,

src/iceberg/expression/term.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ NamedReference::~NamedReference() = default;
5959
Result<std::shared_ptr<BoundReference>> NamedReference::Bind(const Schema& schema,
6060
bool case_sensitive) const {
6161
ICEBERG_ASSIGN_OR_RAISE(auto field_opt,
62-
schema.GetFieldByName(field_name_, case_sensitive));
62+
schema.FindFieldByName(field_name_, case_sensitive));
6363
if (!field_opt.has_value()) [[unlikely]] {
6464
return InvalidExpression("Cannot find field '{}' in struct: {}", field_name_,
6565
schema.ToString());

src/iceberg/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ iceberg_sources = files(
4343
'arrow_c_data_guard_internal.cc',
4444
'catalog/memory/in_memory_catalog.cc',
4545
'expression/binder.cc',
46+
'expression/evaluator.cc',
4647
'expression/expression.cc',
4748
'expression/expressions.cc',
4849
'expression/literal.cc',

src/iceberg/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ if(ICEBERG_BUILD_BUNDLE)
145145
USE_BUNDLE
146146
SOURCES
147147
eval_expr_test.cc
148+
evaluator_test.cc
148149
test_common.cc)
149150

150151
add_iceberg_test(parquet_test

0 commit comments

Comments
 (0)