Skip to content

Commit b541297

Browse files
committed
feat: add expression visitors
- Add template ExpressionVisitor and BoundVisitor - Implement Binder, IsBoundVisitor and RewriteNot
1 parent abd6b3f commit b541297

17 files changed

+1434
-217
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@ set(ICEBERG_INCLUDES "$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/src>"
2020
set(ICEBERG_SOURCES
2121
arrow_c_data_guard_internal.cc
2222
catalog/memory/in_memory_catalog.cc
23+
expression/binder.cc
2324
expression/expression.cc
2425
expression/expressions.cc
2526
expression/literal.cc
2627
expression/predicate.cc
28+
expression/rewrite_not.cc
2729
expression/term.cc
2830
file_reader.cc
2931
file_writer.cc

src/iceberg/expression/binder.cc

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/expression/binder.h"
21+
22+
namespace iceberg {
23+
24+
Binder::Binder(const Schema& schema, bool case_sensitive)
25+
: schema_(schema), case_sensitive_(case_sensitive) {}
26+
27+
Result<std::shared_ptr<Expression>> Binder::Bind(const Schema& schema,
28+
const std::shared_ptr<Expression>& expr,
29+
bool case_sensitive) {
30+
Binder binder(schema, case_sensitive);
31+
return Visit<std::shared_ptr<Expression>, Binder>(expr, binder);
32+
}
33+
34+
Result<std::shared_ptr<Expression>> Binder::AlwaysTrue() { return True::Instance(); }
35+
36+
Result<std::shared_ptr<Expression>> Binder::AlwaysFalse() { return False::Instance(); }
37+
38+
Result<std::shared_ptr<Expression>> Binder::Not(
39+
const std::shared_ptr<Expression>& child_result) {
40+
return iceberg::Not::MakeFolded(child_result);
41+
}
42+
43+
Result<std::shared_ptr<Expression>> Binder::And(
44+
const std::shared_ptr<Expression>& left_result,
45+
const std::shared_ptr<Expression>& right_result) {
46+
return iceberg::And::MakeFolded(left_result, right_result);
47+
}
48+
49+
Result<std::shared_ptr<Expression>> Binder::Or(
50+
const std::shared_ptr<Expression>& left_result,
51+
const std::shared_ptr<Expression>& right_result) {
52+
return iceberg::Or::MakeFolded(left_result, right_result);
53+
}
54+
55+
Result<std::shared_ptr<Expression>> Binder::Predicate(
56+
const std::shared_ptr<UnboundPredicate>& pred) {
57+
ICEBERG_DCHECK(pred != nullptr, "Predicate cannot be null");
58+
return pred->Bind(schema_, case_sensitive_);
59+
}
60+
61+
Result<std::shared_ptr<Expression>> Binder::Predicate(
62+
const std::shared_ptr<BoundPredicate>& pred) {
63+
ICEBERG_DCHECK(pred != nullptr, "Predicate cannot be null");
64+
return InvalidExpression("Found already bound predicate: {}", pred->ToString());
65+
}
66+
67+
Result<bool> IsBoundVisitor::IsBound(const std::shared_ptr<Expression>& expr) {
68+
ICEBERG_DCHECK(expr != nullptr, "Expression cannot be null");
69+
IsBoundVisitor visitor;
70+
return Visit<bool, IsBoundVisitor>(expr, visitor);
71+
}
72+
73+
Result<bool> IsBoundVisitor::AlwaysTrue() { return true; }
74+
75+
Result<bool> IsBoundVisitor::AlwaysFalse() { return true; }
76+
77+
Result<bool> IsBoundVisitor::Not(bool child_result) { return child_result; }
78+
79+
Result<bool> IsBoundVisitor::And(bool left_result, bool right_result) {
80+
return left_result && right_result;
81+
}
82+
83+
Result<bool> IsBoundVisitor::Or(bool left_result, bool right_result) {
84+
return left_result && right_result;
85+
}
86+
87+
Result<bool> IsBoundVisitor::Predicate(const std::shared_ptr<BoundPredicate>& pred) {
88+
return true;
89+
}
90+
91+
Result<bool> IsBoundVisitor::Predicate(const std::shared_ptr<UnboundPredicate>& pred) {
92+
return false;
93+
}
94+
95+
} // namespace iceberg

src/iceberg/expression/binder.h

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/expression/binder.h
23+
/// Bind an expression to a schema.
24+
25+
#include "iceberg/expression/expression_visitor.h"
26+
27+
namespace iceberg {
28+
29+
class ICEBERG_EXPORT Binder : public ExpressionVisitor<std::shared_ptr<Expression>> {
30+
public:
31+
Binder(const Schema& schema, bool case_sensitive);
32+
33+
static Result<std::shared_ptr<Expression>> Bind(const Schema& schema,
34+
const std::shared_ptr<Expression>& expr,
35+
bool case_sensitive);
36+
37+
Result<std::shared_ptr<Expression>> AlwaysTrue() override;
38+
Result<std::shared_ptr<Expression>> AlwaysFalse() override;
39+
Result<std::shared_ptr<Expression>> Not(
40+
const std::shared_ptr<Expression>& child_result) override;
41+
Result<std::shared_ptr<Expression>> And(
42+
const std::shared_ptr<Expression>& left_result,
43+
const std::shared_ptr<Expression>& right_result) override;
44+
Result<std::shared_ptr<Expression>> Or(
45+
const std::shared_ptr<Expression>& left_result,
46+
const std::shared_ptr<Expression>& right_result) override;
47+
Result<std::shared_ptr<Expression>> Predicate(
48+
const std::shared_ptr<BoundPredicate>& pred) override;
49+
Result<std::shared_ptr<Expression>> Predicate(
50+
const std::shared_ptr<UnboundPredicate>& pred) override;
51+
52+
private:
53+
const Schema& schema_;
54+
const bool case_sensitive_;
55+
};
56+
57+
class ICEBERG_EXPORT IsBoundVisitor : public ExpressionVisitor<bool> {
58+
public:
59+
static Result<bool> IsBound(const std::shared_ptr<Expression>& expr);
60+
61+
Result<bool> AlwaysTrue() override;
62+
Result<bool> AlwaysFalse() override;
63+
Result<bool> Not(bool child_result) override;
64+
Result<bool> And(bool left_result, bool right_result) override;
65+
Result<bool> Or(bool left_result, bool right_result) override;
66+
Result<bool> Predicate(const std::shared_ptr<BoundPredicate>& pred) override;
67+
Result<bool> Predicate(const std::shared_ptr<UnboundPredicate>& pred) override;
68+
};
69+
70+
// TODO(gangwu): add the Java parity `ReferenceVisitor`
71+
72+
} // namespace iceberg

src/iceberg/expression/expression.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,24 @@ Result<std::unique_ptr<Not>> Not::Make(std::shared_ptr<Expression> child) {
120120
return std::unique_ptr<Not>(new Not(std::move(child)));
121121
}
122122

123+
Result<std::shared_ptr<Expression>> Not::MakeFolded(std::shared_ptr<Expression> child) {
124+
if (child->op() == Expression::Operation::kTrue) {
125+
return False::Instance();
126+
}
127+
128+
if (child->op() == Expression::Operation::kFalse) {
129+
return True::Instance();
130+
}
131+
132+
// not(not(x)) = x
133+
if (child->op() == Expression::Operation::kNot) {
134+
const auto& not_expr = internal::checked_cast<const ::iceberg::Not&>(*child);
135+
return not_expr.child();
136+
}
137+
138+
return ::iceberg::Not::Make(std::move(child));
139+
}
140+
123141
Not::Not(std::shared_ptr<Expression> child) : child_(std::move(child)) {
124142
ICEBERG_DCHECK(child_ != nullptr, "Not expression cannot have null child");
125143
}

src/iceberg/expression/expression.h

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "iceberg/iceberg_export.h"
2929
#include "iceberg/result.h"
3030
#include "iceberg/util/formattable.h"
31+
#include "iceberg/util/macros.h"
3132

3233
namespace iceberg {
3334

@@ -80,6 +81,11 @@ class ICEBERG_EXPORT Expression : public util::Formattable {
8081
}
8182

8283
std::string ToString() const override { return "Expression"; }
84+
85+
virtual bool is_unbound_predicate() const { return false; }
86+
virtual bool is_bound_predicate() const { return false; }
87+
virtual bool is_unbound_aggregate() const { return false; }
88+
virtual bool is_bound_aggregate() const { return false; }
8389
};
8490

8591
/// \brief An Expression that is always true.
@@ -137,6 +143,45 @@ class ICEBERG_EXPORT And : public Expression {
137143
static Result<std::unique_ptr<And>> Make(std::shared_ptr<Expression> left,
138144
std::shared_ptr<Expression> right);
139145

146+
/// \brief Creates a folded And expression from two sub-expressions.
147+
///
148+
/// \param left The left operand of the AND expression
149+
/// \param right The right operand of the AND expression
150+
/// \param args Additional operands of the AND expression
151+
/// \return A Result containing a shared pointer to the folded And expression, or an
152+
/// error if left or right is nullptr
153+
/// \note A folded And expression is an expression that is equivalent to the original
154+
/// that is equivalent to the original expression, but with the And operation removed.
155+
/// For example, (true and x) = x.
156+
template <typename... Args>
157+
static Result<std::shared_ptr<Expression>> MakeFolded(std::shared_ptr<Expression> left,
158+
std::shared_ptr<Expression> right,
159+
Args&&... args)
160+
requires std::conjunction_v<std::is_same<Args, std::shared_ptr<Expression>>...>
161+
{
162+
if constexpr (sizeof...(args) == 0) {
163+
if (left->op() == Expression::Operation::kFalse ||
164+
right->op() == Expression::Operation::kFalse) {
165+
return False::Instance();
166+
}
167+
168+
if (left->op() == Expression::Operation::kTrue) {
169+
return right;
170+
}
171+
172+
if (right->op() == Expression::Operation::kTrue) {
173+
return left;
174+
}
175+
176+
return And::Make(std::move(left), std::move(right));
177+
} else {
178+
ICEBERG_ASSIGN_OR_THROW(auto and_expr,
179+
And::Make(std::move(left), std::move(right)));
180+
181+
return And::MakeFolded(std::move(and_expr), std::forward<Args>(args)...);
182+
}
183+
}
184+
140185
/// \brief Returns the left operand of the AND expression.
141186
///
142187
/// \return The left operand of the AND expression
@@ -175,6 +220,44 @@ class ICEBERG_EXPORT Or : public Expression {
175220
static Result<std::unique_ptr<Or>> Make(std::shared_ptr<Expression> left,
176221
std::shared_ptr<Expression> right);
177222

223+
/// \brief Creates a folded Or expression from two sub-expressions.
224+
///
225+
/// \param left The left operand of the OR expression
226+
/// \param right The right operand of the OR expression
227+
/// \param args Additional operands of the OR expression
228+
/// \return A Result containing a shared pointer to the folded Or expression, or an
229+
/// error if left or right is nullptr
230+
/// \note A folded Or expression is an expression that is equivalent to the original
231+
/// that is equivalent to the original expression, but with the Or operation removed.
232+
/// For example, (false or x) = x.
233+
template <typename... Args>
234+
static Result<std::shared_ptr<Expression>> MakeFolded(std::shared_ptr<Expression> left,
235+
std::shared_ptr<Expression> right,
236+
Args&&... args)
237+
requires std::conjunction_v<std::is_same<Args, std::shared_ptr<Expression>>...>
238+
{
239+
if constexpr (sizeof...(args) == 0) {
240+
if (left->op() == Expression::Operation::kTrue ||
241+
right->op() == Expression::Operation::kTrue) {
242+
return True::Instance();
243+
}
244+
245+
if (left->op() == Expression::Operation::kFalse) {
246+
return right;
247+
}
248+
249+
if (right->op() == Expression::Operation::kFalse) {
250+
return left;
251+
}
252+
253+
return Or::Make(std::move(left), std::move(right));
254+
} else {
255+
ICEBERG_ASSIGN_OR_THROW(auto or_expr, Or::Make(std::move(left), std::move(right)));
256+
257+
return Or::MakeFolded(std::move(or_expr), std::forward<Args>(args)...);
258+
}
259+
}
260+
178261
/// \brief Returns the left operand of the OR expression.
179262
///
180263
/// \return The left operand of the OR expression
@@ -211,6 +294,16 @@ class ICEBERG_EXPORT Not : public Expression {
211294
/// \return A Result containing a unique pointer to Not, or an error if child is nullptr
212295
static Result<std::unique_ptr<Not>> Make(std::shared_ptr<Expression> child);
213296

297+
/// \brief Creates a folded Not expression from a child expression.
298+
///
299+
/// \param child The expression to negate
300+
/// \return A Result containing a shared pointer to the folded Not expression, or an
301+
/// error if child is nullptr \note A folded Not expression is an expression that is
302+
/// equivalent to the original expression, but with the Not operation removed. For
303+
/// example, not(not(x)) = x.
304+
static Result<std::shared_ptr<Expression>> MakeFolded(
305+
std::shared_ptr<Expression> child);
306+
214307
/// \brief Returns the child expression.
215308
///
216309
/// \return The child expression being negated

0 commit comments

Comments
 (0)