Skip to content

Commit 8c33130

Browse files
committed
feat: add eval support to bound term
- add struct like accessor - support schema to find accessor by field id - bound term can evaluate struct-like
1 parent acd62e3 commit 8c33130

File tree

12 files changed

+563
-13
lines changed

12 files changed

+563
-13
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ set(ICEBERG_SOURCES
4141
partition_spec.cc
4242
row/arrow_array_wrapper.cc
4343
row/manifest_wrapper.cc
44+
row/struct_like.cc
4445
schema.cc
4546
schema_field.cc
4647
schema_internal.cc

src/iceberg/expression/term.cc

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121

2222
#include <format>
2323

24-
#include "iceberg/exception.h"
2524
#include "iceberg/result.h"
25+
#include "iceberg/row/struct_like.h"
2626
#include "iceberg/schema.h"
2727
#include "iceberg/transform.h"
2828
#include "iceberg/util/checked_cast.h"
@@ -64,25 +64,37 @@ Result<std::shared_ptr<BoundReference>> NamedReference::Bind(const Schema& schem
6464
return InvalidExpression("Cannot find field '{}' in struct: {}", field_name_,
6565
schema.ToString());
6666
}
67-
return BoundReference::Make(field_opt.value().get());
67+
68+
int32_t field_id = field_opt.value().get().field_id();
69+
ICEBERG_ASSIGN_OR_RAISE(auto accessor, schema.GetAccessorById(field_id));
70+
71+
return BoundReference::Make(field_opt.value().get(), std::move(accessor));
6872
}
6973

7074
std::string NamedReference::ToString() const {
7175
return std::format("ref(name=\"{}\")", field_name_);
7276
}
7377

7478
// BoundReference implementation
75-
Result<std::unique_ptr<BoundReference>> BoundReference::Make(SchemaField field) {
79+
Result<std::unique_ptr<BoundReference>> BoundReference::Make(
80+
SchemaField field, std::unique_ptr<StructLikeAccessor> accessor) {
7681
if (auto status = field.Validate(); !status.has_value()) [[unlikely]] {
7782
return InvalidExpression("Cannot create BoundReference with invalid field: {}",
7883
status.error().message);
7984
}
80-
return std::unique_ptr<BoundReference>(new BoundReference(std::move(field)));
85+
if (!accessor) [[unlikely]] {
86+
return InvalidExpression("Cannot create BoundReference without accessor");
87+
}
88+
return std::unique_ptr<BoundReference>(
89+
new BoundReference(std::move(field), std::move(accessor)));
8190
}
8291

83-
BoundReference::BoundReference(SchemaField field) : field_(std::move(field)) {
92+
BoundReference::BoundReference(SchemaField field,
93+
std::unique_ptr<StructLikeAccessor> accessor)
94+
: field_(std::move(field)), accessor_(std::move(accessor)) {
8495
ICEBERG_DCHECK(field_.Validate().has_value(),
8596
"Cannot create BoundReference with invalid field");
97+
ICEBERG_DCHECK(accessor_ != nullptr, "Cannot create BoundReference without accessor");
8698
}
8799

88100
BoundReference::~BoundReference() = default;
@@ -92,7 +104,7 @@ std::string BoundReference::ToString() const {
92104
}
93105

94106
Result<Literal> BoundReference::Evaluate(const StructLike& data) const {
95-
return NotImplemented("BoundReference::Evaluate(StructLike) not implemented");
107+
return accessor_->GetLiteral(data);
96108
}
97109

98110
bool BoundReference::Equals(const BoundTerm& other) const {
@@ -167,14 +179,14 @@ std::string BoundTransform::ToString() const {
167179
}
168180

169181
Result<Literal> BoundTransform::Evaluate(const StructLike& data) const {
170-
throw IcebergError("BoundTransform::Evaluate(StructLike) not implemented");
182+
ICEBERG_ASSIGN_OR_RAISE(auto literal, ref_->Evaluate(data));
183+
return transform_func_->Transform(literal);
171184
}
172185

173186
bool BoundTransform::MayProduceNull() const {
174187
// transforms must produce null for null input values
175188
// transforms may produce null for non-null inputs when not order-preserving
176-
// FIXME: add Transform::is_order_preserving()
177-
return ref_->MayProduceNull(); // || !transform_->is_order_preserving();
189+
return ref_->MayProduceNull() || !transform_->PreservesOrder();
178190
}
179191

180192
std::shared_ptr<Type> BoundTransform::type() const {

src/iceberg/expression/term.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,8 @@ class ICEBERG_EXPORT BoundReference
163163
/// \brief Create a bound reference.
164164
///
165165
/// \param field The schema field
166-
static Result<std::unique_ptr<BoundReference>> Make(SchemaField field);
166+
static Result<std::unique_ptr<BoundReference>> Make(
167+
SchemaField field, std::unique_ptr<StructLikeAccessor> accessor);
167168

168169
~BoundReference() override;
169170

@@ -186,9 +187,10 @@ class ICEBERG_EXPORT BoundReference
186187
Kind kind() const override { return Kind::kReference; }
187188

188189
private:
189-
explicit BoundReference(SchemaField field);
190+
BoundReference(SchemaField field, std::unique_ptr<StructLikeAccessor> accessor);
190191

191192
SchemaField field_;
193+
std::unique_ptr<StructLikeAccessor> accessor_;
192194
};
193195

194196
/// \brief An unbound transform expression.

src/iceberg/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ iceberg_sources = files(
6363
'partition_spec.cc',
6464
'row/arrow_array_wrapper.cc',
6565
'row/manifest_wrapper.cc',
66+
'row/struct_like.cc',
6667
'schema.cc',
6768
'schema_field.cc',
6869
'schema_internal.cc',

src/iceberg/row/struct_like.cc

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/row/struct_like.h"
21+
22+
#include <utility>
23+
24+
#include "iceberg/result.h"
25+
#include "iceberg/util/checked_cast.h"
26+
#include "iceberg/util/formatter_internal.h"
27+
#include "iceberg/util/macros.h"
28+
29+
namespace iceberg {
30+
31+
StructLikeAccessor::StructLikeAccessor(std::shared_ptr<Type> type,
32+
std::span<const size_t> position_path)
33+
: type_(std::move(type)) {
34+
if (position_path.size() == 1) {
35+
accessor_ = [pos =
36+
position_path[0]](const StructLike& struct_like) -> Result<Scalar> {
37+
return struct_like.GetField(pos);
38+
};
39+
} else if (position_path.size() == 2) {
40+
accessor_ = [pos0 = position_path[0], pos1 = position_path[1]](
41+
const StructLike& struct_like) -> Result<Scalar> {
42+
ICEBERG_ASSIGN_OR_RAISE(auto first_level_field, struct_like.GetField(pos0));
43+
if (!std::holds_alternative<std::shared_ptr<StructLike>>(first_level_field)) {
44+
return InvalidSchema("Encountered non-struct in the position path [{},{}]", pos0,
45+
pos1);
46+
}
47+
return std::get<std::shared_ptr<StructLike>>(first_level_field)->GetField(pos1);
48+
};
49+
} else if (!position_path.empty()) {
50+
accessor_ = [position_path](const StructLike& struct_like) -> Result<Scalar> {
51+
std::vector<std::shared_ptr<StructLike>> backups;
52+
const StructLike* current_struct_like = &struct_like;
53+
for (size_t i = 0; i < position_path.size() - 1; ++i) {
54+
ICEBERG_ASSIGN_OR_RAISE(auto field,
55+
current_struct_like->GetField(position_path[i]));
56+
if (!std::holds_alternative<std::shared_ptr<StructLike>>(field)) {
57+
return InvalidSchema("Encountered non-struct in the position path [{}]",
58+
position_path);
59+
}
60+
backups.push_back(std::get<std::shared_ptr<StructLike>>(field));
61+
current_struct_like = backups.back().get();
62+
}
63+
return current_struct_like->GetField(position_path.back());
64+
};
65+
} else {
66+
accessor_ = [](const StructLike&) -> Result<Scalar> {
67+
return Invalid("Cannot read StructLike with empty position path");
68+
};
69+
}
70+
}
71+
72+
Result<Literal> StructLikeAccessor::GetLiteral(const StructLike& struct_like) const {
73+
if (!type_->is_primitive()) {
74+
return NotSupported("Cannot get literal value for non-primitive type {}",
75+
type_->ToString());
76+
}
77+
78+
ICEBERG_ASSIGN_OR_RAISE(auto scalar, Get(struct_like));
79+
80+
if (std::holds_alternative<std::monostate>(scalar)) {
81+
return Literal::Null(internal::checked_pointer_cast<PrimitiveType>(type_));
82+
}
83+
84+
switch (type_->type_id()) {
85+
case TypeId::kBoolean:
86+
return Literal::Boolean(std::get<bool>(scalar));
87+
case TypeId::kInt:
88+
return Literal::Int(std::get<int32_t>(scalar));
89+
case TypeId::kLong:
90+
return Literal::Long(std::get<int64_t>(scalar));
91+
case TypeId::kFloat:
92+
return Literal::Float(std::get<float>(scalar));
93+
case TypeId::kDouble:
94+
return Literal::Double(std::get<double>(scalar));
95+
case TypeId::kString:
96+
return Literal::String(std::string(std::get<std::string_view>(scalar)));
97+
case TypeId::kBinary: {
98+
auto binary_data = std::get<std::string_view>(scalar);
99+
return Literal::Binary(
100+
std::vector<uint8_t>(binary_data.cbegin(), binary_data.cend()));
101+
}
102+
case TypeId::kDecimal: {
103+
const auto& decimal_type = internal::checked_cast<const DecimalType&>(*type_);
104+
return Literal::Decimal(std::get<Decimal>(scalar).value(), decimal_type.precision(),
105+
decimal_type.scale());
106+
}
107+
case TypeId::kDate:
108+
return Literal::Date(std::get<int32_t>(scalar));
109+
case TypeId::kTime:
110+
return Literal::Time(std::get<int64_t>(scalar));
111+
case TypeId::kTimestamp:
112+
return Literal::Timestamp(std::get<int64_t>(scalar));
113+
case TypeId::kTimestampTz:
114+
return Literal::TimestampTz(std::get<int64_t>(scalar));
115+
case TypeId::kFixed: {
116+
const auto& fixed_data = std::get<std::string_view>(scalar);
117+
return Literal::Fixed(std::vector<uint8_t>(fixed_data.cbegin(), fixed_data.cend()));
118+
}
119+
case TypeId::kUuid:
120+
// TODO(gangwu): Implement UUID type
121+
default:
122+
return NotSupported("Cannot convert scalar to literal of type {}",
123+
type_->ToString());
124+
}
125+
126+
std::unreachable();
127+
}
128+
129+
} // namespace iceberg

src/iceberg/row/struct_like.h

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,13 @@
2626
/// ManifestEntry. Note that they do not carry type information and should be
2727
/// used in conjunction with the schema to get the type information.
2828

29+
#include <functional>
2930
#include <memory>
31+
#include <span>
3032
#include <string_view>
3133
#include <variant>
32-
#include <vector>
3334

35+
#include "iceberg/expression/literal.h"
3436
#include "iceberg/result.h"
3537
#include "iceberg/type_fwd.h"
3638
#include "iceberg/util/decimal.h"
@@ -96,4 +98,29 @@ class ICEBERG_EXPORT MapLike {
9698
virtual size_t size() const = 0;
9799
};
98100

101+
/// \brief An accessor for a struct-like object.
102+
class ICEBERG_EXPORT StructLikeAccessor {
103+
public:
104+
explicit StructLikeAccessor(std::shared_ptr<Type> type,
105+
std::span<const size_t> position_path);
106+
107+
/// \brief Get the scalar value at the given position.
108+
Result<Scalar> Get(const StructLike& struct_like) const {
109+
return accessor_(struct_like);
110+
}
111+
112+
/// \brief Get the literal value at the given position.
113+
///
114+
/// \return The literal value at the given position, or an error if it is
115+
/// not a primitive type.
116+
Result<Literal> GetLiteral(const StructLike& struct_like) const;
117+
118+
/// \brief Get the type of the value that this accessor is bound to.
119+
const Type& type() const { return *type_; }
120+
121+
private:
122+
std::shared_ptr<Type> type_;
123+
std::function<Result<Scalar>(const StructLike&)> accessor_;
124+
};
125+
99126
} // namespace iceberg

src/iceberg/schema.cc

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#include <format>
2323
#include <functional>
2424

25+
#include "iceberg/result.h"
26+
#include "iceberg/row/struct_like.h"
2527
#include "iceberg/schema_internal.h"
2628
#include "iceberg/type.h"
2729
#include "iceberg/util/formatter.h" // IWYU pragma: keep
@@ -69,6 +71,48 @@ class NameToIdVisitor {
6971
std::function<std::string(std::string_view)> quoting_func_;
7072
};
7173

74+
class PositionPathVisitor {
75+
public:
76+
Status Visit(const PrimitiveType& type) {
77+
if (current_field_id_ == kUnassignedFieldId) {
78+
return InvalidSchema("Current field id is not assigned, type: {}", type.ToString());
79+
}
80+
81+
if (auto ret = position_path_.try_emplace(current_field_id_, current_path_);
82+
!ret.second) {
83+
return InvalidSchema("Duplicate field id found: {}, prev path: {}, curr path: {}",
84+
current_field_id_, ret.first->second, current_path_);
85+
}
86+
87+
return {};
88+
}
89+
90+
Status Visit(const StructType& type) {
91+
for (size_t i = 0; i < type.fields().size(); ++i) {
92+
const auto& field = type.fields()[i];
93+
current_field_id_ = field.field_id();
94+
current_path_.push_back(i);
95+
ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(*field.type(), this));
96+
current_path_.pop_back();
97+
}
98+
return {};
99+
}
100+
101+
// Non-struct types are not supported yet, but it is not an error.
102+
Status Visit(const ListType& type) { return {}; }
103+
Status Visit(const MapType& type) { return {}; }
104+
105+
std::unordered_map<int32_t, std::vector<size_t>> Finish() {
106+
return std::move(position_path_);
107+
}
108+
109+
private:
110+
constexpr static int32_t kUnassignedFieldId = -1;
111+
int32_t current_field_id_ = kUnassignedFieldId;
112+
std::vector<size_t> current_path_;
113+
std::unordered_map<int32_t, std::vector<size_t>> position_path_;
114+
};
115+
72116
Schema::Schema(std::vector<SchemaField> fields, std::optional<int32_t> schema_id)
73117
: StructType(std::move(fields)), schema_id_(schema_id) {}
74118

@@ -144,6 +188,27 @@ Result<std::optional<std::reference_wrapper<const SchemaField>>> Schema::FindFie
144188
return it->second;
145189
}
146190

191+
Result<std::unordered_map<int32_t, std::vector<size_t>>> Schema::InitIdToPositionPath(
192+
const Schema& self) {
193+
PositionPathVisitor visitor;
194+
ICEBERG_RETURN_UNEXPECTED(VisitTypeInline(self, &visitor));
195+
return visitor.Finish();
196+
}
197+
198+
Result<std::unique_ptr<StructLikeAccessor>> Schema::GetAccessorById(
199+
int32_t field_id) const {
200+
ICEBERG_ASSIGN_OR_RAISE(auto id_to_position_path, id_to_position_path_.Get(*this));
201+
if (auto it = id_to_position_path.get().find(field_id);
202+
it != id_to_position_path.get().cend()) {
203+
ICEBERG_ASSIGN_OR_RAISE(auto field, FindFieldById(field_id));
204+
if (!field.has_value()) {
205+
return NotFound("Cannot get accessor for field id: {}", field_id);
206+
}
207+
return std::make_unique<StructLikeAccessor>(field.value().get().type(), it->second);
208+
}
209+
return NotFound("Cannot get accessor for field id: {}", field_id);
210+
}
211+
147212
IdToFieldVisitor::IdToFieldVisitor(
148213
std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& id_to_field)
149214
: id_to_field_(id_to_field) {}

0 commit comments

Comments
 (0)