Skip to content

Commit 11667a9

Browse files
authored
feat: add name mapping (#93)
1 parent 408e1a4 commit 11667a9

File tree

6 files changed

+646
-3
lines changed

6 files changed

+646
-3
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ set(ICEBERG_SOURCES
2323
expression/expression.cc
2424
file_reader.cc
2525
json_internal.cc
26+
name_mapping.cc
2627
partition_field.cc
2728
partition_spec.cc
2829
schema.cc
@@ -36,7 +37,6 @@ set(ICEBERG_SOURCES
3637
transform.cc
3738
transform_function.cc
3839
type.cc
39-
snapshot.cc
4040
util/murmurhash3_internal.cc
4141
util/timepoint.cc)
4242

src/iceberg/name_mapping.cc

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/name_mapping.h"
21+
22+
#include <format>
23+
#include <sstream>
24+
25+
#include "iceberg/util/formatter_internal.h"
26+
27+
namespace iceberg {
28+
29+
namespace {
30+
31+
// Helper function to join a list of field names with a dot
32+
std::string JoinByDot(std::span<const std::string> parts) {
33+
std::stringstream ss;
34+
for (size_t i = 0; i < parts.size(); ++i) {
35+
if (i > 0) {
36+
ss << ".";
37+
}
38+
ss << parts[i];
39+
}
40+
return ss.str();
41+
}
42+
43+
// Helper class to recursively index MappedField by field id
44+
struct IndexByIdVisitor {
45+
std::unordered_map<int32_t, MappedFieldConstRef> field_by_id;
46+
47+
void Visit(const MappedField& field) {
48+
field_by_id.emplace(field.field_id, std::cref(field));
49+
if (field.nested_mapping != nullptr) {
50+
Visit(*field.nested_mapping);
51+
}
52+
}
53+
54+
void Visit(const MappedFields& fields) {
55+
for (const auto& field : fields.fields()) {
56+
Visit(field);
57+
}
58+
}
59+
60+
void Visit(const NameMapping& name_mapping) { Visit(name_mapping.AsMappedFields()); }
61+
};
62+
63+
// Helper class to recursively index MappedField by field name
64+
struct IndexByNameVisitor {
65+
std::unordered_map<std::string, MappedFieldConstRef> field_by_name;
66+
67+
void Visit(const MappedField& field) {
68+
for (const auto& name : field.names) {
69+
field_by_name.emplace(name, std::cref(field));
70+
}
71+
72+
if (field.nested_mapping != nullptr) {
73+
IndexByNameVisitor nested_visitor;
74+
nested_visitor.Visit(*field.nested_mapping);
75+
76+
for (const auto& [name, mapped_field] : nested_visitor.field_by_name) {
77+
for (const auto& prefix : field.names) {
78+
std::vector<std::string> parts = {prefix, name};
79+
field_by_name.emplace(JoinByDot(parts), std::cref(mapped_field));
80+
}
81+
}
82+
}
83+
}
84+
85+
void Visit(const MappedFields& fields) {
86+
for (const auto& field : fields.fields()) {
87+
Visit(field);
88+
}
89+
}
90+
91+
void Visit(const NameMapping& name_mapping) { Visit(name_mapping.AsMappedFields()); }
92+
};
93+
94+
} // namespace
95+
96+
MappedFields::MappedFields(std::vector<MappedField> fields)
97+
: fields_(std::move(fields)) {}
98+
99+
std::unique_ptr<MappedFields> MappedFields::Make(std::vector<MappedField> fields) {
100+
return std::unique_ptr<MappedFields>(new MappedFields(std::move(fields)));
101+
}
102+
103+
std::optional<MappedFieldConstRef> MappedFields::Field(int32_t id) const {
104+
const auto& id_to_field = LazyIdToField();
105+
if (auto it = id_to_field.find(id); it != id_to_field.cend()) {
106+
return it->second;
107+
}
108+
return std::nullopt;
109+
}
110+
111+
std::optional<int32_t> MappedFields::Id(std::string_view name) const {
112+
const auto& name_to_id = LazyNameToId();
113+
if (auto it = name_to_id.find(name); it != name_to_id.cend()) {
114+
return it->second;
115+
}
116+
return std::nullopt;
117+
}
118+
119+
size_t MappedFields::Size() const { return fields_.size(); }
120+
121+
std::span<const MappedField> MappedFields::fields() const { return fields_; }
122+
123+
const std::unordered_map<std::string_view, int32_t>& MappedFields::LazyNameToId() const {
124+
if (name_to_id_.empty() && !fields_.empty()) {
125+
for (const auto& field : fields_) {
126+
for (const auto& name : field.names) {
127+
name_to_id_.emplace(name, field.field_id);
128+
}
129+
}
130+
}
131+
return name_to_id_;
132+
}
133+
134+
const std::unordered_map<int32_t, MappedFieldConstRef>& MappedFields::LazyIdToField()
135+
const {
136+
if (id_to_field_.empty() && !fields_.empty()) {
137+
for (const auto& field : fields_) {
138+
id_to_field_.emplace(field.field_id, std::cref(field));
139+
}
140+
}
141+
return id_to_field_;
142+
}
143+
144+
NameMapping::NameMapping(std::unique_ptr<MappedFields> mapping)
145+
: mapping_(std::move(mapping)) {}
146+
147+
std::optional<MappedFieldConstRef> NameMapping::Find(int32_t id) {
148+
const auto& fields_by_id = LazyFieldsById();
149+
if (auto iter = fields_by_id.find(id); iter != fields_by_id.cend()) {
150+
return iter->second;
151+
}
152+
return std::nullopt;
153+
}
154+
155+
std::optional<MappedFieldConstRef> NameMapping::Find(std::span<const std::string> names) {
156+
if (names.empty()) {
157+
return std::nullopt;
158+
}
159+
return Find(JoinByDot(names));
160+
}
161+
162+
std::optional<MappedFieldConstRef> NameMapping::Find(const std::string& name) {
163+
const auto& fields_by_name = LazyFieldsByName();
164+
if (auto iter = fields_by_name.find(name); iter != fields_by_name.cend()) {
165+
return iter->second;
166+
}
167+
return std::nullopt;
168+
}
169+
170+
const MappedFields& NameMapping::AsMappedFields() const {
171+
if (mapping_ == nullptr) {
172+
const static std::unique_ptr<MappedFields> kEmptyFields = MappedFields::Make({});
173+
return *kEmptyFields;
174+
}
175+
return *mapping_;
176+
}
177+
178+
const std::unordered_map<int32_t, MappedFieldConstRef>& NameMapping::LazyFieldsById()
179+
const {
180+
if (fields_by_id_.empty()) {
181+
IndexByIdVisitor visitor;
182+
visitor.Visit(AsMappedFields());
183+
fields_by_id_ = std::move(visitor.field_by_id);
184+
}
185+
return fields_by_id_;
186+
}
187+
188+
const std::unordered_map<std::string, MappedFieldConstRef>&
189+
NameMapping::LazyFieldsByName() const {
190+
if (fields_by_name_.empty()) {
191+
IndexByNameVisitor visitor;
192+
visitor.Visit(AsMappedFields());
193+
fields_by_name_ = std::move(visitor.field_by_name);
194+
}
195+
return fields_by_name_;
196+
}
197+
198+
std::unique_ptr<NameMapping> NameMapping::MakeEmpty() {
199+
return std::unique_ptr<NameMapping>(new NameMapping(MappedFields::Make({})));
200+
}
201+
202+
std::unique_ptr<NameMapping> NameMapping::Make(std::unique_ptr<MappedFields> fields) {
203+
return std::unique_ptr<NameMapping>(new NameMapping(std::move(fields)));
204+
}
205+
206+
std::unique_ptr<NameMapping> NameMapping::Make(std::vector<MappedField> fields) {
207+
return Make(MappedFields::Make(std::move(fields)));
208+
}
209+
210+
bool operator==(const MappedField& lhs, const MappedField& rhs) {
211+
if (lhs.field_id != rhs.field_id) {
212+
return false;
213+
}
214+
if (lhs.names != rhs.names) {
215+
return false;
216+
}
217+
if (lhs.nested_mapping == nullptr && rhs.nested_mapping == nullptr) {
218+
return true;
219+
}
220+
if (lhs.nested_mapping == nullptr || rhs.nested_mapping == nullptr) {
221+
return false;
222+
}
223+
return *lhs.nested_mapping == *rhs.nested_mapping;
224+
}
225+
226+
bool operator==(const MappedFields& lhs, const MappedFields& rhs) {
227+
if (lhs.Size() != rhs.Size()) {
228+
return false;
229+
}
230+
auto lhs_fields = lhs.fields();
231+
auto rhs_fields = rhs.fields();
232+
for (size_t i = 0; i < lhs.Size(); ++i) {
233+
if (lhs_fields[i] != rhs_fields[i]) {
234+
return false;
235+
}
236+
}
237+
return true;
238+
}
239+
240+
bool operator==(const NameMapping& lhs, const NameMapping& rhs) {
241+
return lhs.AsMappedFields() == rhs.AsMappedFields();
242+
}
243+
244+
std::string ToString(const MappedField& field) {
245+
return std::format(
246+
"({} -> {}{})", field.names, field.field_id,
247+
field.nested_mapping ? std::format(", {}", ToString(*field.nested_mapping)) : "");
248+
}
249+
250+
std::string ToString(const MappedFields& fields) {
251+
return std::format("{}", fields.fields());
252+
}
253+
254+
std::string ToString(const NameMapping& name_mapping) {
255+
const auto& fields = name_mapping.AsMappedFields();
256+
if (fields.Size() == 0) {
257+
return "[]";
258+
}
259+
std::string repr = "[\n";
260+
for (const auto& field : fields.fields()) {
261+
std::format_to(std::back_inserter(repr), " {}\n", ToString(field));
262+
}
263+
repr += "]";
264+
return repr;
265+
}
266+
267+
} // namespace iceberg

0 commit comments

Comments
 (0)