Skip to content

Commit ee6d8e2

Browse files
committed
feat: create name mapping from schema
1 parent a994322 commit ee6d8e2

File tree

5 files changed

+184
-4
lines changed

5 files changed

+184
-4
lines changed

src/iceberg/name_mapping.cc

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include <sstream>
2424

2525
#include "iceberg/util/formatter_internal.h"
26+
#include "iceberg/util/macros.h"
27+
#include "iceberg/util/visit_type.h"
2628

2729
namespace iceberg {
2830

@@ -264,4 +266,65 @@ std::string ToString(const NameMapping& name_mapping) {
264266
return repr;
265267
}
266268

269+
namespace {
270+
271+
// Visitor class for creating name mappings from schema types
272+
class CreateMappingVisitor {
273+
public:
274+
Result<std::unique_ptr<MappedFields>> Visit(const StructType& type) const {
275+
std::vector<MappedField> fields;
276+
fields.reserve(type.fields().size());
277+
for (const auto& field : type.fields()) {
278+
ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, std::string(field.name()), field));
279+
}
280+
return MappedFields::Make(std::move(fields));
281+
}
282+
283+
Result<std::unique_ptr<MappedFields>> Visit(const ListType& type) const {
284+
std::vector<MappedField> fields;
285+
ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, "element", type.fields().back()));
286+
return MappedFields::Make(std::move(fields));
287+
}
288+
289+
Result<std::unique_ptr<MappedFields>> Visit(const MapType& type) const {
290+
std::vector<MappedField> fields;
291+
fields.reserve(2);
292+
ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, "key", type.key()));
293+
ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, "value", type.value()));
294+
return MappedFields::Make(std::move(fields));
295+
}
296+
297+
template <typename T>
298+
Result<std::unique_ptr<MappedFields>> Visit(const T& type) const {
299+
return nullptr;
300+
}
301+
302+
private:
303+
Status AddMappedField(std::vector<MappedField>& fields, const std::string& name,
304+
const SchemaField& field) const {
305+
auto visit_result =
306+
VisitType(*field.type(), [this](const auto& type) { return this->Visit(type); });
307+
ICEBERG_RETURN_UNEXPECTED(visit_result);
308+
309+
fields.emplace_back(MappedField{
310+
.names = {name},
311+
.field_id = field.field_id(),
312+
.nested_mapping = std::move(visit_result.value()),
313+
});
314+
return {};
315+
}
316+
};
317+
318+
} // namespace
319+
320+
Result<std::unique_ptr<NameMapping>> CreateMapping(const Schema& schema) {
321+
CreateMappingVisitor visitor;
322+
auto result = VisitType(
323+
schema, [&visitor](const auto& type) -> Result<std::unique_ptr<MappedFields>> {
324+
return visitor.Visit(type);
325+
});
326+
ICEBERG_RETURN_UNEXPECTED(result);
327+
return NameMapping::Make(std::move(*result));
328+
}
329+
267330
} // namespace iceberg

src/iceberg/name_mapping.h

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
#include <vector>
3030

3131
#include "iceberg/iceberg_export.h"
32+
#include "iceberg/result.h"
33+
#include "iceberg/schema.h"
3234

3335
namespace iceberg {
3436

@@ -43,7 +45,7 @@ struct ICEBERG_EXPORT MappedField {
4345
int32_t field_id;
4446
/// \brief An optional list of field mappings for child field of structs, maps, and
4547
/// lists.
46-
std::unique_ptr<class MappedFields> nested_mapping;
48+
std::shared_ptr<class MappedFields> nested_mapping;
4749

4850
friend bool operator==(const MappedField& lhs, const MappedField& rhs);
4951
};
@@ -140,4 +142,24 @@ ICEBERG_EXPORT std::string ToString(const MappedField& field);
140142
ICEBERG_EXPORT std::string ToString(const MappedFields& fields);
141143
ICEBERG_EXPORT std::string ToString(const NameMapping& mapping);
142144

145+
/// \brief Create a name-based mapping for a schema.
146+
///
147+
/// The mapping returned by this method will use the schema's name for each field.
148+
///
149+
/// \param schema The schema to create the mapping for.
150+
/// \return A new NameMapping instance initialized with the schema's fields and names.
151+
ICEBERG_EXPORT Result<std::unique_ptr<NameMapping>> CreateMapping(const Schema& schema);
152+
153+
/// TODO(gangwu): implement this function once SchemaUpdate is supported
154+
///
155+
/// \brief Update a name-based mapping using changes to a schema.
156+
/// \param mapping a name-based mapping
157+
/// \param updates a map from field ID to updated field definitions
158+
/// \param adds a map from parent field ID to nested fields to be added
159+
/// \return an updated mapping with names added to renamed fields and the mapping extended
160+
/// for new fields
161+
// ICEBERG_EXPORT Result<std::unique_ptr<NameMapping>> UpdateMapping(
162+
// const NameMapping& mapping, const std::map<int32_t, SchemaField>& updates,
163+
// const std::multimap<int32_t, int32_t>& adds);
164+
143165
} // namespace iceberg

src/iceberg/schema.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ class ICEBERG_EXPORT Schema : public StructType {
4343
public:
4444
static constexpr int32_t kInitialSchemaId = 0;
4545

46-
Schema(std::vector<SchemaField> fields, std::optional<int32_t> schema_id);
46+
explicit Schema(std::vector<SchemaField> fields,
47+
std::optional<int32_t> schema_id = std::nullopt);
4748

4849
/// \brief Get the schema ID.
4950
///

test/name_mapping_test.cc

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,4 +202,99 @@ TEST_F(NameMappingTest, ToString) {
202202
}
203203
}
204204

205+
TEST(CreateMappingTest, FlatSchemaToMapping) {
206+
Schema schema(std::vector<SchemaField>{
207+
SchemaField::MakeRequired(1, "id", std::make_shared<LongType>()),
208+
SchemaField::MakeRequired(2, "data", std::make_shared<StringType>()),
209+
});
210+
211+
auto expected = MappedFields::Make({
212+
MappedField{.names = {"id"}, .field_id = 1},
213+
MappedField{.names = {"data"}, .field_id = 2},
214+
});
215+
216+
auto result = CreateMapping(schema);
217+
ASSERT_TRUE(result.has_value());
218+
EXPECT_EQ(result.value()->AsMappedFields(), *expected);
219+
}
220+
221+
TEST(CreateMappingTest, NestedStructSchemaToMapping) {
222+
Schema schema(std::vector<SchemaField>{
223+
SchemaField::MakeRequired(1, "id", std::make_shared<LongType>()),
224+
SchemaField::MakeRequired(2, "data", std::make_shared<StringType>()),
225+
SchemaField::MakeRequired(
226+
3, "location",
227+
std::make_shared<StructType>(std::vector<SchemaField>{
228+
SchemaField::MakeRequired(4, "latitude", std::make_shared<FloatType>()),
229+
SchemaField::MakeRequired(5, "longitude", std::make_shared<FloatType>()),
230+
})),
231+
});
232+
233+
auto expected = MappedFields::Make({
234+
MappedField{.names = {"id"}, .field_id = 1},
235+
MappedField{.names = {"data"}, .field_id = 2},
236+
MappedField{.names = {"location"},
237+
.field_id = 3,
238+
.nested_mapping = MappedFields::Make({
239+
MappedField{.names = {"latitude"}, .field_id = 4},
240+
MappedField{.names = {"longitude"}, .field_id = 5},
241+
})},
242+
});
243+
244+
auto result = CreateMapping(schema);
245+
ASSERT_TRUE(result.has_value());
246+
EXPECT_EQ(result.value()->AsMappedFields(), *expected);
247+
}
248+
249+
TEST(CreateMappingTest, MapSchemaToMapping) {
250+
Schema schema(std::vector<SchemaField>{
251+
SchemaField::MakeRequired(1, "id", std::make_shared<LongType>()),
252+
SchemaField::MakeRequired(2, "data", std::make_shared<StringType>()),
253+
SchemaField::MakeRequired(
254+
3, "map",
255+
std::make_shared<MapType>(
256+
SchemaField::MakeRequired(4, "key", std::make_shared<StringType>()),
257+
SchemaField::MakeRequired(5, "value", std::make_shared<DoubleType>()))),
258+
});
259+
260+
auto expected = MappedFields::Make({
261+
MappedField{.names = {"id"}, .field_id = 1},
262+
MappedField{.names = {"data"}, .field_id = 2},
263+
MappedField{.names = {"map"},
264+
.field_id = 3,
265+
.nested_mapping = MappedFields::Make({
266+
MappedField{.names = {"key"}, .field_id = 4},
267+
MappedField{.names = {"value"}, .field_id = 5},
268+
})},
269+
});
270+
271+
auto result = CreateMapping(schema);
272+
ASSERT_TRUE(result.has_value());
273+
EXPECT_EQ(result.value()->AsMappedFields(), *expected);
274+
}
275+
276+
TEST(CreateMappingTest, ListSchemaToMapping) {
277+
Schema schema(std::vector<SchemaField>{
278+
SchemaField::MakeRequired(1, "id", std::make_shared<LongType>()),
279+
SchemaField::MakeRequired(2, "data", std::make_shared<StringType>()),
280+
SchemaField::MakeRequired(3, "list",
281+
std::make_shared<ListType>(SchemaField::MakeRequired(
282+
4, "element", std::make_shared<StringType>()))),
283+
});
284+
285+
auto expected = MappedFields::Make({
286+
MappedField{.names = {"id"}, .field_id = 1},
287+
MappedField{.names = {"data"}, .field_id = 2},
288+
MappedField{.names = {"list"},
289+
.field_id = 3,
290+
.nested_mapping = MappedFields::Make({
291+
MappedField{.names = {"element"}, .field_id = 4},
292+
})},
293+
});
294+
295+
auto result = CreateMapping(schema);
296+
ASSERT_TRUE(result.has_value());
297+
EXPECT_EQ(result.value()->AsMappedFields(), *expected);
298+
}
299+
205300
} // namespace iceberg

test/visit_type_test.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,9 @@
2323

2424
#include <gmock/gmock.h>
2525
#include <gtest/gtest.h>
26-
#include <iceberg/type.h>
2726

28-
#include "gmock/gmock.h"
2927
#include "iceberg/result.h"
28+
#include "iceberg/type.h"
3029
#include "matchers.h"
3130

3231
namespace iceberg {

0 commit comments

Comments
 (0)