Skip to content

Commit 29b887d

Browse files
authored
feat: create name mapping from schema (#99)
1 parent 27318f5 commit 29b887d

File tree

5 files changed

+183
-3
lines changed

5 files changed

+183
-3
lines changed

src/iceberg/name_mapping.cc

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include <sstream>
2424

2525
#include "iceberg/util/formatter_internal.h"
26+
#include "iceberg/util/macros.h"
27+
#include "iceberg/util/visit_type.h"
2628

2729
namespace iceberg {
2830

@@ -271,4 +273,65 @@ std::string ToString(const NameMapping& name_mapping) {
271273
return repr;
272274
}
273275

276+
namespace {
277+
278+
// Visitor class for creating name mappings from schema types
279+
class CreateMappingVisitor {
280+
public:
281+
Result<std::unique_ptr<MappedFields>> Visit(const StructType& type) const {
282+
std::vector<MappedField> fields;
283+
fields.reserve(type.fields().size());
284+
for (const auto& field : type.fields()) {
285+
ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, std::string(field.name()), field));
286+
}
287+
return MappedFields::Make(std::move(fields));
288+
}
289+
290+
Result<std::unique_ptr<MappedFields>> Visit(const ListType& type) const {
291+
std::vector<MappedField> fields;
292+
ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, "element", type.fields().back()));
293+
return MappedFields::Make(std::move(fields));
294+
}
295+
296+
Result<std::unique_ptr<MappedFields>> Visit(const MapType& type) const {
297+
std::vector<MappedField> fields;
298+
fields.reserve(2);
299+
ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, "key", type.key()));
300+
ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, "value", type.value()));
301+
return MappedFields::Make(std::move(fields));
302+
}
303+
304+
template <typename T>
305+
Result<std::unique_ptr<MappedFields>> Visit(const T& type) const {
306+
return nullptr;
307+
}
308+
309+
private:
310+
Status AddMappedField(std::vector<MappedField>& fields, const std::string& name,
311+
const SchemaField& field) const {
312+
auto visit_result =
313+
VisitType(*field.type(), [this](const auto& type) { return this->Visit(type); });
314+
ICEBERG_RETURN_UNEXPECTED(visit_result);
315+
316+
fields.emplace_back(MappedField{
317+
.names = {name},
318+
.field_id = field.field_id(),
319+
.nested_mapping = std::move(visit_result.value()),
320+
});
321+
return {};
322+
}
323+
};
324+
325+
} // namespace
326+
327+
Result<std::unique_ptr<NameMapping>> CreateMapping(const Schema& schema) {
328+
CreateMappingVisitor visitor;
329+
auto result = VisitType(
330+
schema, [&visitor](const auto& type) -> Result<std::unique_ptr<MappedFields>> {
331+
return visitor.Visit(type);
332+
});
333+
ICEBERG_RETURN_UNEXPECTED(result);
334+
return NameMapping::Make(std::move(*result));
335+
}
336+
274337
} // namespace iceberg

src/iceberg/name_mapping.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
#include <vector>
3030

3131
#include "iceberg/iceberg_export.h"
32+
#include "iceberg/result.h"
33+
#include "iceberg/schema.h"
3234

3335
namespace iceberg {
3436

@@ -133,4 +135,24 @@ ICEBERG_EXPORT std::string ToString(const MappedField& field);
133135
ICEBERG_EXPORT std::string ToString(const MappedFields& fields);
134136
ICEBERG_EXPORT std::string ToString(const NameMapping& mapping);
135137

138+
/// \brief Create a name-based mapping for a schema.
139+
///
140+
/// The mapping returned by this method will use the schema's name for each field.
141+
///
142+
/// \param schema The schema to create the mapping for.
143+
/// \return A new NameMapping instance initialized with the schema's fields and names.
144+
ICEBERG_EXPORT Result<std::unique_ptr<NameMapping>> CreateMapping(const Schema& schema);
145+
146+
/// TODO(gangwu): implement this function once SchemaUpdate is supported
147+
///
148+
/// \brief Update a name-based mapping using changes to a schema.
149+
/// \param mapping a name-based mapping
150+
/// \param updates a map from field ID to updated field definitions
151+
/// \param adds a map from parent field ID to nested fields to be added
152+
/// \return an updated mapping with names added to renamed fields and the mapping extended
153+
/// for new fields
154+
// ICEBERG_EXPORT Result<std::unique_ptr<NameMapping>> UpdateMapping(
155+
// const NameMapping& mapping, const std::map<int32_t, SchemaField>& updates,
156+
// const std::multimap<int32_t, int32_t>& adds);
157+
136158
} // namespace iceberg

src/iceberg/schema.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ class ICEBERG_EXPORT Schema : public StructType {
4343
public:
4444
static constexpr int32_t kInitialSchemaId = 0;
4545

46-
Schema(std::vector<SchemaField> fields, std::optional<int32_t> schema_id);
46+
explicit Schema(std::vector<SchemaField> fields,
47+
std::optional<int32_t> schema_id = std::nullopt);
4748

4849
/// \brief Get the schema ID.
4950
///

test/name_mapping_test.cc

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,4 +203,99 @@ TEST_F(NameMappingTest, ToString) {
203203
}
204204
}
205205

206+
TEST(CreateMappingTest, FlatSchemaToMapping) {
207+
Schema schema(std::vector<SchemaField>{
208+
SchemaField::MakeRequired(1, "id", std::make_shared<LongType>()),
209+
SchemaField::MakeRequired(2, "data", std::make_shared<StringType>()),
210+
});
211+
212+
auto expected = MappedFields::Make({
213+
MappedField{.names = {"id"}, .field_id = 1},
214+
MappedField{.names = {"data"}, .field_id = 2},
215+
});
216+
217+
auto result = CreateMapping(schema);
218+
ASSERT_TRUE(result.has_value());
219+
EXPECT_EQ(result.value()->AsMappedFields(), *expected);
220+
}
221+
222+
TEST(CreateMappingTest, NestedStructSchemaToMapping) {
223+
Schema schema(std::vector<SchemaField>{
224+
SchemaField::MakeRequired(1, "id", std::make_shared<LongType>()),
225+
SchemaField::MakeRequired(2, "data", std::make_shared<StringType>()),
226+
SchemaField::MakeRequired(
227+
3, "location",
228+
std::make_shared<StructType>(std::vector<SchemaField>{
229+
SchemaField::MakeRequired(4, "latitude", std::make_shared<FloatType>()),
230+
SchemaField::MakeRequired(5, "longitude", std::make_shared<FloatType>()),
231+
})),
232+
});
233+
234+
auto expected = MappedFields::Make({
235+
MappedField{.names = {"id"}, .field_id = 1},
236+
MappedField{.names = {"data"}, .field_id = 2},
237+
MappedField{.names = {"location"},
238+
.field_id = 3,
239+
.nested_mapping = MappedFields::Make({
240+
MappedField{.names = {"latitude"}, .field_id = 4},
241+
MappedField{.names = {"longitude"}, .field_id = 5},
242+
})},
243+
});
244+
245+
auto result = CreateMapping(schema);
246+
ASSERT_TRUE(result.has_value());
247+
EXPECT_EQ(result.value()->AsMappedFields(), *expected);
248+
}
249+
250+
TEST(CreateMappingTest, MapSchemaToMapping) {
251+
Schema schema(std::vector<SchemaField>{
252+
SchemaField::MakeRequired(1, "id", std::make_shared<LongType>()),
253+
SchemaField::MakeRequired(2, "data", std::make_shared<StringType>()),
254+
SchemaField::MakeRequired(
255+
3, "map",
256+
std::make_shared<MapType>(
257+
SchemaField::MakeRequired(4, "key", std::make_shared<StringType>()),
258+
SchemaField::MakeRequired(5, "value", std::make_shared<DoubleType>()))),
259+
});
260+
261+
auto expected = MappedFields::Make({
262+
MappedField{.names = {"id"}, .field_id = 1},
263+
MappedField{.names = {"data"}, .field_id = 2},
264+
MappedField{.names = {"map"},
265+
.field_id = 3,
266+
.nested_mapping = MappedFields::Make({
267+
MappedField{.names = {"key"}, .field_id = 4},
268+
MappedField{.names = {"value"}, .field_id = 5},
269+
})},
270+
});
271+
272+
auto result = CreateMapping(schema);
273+
ASSERT_TRUE(result.has_value());
274+
EXPECT_EQ(result.value()->AsMappedFields(), *expected);
275+
}
276+
277+
TEST(CreateMappingTest, ListSchemaToMapping) {
278+
Schema schema(std::vector<SchemaField>{
279+
SchemaField::MakeRequired(1, "id", std::make_shared<LongType>()),
280+
SchemaField::MakeRequired(2, "data", std::make_shared<StringType>()),
281+
SchemaField::MakeRequired(3, "list",
282+
std::make_shared<ListType>(SchemaField::MakeRequired(
283+
4, "element", std::make_shared<StringType>()))),
284+
});
285+
286+
auto expected = MappedFields::Make({
287+
MappedField{.names = {"id"}, .field_id = 1},
288+
MappedField{.names = {"data"}, .field_id = 2},
289+
MappedField{.names = {"list"},
290+
.field_id = 3,
291+
.nested_mapping = MappedFields::Make({
292+
MappedField{.names = {"element"}, .field_id = 4},
293+
})},
294+
});
295+
296+
auto result = CreateMapping(schema);
297+
ASSERT_TRUE(result.has_value());
298+
EXPECT_EQ(result.value()->AsMappedFields(), *expected);
299+
}
300+
206301
} // namespace iceberg

test/visit_type_test.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,9 @@
2323

2424
#include <gmock/gmock.h>
2525
#include <gtest/gtest.h>
26-
#include <iceberg/type.h>
2726

28-
#include "gmock/gmock.h"
2927
#include "iceberg/result.h"
28+
#include "iceberg/type.h"
3029
#include "matchers.h"
3130

3231
namespace iceberg {

0 commit comments

Comments
 (0)