Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions src/iceberg/name_mapping.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include <sstream>

#include "iceberg/util/formatter_internal.h"
#include "iceberg/util/macros.h"
#include "iceberg/util/visit_type.h"

namespace iceberg {

Expand Down Expand Up @@ -264,4 +266,65 @@ std::string ToString(const NameMapping& name_mapping) {
return repr;
}

namespace {

// Visitor class for creating name mappings from schema types
class CreateMappingVisitor {
public:
Result<std::unique_ptr<MappedFields>> Visit(const StructType& type) const {
std::vector<MappedField> fields;
fields.reserve(type.fields().size());
for (const auto& field : type.fields()) {
ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, std::string(field.name()), field));
}
return MappedFields::Make(std::move(fields));
}

Result<std::unique_ptr<MappedFields>> Visit(const ListType& type) const {
std::vector<MappedField> fields;
ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, "element", type.fields().back()));
return MappedFields::Make(std::move(fields));
}

Result<std::unique_ptr<MappedFields>> Visit(const MapType& type) const {
std::vector<MappedField> fields;
fields.reserve(2);
ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, "key", type.key()));
ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, "value", type.value()));
return MappedFields::Make(std::move(fields));
}

template <typename T>
Result<std::unique_ptr<MappedFields>> Visit(const T& type) const {
return nullptr;
}

private:
Status AddMappedField(std::vector<MappedField>& fields, const std::string& name,
const SchemaField& field) const {
auto visit_result =
VisitType(*field.type(), [this](const auto& type) { return this->Visit(type); });
ICEBERG_RETURN_UNEXPECTED(visit_result);

fields.emplace_back(MappedField{
.names = {name},
.field_id = field.field_id(),
.nested_mapping = std::move(visit_result.value()),
});
return {};
}
};

} // namespace

Result<std::unique_ptr<NameMapping>> CreateMapping(const Schema& schema) {
CreateMappingVisitor visitor;
auto result = VisitType(
schema, [&visitor](const auto& type) -> Result<std::unique_ptr<MappedFields>> {
return visitor.Visit(type);
});
ICEBERG_RETURN_UNEXPECTED(result);
return NameMapping::Make(std::move(*result));
}

} // namespace iceberg
24 changes: 23 additions & 1 deletion src/iceberg/name_mapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
#include <vector>

#include "iceberg/iceberg_export.h"
#include "iceberg/result.h"
#include "iceberg/schema.h"

namespace iceberg {

Expand All @@ -43,7 +45,7 @@ struct ICEBERG_EXPORT MappedField {
int32_t field_id;
/// \brief An optional list of field mappings for child field of structs, maps, and
/// lists.
std::unique_ptr<class MappedFields> nested_mapping;
std::shared_ptr<class MappedFields> nested_mapping;

friend bool operator==(const MappedField& lhs, const MappedField& rhs);
};
Expand Down Expand Up @@ -140,4 +142,24 @@ ICEBERG_EXPORT std::string ToString(const MappedField& field);
ICEBERG_EXPORT std::string ToString(const MappedFields& fields);
ICEBERG_EXPORT std::string ToString(const NameMapping& mapping);

/// \brief Create a name-based mapping for a schema.
///
/// The mapping returned by this method will use the schema's name for each field.
///
/// \param schema The schema to create the mapping for.
/// \return A new NameMapping instance initialized with the schema's fields and names.
ICEBERG_EXPORT Result<std::unique_ptr<NameMapping>> CreateMapping(const Schema& schema);

/// TODO(gangwu): implement this function once SchemaUpdate is supported
///
/// \brief Update a name-based mapping using changes to a schema.
/// \param mapping a name-based mapping
/// \param updates a map from field ID to updated field definitions
/// \param adds a map from parent field ID to nested fields to be added
/// \return an updated mapping with names added to renamed fields and the mapping extended
/// for new fields
// ICEBERG_EXPORT Result<std::unique_ptr<NameMapping>> UpdateMapping(
// const NameMapping& mapping, const std::map<int32_t, SchemaField>& updates,
// const std::multimap<int32_t, int32_t>& adds);

} // namespace iceberg
3 changes: 2 additions & 1 deletion src/iceberg/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ class ICEBERG_EXPORT Schema : public StructType {
public:
static constexpr int32_t kInitialSchemaId = 0;

Schema(std::vector<SchemaField> fields, std::optional<int32_t> schema_id);
explicit Schema(std::vector<SchemaField> fields,
std::optional<int32_t> schema_id = std::nullopt);

/// \brief Get the schema ID.
///
Expand Down
95 changes: 95 additions & 0 deletions test/name_mapping_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,4 +202,99 @@ TEST_F(NameMappingTest, ToString) {
}
}

TEST(CreateMappingTest, FlatSchemaToMapping) {
Schema schema(std::vector<SchemaField>{
SchemaField::MakeRequired(1, "id", std::make_shared<LongType>()),
SchemaField::MakeRequired(2, "data", std::make_shared<StringType>()),
});

auto expected = MappedFields::Make({
MappedField{.names = {"id"}, .field_id = 1},
MappedField{.names = {"data"}, .field_id = 2},
});

auto result = CreateMapping(schema);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(result.value()->AsMappedFields(), *expected);
}

TEST(CreateMappingTest, NestedStructSchemaToMapping) {
Schema schema(std::vector<SchemaField>{
SchemaField::MakeRequired(1, "id", std::make_shared<LongType>()),
SchemaField::MakeRequired(2, "data", std::make_shared<StringType>()),
SchemaField::MakeRequired(
3, "location",
std::make_shared<StructType>(std::vector<SchemaField>{
SchemaField::MakeRequired(4, "latitude", std::make_shared<FloatType>()),
SchemaField::MakeRequired(5, "longitude", std::make_shared<FloatType>()),
})),
});

auto expected = MappedFields::Make({
MappedField{.names = {"id"}, .field_id = 1},
MappedField{.names = {"data"}, .field_id = 2},
MappedField{.names = {"location"},
.field_id = 3,
.nested_mapping = MappedFields::Make({
MappedField{.names = {"latitude"}, .field_id = 4},
MappedField{.names = {"longitude"}, .field_id = 5},
})},
});

auto result = CreateMapping(schema);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(result.value()->AsMappedFields(), *expected);
}

TEST(CreateMappingTest, MapSchemaToMapping) {
Schema schema(std::vector<SchemaField>{
SchemaField::MakeRequired(1, "id", std::make_shared<LongType>()),
SchemaField::MakeRequired(2, "data", std::make_shared<StringType>()),
SchemaField::MakeRequired(
3, "map",
std::make_shared<MapType>(
SchemaField::MakeRequired(4, "key", std::make_shared<StringType>()),
SchemaField::MakeRequired(5, "value", std::make_shared<DoubleType>()))),
});

auto expected = MappedFields::Make({
MappedField{.names = {"id"}, .field_id = 1},
MappedField{.names = {"data"}, .field_id = 2},
MappedField{.names = {"map"},
.field_id = 3,
.nested_mapping = MappedFields::Make({
MappedField{.names = {"key"}, .field_id = 4},
MappedField{.names = {"value"}, .field_id = 5},
})},
});

auto result = CreateMapping(schema);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(result.value()->AsMappedFields(), *expected);
}

TEST(CreateMappingTest, ListSchemaToMapping) {
Schema schema(std::vector<SchemaField>{
SchemaField::MakeRequired(1, "id", std::make_shared<LongType>()),
SchemaField::MakeRequired(2, "data", std::make_shared<StringType>()),
SchemaField::MakeRequired(3, "list",
std::make_shared<ListType>(SchemaField::MakeRequired(
4, "element", std::make_shared<StringType>()))),
});

auto expected = MappedFields::Make({
MappedField{.names = {"id"}, .field_id = 1},
MappedField{.names = {"data"}, .field_id = 2},
MappedField{.names = {"list"},
.field_id = 3,
.nested_mapping = MappedFields::Make({
MappedField{.names = {"element"}, .field_id = 4},
})},
});

auto result = CreateMapping(schema);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(result.value()->AsMappedFields(), *expected);
}

} // namespace iceberg
3 changes: 1 addition & 2 deletions test/visit_type_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@

#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <iceberg/type.h>

#include "gmock/gmock.h"
#include "iceberg/result.h"
#include "iceberg/type.h"
#include "matchers.h"

namespace iceberg {
Expand Down
Loading