diff --git a/src/iceberg/name_mapping.cc b/src/iceberg/name_mapping.cc index 75a048260..b9674238f 100644 --- a/src/iceberg/name_mapping.cc +++ b/src/iceberg/name_mapping.cc @@ -23,6 +23,8 @@ #include #include "iceberg/util/formatter_internal.h" +#include "iceberg/util/macros.h" +#include "iceberg/util/visit_type.h" namespace iceberg { @@ -264,4 +266,65 @@ std::string ToString(const NameMapping& name_mapping) { return repr; } +namespace { + +// Visitor class for creating name mappings from schema types +class CreateMappingVisitor { + public: + Result> Visit(const StructType& type) const { + std::vector fields; + fields.reserve(type.fields().size()); + for (const auto& field : type.fields()) { + ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, std::string(field.name()), field)); + } + return MappedFields::Make(std::move(fields)); + } + + Result> Visit(const ListType& type) const { + std::vector fields; + ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, "element", type.fields().back())); + return MappedFields::Make(std::move(fields)); + } + + Result> Visit(const MapType& type) const { + std::vector fields; + fields.reserve(2); + ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, "key", type.key())); + ICEBERG_RETURN_UNEXPECTED(AddMappedField(fields, "value", type.value())); + return MappedFields::Make(std::move(fields)); + } + + template + Result> Visit(const T& type) const { + return nullptr; + } + + private: + Status AddMappedField(std::vector& fields, const std::string& name, + const SchemaField& field) const { + auto visit_result = + VisitType(*field.type(), [this](const auto& type) { return this->Visit(type); }); + ICEBERG_RETURN_UNEXPECTED(visit_result); + + fields.emplace_back(MappedField{ + .names = {name}, + .field_id = field.field_id(), + .nested_mapping = std::move(visit_result.value()), + }); + return {}; + } +}; + +} // namespace + +Result> CreateMapping(const Schema& schema) { + CreateMappingVisitor visitor; + auto result = VisitType( + schema, [&visitor](const auto& type) -> Result> { + return visitor.Visit(type); + }); + ICEBERG_RETURN_UNEXPECTED(result); + return NameMapping::Make(std::move(*result)); +} + } // namespace iceberg diff --git a/src/iceberg/name_mapping.h b/src/iceberg/name_mapping.h index c6f389cd6..5d6f6c709 100644 --- a/src/iceberg/name_mapping.h +++ b/src/iceberg/name_mapping.h @@ -29,6 +29,8 @@ #include #include "iceberg/iceberg_export.h" +#include "iceberg/result.h" +#include "iceberg/schema.h" namespace iceberg { @@ -43,7 +45,7 @@ struct ICEBERG_EXPORT MappedField { int32_t field_id; /// \brief An optional list of field mappings for child field of structs, maps, and /// lists. - std::unique_ptr nested_mapping; + std::shared_ptr nested_mapping; friend bool operator==(const MappedField& lhs, const MappedField& rhs); }; @@ -140,4 +142,24 @@ ICEBERG_EXPORT std::string ToString(const MappedField& field); ICEBERG_EXPORT std::string ToString(const MappedFields& fields); ICEBERG_EXPORT std::string ToString(const NameMapping& mapping); +/// \brief Create a name-based mapping for a schema. +/// +/// The mapping returned by this method will use the schema's name for each field. +/// +/// \param schema The schema to create the mapping for. +/// \return A new NameMapping instance initialized with the schema's fields and names. +ICEBERG_EXPORT Result> CreateMapping(const Schema& schema); + +/// TODO(gangwu): implement this function once SchemaUpdate is supported +/// +/// \brief Update a name-based mapping using changes to a schema. +/// \param mapping a name-based mapping +/// \param updates a map from field ID to updated field definitions +/// \param adds a map from parent field ID to nested fields to be added +/// \return an updated mapping with names added to renamed fields and the mapping extended +/// for new fields +// ICEBERG_EXPORT Result> UpdateMapping( +// const NameMapping& mapping, const std::map& updates, +// const std::multimap& adds); + } // namespace iceberg diff --git a/src/iceberg/schema.h b/src/iceberg/schema.h index a23e3e4be..edc25d6bc 100644 --- a/src/iceberg/schema.h +++ b/src/iceberg/schema.h @@ -43,7 +43,8 @@ class ICEBERG_EXPORT Schema : public StructType { public: static constexpr int32_t kInitialSchemaId = 0; - Schema(std::vector fields, std::optional schema_id); + explicit Schema(std::vector fields, + std::optional schema_id = std::nullopt); /// \brief Get the schema ID. /// diff --git a/test/name_mapping_test.cc b/test/name_mapping_test.cc index d9d054823..e0b068f92 100644 --- a/test/name_mapping_test.cc +++ b/test/name_mapping_test.cc @@ -202,4 +202,99 @@ TEST_F(NameMappingTest, ToString) { } } +TEST(CreateMappingTest, FlatSchemaToMapping) { + Schema schema(std::vector{ + SchemaField::MakeRequired(1, "id", std::make_shared()), + SchemaField::MakeRequired(2, "data", std::make_shared()), + }); + + auto expected = MappedFields::Make({ + MappedField{.names = {"id"}, .field_id = 1}, + MappedField{.names = {"data"}, .field_id = 2}, + }); + + auto result = CreateMapping(schema); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value()->AsMappedFields(), *expected); +} + +TEST(CreateMappingTest, NestedStructSchemaToMapping) { + Schema schema(std::vector{ + SchemaField::MakeRequired(1, "id", std::make_shared()), + SchemaField::MakeRequired(2, "data", std::make_shared()), + SchemaField::MakeRequired( + 3, "location", + std::make_shared(std::vector{ + SchemaField::MakeRequired(4, "latitude", std::make_shared()), + SchemaField::MakeRequired(5, "longitude", std::make_shared()), + })), + }); + + auto expected = MappedFields::Make({ + MappedField{.names = {"id"}, .field_id = 1}, + MappedField{.names = {"data"}, .field_id = 2}, + MappedField{.names = {"location"}, + .field_id = 3, + .nested_mapping = MappedFields::Make({ + MappedField{.names = {"latitude"}, .field_id = 4}, + MappedField{.names = {"longitude"}, .field_id = 5}, + })}, + }); + + auto result = CreateMapping(schema); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value()->AsMappedFields(), *expected); +} + +TEST(CreateMappingTest, MapSchemaToMapping) { + Schema schema(std::vector{ + SchemaField::MakeRequired(1, "id", std::make_shared()), + SchemaField::MakeRequired(2, "data", std::make_shared()), + SchemaField::MakeRequired( + 3, "map", + std::make_shared( + SchemaField::MakeRequired(4, "key", std::make_shared()), + SchemaField::MakeRequired(5, "value", std::make_shared()))), + }); + + auto expected = MappedFields::Make({ + MappedField{.names = {"id"}, .field_id = 1}, + MappedField{.names = {"data"}, .field_id = 2}, + MappedField{.names = {"map"}, + .field_id = 3, + .nested_mapping = MappedFields::Make({ + MappedField{.names = {"key"}, .field_id = 4}, + MappedField{.names = {"value"}, .field_id = 5}, + })}, + }); + + auto result = CreateMapping(schema); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value()->AsMappedFields(), *expected); +} + +TEST(CreateMappingTest, ListSchemaToMapping) { + Schema schema(std::vector{ + SchemaField::MakeRequired(1, "id", std::make_shared()), + SchemaField::MakeRequired(2, "data", std::make_shared()), + SchemaField::MakeRequired(3, "list", + std::make_shared(SchemaField::MakeRequired( + 4, "element", std::make_shared()))), + }); + + auto expected = MappedFields::Make({ + MappedField{.names = {"id"}, .field_id = 1}, + MappedField{.names = {"data"}, .field_id = 2}, + MappedField{.names = {"list"}, + .field_id = 3, + .nested_mapping = MappedFields::Make({ + MappedField{.names = {"element"}, .field_id = 4}, + })}, + }); + + auto result = CreateMapping(schema); + ASSERT_TRUE(result.has_value()); + EXPECT_EQ(result.value()->AsMappedFields(), *expected); +} + } // namespace iceberg diff --git a/test/visit_type_test.cc b/test/visit_type_test.cc index 2215ae514..afd5bf013 100644 --- a/test/visit_type_test.cc +++ b/test/visit_type_test.cc @@ -23,10 +23,9 @@ #include #include -#include -#include "gmock/gmock.h" #include "iceberg/result.h" +#include "iceberg/type.h" #include "matchers.h" namespace iceberg {