From 39fbe3072337792a3450244b115c67aa824fa675 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Tue, 13 May 2025 10:04:04 +0800 Subject: [PATCH 1/2] feat: add json serde to name mapping - add roundtrip json conversion to name mapping - change MappedField to use std::shared_ptr for better usability - use optional field_id in MappedField --- src/iceberg/json_internal.cc | 80 +++++++++++++++++++++++++++++++++++- src/iceberg/json_internal.h | 36 ++++++++++++++++ src/iceberg/name_mapping.cc | 15 +++++-- src/iceberg/name_mapping.h | 10 +---- src/iceberg/type_fwd.h | 4 ++ test/json_internal_test.cc | 32 ++++++++++++++- test/name_mapping_test.cc | 1 + 7 files changed, 164 insertions(+), 14 deletions(-) diff --git a/src/iceberg/json_internal.cc b/src/iceberg/json_internal.cc index 2ffc543fd..3614ed230 100644 --- a/src/iceberg/json_internal.cc +++ b/src/iceberg/json_internal.cc @@ -28,6 +28,7 @@ #include +#include "iceberg/name_mapping.h" #include "iceberg/partition_field.h" #include "iceberg/partition_spec.h" #include "iceberg/result.h" @@ -71,9 +72,11 @@ constexpr std::string_view kKey = "key"; constexpr std::string_view kValue = "value"; constexpr std::string_view kDoc = "doc"; constexpr std::string_view kName = "name"; +constexpr std::string_view kNames = "names"; constexpr std::string_view kId = "id"; constexpr std::string_view kInitialDefault = "initial-default"; constexpr std::string_view kWriteDefault = "write-default"; +constexpr std::string_view kFieldId = "field-id"; constexpr std::string_view kElementId = "element-id"; constexpr std::string_view kKeyId = "key-id"; constexpr std::string_view kValueId = "value-id"; @@ -82,7 +85,6 @@ constexpr std::string_view kElementRequired = "element-required"; constexpr std::string_view kValueRequired = "value-required"; // Snapshot constants -constexpr std::string_view kFieldId = "field-id"; constexpr std::string_view kSpecId = "spec-id"; constexpr std::string_view kSnapshotId = "snapshot-id"; constexpr std::string_view kParentSnapshotId = "parent-snapshot-id"; @@ -1232,4 +1234,80 @@ Result ToJsonString(const nlohmann::json& json) { } } +nlohmann::json ToJson(const MappedField& field) { + nlohmann::json json; + if (field.field_id.has_value()) { + json[kFieldId] = field.field_id.value(); + } + + nlohmann::json names = nlohmann::json::array(); + for (const auto& name : field.names) { + names.push_back(name); + } + json[kNames] = names; + + if (field.nested_mapping != nullptr) { + json[kFields] = ToJson(*field.nested_mapping); + } + return json; +} + +Result MappedFieldFromJson(const nlohmann::json& json) { + if (!json.is_object()) [[unlikely]] { + return JsonParseError("Cannot parse non-object mapping field: {}", + SafeDumpJson(json)); + } + + ICEBERG_ASSIGN_OR_RAISE(std::optional field_id, + GetJsonValueOptional(json, kFieldId)); + + std::vector names; + if (json.contains(kNames)) { + ICEBERG_ASSIGN_OR_RAISE(names, GetJsonValue>(json, kNames)); + } + + std::unique_ptr nested_mapping; + if (json.contains(kFields)) { + ICEBERG_ASSIGN_OR_RAISE(auto fields_json, + GetJsonValue(json, kFields)); + ICEBERG_ASSIGN_OR_RAISE(nested_mapping, MappedFieldsFromJson(fields_json)); + } + + return MappedField{.names = {names.cbegin(), names.cend()}, + .field_id = field_id, + .nested_mapping = std::move(nested_mapping)}; +} + +nlohmann::json ToJson(const MappedFields& mapped_fields) { + nlohmann::json array = nlohmann::json::array(); + for (const auto& field : mapped_fields.fields()) { + array.push_back(ToJson(field)); + } + return array; +} + +Result> MappedFieldsFromJson(const nlohmann::json& json) { + if (!json.is_array()) [[unlikely]] { + return JsonParseError("Cannot parse non-array mapping fields: {}", + SafeDumpJson(json)); + } + + std::vector fields; + for (const auto& field_json : json) { + ICEBERG_ASSIGN_OR_RAISE(auto field, MappedFieldFromJson(field_json)); + fields.push_back(std::move(field)); + } + + return MappedFields::Make(std::move(fields)); +} + +nlohmann::json ToJson(const NameMapping& name_mapping) { + return ToJson(name_mapping.AsMappedFields()); +} + +Result> NameMappingFromJson(const nlohmann::json& json) { + ICEBERG_ASSIGN_OR_RAISE(auto mapped_fields, MappedFieldsFromJson(json)); + return NameMapping::Make(std::move(mapped_fields)); +} + } // namespace iceberg diff --git a/src/iceberg/json_internal.h b/src/iceberg/json_internal.h index ce7afd010..4479a31fc 100644 --- a/src/iceberg/json_internal.h +++ b/src/iceberg/json_internal.h @@ -256,4 +256,40 @@ Result FromJsonString(const std::string& json_string); /// \return A JSON string or an error if the serialization fails. Result ToJsonString(const nlohmann::json& json); +/// \brief Serializes a `MappedField` object to JSON. +/// +/// \param[in] field The `MappedField` object to be serialized. +/// \return A JSON object representing the `MappedField`. +nlohmann::json ToJson(const MappedField& field); + +/// \brief Deserializes a JSON object into a `MappedField` object. +/// +/// \param[in] json The JSON object representing a `MappedField`. +/// \return A `MappedField` object or an error if the conversion fails. +Result MappedFieldFromJson(const nlohmann::json& json); + +/// \brief Serializes a `MappedFields` object to JSON. +/// +/// \param[in] mapped_fields The `MappedFields` object to be serialized. +/// \return A JSON object representing the `MappedFields`. +nlohmann::json ToJson(const MappedFields& mapped_fields); + +/// \brief Deserializes a JSON object into a `MappedFields` object. +/// +/// \param[in] json The JSON object representing a `MappedFields`. +/// \return A `MappedFields` object or an error if the conversion fails. +Result> MappedFieldsFromJson(const nlohmann::json& json); + +/// \brief Serializes a `NameMapping` object to JSON. +/// +/// \param[in] name_mapping The `NameMapping` object to be serialized. +/// \return A JSON object representing the `NameMapping`. +nlohmann::json ToJson(const NameMapping& name_mapping); + +/// \brief Deserializes a JSON object into a `NameMapping` object. +/// +/// \param[in] json The JSON object representing a `NameMapping`. +/// \return A `NameMapping` object or an error if the conversion fails. +Result> NameMappingFromJson(const nlohmann::json& json); + } // namespace iceberg diff --git a/src/iceberg/name_mapping.cc b/src/iceberg/name_mapping.cc index 75a048260..f4c37bb77 100644 --- a/src/iceberg/name_mapping.cc +++ b/src/iceberg/name_mapping.cc @@ -45,7 +45,9 @@ struct IndexByIdVisitor { std::unordered_map field_by_id; void Visit(const MappedField& field) { - field_by_id.emplace(field.field_id, std::cref(field)); + if (field.field_id.has_value()) { + field_by_id.emplace(field.field_id.value(), std::cref(field)); + } if (field.nested_mapping != nullptr) { Visit(*field.nested_mapping); } @@ -124,7 +126,9 @@ const std::unordered_map& MappedFields::LazyNameToId( if (name_to_id_.empty() && !fields_.empty()) { for (const auto& field : fields_) { for (const auto& name : field.names) { - name_to_id_.emplace(name, field.field_id); + if (field.field_id.has_value()) { + name_to_id_.emplace(name, field.field_id.value()); + } } } } @@ -135,7 +139,9 @@ const std::unordered_map& MappedFields::LazyIdToFi const { if (id_to_field_.empty() && !fields_.empty()) { for (const auto& field : fields_) { - id_to_field_.emplace(field.field_id, std::cref(field)); + if (field.field_id.has_value()) { + id_to_field_.emplace(field.field_id.value(), std::cref(field)); + } } } return id_to_field_; @@ -243,7 +249,8 @@ bool operator==(const NameMapping& lhs, const NameMapping& rhs) { std::string ToString(const MappedField& field) { return std::format( - "({} -> {}{})", field.names, field.field_id, + "({} -> {}{})", field.names, + field.field_id.has_value() ? std::to_string(field.field_id.value()) : "null", field.nested_mapping ? std::format(", {}", ToString(*field.nested_mapping)) : ""); } diff --git a/src/iceberg/name_mapping.h b/src/iceberg/name_mapping.h index c6f389cd6..8090f725a 100644 --- a/src/iceberg/name_mapping.h +++ b/src/iceberg/name_mapping.h @@ -40,10 +40,10 @@ struct ICEBERG_EXPORT MappedField { std::unordered_set names; /// \brief An optional Iceberg field ID used when a field's name is present in `names`. /// TODO(gangwu): check if we need to make it optional - int32_t field_id; + std::optional field_id; /// \brief An optional list of field mappings for child field of structs, maps, and /// lists. - std::unique_ptr nested_mapping; + std::shared_ptr nested_mapping; friend bool operator==(const MappedField& lhs, const MappedField& rhs); }; @@ -76,12 +76,6 @@ class ICEBERG_EXPORT MappedFields { friend bool operator==(const MappedFields& lhs, const MappedFields& rhs); - MappedFields(const MappedFields& other) = delete; - MappedFields& operator=(const MappedFields& other) = delete; - - MappedFields(MappedFields&& other) noexcept = default; - MappedFields& operator=(MappedFields&& other) noexcept = default; - private: explicit MappedFields(std::vector fields); diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index ed8cc5f23..39e46883e 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -101,6 +101,10 @@ struct SnapshotRef; struct StatisticsFile; struct TableMetadata; +struct MappedField; +class MappedFields; +class NameMapping; + enum class SnapshotRefType; enum class TransformType; diff --git a/test/json_internal_test.cc b/test/json_internal_test.cc index fc08d486b..75d142246 100644 --- a/test/json_internal_test.cc +++ b/test/json_internal_test.cc @@ -21,10 +21,11 @@ #include +#include #include #include -#include "gmock/gmock.h" +#include "iceberg/name_mapping.h" #include "iceberg/partition_spec.h" #include "iceberg/schema.h" #include "iceberg/snapshot.h" @@ -32,6 +33,7 @@ #include "iceberg/sort_order.h" #include "iceberg/transform.h" #include "iceberg/util/formatter.h" // IWYU pragma: keep +#include "iceberg/util/macros.h" // IWYU pragma: keep #include "iceberg/util/timepoint.h" #include "matchers.h" @@ -67,6 +69,11 @@ Result> FromJsonHelper(const nlohmann::json& json) { return SnapshotFromJson(json); } +template <> +Result> FromJsonHelper(const nlohmann::json& json) { + return NameMappingFromJson(json); +} + // Helper function to reduce duplication in testing template void TestJsonConversion(const T& obj, const nlohmann::json& expected_json) { @@ -257,4 +264,27 @@ TEST(JsonInternalTest, SnapshotFromJsonSummaryWithNoOperation) { ASSERT_EQ(result.value()->operation(), DataOperation::kOverwrite); } +TEST(JsonInternalTest, NameMapping) { + auto mapping = NameMapping::Make( + {MappedField{.names = {"id"}, .field_id = 1}, + MappedField{.names = {"data"}, .field_id = 2}, + MappedField{.names = {"location"}, + .field_id = 3, + .nested_mapping = MappedFields::Make( + {MappedField{.names = {"latitude"}, .field_id = 4}, + MappedField{.names = {"longitude"}, .field_id = 5}})}}); + + nlohmann::json expected_json = + R"([ + {"field-id": 1, "names": ["id"]}, + {"field-id": 2, "names": ["data"]}, + {"field-id": 3, "names": ["location"], "fields": [ + {"field-id": 4, "names": ["latitude"]}, + {"field-id": 5, "names": ["longitude"]} + ]} + ])"_json; + + TestJsonConversion(*mapping, expected_json); +} + } // namespace iceberg diff --git a/test/name_mapping_test.cc b/test/name_mapping_test.cc index d9d054823..1e861a682 100644 --- a/test/name_mapping_test.cc +++ b/test/name_mapping_test.cc @@ -26,6 +26,7 @@ #include #include + namespace iceberg { class NameMappingTest : public ::testing::Test { From 669e0e7eb031d78c02a8f58564ffba3f6045203d Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Tue, 13 May 2025 20:47:34 +0800 Subject: [PATCH 2/2] Update src/iceberg/name_mapping.h --- src/iceberg/name_mapping.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/iceberg/name_mapping.h b/src/iceberg/name_mapping.h index 8090f725a..fc8fcbf73 100644 --- a/src/iceberg/name_mapping.h +++ b/src/iceberg/name_mapping.h @@ -39,7 +39,6 @@ struct ICEBERG_EXPORT MappedField { /// \brief A required list of 0 or more names for a field. std::unordered_set names; /// \brief An optional Iceberg field ID used when a field's name is present in `names`. - /// TODO(gangwu): check if we need to make it optional std::optional field_id; /// \brief An optional list of field mappings for child field of structs, maps, and /// lists.