From 58b7c582bbde8eea3d7dfcc976db311e3eecf964 Mon Sep 17 00:00:00 2001 From: Guotao Yu Date: Fri, 11 Apr 2025 10:48:33 +0800 Subject: [PATCH] feat: add json serialization for partition spec --- src/iceberg/json_internal.cc | 50 +++++++++++++++++++++++++++++++++ src/iceberg/json_internal.h | 47 +++++++++++++++++++++++++++++++ src/iceberg/type_fwd.h | 3 +- test/json_internal_test.cc | 54 ++++++++++++++++++++++++++++++++++++ 4 files changed, 153 insertions(+), 1 deletion(-) diff --git a/src/iceberg/json_internal.cc b/src/iceberg/json_internal.cc index 5307aa562..0905dfc6c 100644 --- a/src/iceberg/json_internal.cc +++ b/src/iceberg/json_internal.cc @@ -25,6 +25,7 @@ #include #include "iceberg/expected.h" +#include "iceberg/partition_spec.h" #include "iceberg/result.h" #include "iceberg/schema.h" #include "iceberg/schema_internal.h" @@ -68,6 +69,9 @@ constexpr std::string_view kRequired = "required"; constexpr std::string_view kElementRequired = "element-required"; constexpr std::string_view kValueRequired = "value-required"; +constexpr std::string_view kFieldId = "field-id"; +constexpr std::string_view kSpecId = "spec-id"; + template Result GetJsonValue(const nlohmann::json& json, std::string_view key) { if (!json.contains(key)) { @@ -370,4 +374,50 @@ Result> SchemaFromJson(const nlohmann::json& json) { return FromStructType(std::move(struct_type), schema_id); } +nlohmann::json ToJson(const PartitionField& partition_field) { + nlohmann::json json; + json[kSourceId] = partition_field.source_id(); + json[kFieldId] = partition_field.field_id(); + json[kTransform] = std::format("{}", *partition_field.transform()); + json[kName] = partition_field.name(); + return json; +} + +nlohmann::json ToJson(const PartitionSpec& partition_spec) { + nlohmann::json json; + json[kSpecId] = partition_spec.spec_id(); + + nlohmann::json fields_json = nlohmann::json::array(); + for (const auto& field : partition_spec.fields()) { + fields_json.push_back(ToJson(field)); + } + json[kFields] = fields_json; + return json; +} + +Result> PartitionFieldFromJson( + const nlohmann::json& json) { + ICEBERG_ASSIGN_OR_RAISE(auto source_id, GetJsonValue(json, kSourceId)); + ICEBERG_ASSIGN_OR_RAISE(auto field_id, GetJsonValue(json, kFieldId)); + ICEBERG_ASSIGN_OR_RAISE( + auto transform, + GetJsonValue(json, kTransform).and_then(TransformFunctionFromString)); + ICEBERG_ASSIGN_OR_RAISE(auto name, GetJsonValue(json, kName)); + return std::make_unique(source_id, field_id, name, + std::move(transform)); +} + +Result> PartitionSpecFromJson( + const std::shared_ptr& schema, const nlohmann::json& json) { + ICEBERG_ASSIGN_OR_RAISE(auto spec_id, GetJsonValue(json, kSpecId)); + ICEBERG_ASSIGN_OR_RAISE(auto fields, GetJsonValue(json, kFields)); + + std::vector partition_fields; + for (const auto& field_json : fields) { + ICEBERG_ASSIGN_OR_RAISE(auto partition_field, PartitionFieldFromJson(field_json)); + partition_fields.push_back(std::move(*partition_field)); + } + return std::make_unique(schema, spec_id, std::move(partition_fields)); +} + } // namespace iceberg diff --git a/src/iceberg/json_internal.h b/src/iceberg/json_internal.h index 5ea4d4750..005b05fab 100644 --- a/src/iceberg/json_internal.h +++ b/src/iceberg/json_internal.h @@ -108,4 +108,51 @@ Result> TypeFromJson(const nlohmann::json& json); /// \return The Iceberg field or an error if the conversion fails. Result> FieldFromJson(const nlohmann::json& json); +/// \brief Serializes a `PartitionField` object to JSON. +/// +/// This function converts a `PartitionField` object into a JSON representation. +/// The resulting JSON object includes the transform type, source ID, field ID, and +/// name. +/// +/// \param partition_field The `PartitionField` object to be serialized. +/// \return A JSON object representing the `PartitionField` in the form of key-value +/// pairs. +nlohmann::json ToJson(const PartitionField& partition_field); + +/// \brief Serializes a `PartitionSpec` object to JSON. +/// +/// This function converts a `PartitionSpec` object into a JSON representation. +/// The resulting JSON includes the spec ID and a list of `PartitionField` objects. +/// Each `PartitionField` is serialized as described in the `ToJson(PartitionField)` +/// function. +/// +/// \param partition_spec The `PartitionSpec` object to be serialized. +/// \return A JSON object representing the `PartitionSpec` with its order ID and fields +/// array. +nlohmann::json ToJson(const PartitionSpec& partition_spec); + +/// \brief Deserializes a JSON object into a `PartitionField` object. +/// +/// This function parses the provided JSON and creates a `PartitionField` object. +/// It expects the JSON object to contain keys for the transform, source ID, field ID, +/// and name. +/// +/// \param json The JSON object representing a `PartitionField`. +/// \return An `expected` value containing either a `PartitionField` object or an error. +/// If the JSON is malformed or missing expected fields, an error will be returned. +Result> PartitionFieldFromJson( + const nlohmann::json& json); + +/// \brief Deserializes a JSON object into a `PartitionSpec` object. +/// +/// This function parses the provided JSON and creates a `PartitionSpec` object. +/// It expects the JSON object to contain the spec ID and a list of `PartitionField` +/// objects. Each `PartitionField` will be parsed using the `PartitionFieldFromJson` +/// function. +/// +/// \param json The JSON object representing a `PartitionSpec`. +/// \return An `expected` value containing either a `PartitionSpec` object or an error. If +/// the JSON is malformed or missing expected fields, an error will be returned. +Result> PartitionSpecFromJson( + const std::shared_ptr& schema, const nlohmann::json& json); } // namespace iceberg diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h index 215e8f272..bc55aebb7 100644 --- a/src/iceberg/type_fwd.h +++ b/src/iceberg/type_fwd.h @@ -69,6 +69,8 @@ class LongType; class ListType; class MapType; class NestedType; +class PartitionField; +class PartitionSpec; class PrimitiveType; class Schema; class SchemaField; @@ -94,7 +96,6 @@ class Transaction; /// ---------------------------------------------------------------------------- class HistoryEntry; -class PartitionSpec; class Snapshot; class SortField; class SortOrder; diff --git a/test/json_internal_test.cc b/test/json_internal_test.cc index 95e4b8bec..75c020a0d 100644 --- a/test/json_internal_test.cc +++ b/test/json_internal_test.cc @@ -25,6 +25,7 @@ #include #include +#include "iceberg/partition_spec.h" #include "iceberg/schema.h" #include "iceberg/sort_field.h" #include "iceberg/sort_order.h" @@ -48,6 +49,12 @@ expected, Error> FromJsonHelper(const nlohmann::json& return SortOrderFromJson(json); } +template <> +expected, Error> FromJsonHelper( + const nlohmann::json& json) { + return PartitionFieldFromJson(json); +} + // Helper function to reduce duplication in testing template void TestJsonConversion(const T& obj, const nlohmann::json& expected_json) { @@ -94,4 +101,51 @@ TEST(JsonInternalTest, SortOrder) { TestJsonConversion(sort_order, expected_sort_order); } +TEST(JsonInternalTest, PartitionField) { + auto identity_transform = std::make_shared(); + PartitionField field(3, 101, "region", identity_transform); + nlohmann::json expected_json = + R"({"source-id":3,"field-id":101,"transform":"identity","name":"region"})"_json; + TestJsonConversion(field, expected_json); +} + +TEST(JsonPartitionTest, PartitionFieldFromJsonMissingField) { + nlohmann::json invalid_json = + R"({"field-id":101,"transform":"identity","name":"region"})"_json; + // missing source-id + + auto result = PartitionFieldFromJson(invalid_json); + EXPECT_FALSE(result.has_value()); + EXPECT_EQ(result.error().kind, ErrorKind::kJsonParseError); +} + +TEST(JsonPartitionTest, PartitionSpec) { + auto schema = std::make_shared( + 100, std::vector{ + SchemaField(3, "region", std::make_shared(), false), + SchemaField(5, "ts", std::make_shared(), false)}); + + auto identity_transform = std::make_shared(); + PartitionSpec spec(schema, 1, + {PartitionField(3, 101, "region", identity_transform), + PartitionField(5, 102, "ts", identity_transform)}); + auto json = ToJson(spec); + nlohmann::json expected_json = R"({"spec-id": 1, + "fields": [ + {"source-id": 3, + "field-id": 101, + "transform": "identity", + "name": "region"}, + {"source-id": 5, + "field-id": 102, + "transform": "identity", + "name": "ts"}]})"_json; + + EXPECT_EQ(json, expected_json); + + auto parsed_spec_result = PartitionSpecFromJson(schema, json); + ASSERT_TRUE(parsed_spec_result.has_value()) << parsed_spec_result.error().message; + EXPECT_EQ(spec, *parsed_spec_result.value()); +} + } // namespace iceberg