Skip to content

Commit 00b96cd

Browse files
committed
feat: add json serialization for partition spec (apache#68)
1 parent ba97a83 commit 00b96cd

File tree

4 files changed

+153
-1
lines changed

4 files changed

+153
-1
lines changed

src/iceberg/json_internal.cc

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <nlohmann/json.hpp>
2626

2727
#include "iceberg/expected.h"
28+
#include "iceberg/partition_spec.h"
2829
#include "iceberg/result.h"
2930
#include "iceberg/schema.h"
3031
#include "iceberg/schema_internal.h"
@@ -68,6 +69,9 @@ constexpr std::string_view kRequired = "required";
6869
constexpr std::string_view kElementRequired = "element-required";
6970
constexpr std::string_view kValueRequired = "value-required";
7071

72+
constexpr std::string_view kFieldId = "field-id";
73+
constexpr std::string_view kSpecId = "spec-id";
74+
7175
template <typename T>
7276
Result<T> GetJsonValue(const nlohmann::json& json, std::string_view key) {
7377
if (!json.contains(key)) {
@@ -370,4 +374,50 @@ Result<std::unique_ptr<Schema>> SchemaFromJson(const nlohmann::json& json) {
370374
return FromStructType(std::move(struct_type), schema_id);
371375
}
372376

377+
nlohmann::json ToJson(const PartitionField& partition_field) {
378+
nlohmann::json json;
379+
json[kSourceId] = partition_field.source_id();
380+
json[kFieldId] = partition_field.field_id();
381+
json[kTransform] = std::format("{}", *partition_field.transform());
382+
json[kName] = partition_field.name();
383+
return json;
384+
}
385+
386+
nlohmann::json ToJson(const PartitionSpec& partition_spec) {
387+
nlohmann::json json;
388+
json[kSpecId] = partition_spec.spec_id();
389+
390+
nlohmann::json fields_json = nlohmann::json::array();
391+
for (const auto& field : partition_spec.fields()) {
392+
fields_json.push_back(ToJson(field));
393+
}
394+
json[kFields] = fields_json;
395+
return json;
396+
}
397+
398+
Result<std::unique_ptr<PartitionField>> PartitionFieldFromJson(
399+
const nlohmann::json& json) {
400+
ICEBERG_ASSIGN_OR_RAISE(auto source_id, GetJsonValue<int32_t>(json, kSourceId));
401+
ICEBERG_ASSIGN_OR_RAISE(auto field_id, GetJsonValue<int32_t>(json, kFieldId));
402+
ICEBERG_ASSIGN_OR_RAISE(
403+
auto transform,
404+
GetJsonValue<std::string>(json, kTransform).and_then(TransformFunctionFromString));
405+
ICEBERG_ASSIGN_OR_RAISE(auto name, GetJsonValue<std::string>(json, kName));
406+
return std::make_unique<PartitionField>(source_id, field_id, name,
407+
std::move(transform));
408+
}
409+
410+
Result<std::unique_ptr<PartitionSpec>> PartitionSpecFromJson(
411+
const std::shared_ptr<Schema>& schema, const nlohmann::json& json) {
412+
ICEBERG_ASSIGN_OR_RAISE(auto spec_id, GetJsonValue<int32_t>(json, kSpecId));
413+
ICEBERG_ASSIGN_OR_RAISE(auto fields, GetJsonValue<nlohmann::json>(json, kFields));
414+
415+
std::vector<PartitionField> partition_fields;
416+
for (const auto& field_json : fields) {
417+
ICEBERG_ASSIGN_OR_RAISE(auto partition_field, PartitionFieldFromJson(field_json));
418+
partition_fields.push_back(std::move(*partition_field));
419+
}
420+
return std::make_unique<PartitionSpec>(schema, spec_id, std::move(partition_fields));
421+
}
422+
373423
} // namespace iceberg

src/iceberg/json_internal.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,4 +108,51 @@ Result<std::unique_ptr<Type>> TypeFromJson(const nlohmann::json& json);
108108
/// \return The Iceberg field or an error if the conversion fails.
109109
Result<std::unique_ptr<SchemaField>> FieldFromJson(const nlohmann::json& json);
110110

111+
/// \brief Serializes a `PartitionField` object to JSON.
112+
///
113+
/// This function converts a `PartitionField` object into a JSON representation.
114+
/// The resulting JSON object includes the transform type, source ID, field ID, and
115+
/// name.
116+
///
117+
/// \param partition_field The `PartitionField` object to be serialized.
118+
/// \return A JSON object representing the `PartitionField` in the form of key-value
119+
/// pairs.
120+
nlohmann::json ToJson(const PartitionField& partition_field);
121+
122+
/// \brief Serializes a `PartitionSpec` object to JSON.
123+
///
124+
/// This function converts a `PartitionSpec` object into a JSON representation.
125+
/// The resulting JSON includes the spec ID and a list of `PartitionField` objects.
126+
/// Each `PartitionField` is serialized as described in the `ToJson(PartitionField)`
127+
/// function.
128+
///
129+
/// \param partition_spec The `PartitionSpec` object to be serialized.
130+
/// \return A JSON object representing the `PartitionSpec` with its order ID and fields
131+
/// array.
132+
nlohmann::json ToJson(const PartitionSpec& partition_spec);
133+
134+
/// \brief Deserializes a JSON object into a `PartitionField` object.
135+
///
136+
/// This function parses the provided JSON and creates a `PartitionField` object.
137+
/// It expects the JSON object to contain keys for the transform, source ID, field ID,
138+
/// and name.
139+
///
140+
/// \param json The JSON object representing a `PartitionField`.
141+
/// \return An `expected` value containing either a `PartitionField` object or an error.
142+
/// If the JSON is malformed or missing expected fields, an error will be returned.
143+
Result<std::unique_ptr<PartitionField>> PartitionFieldFromJson(
144+
const nlohmann::json& json);
145+
146+
/// \brief Deserializes a JSON object into a `PartitionSpec` object.
147+
///
148+
/// This function parses the provided JSON and creates a `PartitionSpec` object.
149+
/// It expects the JSON object to contain the spec ID and a list of `PartitionField`
150+
/// objects. Each `PartitionField` will be parsed using the `PartitionFieldFromJson`
151+
/// function.
152+
///
153+
/// \param json The JSON object representing a `PartitionSpec`.
154+
/// \return An `expected` value containing either a `PartitionSpec` object or an error. If
155+
/// the JSON is malformed or missing expected fields, an error will be returned.
156+
Result<std::unique_ptr<PartitionSpec>> PartitionSpecFromJson(
157+
const std::shared_ptr<Schema>& schema, const nlohmann::json& json);
111158
} // namespace iceberg

src/iceberg/type_fwd.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ class LongType;
6969
class ListType;
7070
class MapType;
7171
class NestedType;
72+
class PartitionField;
73+
class PartitionSpec;
7274
class PrimitiveType;
7375
class Schema;
7476
class SchemaField;
@@ -94,7 +96,6 @@ class Transaction;
9496
/// ----------------------------------------------------------------------------
9597

9698
class HistoryEntry;
97-
class PartitionSpec;
9899
class Snapshot;
99100
class SortField;
100101
class SortOrder;

test/json_internal_test.cc

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <gtest/gtest.h>
2626
#include <nlohmann/json.hpp>
2727

28+
#include "iceberg/partition_spec.h"
2829
#include "iceberg/schema.h"
2930
#include "iceberg/sort_field.h"
3031
#include "iceberg/sort_order.h"
@@ -48,6 +49,12 @@ expected<std::unique_ptr<SortOrder>, Error> FromJsonHelper(const nlohmann::json&
4849
return SortOrderFromJson(json);
4950
}
5051

52+
template <>
53+
expected<std::unique_ptr<PartitionField>, Error> FromJsonHelper(
54+
const nlohmann::json& json) {
55+
return PartitionFieldFromJson(json);
56+
}
57+
5158
// Helper function to reduce duplication in testing
5259
template <typename T>
5360
void TestJsonConversion(const T& obj, const nlohmann::json& expected_json) {
@@ -94,4 +101,51 @@ TEST(JsonInternalTest, SortOrder) {
94101
TestJsonConversion(sort_order, expected_sort_order);
95102
}
96103

104+
TEST(JsonInternalTest, PartitionField) {
105+
auto identity_transform = std::make_shared<IdentityTransformFunction>();
106+
PartitionField field(3, 101, "region", identity_transform);
107+
nlohmann::json expected_json =
108+
R"({"source-id":3,"field-id":101,"transform":"identity","name":"region"})"_json;
109+
TestJsonConversion(field, expected_json);
110+
}
111+
112+
TEST(JsonPartitionTest, PartitionFieldFromJsonMissingField) {
113+
nlohmann::json invalid_json =
114+
R"({"field-id":101,"transform":"identity","name":"region"})"_json;
115+
// missing source-id
116+
117+
auto result = PartitionFieldFromJson(invalid_json);
118+
EXPECT_FALSE(result.has_value());
119+
EXPECT_EQ(result.error().kind, ErrorKind::kJsonParseError);
120+
}
121+
122+
TEST(JsonPartitionTest, PartitionSpec) {
123+
auto schema = std::make_shared<Schema>(
124+
100, std::vector<SchemaField>{
125+
SchemaField(3, "region", std::make_shared<StringType>(), false),
126+
SchemaField(5, "ts", std::make_shared<LongType>(), false)});
127+
128+
auto identity_transform = std::make_shared<IdentityTransformFunction>();
129+
PartitionSpec spec(schema, 1,
130+
{PartitionField(3, 101, "region", identity_transform),
131+
PartitionField(5, 102, "ts", identity_transform)});
132+
auto json = ToJson(spec);
133+
nlohmann::json expected_json = R"({"spec-id": 1,
134+
"fields": [
135+
{"source-id": 3,
136+
"field-id": 101,
137+
"transform": "identity",
138+
"name": "region"},
139+
{"source-id": 5,
140+
"field-id": 102,
141+
"transform": "identity",
142+
"name": "ts"}]})"_json;
143+
144+
EXPECT_EQ(json, expected_json);
145+
146+
auto parsed_spec_result = PartitionSpecFromJson(schema, json);
147+
ASSERT_TRUE(parsed_spec_result.has_value()) << parsed_spec_result.error().message;
148+
EXPECT_EQ(spec, *parsed_spec_result.value());
149+
}
150+
97151
} // namespace iceberg

0 commit comments

Comments
 (0)