Skip to content

Commit 1f4447a

Browse files
committed
feat: add json serde for table metadata
1 parent a5bcd45 commit 1f4447a

File tree

9 files changed

+896
-172
lines changed

9 files changed

+896
-172
lines changed

src/iceberg/json_internal.cc

Lines changed: 633 additions & 55 deletions
Large diffs are not rendered by default.

src/iceberg/json_internal.h

Lines changed: 112 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
#include <nlohmann/json_fwd.hpp>
2525

2626
#include "iceberg/result.h"
27+
#include "iceberg/snapshot.h"
28+
#include "iceberg/statistics_file.h"
29+
#include "iceberg/table_metadata.h"
2730
#include "iceberg/type_fwd.h"
2831

2932
namespace iceberg {
@@ -38,16 +41,6 @@ namespace iceberg {
3841
/// \return A JSON object representing the `SortField` in the form of key-value pairs.
3942
nlohmann::json ToJson(const SortField& sort_field);
4043

41-
/// \brief Serializes a `SortOrder` object to JSON.
42-
///
43-
/// This function converts a `SortOrder` object into a JSON representation.
44-
/// The resulting JSON includes the order ID and a list of `SortField` objects.
45-
/// Each `SortField` is serialized as described in the `ToJson(SortField)` function.
46-
///
47-
/// \param sort_order The `SortOrder` object to be serialized.
48-
/// \return A JSON object representing the `SortOrder` with its order ID and fields array.
49-
nlohmann::json ToJson(const SortOrder& sort_order);
50-
5144
/// \brief Deserializes a JSON object into a `SortField` object.
5245
///
5346
/// This function parses the provided JSON and creates a `SortField` object.
@@ -59,6 +52,16 @@ nlohmann::json ToJson(const SortOrder& sort_order);
5952
/// JSON is malformed or missing expected fields, an error will be returned.
6053
Result<std::unique_ptr<SortField>> SortFieldFromJson(const nlohmann::json& json);
6154

55+
/// \brief Serializes a `SortOrder` object to JSON.
56+
///
57+
/// This function converts a `SortOrder` object into a JSON representation.
58+
/// The resulting JSON includes the order ID and a list of `SortField` objects.
59+
/// Each `SortField` is serialized as described in the `ToJson(SortField)` function.
60+
///
61+
/// \param sort_order The `SortOrder` object to be serialized.
62+
/// \return A JSON object representing the `SortOrder` with its order ID and fields array.
63+
nlohmann::json ToJson(const SortOrder& sort_order);
64+
6265
/// \brief Deserializes a JSON object into a `SortOrder` object.
6366
///
6467
/// This function parses the provided JSON and creates a `SortOrder` object.
@@ -74,44 +77,32 @@ Result<std::unique_ptr<SortOrder>> SortOrderFromJson(const nlohmann::json& json)
7477
///
7578
/// \param[in] schema The Iceberg schema to convert.
7679
/// \return The JSON representation of the schema.
77-
nlohmann::json SchemaToJson(const Schema& schema);
78-
79-
/// \brief Convert an Iceberg Type to JSON.
80-
///
81-
/// \param[in] type The Iceberg type to convert.
82-
/// \return The JSON representation of the type.
83-
nlohmann::json TypeToJson(const Type& type);
84-
85-
/// \brief Convert an Iceberg SchemaField to JSON.
86-
///
87-
/// \param[in] field The Iceberg field to convert.
88-
/// \return The JSON representation of the field.
89-
nlohmann::json FieldToJson(const SchemaField& field);
90-
91-
/// \brief Serializes a `SnapshotRef` object to JSON.
92-
///
93-
/// \param[in] snapshot_ref The `SnapshotRef` object to be serialized.
94-
/// \return A JSON object representing the `SnapshotRef`.
95-
nlohmann::json ToJson(const SnapshotRef& snapshot_ref);
96-
97-
/// \brief Serializes a `Snapshot` object to JSON.
98-
///
99-
/// \param[in] snapshot The `Snapshot` object to be serialized.
100-
/// \return A JSON object representing the `snapshot`.
101-
nlohmann::json ToJson(const Snapshot& snapshot);
80+
nlohmann::json ToJson(const Schema& schema);
10281

10382
/// \brief Convert JSON to an Iceberg Schema.
10483
///
10584
/// \param[in] json The JSON representation of the schema.
10685
/// \return The Iceberg schema or an error if the conversion fails.
10786
Result<std::unique_ptr<Schema>> SchemaFromJson(const nlohmann::json& json);
10887

88+
/// \brief Convert an Iceberg Type to JSON.
89+
///
90+
/// \param[in] type The Iceberg type to convert.
91+
/// \return The JSON representation of the type.
92+
nlohmann::json ToJson(const Type& type);
93+
10994
/// \brief Convert JSON to an Iceberg Type.
11095
///
11196
/// \param[in] json The JSON representation of the type.
11297
/// \return The Iceberg type or an error if the conversion fails.
11398
Result<std::unique_ptr<Type>> TypeFromJson(const nlohmann::json& json);
11499

100+
/// \brief Convert an Iceberg SchemaField to JSON.
101+
///
102+
/// \param[in] field The Iceberg field to convert.
103+
/// \return The JSON representation of the field.
104+
nlohmann::json ToJson(const SchemaField& field);
105+
115106
/// \brief Convert JSON to an Iceberg SchemaField.
116107
///
117108
/// \param[in] json The JSON representation of the field.
@@ -129,18 +120,6 @@ Result<std::unique_ptr<SchemaField>> FieldFromJson(const nlohmann::json& json);
129120
/// pairs.
130121
nlohmann::json ToJson(const PartitionField& partition_field);
131122

132-
/// \brief Serializes a `PartitionSpec` object to JSON.
133-
///
134-
/// This function converts a `PartitionSpec` object into a JSON representation.
135-
/// The resulting JSON includes the spec ID and a list of `PartitionField` objects.
136-
/// Each `PartitionField` is serialized as described in the `ToJson(PartitionField)`
137-
/// function.
138-
///
139-
/// \param partition_spec The `PartitionSpec` object to be serialized.
140-
/// \return A JSON object representing the `PartitionSpec` with its order ID and fields
141-
/// array.
142-
nlohmann::json ToJson(const PartitionSpec& partition_spec);
143-
144123
/// \brief Deserializes a JSON object into a `PartitionField` object.
145124
///
146125
/// This function parses the provided JSON and creates a `PartitionField` object.
@@ -153,6 +132,18 @@ nlohmann::json ToJson(const PartitionSpec& partition_spec);
153132
Result<std::unique_ptr<PartitionField>> PartitionFieldFromJson(
154133
const nlohmann::json& json);
155134

135+
/// \brief Serializes a `PartitionSpec` object to JSON.
136+
///
137+
/// This function converts a `PartitionSpec` object into a JSON representation.
138+
/// The resulting JSON includes the spec ID and a list of `PartitionField` objects.
139+
/// Each `PartitionField` is serialized as described in the `ToJson(PartitionField)`
140+
/// function.
141+
///
142+
/// \param partition_spec The `PartitionSpec` object to be serialized.
143+
/// \return A JSON object representing the `PartitionSpec` with its order ID and fields
144+
/// array.
145+
nlohmann::json ToJson(const PartitionSpec& partition_spec);
146+
156147
/// \brief Deserializes a JSON object into a `PartitionSpec` object.
157148
///
158149
/// This function parses the provided JSON and creates a `PartitionSpec` object.
@@ -166,16 +157,90 @@ Result<std::unique_ptr<PartitionField>> PartitionFieldFromJson(
166157
Result<std::unique_ptr<PartitionSpec>> PartitionSpecFromJson(
167158
const std::shared_ptr<Schema>& schema, const nlohmann::json& json);
168159

160+
/// \brief Serializes a `SnapshotRef` object to JSON.
161+
///
162+
/// \param[in] snapshot_ref The `SnapshotRef` object to be serialized.
163+
/// \return A JSON object representing the `SnapshotRef`.
164+
nlohmann::json ToJson(const SnapshotRef& snapshot_ref);
165+
169166
/// \brief Deserializes a JSON object into a `SnapshotRef` object.
170167
///
171168
/// \param[in] json The JSON object representing a `SnapshotRef`.
172169
/// \return A `SnapshotRef` object or an error if the conversion fails.
173170
Result<std::unique_ptr<SnapshotRef>> SnapshotRefFromJson(const nlohmann::json& json);
174171

172+
/// \brief Serializes a `Snapshot` object to JSON.
173+
///
174+
/// \param[in] snapshot The `Snapshot` object to be serialized.
175+
/// \return A JSON object representing the `snapshot`.
176+
nlohmann::json ToJson(const Snapshot& snapshot);
177+
175178
/// \brief Deserializes a JSON object into a `Snapshot` object.
176179
///
177180
/// \param[in] json The JSON representation of the snapshot.
178181
/// \return A `Snapshot` object or an error if the conversion fails.
179182
Result<std::unique_ptr<Snapshot>> SnapshotFromJson(const nlohmann::json& json);
180183

184+
/// \brief Serializes a `StatisticsFile` object to JSON.
185+
///
186+
/// \param statistics_file The `StatisticsFile` object to be serialized.
187+
/// \return A JSON object representing the `StatisticsFile`.
188+
nlohmann::json ToJson(const StatisticsFile& statistics_file);
189+
190+
/// \brief Deserializes a JSON object into a `StatisticsFile` object.
191+
///
192+
/// \param json The JSON object representing a `StatisticsFile`.
193+
/// \return A `StatisticsFile` object or an error if the conversion fails.
194+
Result<std::unique_ptr<StatisticsFile>> StatisticsFileFromJson(
195+
const nlohmann::json& json);
196+
197+
/// \brief Serializes a `PartitionStatisticsFile` object to JSON.
198+
///
199+
/// \param partition_statistics_file The `PartitionStatisticsFile` object to be
200+
/// serialized. \return A JSON object representing the `PartitionStatisticsFile`.
201+
nlohmann::json ToJson(const PartitionStatisticsFile& partition_statistics_file);
202+
203+
/// \brief Deserializes a JSON object into a `PartitionStatisticsFile` object.
204+
///
205+
/// \param json The JSON object representing a `PartitionStatisticsFile`.
206+
/// \return A `PartitionStatisticsFile` object or an error if the conversion fails.
207+
Result<std::unique_ptr<PartitionStatisticsFile>> PartitionStatisticsFileFromJson(
208+
const nlohmann::json& json);
209+
210+
/// \brief Serializes a `SnapshotLogEntry` object to JSON.
211+
///
212+
/// \param snapshot_log_entry The `SnapshotLogEntry` object to be serialized.
213+
/// \return A JSON object representing the `SnapshotLogEntry`.
214+
nlohmann::json ToJson(const SnapshotLogEntry& snapshot_log_entry);
215+
216+
/// \brief Deserializes a JSON object into a `SnapshotLogEntry` object.
217+
///
218+
/// \param json The JSON object representing a `SnapshotLogEntry`.
219+
/// \return A `SnapshotLogEntry` object or an error if the conversion fails.
220+
Result<SnapshotLogEntry> SnapshotLogEntryFromJson(const nlohmann::json& json);
221+
222+
/// \brief Serializes a `MetadataLogEntry` object to JSON.
223+
///
224+
/// \param metadata_log_entry The `MetadataLogEntry` object to be serialized.
225+
/// \return A JSON object representing the `MetadataLogEntry`.
226+
nlohmann::json ToJson(const MetadataLogEntry& metadata_log_entry);
227+
228+
/// \brief Deserializes a JSON object into a `MetadataLogEntry` object.
229+
///
230+
/// \param json The JSON object representing a `MetadataLogEntry`.
231+
/// \return A `MetadataLogEntry` object or an error if the conversion fails.
232+
Result<MetadataLogEntry> MetadataLogEntryFromJson(const nlohmann::json& json);
233+
234+
/// \brief Serializes a `TableMetadata` object to JSON.
235+
///
236+
/// \param table_metadata The `TableMetadata` object to be serialized.
237+
/// \return A JSON object representing the `TableMetadata`.
238+
nlohmann::json ToJson(const TableMetadata& table_metadata);
239+
240+
/// \brief Deserializes a JSON object into a `TableMetadata` object.
241+
///
242+
/// \param json The JSON object representing a `TableMetadata`.
243+
/// \return A `TableMetadata` object or an error if the conversion fails.
244+
Result<std::unique_ptr<TableMetadata>> TableMetadataFromJson(const nlohmann::json& json);
245+
181246
} // namespace iceberg

src/iceberg/result.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#pragma once
2121

22+
#include <format>
2223
#include <string>
2324

2425
#include "iceberg/expected.h"
@@ -60,4 +61,20 @@ using Result = expected<T, E>;
6061

6162
using Status = Result<void>;
6263

64+
/// \brief Create an unexpected error with kNotImplemented
65+
template <typename... Args>
66+
auto NotImplementedError(const std::format_string<Args...> fmt, Args&&... args)
67+
-> unexpected<Error> {
68+
return unexpected<Error>({.kind = ErrorKind::kNotImplemented,
69+
.message = std::format(fmt, std::forward<Args>(args)...)});
70+
}
71+
72+
/// \brief Create an unexpected error with kJsonParseError
73+
template <typename... Args>
74+
auto JsonParseError(const std::format_string<Args...> fmt, Args&&... args)
75+
-> unexpected<Error> {
76+
return unexpected<Error>({.kind = ErrorKind::kJsonParseError,
77+
.message = std::format(fmt, std::forward<Args>(args)...)});
78+
}
79+
6380
} // namespace iceberg

src/iceberg/statistics_file.cc

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -23,29 +23,26 @@
2323

2424
namespace iceberg {
2525

26-
bool BlobMetadata::Equals(const BlobMetadata& other) const {
27-
return type == other.type && source_snapshot_id == other.source_snapshot_id &&
28-
source_snapshot_sequence_number == other.source_snapshot_sequence_number &&
29-
fields == other.fields && properties == other.properties;
30-
}
31-
32-
std::string BlobMetadata::ToString() const {
26+
std::string ToString(const BlobMetadata& blob_metadata) {
3327
std::string repr = "BlobMetadata[";
3428
std::format_to(std::back_inserter(repr),
35-
"type='{}',sourceSnapshotId={},sourceSnapshotSequenceNumber={},", type,
36-
source_snapshot_id, source_snapshot_sequence_number);
29+
"type='{}',sourceSnapshotId={},sourceSnapshotSequenceNumber={},",
30+
blob_metadata.type, blob_metadata.source_snapshot_id,
31+
blob_metadata.source_snapshot_sequence_number);
3732
std::format_to(std::back_inserter(repr), "fields=[");
38-
for (auto iter = fields.cbegin(); iter != fields.cend(); ++iter) {
39-
if (iter != fields.cbegin()) {
33+
for (auto iter = blob_metadata.fields.cbegin(); iter != blob_metadata.fields.cend();
34+
++iter) {
35+
if (iter != blob_metadata.fields.cbegin()) {
4036
std::format_to(std::back_inserter(repr), ",{}", *iter);
4137
} else {
4238
std::format_to(std::back_inserter(repr), "{}", *iter);
4339
}
4440
}
4541
std::format_to(std::back_inserter(repr), "],properties=[");
46-
for (auto iter = properties.cbegin(); iter != properties.cend(); ++iter) {
42+
for (auto iter = blob_metadata.properties.cbegin();
43+
iter != blob_metadata.properties.cend(); ++iter) {
4744
const auto& [key, value] = *iter;
48-
if (iter != properties.cbegin()) {
45+
if (iter != blob_metadata.properties.cbegin()) {
4946
std::format_to(std::back_inserter(repr), ",{}:{}", key, value);
5047
} else {
5148
std::format_to(std::back_inserter(repr), "{}:{}", key, value);
@@ -55,28 +52,32 @@ std::string BlobMetadata::ToString() const {
5552
return repr;
5653
}
5754

58-
bool StatisticsFile::Equals(const StatisticsFile& other) const {
59-
return snapshot_id == other.snapshot_id && path == other.path &&
60-
file_size_in_bytes == other.file_size_in_bytes &&
61-
file_footer_size_in_bytes == other.file_footer_size_in_bytes &&
62-
blob_metadata == other.blob_metadata;
63-
}
64-
65-
std::string StatisticsFile::ToString() const {
55+
std::string ToString(const StatisticsFile& statistics_file) {
6656
std::string repr = "StatisticsFile[";
6757
std::format_to(std::back_inserter(repr),
6858
"snapshotId={},path={},fileSizeInBytes={},fileFooterSizeInBytes={},",
69-
snapshot_id, path, file_size_in_bytes, file_footer_size_in_bytes);
59+
statistics_file.snapshot_id, statistics_file.path,
60+
statistics_file.file_size_in_bytes,
61+
statistics_file.file_footer_size_in_bytes);
7062
std::format_to(std::back_inserter(repr), "blobMetadata=[");
71-
for (auto iter = blob_metadata.cbegin(); iter != blob_metadata.cend(); ++iter) {
72-
if (iter != blob_metadata.cbegin()) {
73-
std::format_to(std::back_inserter(repr), ",{}", iter->ToString());
63+
for (auto iter = statistics_file.blob_metadata.cbegin();
64+
iter != statistics_file.blob_metadata.cend(); ++iter) {
65+
if (iter != statistics_file.blob_metadata.cbegin()) {
66+
std::format_to(std::back_inserter(repr), ",{}", ToString(*iter));
7467
} else {
75-
std::format_to(std::back_inserter(repr), "{}", iter->ToString());
68+
std::format_to(std::back_inserter(repr), "{}", ToString(*iter));
7669
}
7770
}
7871
repr += "]]";
7972
return repr;
8073
}
8174

75+
std::string ToString(const PartitionStatisticsFile& partition_statistics_file) {
76+
std::string repr = "PartitionStatisticsFile[";
77+
std::format_to(std::back_inserter(repr), "snapshotId={},path={},fileSizeInBytes={},",
78+
partition_statistics_file.snapshot_id, partition_statistics_file.path,
79+
partition_statistics_file.file_size_in_bytes);
80+
return repr;
81+
}
82+
8283
} // namespace iceberg

0 commit comments

Comments
 (0)