Skip to content

Commit 9c74fd2

Browse files
authored
feat: add json serde for table metadata (#75)
1 parent 32fde8e commit 9c74fd2

File tree

9 files changed

+892
-172
lines changed

9 files changed

+892
-172
lines changed

src/iceberg/json_internal.cc

Lines changed: 630 additions & 55 deletions
Large diffs are not rendered by default.

src/iceberg/json_internal.h

Lines changed: 111 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
#include <nlohmann/json_fwd.hpp>
2525

2626
#include "iceberg/result.h"
27+
#include "iceberg/statistics_file.h"
28+
#include "iceberg/table_metadata.h"
2729
#include "iceberg/type_fwd.h"
2830

2931
namespace iceberg {
@@ -38,16 +40,6 @@ namespace iceberg {
3840
/// \return A JSON object representing the `SortField` in the form of key-value pairs.
3941
nlohmann::json ToJson(const SortField& sort_field);
4042

41-
/// \brief Serializes a `SortOrder` object to JSON.
42-
///
43-
/// This function converts a `SortOrder` object into a JSON representation.
44-
/// The resulting JSON includes the order ID and a list of `SortField` objects.
45-
/// Each `SortField` is serialized as described in the `ToJson(SortField)` function.
46-
///
47-
/// \param sort_order The `SortOrder` object to be serialized.
48-
/// \return A JSON object representing the `SortOrder` with its order ID and fields array.
49-
nlohmann::json ToJson(const SortOrder& sort_order);
50-
5143
/// \brief Deserializes a JSON object into a `SortField` object.
5244
///
5345
/// This function parses the provided JSON and creates a `SortField` object.
@@ -59,6 +51,16 @@ nlohmann::json ToJson(const SortOrder& sort_order);
5951
/// JSON is malformed or missing expected fields, an error will be returned.
6052
Result<std::unique_ptr<SortField>> SortFieldFromJson(const nlohmann::json& json);
6153

54+
/// \brief Serializes a `SortOrder` object to JSON.
55+
///
56+
/// This function converts a `SortOrder` object into a JSON representation.
57+
/// The resulting JSON includes the order ID and a list of `SortField` objects.
58+
/// Each `SortField` is serialized as described in the `ToJson(SortField)` function.
59+
///
60+
/// \param sort_order The `SortOrder` object to be serialized.
61+
/// \return A JSON object representing the `SortOrder` with its order ID and fields array.
62+
nlohmann::json ToJson(const SortOrder& sort_order);
63+
6264
/// \brief Deserializes a JSON object into a `SortOrder` object.
6365
///
6466
/// This function parses the provided JSON and creates a `SortOrder` object.
@@ -74,44 +76,32 @@ Result<std::unique_ptr<SortOrder>> SortOrderFromJson(const nlohmann::json& json)
7476
///
7577
/// \param[in] schema The Iceberg schema to convert.
7678
/// \return The JSON representation of the schema.
77-
nlohmann::json SchemaToJson(const Schema& schema);
78-
79-
/// \brief Convert an Iceberg Type to JSON.
80-
///
81-
/// \param[in] type The Iceberg type to convert.
82-
/// \return The JSON representation of the type.
83-
nlohmann::json TypeToJson(const Type& type);
84-
85-
/// \brief Convert an Iceberg SchemaField to JSON.
86-
///
87-
/// \param[in] field The Iceberg field to convert.
88-
/// \return The JSON representation of the field.
89-
nlohmann::json FieldToJson(const SchemaField& field);
90-
91-
/// \brief Serializes a `SnapshotRef` object to JSON.
92-
///
93-
/// \param[in] snapshot_ref The `SnapshotRef` object to be serialized.
94-
/// \return A JSON object representing the `SnapshotRef`.
95-
nlohmann::json ToJson(const SnapshotRef& snapshot_ref);
96-
97-
/// \brief Serializes a `Snapshot` object to JSON.
98-
///
99-
/// \param[in] snapshot The `Snapshot` object to be serialized.
100-
/// \return A JSON object representing the `snapshot`.
101-
nlohmann::json ToJson(const Snapshot& snapshot);
79+
nlohmann::json ToJson(const Schema& schema);
10280

10381
/// \brief Convert JSON to an Iceberg Schema.
10482
///
10583
/// \param[in] json The JSON representation of the schema.
10684
/// \return The Iceberg schema or an error if the conversion fails.
10785
Result<std::unique_ptr<Schema>> SchemaFromJson(const nlohmann::json& json);
10886

87+
/// \brief Convert an Iceberg Type to JSON.
88+
///
89+
/// \param[in] type The Iceberg type to convert.
90+
/// \return The JSON representation of the type.
91+
nlohmann::json ToJson(const Type& type);
92+
10993
/// \brief Convert JSON to an Iceberg Type.
11094
///
11195
/// \param[in] json The JSON representation of the type.
11296
/// \return The Iceberg type or an error if the conversion fails.
11397
Result<std::unique_ptr<Type>> TypeFromJson(const nlohmann::json& json);
11498

99+
/// \brief Convert an Iceberg SchemaField to JSON.
100+
///
101+
/// \param[in] field The Iceberg field to convert.
102+
/// \return The JSON representation of the field.
103+
nlohmann::json ToJson(const SchemaField& field);
104+
115105
/// \brief Convert JSON to an Iceberg SchemaField.
116106
///
117107
/// \param[in] json The JSON representation of the field.
@@ -129,18 +119,6 @@ Result<std::unique_ptr<SchemaField>> FieldFromJson(const nlohmann::json& json);
129119
/// pairs.
130120
nlohmann::json ToJson(const PartitionField& partition_field);
131121

132-
/// \brief Serializes a `PartitionSpec` object to JSON.
133-
///
134-
/// This function converts a `PartitionSpec` object into a JSON representation.
135-
/// The resulting JSON includes the spec ID and a list of `PartitionField` objects.
136-
/// Each `PartitionField` is serialized as described in the `ToJson(PartitionField)`
137-
/// function.
138-
///
139-
/// \param partition_spec The `PartitionSpec` object to be serialized.
140-
/// \return A JSON object representing the `PartitionSpec` with its order ID and fields
141-
/// array.
142-
nlohmann::json ToJson(const PartitionSpec& partition_spec);
143-
144122
/// \brief Deserializes a JSON object into a `PartitionField` object.
145123
///
146124
/// This function parses the provided JSON and creates a `PartitionField` object.
@@ -153,6 +131,18 @@ nlohmann::json ToJson(const PartitionSpec& partition_spec);
153131
Result<std::unique_ptr<PartitionField>> PartitionFieldFromJson(
154132
const nlohmann::json& json);
155133

134+
/// \brief Serializes a `PartitionSpec` object to JSON.
135+
///
136+
/// This function converts a `PartitionSpec` object into a JSON representation.
137+
/// The resulting JSON includes the spec ID and a list of `PartitionField` objects.
138+
/// Each `PartitionField` is serialized as described in the `ToJson(PartitionField)`
139+
/// function.
140+
///
141+
/// \param partition_spec The `PartitionSpec` object to be serialized.
142+
/// \return A JSON object representing the `PartitionSpec` with its order ID and fields
143+
/// array.
144+
nlohmann::json ToJson(const PartitionSpec& partition_spec);
145+
156146
/// \brief Deserializes a JSON object into a `PartitionSpec` object.
157147
///
158148
/// This function parses the provided JSON and creates a `PartitionSpec` object.
@@ -166,16 +156,90 @@ Result<std::unique_ptr<PartitionField>> PartitionFieldFromJson(
166156
Result<std::unique_ptr<PartitionSpec>> PartitionSpecFromJson(
167157
const std::shared_ptr<Schema>& schema, const nlohmann::json& json);
168158

159+
/// \brief Serializes a `SnapshotRef` object to JSON.
160+
///
161+
/// \param[in] snapshot_ref The `SnapshotRef` object to be serialized.
162+
/// \return A JSON object representing the `SnapshotRef`.
163+
nlohmann::json ToJson(const SnapshotRef& snapshot_ref);
164+
169165
/// \brief Deserializes a JSON object into a `SnapshotRef` object.
170166
///
171167
/// \param[in] json The JSON object representing a `SnapshotRef`.
172168
/// \return A `SnapshotRef` object or an error if the conversion fails.
173169
Result<std::unique_ptr<SnapshotRef>> SnapshotRefFromJson(const nlohmann::json& json);
174170

171+
/// \brief Serializes a `Snapshot` object to JSON.
172+
///
173+
/// \param[in] snapshot The `Snapshot` object to be serialized.
174+
/// \return A JSON object representing the `snapshot`.
175+
nlohmann::json ToJson(const Snapshot& snapshot);
176+
175177
/// \brief Deserializes a JSON object into a `Snapshot` object.
176178
///
177179
/// \param[in] json The JSON representation of the snapshot.
178180
/// \return A `Snapshot` object or an error if the conversion fails.
179181
Result<std::unique_ptr<Snapshot>> SnapshotFromJson(const nlohmann::json& json);
180182

183+
/// \brief Serializes a `StatisticsFile` object to JSON.
184+
///
185+
/// \param statistics_file The `StatisticsFile` object to be serialized.
186+
/// \return A JSON object representing the `StatisticsFile`.
187+
nlohmann::json ToJson(const StatisticsFile& statistics_file);
188+
189+
/// \brief Deserializes a JSON object into a `StatisticsFile` object.
190+
///
191+
/// \param json The JSON object representing a `StatisticsFile`.
192+
/// \return A `StatisticsFile` object or an error if the conversion fails.
193+
Result<std::unique_ptr<StatisticsFile>> StatisticsFileFromJson(
194+
const nlohmann::json& json);
195+
196+
/// \brief Serializes a `PartitionStatisticsFile` object to JSON.
197+
///
198+
/// \param partition_statistics_file The `PartitionStatisticsFile` object to be
199+
/// serialized. \return A JSON object representing the `PartitionStatisticsFile`.
200+
nlohmann::json ToJson(const PartitionStatisticsFile& partition_statistics_file);
201+
202+
/// \brief Deserializes a JSON object into a `PartitionStatisticsFile` object.
203+
///
204+
/// \param json The JSON object representing a `PartitionStatisticsFile`.
205+
/// \return A `PartitionStatisticsFile` object or an error if the conversion fails.
206+
Result<std::unique_ptr<PartitionStatisticsFile>> PartitionStatisticsFileFromJson(
207+
const nlohmann::json& json);
208+
209+
/// \brief Serializes a `SnapshotLogEntry` object to JSON.
210+
///
211+
/// \param snapshot_log_entry The `SnapshotLogEntry` object to be serialized.
212+
/// \return A JSON object representing the `SnapshotLogEntry`.
213+
nlohmann::json ToJson(const SnapshotLogEntry& snapshot_log_entry);
214+
215+
/// \brief Deserializes a JSON object into a `SnapshotLogEntry` object.
216+
///
217+
/// \param json The JSON object representing a `SnapshotLogEntry`.
218+
/// \return A `SnapshotLogEntry` object or an error if the conversion fails.
219+
Result<SnapshotLogEntry> SnapshotLogEntryFromJson(const nlohmann::json& json);
220+
221+
/// \brief Serializes a `MetadataLogEntry` object to JSON.
222+
///
223+
/// \param metadata_log_entry The `MetadataLogEntry` object to be serialized.
224+
/// \return A JSON object representing the `MetadataLogEntry`.
225+
nlohmann::json ToJson(const MetadataLogEntry& metadata_log_entry);
226+
227+
/// \brief Deserializes a JSON object into a `MetadataLogEntry` object.
228+
///
229+
/// \param json The JSON object representing a `MetadataLogEntry`.
230+
/// \return A `MetadataLogEntry` object or an error if the conversion fails.
231+
Result<MetadataLogEntry> MetadataLogEntryFromJson(const nlohmann::json& json);
232+
233+
/// \brief Serializes a `TableMetadata` object to JSON.
234+
///
235+
/// \param table_metadata The `TableMetadata` object to be serialized.
236+
/// \return A JSON object representing the `TableMetadata`.
237+
nlohmann::json ToJson(const TableMetadata& table_metadata);
238+
239+
/// \brief Deserializes a JSON object into a `TableMetadata` object.
240+
///
241+
/// \param json The JSON object representing a `TableMetadata`.
242+
/// \return A `TableMetadata` object or an error if the conversion fails.
243+
Result<std::unique_ptr<TableMetadata>> TableMetadataFromJson(const nlohmann::json& json);
244+
181245
} // namespace iceberg

src/iceberg/result.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#pragma once
2121

22+
#include <format>
2223
#include <string>
2324

2425
#include "iceberg/expected.h"
@@ -61,4 +62,20 @@ using Result = expected<T, E>;
6162

6263
using Status = Result<void>;
6364

65+
/// \brief Create an unexpected error with kNotImplemented
66+
template <typename... Args>
67+
auto NotImplementedError(const std::format_string<Args...> fmt, Args&&... args)
68+
-> unexpected<Error> {
69+
return unexpected<Error>({.kind = ErrorKind::kNotImplemented,
70+
.message = std::format(fmt, std::forward<Args>(args)...)});
71+
}
72+
73+
/// \brief Create an unexpected error with kJsonParseError
74+
template <typename... Args>
75+
auto JsonParseError(const std::format_string<Args...> fmt, Args&&... args)
76+
-> unexpected<Error> {
77+
return unexpected<Error>({.kind = ErrorKind::kJsonParseError,
78+
.message = std::format(fmt, std::forward<Args>(args)...)});
79+
}
80+
6481
} // namespace iceberg

src/iceberg/statistics_file.cc

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -23,29 +23,26 @@
2323

2424
namespace iceberg {
2525

26-
bool BlobMetadata::Equals(const BlobMetadata& other) const {
27-
return type == other.type && source_snapshot_id == other.source_snapshot_id &&
28-
source_snapshot_sequence_number == other.source_snapshot_sequence_number &&
29-
fields == other.fields && properties == other.properties;
30-
}
31-
32-
std::string BlobMetadata::ToString() const {
26+
std::string ToString(const BlobMetadata& blob_metadata) {
3327
std::string repr = "BlobMetadata[";
3428
std::format_to(std::back_inserter(repr),
35-
"type='{}',sourceSnapshotId={},sourceSnapshotSequenceNumber={},", type,
36-
source_snapshot_id, source_snapshot_sequence_number);
29+
"type='{}',sourceSnapshotId={},sourceSnapshotSequenceNumber={},",
30+
blob_metadata.type, blob_metadata.source_snapshot_id,
31+
blob_metadata.source_snapshot_sequence_number);
3732
std::format_to(std::back_inserter(repr), "fields=[");
38-
for (auto iter = fields.cbegin(); iter != fields.cend(); ++iter) {
39-
if (iter != fields.cbegin()) {
33+
for (auto iter = blob_metadata.fields.cbegin(); iter != blob_metadata.fields.cend();
34+
++iter) {
35+
if (iter != blob_metadata.fields.cbegin()) {
4036
std::format_to(std::back_inserter(repr), ",{}", *iter);
4137
} else {
4238
std::format_to(std::back_inserter(repr), "{}", *iter);
4339
}
4440
}
4541
std::format_to(std::back_inserter(repr), "],properties=[");
46-
for (auto iter = properties.cbegin(); iter != properties.cend(); ++iter) {
42+
for (auto iter = blob_metadata.properties.cbegin();
43+
iter != blob_metadata.properties.cend(); ++iter) {
4744
const auto& [key, value] = *iter;
48-
if (iter != properties.cbegin()) {
45+
if (iter != blob_metadata.properties.cbegin()) {
4946
std::format_to(std::back_inserter(repr), ",{}:{}", key, value);
5047
} else {
5148
std::format_to(std::back_inserter(repr), "{}:{}", key, value);
@@ -55,28 +52,32 @@ std::string BlobMetadata::ToString() const {
5552
return repr;
5653
}
5754

58-
bool StatisticsFile::Equals(const StatisticsFile& other) const {
59-
return snapshot_id == other.snapshot_id && path == other.path &&
60-
file_size_in_bytes == other.file_size_in_bytes &&
61-
file_footer_size_in_bytes == other.file_footer_size_in_bytes &&
62-
blob_metadata == other.blob_metadata;
63-
}
64-
65-
std::string StatisticsFile::ToString() const {
55+
std::string ToString(const StatisticsFile& statistics_file) {
6656
std::string repr = "StatisticsFile[";
6757
std::format_to(std::back_inserter(repr),
6858
"snapshotId={},path={},fileSizeInBytes={},fileFooterSizeInBytes={},",
69-
snapshot_id, path, file_size_in_bytes, file_footer_size_in_bytes);
59+
statistics_file.snapshot_id, statistics_file.path,
60+
statistics_file.file_size_in_bytes,
61+
statistics_file.file_footer_size_in_bytes);
7062
std::format_to(std::back_inserter(repr), "blobMetadata=[");
71-
for (auto iter = blob_metadata.cbegin(); iter != blob_metadata.cend(); ++iter) {
72-
if (iter != blob_metadata.cbegin()) {
73-
std::format_to(std::back_inserter(repr), ",{}", iter->ToString());
63+
for (auto iter = statistics_file.blob_metadata.cbegin();
64+
iter != statistics_file.blob_metadata.cend(); ++iter) {
65+
if (iter != statistics_file.blob_metadata.cbegin()) {
66+
std::format_to(std::back_inserter(repr), ",{}", ToString(*iter));
7467
} else {
75-
std::format_to(std::back_inserter(repr), "{}", iter->ToString());
68+
std::format_to(std::back_inserter(repr), "{}", ToString(*iter));
7669
}
7770
}
7871
repr += "]]";
7972
return repr;
8073
}
8174

75+
std::string ToString(const PartitionStatisticsFile& partition_statistics_file) {
76+
std::string repr = "PartitionStatisticsFile[";
77+
std::format_to(std::back_inserter(repr), "snapshotId={},path={},fileSizeInBytes={},",
78+
partition_statistics_file.snapshot_id, partition_statistics_file.path,
79+
partition_statistics_file.file_size_in_bytes);
80+
return repr;
81+
}
82+
8283
} // namespace iceberg

0 commit comments

Comments
 (0)