Skip to content

Commit 2d55ea2

Browse files
committed
feat: add json serde for table metadata
1 parent 5ba0a84 commit 2d55ea2

File tree

9 files changed

+864
-84
lines changed

9 files changed

+864
-84
lines changed

src/iceberg/json_internal.cc

Lines changed: 643 additions & 16 deletions
Large diffs are not rendered by default.

src/iceberg/json_internal.h

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
#include <nlohmann/json_fwd.hpp>
2525

2626
#include "iceberg/result.h"
27+
#include "iceberg/snapshot.h"
28+
#include "iceberg/statistics_file.h"
29+
#include "iceberg/table_metadata.h"
2730
#include "iceberg/type_fwd.h"
2831

2932
namespace iceberg {
@@ -74,19 +77,19 @@ Result<std::unique_ptr<SortOrder>> SortOrderFromJson(const nlohmann::json& json)
7477
///
7578
/// \param[in] schema The Iceberg schema to convert.
7679
/// \return The JSON representation of the schema.
77-
nlohmann::json SchemaToJson(const Schema& schema);
80+
nlohmann::json ToJson(const Schema& schema);
7881

7982
/// \brief Convert an Iceberg Type to JSON.
8083
///
8184
/// \param[in] type The Iceberg type to convert.
8285
/// \return The JSON representation of the type.
83-
nlohmann::json TypeToJson(const Type& type);
86+
nlohmann::json ToJson(const Type& type);
8487

8588
/// \brief Convert an Iceberg SchemaField to JSON.
8689
///
8790
/// \param[in] field The Iceberg field to convert.
8891
/// \return The JSON representation of the field.
89-
nlohmann::json FieldToJson(const SchemaField& field);
92+
nlohmann::json ToJson(const SchemaField& field);
9093

9194
/// \brief Convert JSON to an Iceberg Schema.
9295
///
@@ -153,4 +156,73 @@ Result<std::unique_ptr<PartitionField>> PartitionFieldFromJson(
153156
/// the JSON is malformed or missing expected fields, an error will be returned.
154157
Result<std::unique_ptr<PartitionSpec>> PartitionSpecFromJson(
155158
const std::shared_ptr<Schema>& schema, const nlohmann::json& json);
159+
160+
/// \brief Serializes a `StatisticsFile` object to JSON.
161+
///
162+
/// \param statistics_file The `StatisticsFile` object to be serialized.
163+
/// \return A JSON object representing the `StatisticsFile`.
164+
nlohmann::json ToJson(const StatisticsFile& statistics_file);
165+
166+
/// \brief Deserializes a JSON object into a `StatisticsFile` object.
167+
///
168+
/// \param json The JSON object representing a `StatisticsFile`.
169+
/// \return A `StatisticsFile` object or an error if the conversion fails.
170+
Result<std::unique_ptr<StatisticsFile>> StatisticsFileFromJson(
171+
const nlohmann::json& json);
172+
173+
/// \brief Serializes a `PartitionStatisticsFile` object to JSON.
174+
///
175+
/// \param partition_statistics_file The `PartitionStatisticsFile` object to be
176+
/// serialized. \return A JSON object representing the `PartitionStatisticsFile`.
177+
nlohmann::json ToJson(const PartitionStatisticsFile& partition_statistics_file);
178+
179+
/// \brief Deserializes a JSON object into a `PartitionStatisticsFile` object.
180+
///
181+
/// \param json The JSON object representing a `PartitionStatisticsFile`.
182+
/// \return A `PartitionStatisticsFile` object or an error if the conversion fails.
183+
Result<std::unique_ptr<PartitionStatisticsFile>> PartitionStatisticsFileFromJson(
184+
const nlohmann::json& json);
185+
186+
/// \brief Serializes a `SnapshotLogEntry` object to JSON.
187+
///
188+
/// \param snapshot_log_entry The `SnapshotLogEntry` object to be serialized.
189+
/// \return A JSON object representing the `SnapshotLogEntry`.
190+
nlohmann::json ToJson(const SnapshotLogEntry& snapshot_log_entry);
191+
192+
/// \brief Deserializes a JSON object into a `SnapshotLogEntry` object.
193+
///
194+
/// \param json The JSON object representing a `SnapshotLogEntry`.
195+
/// \return A `SnapshotLogEntry` object or an error if the conversion fails.
196+
Result<SnapshotLogEntry> SnapshotLogEntryFromJson(const nlohmann::json& json);
197+
198+
/// \brief Serializes a `MetadataLogEntry` object to JSON.
199+
///
200+
/// \param metadata_log_entry The `MetadataLogEntry` object to be serialized.
201+
/// \return A JSON object representing the `MetadataLogEntry`.
202+
nlohmann::json ToJson(const MetadataLogEntry& metadata_log_entry);
203+
204+
/// \brief Deserializes a JSON object into a `MetadataLogEntry` object.
205+
///
206+
/// \param json The JSON object representing a `MetadataLogEntry`.
207+
/// \return A `MetadataLogEntry` object or an error if the conversion fails.
208+
Result<MetadataLogEntry> MetadataLogEntryFromJson(const nlohmann::json& json);
209+
210+
/// \brief Serializes a `TableMetadata` object to JSON.
211+
///
212+
/// \param table_metadata The `TableMetadata` object to be serialized.
213+
/// \return A JSON object representing the `TableMetadata`.
214+
nlohmann::json ToJson(const TableMetadata& table_metadata);
215+
216+
/// \brief Deserializes a JSON object into a `TableMetadata` object.
217+
///
218+
/// \param json The JSON object representing a `TableMetadata`.
219+
/// \return A `TableMetadata` object or an error if the conversion fails.
220+
Result<std::unique_ptr<TableMetadata>> TableMetadataFromJson(const nlohmann::json& json);
221+
222+
// Functions below will be available after https://github.com/apache/iceberg-cpp/pull/74
223+
nlohmann::json ToJson(const Snapshot& snapshot);
224+
nlohmann::json ToJson(const SnapshotRef& snapshot_ref);
225+
Result<std::unique_ptr<Snapshot>> SnapshotFromJson(const nlohmann::json& json);
226+
Result<std::unique_ptr<SnapshotRef>> SnapshotRefFromJson(const nlohmann::json& json);
227+
156228
} // namespace iceberg

src/iceberg/result.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#pragma once
2121

22+
#include <format>
2223
#include <string>
2324

2425
#include "iceberg/expected.h"
@@ -60,4 +61,20 @@ using Result = expected<T, E>;
6061

6162
using Status = Result<void>;
6263

64+
/// \brief Create an unexpected error with kNotImplemented
65+
template <typename... Args>
66+
auto NotImplementedError(const std::format_string<Args...> fmt, Args&&... args)
67+
-> unexpected<Error> {
68+
return unexpected<Error>({.kind = ErrorKind::kNotImplemented,
69+
.message = std::format(fmt, std::forward<Args>(args)...)});
70+
}
71+
72+
/// \brief Create an unexpected error with kJsonParseError
73+
template <typename... Args>
74+
auto JsonParseError(const std::format_string<Args...> fmt, Args&&... args)
75+
-> unexpected<Error> {
76+
return unexpected<Error>({.kind = ErrorKind::kJsonParseError,
77+
.message = std::format(fmt, std::forward<Args>(args)...)});
78+
}
79+
6380
} // namespace iceberg

src/iceberg/statistics_file.cc

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -23,29 +23,26 @@
2323

2424
namespace iceberg {
2525

26-
bool BlobMetadata::Equals(const BlobMetadata& other) const {
27-
return type == other.type && source_snapshot_id == other.source_snapshot_id &&
28-
source_snapshot_sequence_number == other.source_snapshot_sequence_number &&
29-
fields == other.fields && properties == other.properties;
30-
}
31-
32-
std::string BlobMetadata::ToString() const {
26+
std::string ToString(const BlobMetadata& blob_metadata) {
3327
std::string repr = "BlobMetadata[";
3428
std::format_to(std::back_inserter(repr),
35-
"type='{}',sourceSnapshotId={},sourceSnapshotSequenceNumber={},", type,
36-
source_snapshot_id, source_snapshot_sequence_number);
29+
"type='{}',sourceSnapshotId={},sourceSnapshotSequenceNumber={},",
30+
blob_metadata.type, blob_metadata.source_snapshot_id,
31+
blob_metadata.source_snapshot_sequence_number);
3732
std::format_to(std::back_inserter(repr), "fields=[");
38-
for (auto iter = fields.cbegin(); iter != fields.cend(); ++iter) {
39-
if (iter != fields.cbegin()) {
33+
for (auto iter = blob_metadata.fields.cbegin(); iter != blob_metadata.fields.cend();
34+
++iter) {
35+
if (iter != blob_metadata.fields.cbegin()) {
4036
std::format_to(std::back_inserter(repr), ",{}", *iter);
4137
} else {
4238
std::format_to(std::back_inserter(repr), "{}", *iter);
4339
}
4440
}
4541
std::format_to(std::back_inserter(repr), "],properties=[");
46-
for (auto iter = properties.cbegin(); iter != properties.cend(); ++iter) {
42+
for (auto iter = blob_metadata.properties.cbegin();
43+
iter != blob_metadata.properties.cend(); ++iter) {
4744
const auto& [key, value] = *iter;
48-
if (iter != properties.cbegin()) {
45+
if (iter != blob_metadata.properties.cbegin()) {
4946
std::format_to(std::back_inserter(repr), ",{}:{}", key, value);
5047
} else {
5148
std::format_to(std::back_inserter(repr), "{}:{}", key, value);
@@ -55,28 +52,32 @@ std::string BlobMetadata::ToString() const {
5552
return repr;
5653
}
5754

58-
bool StatisticsFile::Equals(const StatisticsFile& other) const {
59-
return snapshot_id == other.snapshot_id && path == other.path &&
60-
file_size_in_bytes == other.file_size_in_bytes &&
61-
file_footer_size_in_bytes == other.file_footer_size_in_bytes &&
62-
blob_metadata == other.blob_metadata;
63-
}
64-
65-
std::string StatisticsFile::ToString() const {
55+
std::string ToString(const StatisticsFile& statistics_file) {
6656
std::string repr = "StatisticsFile[";
6757
std::format_to(std::back_inserter(repr),
6858
"snapshotId={},path={},fileSizeInBytes={},fileFooterSizeInBytes={},",
69-
snapshot_id, path, file_size_in_bytes, file_footer_size_in_bytes);
59+
statistics_file.snapshot_id, statistics_file.path,
60+
statistics_file.file_size_in_bytes,
61+
statistics_file.file_footer_size_in_bytes);
7062
std::format_to(std::back_inserter(repr), "blobMetadata=[");
71-
for (auto iter = blob_metadata.cbegin(); iter != blob_metadata.cend(); ++iter) {
72-
if (iter != blob_metadata.cbegin()) {
73-
std::format_to(std::back_inserter(repr), ",{}", iter->ToString());
63+
for (auto iter = statistics_file.blob_metadata.cbegin();
64+
iter != statistics_file.blob_metadata.cend(); ++iter) {
65+
if (iter != statistics_file.blob_metadata.cbegin()) {
66+
std::format_to(std::back_inserter(repr), ",{}", ToString(*iter));
7467
} else {
75-
std::format_to(std::back_inserter(repr), "{}", iter->ToString());
68+
std::format_to(std::back_inserter(repr), "{}", ToString(*iter));
7669
}
7770
}
7871
repr += "]]";
7972
return repr;
8073
}
8174

75+
std::string ToString(const PartitionStatisticsFile& partition_statistics_file) {
76+
std::string repr = "PartitionStatisticsFile[";
77+
std::format_to(std::back_inserter(repr), "snapshotId={},path={},fileSizeInBytes={},",
78+
partition_statistics_file.snapshot_id, partition_statistics_file.path,
79+
partition_statistics_file.file_size_in_bytes);
80+
return repr;
81+
}
82+
8283
} // namespace iceberg

src/iceberg/statistics_file.h

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,11 @@
2828
#include <vector>
2929

3030
#include "iceberg/iceberg_export.h"
31-
#include "iceberg/util/formattable.h"
3231

3332
namespace iceberg {
3433

3534
/// \brief A metadata about a statistics or indices blob
36-
struct ICEBERG_EXPORT BlobMetadata : public util::Formattable {
35+
struct ICEBERG_EXPORT BlobMetadata {
3736
/// Type of the blob
3837
std::string type;
3938
/// ID of the Iceberg table's snapshot the blob was computed from
@@ -47,22 +46,19 @@ struct ICEBERG_EXPORT BlobMetadata : public util::Formattable {
4746

4847
/// \brief Compare two BlobMetadatas for equality.
4948
friend bool operator==(const BlobMetadata& lhs, const BlobMetadata& rhs) {
50-
return lhs.Equals(rhs);
49+
return lhs.type == rhs.type && lhs.source_snapshot_id == rhs.source_snapshot_id &&
50+
lhs.source_snapshot_sequence_number == rhs.source_snapshot_sequence_number &&
51+
lhs.fields == rhs.fields && lhs.properties == rhs.properties;
5152
}
5253

5354
/// \brief Compare two BlobMetadatas for inequality.
5455
friend bool operator!=(const BlobMetadata& lhs, const BlobMetadata& rhs) {
5556
return !(lhs == rhs);
5657
}
57-
58-
std::string ToString() const override;
59-
60-
private:
61-
bool Equals(const BlobMetadata& other) const;
6258
};
6359

6460
/// \brief Represents a statistics file in the Puffin format
65-
struct ICEBERG_EXPORT StatisticsFile : public util::Formattable {
61+
struct ICEBERG_EXPORT StatisticsFile {
6662
/// ID of the Iceberg table's snapshot the statistics file is associated with
6763
int64_t snapshot_id;
6864
/// Fully qualified path to the file
@@ -76,18 +72,16 @@ struct ICEBERG_EXPORT StatisticsFile : public util::Formattable {
7672

7773
/// \brief Compare two StatisticsFiles for equality.
7874
friend bool operator==(const StatisticsFile& lhs, const StatisticsFile& rhs) {
79-
return lhs.Equals(rhs);
75+
return lhs.snapshot_id == rhs.snapshot_id && lhs.path == rhs.path &&
76+
lhs.file_size_in_bytes == rhs.file_size_in_bytes &&
77+
lhs.file_footer_size_in_bytes == rhs.file_footer_size_in_bytes &&
78+
lhs.blob_metadata == rhs.blob_metadata;
8079
}
8180

8281
/// \brief Compare two StatisticsFiles for inequality.
8382
friend bool operator!=(const StatisticsFile& lhs, const StatisticsFile& rhs) {
8483
return !(lhs == rhs);
8584
}
86-
87-
std::string ToString() const override;
88-
89-
private:
90-
bool Equals(const StatisticsFile& other) const;
9185
};
9286

9387
/// \brief Represents a partition statistics file
@@ -99,6 +93,29 @@ struct ICEBERG_EXPORT PartitionStatisticsFile {
9993
std::string path;
10094
/// The size of the partition statistics file in bytes
10195
int64_t file_size_in_bytes;
96+
97+
/// \brief Compare two PartitionStatisticsFiles for equality.
98+
friend bool operator==(const PartitionStatisticsFile& lhs,
99+
const PartitionStatisticsFile& rhs) {
100+
return lhs.snapshot_id == rhs.snapshot_id && lhs.path == rhs.path &&
101+
lhs.file_size_in_bytes == rhs.file_size_in_bytes;
102+
}
103+
104+
/// \brief Compare two PartitionStatisticsFiles for inequality.
105+
friend bool operator!=(const PartitionStatisticsFile& lhs,
106+
const PartitionStatisticsFile& rhs) {
107+
return !(lhs == rhs);
108+
}
102109
};
103110

111+
/// \brief Returns a string representation of a BlobMetadata
112+
ICEBERG_EXPORT std::string ToString(const BlobMetadata& blob_metadata);
113+
114+
/// \brief Returns a string representation of a StatisticsFile
115+
ICEBERG_EXPORT std::string ToString(const StatisticsFile& statistics_file);
116+
117+
/// \brief Returns a string representation of a PartitionStatisticsFile
118+
ICEBERG_EXPORT std::string ToString(
119+
const PartitionStatisticsFile& partition_statistics_file);
120+
104121
} // namespace iceberg

src/iceberg/table_metadata.cc

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,26 @@
2222
#include <format>
2323
#include <string>
2424

25-
#include "iceberg/statistics_file.h"
26-
2725
namespace iceberg {
2826

29-
std::string SnapshotLogEntry::ToString() const {
30-
return std::format("SnapshotLogEntry[timestampMillis={},snapshotId={}]", timestamp_ms,
31-
snapshot_id);
27+
std::string ToString(const SnapshotLogEntry& entry) {
28+
return std::format("SnapshotLogEntry[timestampMillis={},snapshotId={}]",
29+
entry.timestamp_ms, entry.snapshot_id);
30+
}
31+
32+
std::string ToString(const MetadataLogEntry& entry) {
33+
return std::format("MetadataLogEntry[timestampMillis={},file={}]", entry.timestamp_ms,
34+
entry.metadata_file);
35+
}
36+
37+
Result<TimePointMs> TimePointMsFromUnixMs(int64_t unix_ms) {
38+
return TimePointMs{std::chrono::milliseconds(unix_ms)};
3239
}
3340

34-
std::string MetadataLogEntry::ToString() const {
35-
return std::format("MetadataLogEntry[timestampMillis={},file={}]", timestamp_ms, file);
41+
int64_t UnixMsFromTimePointMs(const TimePointMs& time_point_ms) {
42+
return std::chrono::duration_cast<std::chrono::milliseconds>(
43+
time_point_ms.time_since_epoch())
44+
.count();
3645
}
3746

3847
} // namespace iceberg

0 commit comments

Comments
 (0)