2121
2222#include < format>
2323#include < regex>
24+ #include < unordered_set>
2425
2526#include < nlohmann/json.hpp>
2627
2728#include " iceberg/partition_spec.h"
2829#include " iceberg/result.h"
2930#include " iceberg/schema.h"
3031#include " iceberg/schema_internal.h"
32+ #include " iceberg/snapshot.h"
3133#include " iceberg/sort_order.h"
3234#include " iceberg/transform.h"
3335#include " iceberg/type.h"
@@ -70,6 +72,53 @@ constexpr std::string_view kValueRequired = "value-required";
7072
7173constexpr std::string_view kFieldId = " field-id" ;
7274constexpr std::string_view kSpecId = " spec-id" ;
75+ constexpr std::string_view kSnapshotId = " snapshot-id" ;
76+ constexpr std::string_view kParentSnapshotId = " parent-snapshot-id" ;
77+ constexpr std::string_view kSequenceNumber = " sequence-number" ;
78+ constexpr std::string_view kTimestampMs = " timestamp-ms" ;
79+ constexpr std::string_view kManifestList = " manifest-list" ;
80+ constexpr std::string_view kSummary = " summary" ;
81+ constexpr std::string_view kMinSnapshotsToKeep = " min-snapshots-to-keep" ;
82+ constexpr std::string_view kMaxSnapshotAgeMs = " max-snapshot-age-ms" ;
83+ constexpr std::string_view kMaxRefAgeMs = " max-ref-age-ms" ;
84+
85+ const std::unordered_set<std::string_view> kValidSnapshotSummaryFields = {
86+ SnapshotSummaryFields::kOperation ,
87+ SnapshotSummaryFields::kAddedDataFiles ,
88+ SnapshotSummaryFields::kDeletedDataFiles ,
89+ SnapshotSummaryFields::kTotalDataFiles ,
90+ SnapshotSummaryFields::kAddedDeleteFiles ,
91+ SnapshotSummaryFields::kAddedEqDeleteFiles ,
92+ SnapshotSummaryFields::kRemovedEqDeleteFiles ,
93+ SnapshotSummaryFields::kAddedPosDeleteFiles ,
94+ SnapshotSummaryFields::kRemovedPosDeleteFiles ,
95+ SnapshotSummaryFields::kAddedDVs ,
96+ SnapshotSummaryFields::kRemovedDVs ,
97+ SnapshotSummaryFields::kRemovedDeleteFiles ,
98+ SnapshotSummaryFields::kTotalDeleteFiles ,
99+ SnapshotSummaryFields::kAddedRecords ,
100+ SnapshotSummaryFields::kDeletedRecords ,
101+ SnapshotSummaryFields::kTotalRecords ,
102+ SnapshotSummaryFields::kAddedFileSize ,
103+ SnapshotSummaryFields::kRemovedFileSize ,
104+ SnapshotSummaryFields::kTotalFileSize ,
105+ SnapshotSummaryFields::kAddedPosDeletes ,
106+ SnapshotSummaryFields::kRemovedPosDeletes ,
107+ SnapshotSummaryFields::kTotalPosDeletes ,
108+ SnapshotSummaryFields::kAddedEqDeletes ,
109+ SnapshotSummaryFields::kRemovedEqDeletes ,
110+ SnapshotSummaryFields::kTotalEqDeletes ,
111+ SnapshotSummaryFields::kDeletedDuplicatedFiles ,
112+ SnapshotSummaryFields::kChangedPartitionCountProp ,
113+ SnapshotSummaryFields::kWAPID ,
114+ SnapshotSummaryFields::kPublishedWAPID ,
115+ SnapshotSummaryFields::kSourceSnapshotID ,
116+ SnapshotSummaryFields::kEngineName ,
117+ SnapshotSummaryFields::kEngineVersion };
118+
119+ const std::unordered_set<std::string_view> kValidDataOperation = {
120+ DataOperation::kAppend , DataOperation::kReplace , DataOperation::kOverwrite ,
121+ DataOperation::kDelete };
73122
74123template <typename T>
75124Result<T> GetJsonValue (const nlohmann::json& json, std::string_view key) {
@@ -89,6 +138,22 @@ Result<T> GetJsonValue(const nlohmann::json& json, std::string_view key) {
89138 }
90139}
91140
141+ template <typename T>
142+ Result<std::optional<T>> GetJsonValueOptional (const nlohmann::json& json,
143+ std::string_view key) {
144+ if (!json.contains (key)) {
145+ return std::nullopt ;
146+ }
147+ try {
148+ return json.at (key).get <T>();
149+ } catch (const std::exception& ex) {
150+ return unexpected<Error>({
151+ .kind = ErrorKind::kJsonParseError ,
152+ .message = std::format (" Failed to parse key '{}' in {}" , key, json.dump ()),
153+ });
154+ }
155+ }
156+
92157} // namespace
93158
94159nlohmann::json ToJson (const SortField& sort_field) {
@@ -231,6 +296,53 @@ nlohmann::json SchemaToJson(const Schema& schema) {
231296 return json;
232297}
233298
299+ nlohmann::json SnapshotRefToJson (const SnapshotRef& ref) {
300+ nlohmann::json json;
301+ json[kSnapshotId ] = ref.snapshot_id ;
302+ json[kType ] = SnapshotRefTypeToString (ref.type ());
303+ if (ref.type () == SnapshotRefType::kBranch ) {
304+ const auto & branch = std::get<SnapshotRef::Branch>(ref.retention );
305+ if (branch.min_snapshots_to_keep .has_value ()) {
306+ json[kMinSnapshotsToKeep ] = *branch.min_snapshots_to_keep ;
307+ }
308+ if (branch.max_snapshot_age_ms .has_value ()) {
309+ json[kMaxSnapshotAgeMs ] = *branch.max_snapshot_age_ms ;
310+ }
311+ if (branch.max_ref_age_ms .has_value ()) {
312+ json[kMaxRefAgeMs ] = *branch.max_ref_age_ms ;
313+ }
314+ } else if (ref.type () == SnapshotRefType::kTag ) {
315+ const auto & tag = std::get<SnapshotRef::Tag>(ref.retention );
316+ if (tag.max_ref_age_ms .has_value ()) {
317+ json[kMaxRefAgeMs ] = *tag.max_ref_age_ms ;
318+ }
319+ }
320+ return json;
321+ }
322+
323+ nlohmann::json SnapshotToJson (const Snapshot& snapshot) {
324+ nlohmann::json json;
325+ json[kSnapshotId ] = snapshot.snapshot_id ;
326+ if (snapshot.parent_snapshot_id .has_value ()) {
327+ json[kParentSnapshotId ] = *snapshot.parent_snapshot_id ;
328+ }
329+ json[kSequenceNumber ] = snapshot.sequence_number ;
330+ json[kTimestampMs ] = snapshot.timestamp_ms ;
331+ json[kManifestList ] = snapshot.manifest_list ;
332+
333+ nlohmann::json summary_json;
334+ for (const auto & [key, value] : snapshot.summary ) {
335+ summary_json[key] = value;
336+ }
337+ json[kSummary ] = summary_json;
338+
339+ if (snapshot.schema_id .has_value ()) {
340+ json[kSchemaId ] = *snapshot.schema_id ;
341+ }
342+
343+ return json;
344+ }
345+
234346namespace {
235347
236348Result<std::unique_ptr<Type>> StructTypeFromJson (const nlohmann::json& json) {
@@ -419,4 +531,75 @@ Result<std::unique_ptr<PartitionSpec>> PartitionSpecFromJson(
419531 return std::make_unique<PartitionSpec>(schema, spec_id, std::move (partition_fields));
420532}
421533
534+ Result<std::unique_ptr<SnapshotRef>> SnapshotRefFromJson (const nlohmann::json& json) {
535+ ICEBERG_ASSIGN_OR_RAISE (auto snapshot_id, GetJsonValue<int64_t >(json, kSnapshotId ));
536+ ICEBERG_ASSIGN_OR_RAISE (
537+ auto type,
538+ GetJsonValue<std::string>(json, kType ).and_then (SnapshotRefTypeFromString));
539+ if (type == SnapshotRefType::kBranch ) {
540+ ICEBERG_ASSIGN_OR_RAISE (auto min_snapshots_to_keep,
541+ GetJsonValueOptional<int32_t >(json, kMinSnapshotsToKeep ));
542+ ICEBERG_ASSIGN_OR_RAISE (auto max_snapshot_age_ms,
543+ GetJsonValueOptional<int64_t >(json, kMaxSnapshotAgeMs ));
544+ ICEBERG_ASSIGN_OR_RAISE (auto max_ref_age_ms,
545+ GetJsonValueOptional<int64_t >(json, kMaxRefAgeMs ));
546+
547+ return std::make_unique<SnapshotRef>(
548+ snapshot_id, SnapshotRef::Branch{.min_snapshots_to_keep = min_snapshots_to_keep,
549+ .max_snapshot_age_ms = max_snapshot_age_ms,
550+ .max_ref_age_ms = max_ref_age_ms});
551+ } else {
552+ ICEBERG_ASSIGN_OR_RAISE (auto max_ref_age_ms,
553+ GetJsonValueOptional<int64_t >(json, kMaxRefAgeMs ));
554+
555+ return std::make_unique<SnapshotRef>(
556+ snapshot_id, SnapshotRef::Tag{.max_ref_age_ms = max_ref_age_ms});
557+ }
558+ }
559+
560+ Result<std::unique_ptr<Snapshot>> SnapshotFromJson (const nlohmann::json& json) {
561+ ICEBERG_ASSIGN_OR_RAISE (auto snapshot_id, GetJsonValue<int64_t >(json, kSnapshotId ));
562+ ICEBERG_ASSIGN_OR_RAISE (auto sequence_number,
563+ GetJsonValue<int64_t >(json, kSequenceNumber ));
564+ ICEBERG_ASSIGN_OR_RAISE (auto timestamp_ms, GetJsonValue<int64_t >(json, kTimestampMs ));
565+ ICEBERG_ASSIGN_OR_RAISE (auto manifest_list,
566+ GetJsonValue<std::string>(json, kManifestList ));
567+
568+ ICEBERG_ASSIGN_OR_RAISE (auto parent_snapshot_id,
569+ GetJsonValueOptional<int64_t >(json, kParentSnapshotId ));
570+
571+ ICEBERG_ASSIGN_OR_RAISE (auto summary_json,
572+ GetJsonValue<nlohmann::json>(json, kSummary ));
573+ std::unordered_map<std::string, std::string> summary;
574+ for (const auto & [key, value] : summary_json.items ()) {
575+ if (!kValidSnapshotSummaryFields .contains (key)) {
576+ return unexpected<Error>({
577+ .kind = ErrorKind::kJsonParseError ,
578+ .message = std::format (" Invalid snapshot summary field: {}" , key),
579+ });
580+ }
581+ if (!value.is_string ()) {
582+ return unexpected<Error>({
583+ .kind = ErrorKind::kJsonParseError ,
584+ .message =
585+ std::format (" Invalid snapshot summary field value: {}" , value.dump ()),
586+ });
587+ }
588+ if (key == SnapshotSummaryFields::kOperation &&
589+ !kValidDataOperation .contains (value.get <std::string>())) {
590+ return unexpected<Error>({
591+ .kind = ErrorKind::kJsonParseError ,
592+ .message = std::format (" Invalid snapshot operation: {}" , value.dump ()),
593+ });
594+ }
595+ summary[key] = value.get <std::string>();
596+ }
597+
598+ ICEBERG_ASSIGN_OR_RAISE (auto schema_id, GetJsonValueOptional<int32_t >(json, kSchemaId ));
599+
600+ return std::make_unique<Snapshot>(snapshot_id, parent_snapshot_id, sequence_number,
601+ timestamp_ms, manifest_list, std::move (summary),
602+ schema_id);
603+ }
604+
422605} // namespace iceberg
0 commit comments