1919
2020#include " iceberg/json_internal.h"
2121
22+ #include < cstdint>
2223#include < format>
2324#include < regex>
25+ #include < unordered_set>
2426
2527#include < nlohmann/json.hpp>
2628
2729#include " iceberg/partition_spec.h"
2830#include " iceberg/result.h"
2931#include " iceberg/schema.h"
3032#include " iceberg/schema_internal.h"
33+ #include " iceberg/snapshot.h"
3134#include " iceberg/sort_order.h"
3235#include " iceberg/transform.h"
3336#include " iceberg/type.h"
@@ -70,6 +73,55 @@ constexpr std::string_view kValueRequired = "value-required";
7073
7174constexpr std::string_view kFieldId = " field-id" ;
7275constexpr std::string_view kSpecId = " spec-id" ;
76+ constexpr std::string_view kSnapshotId = " snapshot-id" ;
77+ constexpr std::string_view kParentSnapshotId = " parent-snapshot-id" ;
78+ constexpr std::string_view kSequenceNumber = " sequence-number" ;
79+ constexpr std::string_view kTimestampMs = " timestamp-ms" ;
80+ constexpr std::string_view kManifestList = " manifest-list" ;
81+ constexpr std::string_view kSummary = " summary" ;
82+ constexpr std::string_view kMinSnapshotsToKeep = " min-snapshots-to-keep" ;
83+ constexpr std::string_view kMaxSnapshotAgeMs = " max-snapshot-age-ms" ;
84+ constexpr std::string_view kMaxRefAgeMs = " max-ref-age-ms" ;
85+
86+ constexpr int64_t kInitialSequenceNumber = 0 ;
87+
88+ const std::unordered_set<std::string_view> kValidSnapshotSummaryFields = {
89+ SnapshotSummaryFields::kOperation ,
90+ SnapshotSummaryFields::kAddedDataFiles ,
91+ SnapshotSummaryFields::kDeletedDataFiles ,
92+ SnapshotSummaryFields::kTotalDataFiles ,
93+ SnapshotSummaryFields::kAddedDeleteFiles ,
94+ SnapshotSummaryFields::kAddedEqDeleteFiles ,
95+ SnapshotSummaryFields::kRemovedEqDeleteFiles ,
96+ SnapshotSummaryFields::kAddedPosDeleteFiles ,
97+ SnapshotSummaryFields::kRemovedPosDeleteFiles ,
98+ SnapshotSummaryFields::kAddedDVs ,
99+ SnapshotSummaryFields::kRemovedDVs ,
100+ SnapshotSummaryFields::kRemovedDeleteFiles ,
101+ SnapshotSummaryFields::kTotalDeleteFiles ,
102+ SnapshotSummaryFields::kAddedRecords ,
103+ SnapshotSummaryFields::kDeletedRecords ,
104+ SnapshotSummaryFields::kTotalRecords ,
105+ SnapshotSummaryFields::kAddedFileSize ,
106+ SnapshotSummaryFields::kRemovedFileSize ,
107+ SnapshotSummaryFields::kTotalFileSize ,
108+ SnapshotSummaryFields::kAddedPosDeletes ,
109+ SnapshotSummaryFields::kRemovedPosDeletes ,
110+ SnapshotSummaryFields::kTotalPosDeletes ,
111+ SnapshotSummaryFields::kAddedEqDeletes ,
112+ SnapshotSummaryFields::kRemovedEqDeletes ,
113+ SnapshotSummaryFields::kTotalEqDeletes ,
114+ SnapshotSummaryFields::kDeletedDuplicatedFiles ,
115+ SnapshotSummaryFields::kChangedPartitionCountProp ,
116+ SnapshotSummaryFields::kWAPId ,
117+ SnapshotSummaryFields::kPublishedWAPId ,
118+ SnapshotSummaryFields::kSourceSnapshotId ,
119+ SnapshotSummaryFields::kEngineName ,
120+ SnapshotSummaryFields::kEngineVersion };
121+
122+ const std::unordered_set<std::string_view> kValidDataOperation = {
123+ DataOperation::kAppend , DataOperation::kReplace , DataOperation::kOverwrite ,
124+ DataOperation::kDelete };
73125
74126template <typename T>
75127Result<T> GetJsonValue (const nlohmann::json& json, std::string_view key) {
@@ -89,6 +141,30 @@ Result<T> GetJsonValue(const nlohmann::json& json, std::string_view key) {
89141 }
90142}
91143
144+ template <typename T>
145+ Result<std::optional<T>> GetJsonValueOptional (const nlohmann::json& json,
146+ std::string_view key) {
147+ if (!json.contains (key)) {
148+ return std::nullopt ;
149+ }
150+ try {
151+ return json.at (key).get <T>();
152+ } catch (const std::exception& ex) {
153+ return unexpected<Error>({
154+ .kind = ErrorKind::kJsonParseError ,
155+ .message = std::format (" Failed to parse key '{}' in {}" , key, json.dump ()),
156+ });
157+ }
158+ }
159+
160+ template <typename T>
161+ void SetOptionalField (nlohmann::json& json, std::string_view key,
162+ const std::optional<T>& value) {
163+ if (value.has_value ()) {
164+ json[key] = *value;
165+ }
166+ }
167+
92168} // namespace
93169
94170nlohmann::json ToJson (const SortField& sort_field) {
@@ -231,6 +307,39 @@ nlohmann::json SchemaToJson(const Schema& schema) {
231307 return json;
232308}
233309
310+ nlohmann::json ToJson (const SnapshotRef& ref) {
311+ nlohmann::json json;
312+ json[kSnapshotId ] = ref.snapshot_id ;
313+ json[kType ] = SnapshotRefTypeToString (ref.type ());
314+ if (ref.type () == SnapshotRefType::kBranch ) {
315+ const auto & branch = std::get<SnapshotRef::Branch>(ref.retention );
316+ SetOptionalField (json, kMinSnapshotsToKeep , branch.min_snapshots_to_keep );
317+ SetOptionalField (json, kMaxSnapshotAgeMs , branch.max_snapshot_age_ms );
318+ SetOptionalField (json, kMaxRefAgeMs , branch.max_ref_age_ms );
319+ } else if (ref.type () == SnapshotRefType::kTag ) {
320+ const auto & tag = std::get<SnapshotRef::Tag>(ref.retention );
321+ SetOptionalField (json, kMaxRefAgeMs , tag.max_ref_age_ms );
322+ }
323+ return json;
324+ }
325+
326+ nlohmann::json ToJson (const Snapshot& snapshot) {
327+ nlohmann::json json;
328+ json[kSnapshotId ] = snapshot.snapshot_id ;
329+ SetOptionalField (json, kParentSnapshotId , snapshot.parent_snapshot_id );
330+ if (snapshot.sequence_number > kInitialSequenceNumber ) {
331+ json[kSequenceNumber ] = snapshot.sequence_number ;
332+ }
333+ json[kTimestampMs ] = snapshot.timestamp_ms ;
334+ json[kManifestList ] = snapshot.manifest_list ;
335+ // If there is an operation, write the summary map
336+ if (snapshot.operation ().has_value ()) {
337+ json[kSummary ] = snapshot.summary ;
338+ }
339+ SetOptionalField (json, kSchemaId , snapshot.schema_id );
340+ return json;
341+ }
342+
234343namespace {
235344
236345Result<std::unique_ptr<Type>> StructTypeFromJson (const nlohmann::json& json) {
@@ -419,4 +528,82 @@ Result<std::unique_ptr<PartitionSpec>> PartitionSpecFromJson(
419528 return std::make_unique<PartitionSpec>(schema, spec_id, std::move (partition_fields));
420529}
421530
531+ Result<std::unique_ptr<SnapshotRef>> SnapshotRefFromJson (const nlohmann::json& json) {
532+ ICEBERG_ASSIGN_OR_RAISE (auto snapshot_id, GetJsonValue<int64_t >(json, kSnapshotId ));
533+ ICEBERG_ASSIGN_OR_RAISE (
534+ auto type,
535+ GetJsonValue<std::string>(json, kType ).and_then (SnapshotRefTypeFromString));
536+ if (type == SnapshotRefType::kBranch ) {
537+ ICEBERG_ASSIGN_OR_RAISE (auto min_snapshots_to_keep,
538+ GetJsonValueOptional<int32_t >(json, kMinSnapshotsToKeep ));
539+ ICEBERG_ASSIGN_OR_RAISE (auto max_snapshot_age_ms,
540+ GetJsonValueOptional<int64_t >(json, kMaxSnapshotAgeMs ));
541+ ICEBERG_ASSIGN_OR_RAISE (auto max_ref_age_ms,
542+ GetJsonValueOptional<int64_t >(json, kMaxRefAgeMs ));
543+
544+ return std::make_unique<SnapshotRef>(
545+ snapshot_id, SnapshotRef::Branch{.min_snapshots_to_keep = min_snapshots_to_keep,
546+ .max_snapshot_age_ms = max_snapshot_age_ms,
547+ .max_ref_age_ms = max_ref_age_ms});
548+ } else {
549+ ICEBERG_ASSIGN_OR_RAISE (auto max_ref_age_ms,
550+ GetJsonValueOptional<int64_t >(json, kMaxRefAgeMs ));
551+
552+ return std::make_unique<SnapshotRef>(
553+ snapshot_id, SnapshotRef::Tag{.max_ref_age_ms = max_ref_age_ms});
554+ }
555+ }
556+
557+ Result<std::unique_ptr<Snapshot>> SnapshotFromJson (const nlohmann::json& json) {
558+ ICEBERG_ASSIGN_OR_RAISE (auto snapshot_id, GetJsonValue<int64_t >(json, kSnapshotId ));
559+ ICEBERG_ASSIGN_OR_RAISE (auto sequence_number,
560+ GetJsonValueOptional<int64_t >(json, kSequenceNumber ));
561+ ICEBERG_ASSIGN_OR_RAISE (auto timestamp_ms, GetJsonValue<int64_t >(json, kTimestampMs ));
562+ ICEBERG_ASSIGN_OR_RAISE (auto manifest_list,
563+ GetJsonValue<std::string>(json, kManifestList ));
564+
565+ ICEBERG_ASSIGN_OR_RAISE (auto parent_snapshot_id,
566+ GetJsonValueOptional<int64_t >(json, kParentSnapshotId ));
567+
568+ ICEBERG_ASSIGN_OR_RAISE (auto summary_json,
569+ GetJsonValueOptional<nlohmann::json>(json, kSummary ));
570+ std::unordered_map<std::string, std::string> summary;
571+ if (summary_json.has_value ()) {
572+ for (const auto & [key, value] : summary_json->items ()) {
573+ if (!kValidSnapshotSummaryFields .contains (key)) {
574+ return unexpected<Error>({
575+ .kind = ErrorKind::kJsonParseError ,
576+ .message = std::format (" Invalid snapshot summary field: {}" , key),
577+ });
578+ }
579+ if (!value.is_string ()) {
580+ return unexpected<Error>({
581+ .kind = ErrorKind::kJsonParseError ,
582+ .message =
583+ std::format (" Invalid snapshot summary field value: {}" , value.dump ()),
584+ });
585+ }
586+ if (key == SnapshotSummaryFields::kOperation &&
587+ !kValidDataOperation .contains (value.get <std::string>())) {
588+ return unexpected<Error>({
589+ .kind = ErrorKind::kJsonParseError ,
590+ .message = std::format (" Invalid snapshot operation: {}" , value.dump ()),
591+ });
592+ }
593+ summary[key] = value.get <std::string>();
594+ }
595+ // If summary is available but operation is missing, set operation to overwrite.
596+ if (!summary.contains (SnapshotSummaryFields::kOperation )) {
597+ summary[SnapshotSummaryFields::kOperation ] = DataOperation::kOverwrite ;
598+ }
599+ }
600+
601+ ICEBERG_ASSIGN_OR_RAISE (auto schema_id, GetJsonValueOptional<int32_t >(json, kSchemaId ));
602+
603+ return std::make_unique<Snapshot>(
604+ snapshot_id, parent_snapshot_id,
605+ sequence_number.has_value () ? *sequence_number : kInitialSequenceNumber ,
606+ timestamp_ms, manifest_list, std::move (summary), schema_id);
607+ }
608+
422609} // namespace iceberg
0 commit comments