2222#include < optional>
2323#include < string>
2424#include < unordered_map>
25+ #include < variant>
26+ #include < vector>
2527
2628#include " iceberg/iceberg_export.h"
2729#include " iceberg/util/formattable.h"
2830
2931namespace iceberg {
3032
33+ // / \brief The type of snapshot reference
34+ enum class SnapshotRefType {
35+ // / Branches are mutable named references that can be updated by committing a new
36+ // / snapshot as the branch’s referenced snapshot using the Commit Conflict Resolution
37+ // / and Retry procedures.
38+ kBranch ,
39+ // / Tags are labels for individual snapshots
40+ kTag ,
41+ };
42+
43+ // / \brief A reference to a snapshot, either a branch or a tag.
44+ struct ICEBERG_EXPORT SnapshotRef {
45+ // / A reference's snapshot ID. The tagged snapshot or latest snapshot of a branch.
46+ int64_t snapshot_id;
47+ // / Type of the reference, tag or branch
48+ SnapshotRefType type;
49+ // / For branch type only, a positive number for the minimum number of snapshots to keep
50+ // / in a branch while expiring snapshots. Defaults to table property
51+ // / history.expire.min-snapshots-to-keep.
52+ std::optional<int32_t > min_snapshots_to_keep;
53+ // / For branch type only, a positive number for the max age of snapshots to keep when
54+ // / expiring, including the latest snapshot. Defaults to table property
55+ // / history.expire.max-snapshot-age-ms.
56+ std::optional<int64_t > max_snapshot_age_ms;
57+ // / For snapshot references except the main branch, a positive number for the max age of
58+ // / the snapshot reference to keep while expiring snapshots. Defaults to table property
59+ // / history.expire.max-ref-age-ms. The main branch never expires.
60+ std::optional<int64_t > max_ref_age_ms;
61+ };
62+
3163// / \brief Optional Snapshot Summary Fields
3264struct SnapshotSummaryFields {
3365 // / \brief The operation field key
@@ -109,40 +141,23 @@ struct SnapshotSummaryFields {
109141 constexpr static std::string_view kEngineVersion = " engine-version" ;
110142};
111143
112- // / \brief Summarises the changes in the snapshot.
113- class ICEBERG_EXPORT Summary : public iceberg::util::Formattable {
114- public:
115- // / \brief The operation field is used by some operations, like snapshot expiration, to
116- // / skip processing certain snapshots.
117- enum class Operation {
118- // / Only data files were added and no files were removed.
119- kAppend ,
120- // / Data and delete files were added and removed without changing table data; i.e.
121- // / compaction, change the data file format, or relocating data files.
122- kReplace ,
123- // / Data and delete files were added and removed in a logical overwrite operation.
124- kOverwrite ,
125- // / Data files were removed and their contents logically deleted and/or delete files
126- // / were added to delete rows.
127- kDelete ,
128- };
129- Summary () = default ;
130- // / \brief Construct a summary with the given operation and properties.
131- Summary (Operation op, std::unordered_map<std::string, std::string> props);
132-
133- // / \brief Get the operation type of the snapshot.
134- Operation operation () const ;
135-
136- // / \brief Get the additional properties of the snapshot.
137- const std::unordered_map<std::string, std::string>& properties () const ;
138-
139- std::string ToString () const override ;
140-
141- private:
142- // / The type of operation in the snapshot
143- Operation operation_{Operation::kAppend };
144- // / Other summary data.
145- std::unordered_map<std::string, std::string> additional_properties_;
144+ // / \brief Data operation that produce snapshots.
145+ // /
146+ // / A snapshot can return the operation that created the snapshot to help other components
147+ // / ignore snapshots that are not needed for some tasks. For example, snapshot expiration
148+ // / does not need to clean up deleted files for appends, which have no deleted files.
149+ struct ICEBERG_EXPORT DataOperation {
150+ // / \brief Only data files were added and no files were removed.
151+ static constexpr std::string_view kAppend = " append" ;
152+ // / \brief Data and delete files were added and removed without changing table data;
153+ // / i.e. compaction, change the data file format, or relocating data files.
154+ static constexpr std::string_view kReplace = " replace" ;
155+ // / \brief Data and delete files were added and removed in a logical overwrite
156+ // / operation.
157+ static constexpr std::string_view kOverwrite = " overwrite" ;
158+ // / \brief Data files were removed and their contents logically deleted and/or delete
159+ // / files were added to delete rows.
160+ static constexpr std::string_view kDelete = " delete" ;
146161};
147162
148163// / \brief A snapshot of the data in a table at a point in time.
@@ -151,63 +166,59 @@ class ICEBERG_EXPORT Summary : public iceberg::util::Formattable {
151166// / the union of all the data files in those manifests.
152167// /
153168// / Snapshots are created by table operations.
154- class ICEBERG_EXPORT Snapshot : public iceberg::util::Formattable {
155- public:
156- Snapshot (int64_t snapshot_id, std::optional<int64_t > parent_snapshot_id,
157- int64_t sequence_number, int64_t timestamp_ms, std::string manifest_list,
158- Summary summary, std::optional<int64_t > schema_id);
159-
160- // / \brief Get the id of the snapshot.
161- int64_t snapshot_id () const ;
162-
163- // / \brief Get parent snapshot id.
164- std::optional<int64_t > parent_snapshot_id () const ;
169+ struct ICEBERG_EXPORT Snapshot {
170+ using manifest_list_t = std::string;
171+ using manifests_t = std::vector<std::string>;
165172
166- // / \brief Get the sequence number of the snapshot.
167- int64_t sequence_number () const ;
168-
169- // / \brief Get the timestamp of the snapshot.
170- int64_t timestamp_ms () const ;
171-
172- // / \brief Get the manifest list of the snapshot.
173- const std::string& manifest_list () const ;
174-
175- // / \brief Get the summary of the snapshot.
176- const Summary& summary () const ;
177-
178- // / \brief Get the schema ID of the snapshot.
179- std::optional<int32_t > schema_id () const ;
180-
181- std::string ToString () const override ;
173+ // / A unqiue long ID.
174+ int64_t snapshot_id;
175+ // / The snapshot ID of the snapshot's parent. Omitted for any snapshot with no parent.
176+ std::optional<int64_t > parent_snapshot_id;
177+ // / A monotonically increasing long that tracks the order of changes to a table.
178+ int64_t sequence_number;
179+ // / A timestamp when the snapshot was created, used for garbage collection and table
180+ // / inspection.
181+ int64_t timestamp_ms;
182+ // / The location of a manifest list for this snapshot that tracks manifest files with
183+ // / additional metadata.
184+ std::variant<manifest_list_t , manifests_t > manifest_list;
185+ // / A string map that summaries the snapshot changes, including operation.
186+ std::unordered_map<std::string, std::string> summary;
187+ // / ID of the table's current schema when the snapshot was created.
188+ std::optional<int32_t > schema_id;
189+
190+ // / \brief Return the name of the DataOperations data operation that produced this
191+ // / snapshot.
192+ // /
193+ // / \return the operation that produced this snapshot, or nullopt if the operation is
194+ // / unknown.
195+ std::optional<std::string> operation () const ;
196+
197+ // / \brief Get the manifest list for this snapshot.
198+ // /
199+ // / \return the manifest list for this snapshot, or nullopt if the snapshot has no
200+ // / manifest list.
201+ std::optional<std::reference_wrapper<const manifest_list_t >> ManifestList () const ;
202+
203+ // / \brief Get the manifests for this snapshot.
204+ // /
205+ // / \return the manifests for this snapshot, or nullopt if the snapshot has no
206+ // / manifests.
207+ std::optional<std::reference_wrapper<const manifests_t >> Manifests () const ;
182208
209+ // / \brief Compare two snapshots for equality.
183210 friend bool operator ==(const Snapshot& lhs, const Snapshot& rhs) {
184211 return lhs.Equals (rhs);
185212 }
186213
214+ // / \brief Compare two snapshots for inequality.
187215 friend bool operator !=(const Snapshot& lhs, const Snapshot& rhs) {
188216 return !(lhs == rhs);
189217 }
190218
191219 private:
192220 // / \brief Compare two snapshots for equality.
193221 bool Equals (const Snapshot& other) const ;
194-
195- // / A unqiue long ID.
196- int64_t snapshot_id_;
197- // / The snapshot ID of the snapshot's parent. Omitted for any snapshot with no parent.
198- std::optional<int64_t > parent_snapshot_id_;
199- // / A monotonically increasing long that tracks the order of changes to a table.
200- int64_t sequence_number_;
201- // / A timestamp when the snapshot was created, used for garbage collection and table
202- // / inspection.
203- int64_t timestamp_ms_;
204- // / The location of a manifest list for this snapshot that tracks manifest files with
205- // / additional metadata.
206- std::string manifest_list_;
207- // / A string map that summaries the snapshot changes, including operation.
208- Summary summary_;
209- // / ID of the table's current schema when the snapshot was created.
210- std::optional<int32_t > schema_id_;
211222};
212223
213224} // namespace iceberg
0 commit comments