Skip to content

Commit 144de7b

Browse files
committed
resolve more review comments
1 parent 2c46184 commit 144de7b

File tree

4 files changed

+157
-294
lines changed

4 files changed

+157
-294
lines changed

src/iceberg/snapshot.cc

Lines changed: 19 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -25,98 +25,38 @@
2525

2626
namespace iceberg {
2727

28-
namespace {
29-
/// \brief Get the relative Operation name
30-
constexpr std::string_view ToString(Summary::Operation operation) {
31-
switch (operation) {
32-
case Summary::Operation::kAppend:
33-
return "append";
34-
case Summary::Operation::kOverwrite:
35-
return "overwrite";
36-
case Summary::Operation::kReplace:
37-
return "replace";
38-
case Summary::Operation::kDelete:
39-
return "delete";
40-
default:
41-
return "invalid";
28+
std::optional<std::string> Snapshot::operation() const {
29+
auto it = summary.find(std::string(SnapshotSummaryFields::kOperation));
30+
if (it != summary.end()) {
31+
return it->second;
4232
}
33+
return std::nullopt;
4334
}
44-
} // namespace
4535

46-
Summary::Summary(Operation op, std::unordered_map<std::string, std::string> props)
47-
: operation_(op), additional_properties_(std::move(props)) {}
48-
49-
Summary::Operation Summary::operation() const { return operation_; }
50-
51-
const std::unordered_map<std::string, std::string>& Summary::properties() const {
52-
return additional_properties_;
53-
}
54-
55-
std::string Summary::ToString() const {
56-
std::string repr = std::format("summary<operation: {}", iceberg::ToString(operation_));
57-
for (const auto& [key, value] : additional_properties_) {
58-
std::format_to(std::back_inserter(repr), ", {}: {}", key, value);
36+
std::optional<std::reference_wrapper<const Snapshot::manifest_list_t>>
37+
Snapshot::ManifestList() const {
38+
if (std::holds_alternative<manifest_list_t>(manifest_list)) {
39+
return std::cref(std::get<manifest_list_t>(manifest_list));
5940
}
60-
repr += ">";
61-
return repr;
62-
}
63-
64-
Snapshot::Snapshot(int64_t snapshot_id, std::optional<int64_t> parent_snapshot_id,
65-
int64_t sequence_number, int64_t timestamp_ms,
66-
std::string manifest_list, Summary summary,
67-
std::optional<int64_t> schema_id)
68-
: snapshot_id_(snapshot_id),
69-
parent_snapshot_id_(parent_snapshot_id),
70-
sequence_number_(sequence_number),
71-
timestamp_ms_(timestamp_ms),
72-
manifest_list_(std::move(manifest_list)),
73-
summary_(std::move(summary)),
74-
schema_id_(schema_id) {}
75-
76-
int64_t Snapshot::snapshot_id() const { return snapshot_id_; }
77-
78-
std::optional<int64_t> Snapshot::parent_snapshot_id() const {
79-
return parent_snapshot_id_;
41+
return std::nullopt;
8042
}
8143

82-
int64_t Snapshot::sequence_number() const { return sequence_number_; }
83-
84-
int64_t Snapshot::timestamp_ms() const { return timestamp_ms_; }
85-
86-
const std::string& Snapshot::manifest_list() const { return manifest_list_; }
87-
88-
const Summary& Snapshot::summary() const { return summary_; }
89-
90-
std::optional<int32_t> Snapshot::schema_id() const { return schema_id_; }
91-
92-
std::string Snapshot::ToString() const {
93-
std::string repr;
94-
std::format_to(std::back_inserter(repr), "snapshot<\n id: {}\n", snapshot_id_);
95-
if (parent_snapshot_id_.has_value()) {
96-
std::format_to(std::back_inserter(repr), " parent_id: {}\n",
97-
parent_snapshot_id_.value());
98-
}
99-
std::format_to(std::back_inserter(repr), " sequence_number: {}\n", sequence_number_);
100-
std::format_to(std::back_inserter(repr), " timestamp_ms: {}\n", timestamp_ms_);
101-
std::format_to(std::back_inserter(repr), " manifest_list: {}\n", manifest_list_);
102-
std::format_to(std::back_inserter(repr), " summary: {}\n", summary_);
103-
104-
if (schema_id_.has_value()) {
105-
std::format_to(std::back_inserter(repr), " schema_id: {}\n", schema_id_.value());
44+
std::optional<std::reference_wrapper<const Snapshot::manifests_t>> Snapshot::Manifests()
45+
const {
46+
if (std::holds_alternative<manifests_t>(manifest_list)) {
47+
return std::cref(std::get<manifests_t>(manifest_list));
10648
}
107-
108-
repr += ">";
109-
return repr;
49+
return std::nullopt;
11050
}
11151

11252
bool Snapshot::Equals(const Snapshot& other) const {
11353
if (this == &other) {
11454
return true;
11555
}
116-
return snapshot_id_ == other.snapshot_id_ &&
117-
parent_snapshot_id_ == other.parent_snapshot_id_ &&
118-
sequence_number_ == other.sequence_number_ &&
119-
timestamp_ms_ == other.timestamp_ms_ && schema_id_ == other.schema_id_;
56+
return snapshot_id == other.snapshot_id &&
57+
parent_snapshot_id == other.parent_snapshot_id &&
58+
sequence_number == other.sequence_number && timestamp_ms == other.timestamp_ms &&
59+
schema_id == other.schema_id;
12060
}
12161

12262
} // namespace iceberg

src/iceberg/snapshot.h

Lines changed: 89 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,44 @@
2222
#include <optional>
2323
#include <string>
2424
#include <unordered_map>
25+
#include <variant>
26+
#include <vector>
2527

2628
#include "iceberg/iceberg_export.h"
2729
#include "iceberg/util/formattable.h"
2830

2931
namespace iceberg {
3032

33+
/// \brief The type of snapshot reference
34+
enum class SnapshotRefType {
35+
/// Branches are mutable named references that can be updated by committing a new
36+
/// snapshot as the branch’s referenced snapshot using the Commit Conflict Resolution
37+
/// and Retry procedures.
38+
kBranch,
39+
/// Tags are labels for individual snapshots
40+
kTag,
41+
};
42+
43+
/// \brief A reference to a snapshot, either a branch or a tag.
44+
struct ICEBERG_EXPORT SnapshotRef {
45+
/// A reference's snapshot ID. The tagged snapshot or latest snapshot of a branch.
46+
int64_t snapshot_id;
47+
/// Type of the reference, tag or branch
48+
SnapshotRefType type;
49+
/// For branch type only, a positive number for the minimum number of snapshots to keep
50+
/// in a branch while expiring snapshots. Defaults to table property
51+
/// history.expire.min-snapshots-to-keep.
52+
std::optional<int32_t> min_snapshots_to_keep;
53+
/// For branch type only, a positive number for the max age of snapshots to keep when
54+
/// expiring, including the latest snapshot. Defaults to table property
55+
/// history.expire.max-snapshot-age-ms.
56+
std::optional<int64_t> max_snapshot_age_ms;
57+
/// For snapshot references except the main branch, a positive number for the max age of
58+
/// the snapshot reference to keep while expiring snapshots. Defaults to table property
59+
/// history.expire.max-ref-age-ms. The main branch never expires.
60+
std::optional<int64_t> max_ref_age_ms;
61+
};
62+
3163
/// \brief Optional Snapshot Summary Fields
3264
struct SnapshotSummaryFields {
3365
/// \brief The operation field key
@@ -109,40 +141,23 @@ struct SnapshotSummaryFields {
109141
constexpr static std::string_view kEngineVersion = "engine-version";
110142
};
111143

112-
/// \brief Summarises the changes in the snapshot.
113-
class ICEBERG_EXPORT Summary : public iceberg::util::Formattable {
114-
public:
115-
/// \brief The operation field is used by some operations, like snapshot expiration, to
116-
/// skip processing certain snapshots.
117-
enum class Operation {
118-
/// Only data files were added and no files were removed.
119-
kAppend,
120-
/// Data and delete files were added and removed without changing table data; i.e.
121-
/// compaction, change the data file format, or relocating data files.
122-
kReplace,
123-
/// Data and delete files were added and removed in a logical overwrite operation.
124-
kOverwrite,
125-
/// Data files were removed and their contents logically deleted and/or delete files
126-
/// were added to delete rows.
127-
kDelete,
128-
};
129-
Summary() = default;
130-
/// \brief Construct a summary with the given operation and properties.
131-
Summary(Operation op, std::unordered_map<std::string, std::string> props);
132-
133-
/// \brief Get the operation type of the snapshot.
134-
Operation operation() const;
135-
136-
/// \brief Get the additional properties of the snapshot.
137-
const std::unordered_map<std::string, std::string>& properties() const;
138-
139-
std::string ToString() const override;
140-
141-
private:
142-
/// The type of operation in the snapshot
143-
Operation operation_{Operation::kAppend};
144-
/// Other summary data.
145-
std::unordered_map<std::string, std::string> additional_properties_;
144+
/// \brief Data operation that produce snapshots.
145+
///
146+
/// A snapshot can return the operation that created the snapshot to help other components
147+
/// ignore snapshots that are not needed for some tasks. For example, snapshot expiration
148+
/// does not need to clean up deleted files for appends, which have no deleted files.
149+
struct ICEBERG_EXPORT DataOperation {
150+
/// \brief Only data files were added and no files were removed.
151+
static constexpr std::string_view kAppend = "append";
152+
/// \brief Data and delete files were added and removed without changing table data;
153+
/// i.e. compaction, change the data file format, or relocating data files.
154+
static constexpr std::string_view kReplace = "replace";
155+
/// \brief Data and delete files were added and removed in a logical overwrite
156+
/// operation.
157+
static constexpr std::string_view kOverwrite = "overwrite";
158+
/// \brief Data files were removed and their contents logically deleted and/or delete
159+
/// files were added to delete rows.
160+
static constexpr std::string_view kDelete = "delete";
146161
};
147162

148163
/// \brief A snapshot of the data in a table at a point in time.
@@ -151,63 +166,59 @@ class ICEBERG_EXPORT Summary : public iceberg::util::Formattable {
151166
/// the union of all the data files in those manifests.
152167
///
153168
/// Snapshots are created by table operations.
154-
class ICEBERG_EXPORT Snapshot : public iceberg::util::Formattable {
155-
public:
156-
Snapshot(int64_t snapshot_id, std::optional<int64_t> parent_snapshot_id,
157-
int64_t sequence_number, int64_t timestamp_ms, std::string manifest_list,
158-
Summary summary, std::optional<int64_t> schema_id);
159-
160-
/// \brief Get the id of the snapshot.
161-
int64_t snapshot_id() const;
162-
163-
/// \brief Get parent snapshot id.
164-
std::optional<int64_t> parent_snapshot_id() const;
169+
struct ICEBERG_EXPORT Snapshot {
170+
using manifest_list_t = std::string;
171+
using manifests_t = std::vector<std::string>;
165172

166-
/// \brief Get the sequence number of the snapshot.
167-
int64_t sequence_number() const;
168-
169-
/// \brief Get the timestamp of the snapshot.
170-
int64_t timestamp_ms() const;
171-
172-
/// \brief Get the manifest list of the snapshot.
173-
const std::string& manifest_list() const;
174-
175-
/// \brief Get the summary of the snapshot.
176-
const Summary& summary() const;
177-
178-
/// \brief Get the schema ID of the snapshot.
179-
std::optional<int32_t> schema_id() const;
180-
181-
std::string ToString() const override;
173+
/// A unqiue long ID.
174+
int64_t snapshot_id;
175+
/// The snapshot ID of the snapshot's parent. Omitted for any snapshot with no parent.
176+
std::optional<int64_t> parent_snapshot_id;
177+
/// A monotonically increasing long that tracks the order of changes to a table.
178+
int64_t sequence_number;
179+
/// A timestamp when the snapshot was created, used for garbage collection and table
180+
/// inspection.
181+
int64_t timestamp_ms;
182+
/// The location of a manifest list for this snapshot that tracks manifest files with
183+
/// additional metadata.
184+
std::variant<manifest_list_t, manifests_t> manifest_list;
185+
/// A string map that summaries the snapshot changes, including operation.
186+
std::unordered_map<std::string, std::string> summary;
187+
/// ID of the table's current schema when the snapshot was created.
188+
std::optional<int32_t> schema_id;
189+
190+
/// \brief Return the name of the DataOperations data operation that produced this
191+
/// snapshot.
192+
///
193+
/// \return the operation that produced this snapshot, or nullopt if the operation is
194+
/// unknown.
195+
std::optional<std::string> operation() const;
196+
197+
/// \brief Get the manifest list for this snapshot.
198+
///
199+
/// \return the manifest list for this snapshot, or nullopt if the snapshot has no
200+
/// manifest list.
201+
std::optional<std::reference_wrapper<const manifest_list_t>> ManifestList() const;
202+
203+
/// \brief Get the manifests for this snapshot.
204+
///
205+
/// \return the manifests for this snapshot, or nullopt if the snapshot has no
206+
/// manifests.
207+
std::optional<std::reference_wrapper<const manifests_t>> Manifests() const;
182208

209+
/// \brief Compare two snapshots for equality.
183210
friend bool operator==(const Snapshot& lhs, const Snapshot& rhs) {
184211
return lhs.Equals(rhs);
185212
}
186213

214+
/// \brief Compare two snapshots for inequality.
187215
friend bool operator!=(const Snapshot& lhs, const Snapshot& rhs) {
188216
return !(lhs == rhs);
189217
}
190218

191219
private:
192220
/// \brief Compare two snapshots for equality.
193221
bool Equals(const Snapshot& other) const;
194-
195-
/// A unqiue long ID.
196-
int64_t snapshot_id_;
197-
/// The snapshot ID of the snapshot's parent. Omitted for any snapshot with no parent.
198-
std::optional<int64_t> parent_snapshot_id_;
199-
/// A monotonically increasing long that tracks the order of changes to a table.
200-
int64_t sequence_number_;
201-
/// A timestamp when the snapshot was created, used for garbage collection and table
202-
/// inspection.
203-
int64_t timestamp_ms_;
204-
/// The location of a manifest list for this snapshot that tracks manifest files with
205-
/// additional metadata.
206-
std::string manifest_list_;
207-
/// A string map that summaries the snapshot changes, including operation.
208-
Summary summary_;
209-
/// ID of the table's current schema when the snapshot was created.
210-
std::optional<int32_t> schema_id_;
211222
};
212223

213224
} // namespace iceberg

src/iceberg/snapshot_ref.h

Lines changed: 0 additions & 59 deletions
This file was deleted.

0 commit comments

Comments
 (0)