Skip to content

Commit b50ea70

Browse files
committed
changes to the review comments
1 parent ef95c69 commit b50ea70

File tree

4 files changed

+171
-187
lines changed

4 files changed

+171
-187
lines changed

src/iceberg/snapshot.cc

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,23 @@
1919

2020
#include "iceberg/snapshot.h"
2121

22+
#include <format>
23+
2224
#include "iceberg/util/formatter.h"
2325

2426
namespace iceberg {
2527

2628
namespace {
2729
/// \brief Get the relative Operation name
28-
constexpr std::string_view ToString(Operation operation) {
30+
constexpr std::string_view ToString(Summary::Operation operation) {
2931
switch (operation) {
30-
case Operation::kAppend:
32+
case Summary::Operation::kAppend:
3133
return "append";
32-
case Operation::kOverwrite:
34+
case Summary::Operation::kOverwrite:
3335
return "overwrite";
34-
case Operation::kReplace:
36+
case Summary::Operation::kReplace:
3537
return "replace";
36-
case Operation::kDelete:
38+
case Summary::Operation::kDelete:
3739
return "delete";
3840
default:
3941
return "invalid";
@@ -44,30 +46,24 @@ constexpr std::string_view ToString(Operation operation) {
4446
Summary::Summary(Operation op, std::unordered_map<std::string, std::string> props)
4547
: operation_(op), additional_properties_(std::move(props)) {}
4648

47-
Operation Summary::operation() const { return operation_; }
49+
Summary::Operation Summary::operation() const { return operation_; }
4850

4951
const std::unordered_map<std::string, std::string>& Summary::properties() const {
5052
return additional_properties_;
5153
}
5254

5355
std::string Summary::ToString() const {
54-
std::string repr =
55-
"summary: { operation: " + std::string(iceberg::ToString(operation_));
56+
std::string repr = std::format("summary<operation: {}", iceberg::ToString(operation_));
5657
for (const auto& [key, value] : additional_properties_) {
57-
repr += ", " + key + ": " + value;
58+
std::format_to(std::back_inserter(repr), ", {}: {}", key, value);
5859
}
59-
repr += "}";
60+
repr += ">";
6061
return repr;
6162
}
6263

63-
bool Summary::Equals(const Summary& other) const {
64-
return operation_ == other.operation_ &&
65-
additional_properties_ == other.additional_properties_;
66-
}
67-
6864
Snapshot::Snapshot(int64_t snapshot_id, std::optional<int64_t> parent_snapshot_id,
6965
int64_t sequence_number, int64_t timestamp_ms,
70-
std::string manifest_list, std::shared_ptr<Summary> summary,
66+
std::string manifest_list, Summary summary,
7167
std::optional<int64_t> schema_id)
7268
: snapshot_id_(snapshot_id),
7369
parent_snapshot_id_(parent_snapshot_id),
@@ -89,35 +85,38 @@ int64_t Snapshot::timestamp_ms() const { return timestamp_ms_; }
8985

9086
const std::string& Snapshot::manifest_list() const { return manifest_list_; }
9187

92-
const std::shared_ptr<Summary>& Snapshot::summary() const { return summary_; }
88+
const Summary& Snapshot::summary() const { return summary_; }
9389

9490
std::optional<int32_t> Snapshot::schema_id() const { return schema_id_; }
9591

9692
std::string Snapshot::ToString() const {
97-
std::string repr = "snapshot: { id: " + std::to_string(snapshot_id_);
93+
std::string repr;
94+
std::format_to(std::back_inserter(repr), "snapshot<\n id: {}\n", snapshot_id_);
9895
if (parent_snapshot_id_.has_value()) {
99-
repr += ", parent_id: " + std::to_string(parent_snapshot_id_.value());
96+
std::format_to(std::back_inserter(repr), " parent_id: {}\n",
97+
parent_snapshot_id_.value());
10098
}
101-
repr += ", sequence_number: " + std::to_string(sequence_number_);
102-
repr += ", timestamp_ms: " + std::to_string(timestamp_ms_);
103-
repr += ", manifest_list: " + manifest_list_;
104-
repr += ", summary: " + summary_->ToString();
99+
std::format_to(std::back_inserter(repr), " sequence_number: {}\n", sequence_number_);
100+
std::format_to(std::back_inserter(repr), " timestamp_ms: {}\n", timestamp_ms_);
101+
std::format_to(std::back_inserter(repr), " manifest_list: {}\n", manifest_list_);
102+
std::format_to(std::back_inserter(repr), " summary: {}\n", summary_);
105103

106104
if (schema_id_.has_value()) {
107-
repr += ", schema_id: " + std::to_string(schema_id_.value());
105+
std::format_to(std::back_inserter(repr), " schema_id: {}\n", schema_id_.value());
108106
}
109107

110-
repr += " }";
111-
108+
repr += ">";
112109
return repr;
113110
}
114111

115112
bool Snapshot::Equals(const Snapshot& other) const {
113+
if (this == &other) {
114+
return true;
115+
}
116116
return snapshot_id_ == other.snapshot_id_ &&
117117
parent_snapshot_id_ == other.parent_snapshot_id_ &&
118118
sequence_number_ == other.sequence_number_ &&
119-
timestamp_ms_ == other.timestamp_ms_ && manifest_list_ == other.manifest_list_ &&
120-
*summary_ == *other.summary_ && schema_id_ == other.schema_id_;
119+
timestamp_ms_ == other.timestamp_ms_ && schema_id_ == other.schema_id_;
121120
}
122121

123122
} // namespace iceberg

src/iceberg/snapshot.h

Lines changed: 105 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -28,121 +28,117 @@
2828

2929
namespace iceberg {
3030

31-
/// Optional Snapshot Summary Fields
32-
/// Metrics
33-
/// See https://iceberg.apache.org/spec/#metrics
34-
35-
/// \brief Number of data files added in the snapshot
36-
constexpr std::string_view kAddedDataFilesKey = "added-data-files";
37-
/// \brief Number of data files deleted in the snapshot
38-
constexpr std::string_view kDeletedDataFilesKey = "deleted-data-files";
39-
/// \brief Total number of live data files in the snapshot
40-
constexpr std::string_view kTotalDataFilesKey = "total-data-files";
41-
/// \brief Number of positional/equality delete files and deletion vectors added in the
42-
/// snapshot
43-
constexpr std::string_view kAddedDeleteFilesKey = "added-delete-files";
44-
/// \brief Number of equality delete files added in the snapshot
45-
constexpr std::string_view kAddedEqDeleteFilesKey = "added-equality-delete-files";
46-
/// \brief Number of equality delete files removed in the snapshot
47-
constexpr std::string_view kRemovedEqDeleteFilesKey = "removed-equality-delete-files";
48-
/// \brief Number of position delete files added in the snapshot
49-
constexpr std::string_view kAddedPosDeleteFilesKey = "added-position-delete-files";
50-
/// \brief Number of position delete files removed in the snapshot
51-
constexpr std::string_view kRemovedPosDeleteFilesKey = "removed-position-delete-files";
52-
/// \brief Number of deletion vectors added in the snapshot
53-
constexpr std::string_view kAddedDVSKey = "added-dvs";
54-
/// \brief Number of deletion vectors removed in the snapshot
55-
constexpr std::string_view kRemovedDVSKey = "removed-dvs";
56-
/// \brief Number of positional/equality delete files and deletion vectors removed in the
57-
/// snapshot
58-
constexpr std::string_view kRemovedDeleteFilesKey = "removed-delete-files";
59-
/// \brief Total number of live positional/equality delete files and deletion vectors in
60-
/// the snapshot
61-
constexpr std::string_view kTotalDeleteFilesKey = "total-delete-files";
62-
/// \brief Number of records added in the snapshot
63-
constexpr std::string_view kAddedRecordsKey = "added-records";
64-
/// \brief Number of records deleted in the snapshot
65-
constexpr std::string_view kDeletedRecordsKey = "deleted-records";
66-
/// \brief Total number of records in the snapshot
67-
constexpr std::string_view kTotalRecordsKey = "total-records";
68-
/// \brief The size of files added in the snapshot
69-
constexpr std::string_view kAddedFileSizeKey = "added-files-size";
70-
/// \brief The size of files removed in the snapshot
71-
constexpr std::string_view kRemovedFileSizeKey = "removed-files-size";
72-
/// \brief Total size of live files in the snapshot
73-
constexpr std::string_view kTotalFileSizeKey = "total-files-size";
74-
/// \brief Number of position delete records added in the snapshot
75-
constexpr std::string_view kAddedPosDeletesKey = "added-position-deletes";
76-
/// \brief Number of position delete records removed in the snapshot
77-
constexpr std::string_view kRemovedPosDeletesKey = "removed-position-deletes";
78-
/// \brief Total number of position delete records in the snapshot
79-
constexpr std::string_view kTotalPosDeletesKey = "total-position-deletes";
80-
/// \brief Number of equality delete records added in the snapshot
81-
constexpr std::string_view kAddedEqDeletesKey = "added-equality-deletes";
82-
/// \brief Number of equality delete records removed in the snapshot
83-
constexpr std::string_view kRemovedEqDeletesKey = "removed-equality-deletes";
84-
/// \brief Total number of equality delete records in the snapshot
85-
constexpr std::string_view kTotalEqDeletesKey = "total-equality-deletes";
86-
/// \brief Number of duplicate files deleted (duplicates are files recorded more than once
87-
/// in the manifest)
88-
constexpr std::string_view kDeletedDuplicatedFilesKey = "deleted-duplicate-files";
89-
/// \brief Number of partitions with files added or removed in the snapshot
90-
constexpr std::string_view kChangedPartitionCountProp = "changed-partition-count";
91-
92-
/// Other Fields
93-
/// See https://iceberg.apache.org/spec/#other-fields
94-
95-
/// \brief The Write-Audit-Publish id of a staged snapshot
96-
constexpr std::string_view kWAPIDKey = "wap.id";
97-
/// \brief The Write-Audit-Publish id of a snapshot already been published
98-
constexpr std::string_view kPublishedWAPIDKey = "published-wap-id";
99-
/// \brief The original id of a cherry-picked snapshot
100-
constexpr std::string_view kSourceSnapshotIDKey = "source-snapshot-id";
101-
/// \brief Name of the engine that created the snapshot
102-
constexpr std::string_view kEngineNameKey = "engine-name";
103-
/// \brief Version of the engine that created the snapshot
104-
constexpr std::string_view kEngineVersionKey = "engine-version";
105-
106-
/// \brief The operation field is used by some operations, like snapshot expiration, to
107-
/// skip processing certain snapshots.
108-
enum class Operation {
109-
/// Only data files were added and no files were removed.
110-
kAppend,
111-
/// Data and delete files were added and removed without changing table data; i.e.
112-
/// compaction, change the data file format, or relocating data files.
113-
kReplace,
114-
/// Data and delete files were added and removed in a logical overwrite operation.
115-
kOverwrite,
116-
/// Data files were removed and their contents logically deleted and/or delete files
117-
/// were added to delete rows.
118-
kDelete,
31+
/// \brief Optional Snapshot Summary Fields
32+
struct SnapshotSummaryFields {
33+
/// \brief The operation field key
34+
constexpr static std::string_view kOperation = "operation";
35+
36+
/// Metrics, see https://iceberg.apache.org/spec/#metrics
37+
38+
/// \brief Number of data files added in the snapshot
39+
constexpr static std::string_view kAddedDataFiles = "added-data-files";
40+
/// \brief Number of data files deleted in the snapshot
41+
constexpr static std::string_view kDeletedDataFiles = "deleted-data-files";
42+
/// \brief Total number of live data files in the snapshot
43+
constexpr static std::string_view kTotalDataFiles = "total-data-files";
44+
/// \brief Number of positional/equality delete files and deletion vectors added in the
45+
/// snapshot
46+
constexpr static std::string_view kAddedDeleteFiles = "added-delete-files";
47+
/// \brief Number of equality delete files added in the snapshot
48+
constexpr static std::string_view kAddedEqDeleteFiles = "added-equality-delete-files";
49+
/// \brief Number of equality delete files removed in the snapshot
50+
constexpr static std::string_view kRemovedEqDeleteFiles =
51+
"removed-equality-delete-files";
52+
/// \brief Number of position delete files added in the snapshot
53+
constexpr static std::string_view kAddedPosDeleteFiles = "added-position-delete-files";
54+
/// \brief Number of position delete files removed in the snapshot
55+
constexpr static std::string_view kRemovedPosDeleteFiles =
56+
"removed-position-delete-files";
57+
/// \brief Number of deletion vectors added in the snapshot
58+
constexpr static std::string_view kAddedDVS = "added-dvs";
59+
/// \brief Number of deletion vectors removed in the snapshot
60+
constexpr static std::string_view kRemovedDVS = "removed-dvs";
61+
/// \brief Number of positional/equality delete files and deletion vectors removed in
62+
/// the snapshot
63+
constexpr static std::string_view kRemovedDeleteFiles = "removed-delete-files";
64+
/// \brief Total number of live positional/equality delete files and deletion vectors in
65+
/// the snapshot
66+
constexpr static std::string_view kTotalDeleteFiles = "total-delete-files";
67+
/// \brief Number of records added in the snapshot
68+
constexpr static std::string_view kAddedRecords = "added-records";
69+
/// \brief Number of records deleted in the snapshot
70+
constexpr static std::string_view kDeletedRecords = "deleted-records";
71+
/// \brief Total number of records in the snapshot
72+
constexpr static std::string_view kTotalRecords = "total-records";
73+
/// \brief The size of files added in the snapshot
74+
constexpr static std::string_view kAddedFileSize = "added-files-size";
75+
/// \brief The size of files removed in the snapshot
76+
constexpr static std::string_view kRemovedFileSize = "removed-files-size";
77+
/// \brief Total size of live files in the snapshot
78+
constexpr static std::string_view kTotalFileSize = "total-files-size";
79+
/// \brief Number of position delete records added in the snapshot
80+
constexpr static std::string_view kAddedPosDeletes = "added-position-deletes";
81+
/// \brief Number of position delete records removed in the snapshot
82+
constexpr static std::string_view kRemovedPosDeletes = "removed-position-deletes";
83+
/// \brief Total number of position delete records in the snapshot
84+
constexpr static std::string_view kTotalPosDeletes = "total-position-deletes";
85+
/// \brief Number of equality delete records added in the snapshot
86+
constexpr static std::string_view kAddedEqDeletes = "added-equality-deletes";
87+
/// \brief Number of equality delete records removed in the snapshot
88+
constexpr static std::string_view kRemovedEqDeletes = "removed-equality-deletes";
89+
/// \brief Total number of equality delete records in the snapshot
90+
constexpr static std::string_view kTotalEqDeletes = "total-equality-deletes";
91+
/// \brief Number of duplicate files deleted (duplicates are files recorded more than
92+
/// once in the manifest)
93+
constexpr static std::string_view kDeletedDuplicatedFiles = "deleted-duplicate-files";
94+
/// \brief Number of partitions with files added or removed in the snapshot
95+
constexpr static std::string_view kChangedPartitionCountProp =
96+
"changed-partition-count";
97+
98+
/// Other Fields, see https://iceberg.apache.org/spec/#other-fields
99+
100+
/// \brief The Write-Audit-Publish id of a staged snapshot
101+
constexpr static std::string_view kWAPID = "wap.id";
102+
/// \brief The Write-Audit-Publish id of a snapshot already been published
103+
constexpr static std::string_view kPublishedWAPID = "published-wap-id";
104+
/// \brief The original id of a cherry-picked snapshot
105+
constexpr static std::string_view kSourceSnapshotID = "source-snapshot-id";
106+
/// \brief Name of the engine that created the snapshot
107+
constexpr static std::string_view kEngineName = "engine-name";
108+
/// \brief Version of the engine that created the snapshot
109+
constexpr static std::string_view kEngineVersion = "engine-version";
119110
};
120111

121112
/// \brief Summarises the changes in the snapshot.
122113
class ICEBERG_EXPORT Summary : public iceberg::util::Formattable {
123114
public:
115+
/// \brief The operation field is used by some operations, like snapshot expiration, to
116+
/// skip processing certain snapshots.
117+
enum class Operation {
118+
/// Only data files were added and no files were removed.
119+
kAppend,
120+
/// Data and delete files were added and removed without changing table data; i.e.
121+
/// compaction, change the data file format, or relocating data files.
122+
kReplace,
123+
/// Data and delete files were added and removed in a logical overwrite operation.
124+
kOverwrite,
125+
/// Data files were removed and their contents logically deleted and/or delete files
126+
/// were added to delete rows.
127+
kDelete,
128+
};
124129
Summary() = default;
125130
/// \brief Construct a summary with the given operation and properties.
126131
Summary(Operation op, std::unordered_map<std::string, std::string> props);
127132

128133
/// \brief Get the operation type of the snapshot.
129-
[[nodiscard]] Operation operation() const;
134+
Operation operation() const;
130135

131136
/// \brief Get the additional properties of the snapshot.
132-
[[nodiscard]] const std::unordered_map<std::string, std::string>& properties() const;
137+
const std::unordered_map<std::string, std::string>& properties() const;
133138

134139
std::string ToString() const override;
135140

136-
friend bool operator==(const Summary& lhs, const Summary& rhs) {
137-
return lhs.Equals(rhs);
138-
}
139-
140-
friend bool operator!=(const Summary& lhs, const Summary& rhs) { return !(lhs == rhs); }
141-
142141
private:
143-
/// \brief Compare two Summaries for equality.
144-
[[nodiscard]] bool Equals(const Summary& other) const;
145-
146142
/// The type of operation in the snapshot
147143
Operation operation_{Operation::kAppend};
148144
/// Other summary data.
@@ -159,28 +155,28 @@ class ICEBERG_EXPORT Snapshot : public iceberg::util::Formattable {
159155
public:
160156
Snapshot(int64_t snapshot_id, std::optional<int64_t> parent_snapshot_id,
161157
int64_t sequence_number, int64_t timestamp_ms, std::string manifest_list,
162-
std::shared_ptr<Summary> summary, std::optional<int64_t> schema_id);
158+
Summary summary, std::optional<int64_t> schema_id);
163159

164160
/// \brief Get the id of the snapshot.
165-
[[nodiscard]] int64_t snapshot_id() const;
161+
int64_t snapshot_id() const;
166162

167163
/// \brief Get parent snapshot id.
168-
[[nodiscard]] std::optional<int64_t> parent_snapshot_id() const;
164+
std::optional<int64_t> parent_snapshot_id() const;
169165

170166
/// \brief Get the sequence number of the snapshot.
171-
[[nodiscard]] int64_t sequence_number() const;
167+
int64_t sequence_number() const;
172168

173169
/// \brief Get the timestamp of the snapshot.
174-
[[nodiscard]] int64_t timestamp_ms() const;
170+
int64_t timestamp_ms() const;
175171

176172
/// \brief Get the manifest list of the snapshot.
177-
[[nodiscard]] const std::string& manifest_list() const;
173+
const std::string& manifest_list() const;
178174

179175
/// \brief Get the summary of the snapshot.
180-
[[nodiscard]] const std::shared_ptr<Summary>& summary() const;
176+
const Summary& summary() const;
181177

182178
/// \brief Get the schema ID of the snapshot.
183-
[[nodiscard]] std::optional<int32_t> schema_id() const;
179+
std::optional<int32_t> schema_id() const;
184180

185181
std::string ToString() const override;
186182

@@ -194,7 +190,7 @@ class ICEBERG_EXPORT Snapshot : public iceberg::util::Formattable {
194190

195191
private:
196192
/// \brief Compare two snapshots for equality.
197-
[[nodiscard]] bool Equals(const Snapshot& other) const;
193+
bool Equals(const Snapshot& other) const;
198194

199195
/// A unqiue long ID.
200196
int64_t snapshot_id_;
@@ -209,7 +205,7 @@ class ICEBERG_EXPORT Snapshot : public iceberg::util::Formattable {
209205
/// additional metadata.
210206
std::string manifest_list_;
211207
/// A string map that summaries the snapshot changes, including operation.
212-
std::shared_ptr<Summary> summary_;
208+
Summary summary_;
213209
/// ID of the table's current schema when the snapshot was created.
214210
std::optional<int32_t> schema_id_;
215211
};

0 commit comments

Comments
 (0)