Skip to content

Commit a9c69fb

Browse files
committed
resolve some review comments
1 parent 144de7b commit a9c69fb

File tree

3 files changed

+133
-82
lines changed

3 files changed

+133
-82
lines changed

src/iceberg/snapshot.cc

Lines changed: 65 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,34 +19,82 @@
1919

2020
#include "iceberg/snapshot.h"
2121

22-
#include <format>
22+
namespace iceberg {
2323

24-
#include "iceberg/util/formatter.h"
24+
const std::string SnapshotSummaryFields::kOperation = "operation";
25+
const std::string SnapshotSummaryFields::kAddedDataFiles = "added-data-files";
26+
const std::string SnapshotSummaryFields::kDeletedDataFiles = "deleted-data-files";
27+
const std::string SnapshotSummaryFields::kTotalDataFiles = "total-data-files";
28+
const std::string SnapshotSummaryFields::kAddedDeleteFiles = "added-delete-files";
29+
const std::string SnapshotSummaryFields::kAddedEqDeleteFiles =
30+
"added-equality-delete-files";
31+
const std::string SnapshotSummaryFields::kRemovedEqDeleteFiles =
32+
"removed-equality-delete-files";
33+
const std::string SnapshotSummaryFields::kAddedPosDeleteFiles =
34+
"added-position-delete-files";
35+
const std::string SnapshotSummaryFields::kRemovedPosDeleteFiles =
36+
"removed-position-delete-files";
37+
const std::string SnapshotSummaryFields::kAddedDVS = "added-dvs";
38+
const std::string SnapshotSummaryFields::kRemovedDVS = "removed-dvs";
39+
const std::string SnapshotSummaryFields::kRemovedDeleteFiles = "removed-delete-files";
40+
const std::string SnapshotSummaryFields::kTotalDeleteFiles = "total-delete-files";
41+
const std::string SnapshotSummaryFields::kAddedRecords = "added-records";
42+
const std::string SnapshotSummaryFields::kDeletedRecords = "deleted-records";
43+
const std::string SnapshotSummaryFields::kTotalRecords = "total-records";
44+
const std::string SnapshotSummaryFields::kAddedFileSize = "added-files-size";
45+
const std::string SnapshotSummaryFields::kRemovedFileSize = "removed-files-size";
46+
const std::string SnapshotSummaryFields::kTotalFileSize = "total-files-size";
47+
const std::string SnapshotSummaryFields::kAddedPosDeletes = "added-position-deletes";
48+
const std::string SnapshotSummaryFields::kRemovedPosDeletes = "removed-position-deletes";
49+
const std::string SnapshotSummaryFields::kTotalPosDeletes = "total-position-deletes";
50+
const std::string SnapshotSummaryFields::kAddedEqDeletes = "added-equality-deletes";
51+
const std::string SnapshotSummaryFields::kRemovedEqDeletes = "removed-equality-deletes";
52+
const std::string SnapshotSummaryFields::kTotalEqDeletes = "total-equality-deletes";
53+
const std::string SnapshotSummaryFields::kDeletedDuplicatedFiles =
54+
"deleted-duplicate-files";
55+
const std::string SnapshotSummaryFields::kChangedPartitionCountProp =
56+
"changed-partition-count";
2557

26-
namespace iceberg {
58+
const std::string SnapshotSummaryFields::kWAPID = "wap.id";
59+
const std::string SnapshotSummaryFields::kPublishedWAPID = "published-wap-id";
60+
const std::string SnapshotSummaryFields::kSourceSnapshotID = "source-snapshot-id";
61+
const std::string SnapshotSummaryFields::kEngineName = "engine-name";
62+
const std::string SnapshotSummaryFields::kEngineVersion = "engine-version";
2763

28-
std::optional<std::string> Snapshot::operation() const {
29-
auto it = summary.find(std::string(SnapshotSummaryFields::kOperation));
64+
std::optional<std::string_view> Snapshot::operation() const {
65+
auto it = summary.find(SnapshotSummaryFields::kOperation);
3066
if (it != summary.end()) {
3167
return it->second;
3268
}
3369
return std::nullopt;
3470
}
3571

36-
std::optional<std::reference_wrapper<const Snapshot::manifest_list_t>>
37-
Snapshot::ManifestList() const {
38-
if (std::holds_alternative<manifest_list_t>(manifest_list)) {
39-
return std::cref(std::get<manifest_list_t>(manifest_list));
40-
}
41-
return std::nullopt;
72+
std::optional<std::reference_wrapper<const ManifestList>> Snapshot::ManifestList() const {
73+
return std::visit(
74+
[&](const auto& manifest_list)
75+
-> std::optional<std::reference_wrapper<const struct ManifestList>> {
76+
using T = std::decay_t<decltype(manifest_list)>;
77+
if constexpr (std::is_same_v<T, struct ManifestList>) {
78+
return std::cref(manifest_list);
79+
} else {
80+
return std::nullopt;
81+
}
82+
},
83+
manifest_list);
4284
}
4385

44-
std::optional<std::reference_wrapper<const Snapshot::manifests_t>> Snapshot::Manifests()
45-
const {
46-
if (std::holds_alternative<manifests_t>(manifest_list)) {
47-
return std::cref(std::get<manifests_t>(manifest_list));
48-
}
49-
return std::nullopt;
86+
std::optional<std::reference_wrapper<const Manifests>> Snapshot::Manifests() const {
87+
return std::visit(
88+
[&](const auto& manifest_list)
89+
-> std::optional<std::reference_wrapper<const struct Manifests>> {
90+
using T = std::decay_t<decltype(manifest_list)>;
91+
if constexpr (std::is_same_v<T, struct Manifests>) {
92+
return std::cref(manifest_list);
93+
} else {
94+
return std::nullopt;
95+
}
96+
},
97+
manifest_list);
5098
}
5199

52100
bool Snapshot::Equals(const Snapshot& other) const {

src/iceberg/snapshot.h

Lines changed: 53 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@
2121

2222
#include <optional>
2323
#include <string>
24+
#include <string_view>
2425
#include <unordered_map>
2526
#include <variant>
2627
#include <vector>
2728

2829
#include "iceberg/iceberg_export.h"
29-
#include "iceberg/util/formattable.h"
3030

3131
namespace iceberg {
3232

@@ -63,82 +63,79 @@ struct ICEBERG_EXPORT SnapshotRef {
6363
/// \brief Optional Snapshot Summary Fields
6464
struct SnapshotSummaryFields {
6565
/// \brief The operation field key
66-
constexpr static std::string_view kOperation = "operation";
66+
static const std::string kOperation;
6767

6868
/// Metrics, see https://iceberg.apache.org/spec/#metrics
6969

7070
/// \brief Number of data files added in the snapshot
71-
constexpr static std::string_view kAddedDataFiles = "added-data-files";
71+
static const std::string kAddedDataFiles;
7272
/// \brief Number of data files deleted in the snapshot
73-
constexpr static std::string_view kDeletedDataFiles = "deleted-data-files";
73+
static const std::string kDeletedDataFiles;
7474
/// \brief Total number of live data files in the snapshot
75-
constexpr static std::string_view kTotalDataFiles = "total-data-files";
75+
static const std::string kTotalDataFiles;
7676
/// \brief Number of positional/equality delete files and deletion vectors added in the
7777
/// snapshot
78-
constexpr static std::string_view kAddedDeleteFiles = "added-delete-files";
78+
static const std::string kAddedDeleteFiles;
7979
/// \brief Number of equality delete files added in the snapshot
80-
constexpr static std::string_view kAddedEqDeleteFiles = "added-equality-delete-files";
80+
static const std::string kAddedEqDeleteFiles;
8181
/// \brief Number of equality delete files removed in the snapshot
82-
constexpr static std::string_view kRemovedEqDeleteFiles =
83-
"removed-equality-delete-files";
82+
static const std::string kRemovedEqDeleteFiles;
8483
/// \brief Number of position delete files added in the snapshot
85-
constexpr static std::string_view kAddedPosDeleteFiles = "added-position-delete-files";
84+
static const std::string kAddedPosDeleteFiles;
8685
/// \brief Number of position delete files removed in the snapshot
87-
constexpr static std::string_view kRemovedPosDeleteFiles =
88-
"removed-position-delete-files";
86+
static const std::string kRemovedPosDeleteFiles;
8987
/// \brief Number of deletion vectors added in the snapshot
90-
constexpr static std::string_view kAddedDVS = "added-dvs";
88+
static const std::string kAddedDVS;
9189
/// \brief Number of deletion vectors removed in the snapshot
92-
constexpr static std::string_view kRemovedDVS = "removed-dvs";
90+
static const std::string kRemovedDVS;
9391
/// \brief Number of positional/equality delete files and deletion vectors removed in
9492
/// the snapshot
95-
constexpr static std::string_view kRemovedDeleteFiles = "removed-delete-files";
93+
static const std::string kRemovedDeleteFiles;
9694
/// \brief Total number of live positional/equality delete files and deletion vectors in
9795
/// the snapshot
98-
constexpr static std::string_view kTotalDeleteFiles = "total-delete-files";
96+
static const std::string kTotalDeleteFiles;
9997
/// \brief Number of records added in the snapshot
100-
constexpr static std::string_view kAddedRecords = "added-records";
98+
static const std::string kAddedRecords;
10199
/// \brief Number of records deleted in the snapshot
102-
constexpr static std::string_view kDeletedRecords = "deleted-records";
100+
static const std::string kDeletedRecords;
103101
/// \brief Total number of records in the snapshot
104-
constexpr static std::string_view kTotalRecords = "total-records";
102+
static const std::string kTotalRecords;
105103
/// \brief The size of files added in the snapshot
106-
constexpr static std::string_view kAddedFileSize = "added-files-size";
104+
static const std::string kAddedFileSize;
107105
/// \brief The size of files removed in the snapshot
108-
constexpr static std::string_view kRemovedFileSize = "removed-files-size";
106+
static const std::string kRemovedFileSize;
109107
/// \brief Total size of live files in the snapshot
110-
constexpr static std::string_view kTotalFileSize = "total-files-size";
108+
static const std::string kTotalFileSize;
111109
/// \brief Number of position delete records added in the snapshot
112-
constexpr static std::string_view kAddedPosDeletes = "added-position-deletes";
110+
static const std::string kAddedPosDeletes;
113111
/// \brief Number of position delete records removed in the snapshot
114-
constexpr static std::string_view kRemovedPosDeletes = "removed-position-deletes";
112+
static const std::string kRemovedPosDeletes;
115113
/// \brief Total number of position delete records in the snapshot
116-
constexpr static std::string_view kTotalPosDeletes = "total-position-deletes";
114+
static const std::string kTotalPosDeletes;
117115
/// \brief Number of equality delete records added in the snapshot
118-
constexpr static std::string_view kAddedEqDeletes = "added-equality-deletes";
116+
static const std::string kAddedEqDeletes;
119117
/// \brief Number of equality delete records removed in the snapshot
120-
constexpr static std::string_view kRemovedEqDeletes = "removed-equality-deletes";
118+
static const std::string kRemovedEqDeletes;
121119
/// \brief Total number of equality delete records in the snapshot
122-
constexpr static std::string_view kTotalEqDeletes = "total-equality-deletes";
120+
static const std::string kTotalEqDeletes;
123121
/// \brief Number of duplicate files deleted (duplicates are files recorded more than
124122
/// once in the manifest)
125-
constexpr static std::string_view kDeletedDuplicatedFiles = "deleted-duplicate-files";
123+
static const std::string kDeletedDuplicatedFiles;
126124
/// \brief Number of partitions with files added or removed in the snapshot
127-
constexpr static std::string_view kChangedPartitionCountProp =
128-
"changed-partition-count";
125+
static const std::string kChangedPartitionCountProp;
129126

130127
/// Other Fields, see https://iceberg.apache.org/spec/#other-fields
131128

132129
/// \brief The Write-Audit-Publish id of a staged snapshot
133-
constexpr static std::string_view kWAPID = "wap.id";
130+
static const std::string kWAPID;
134131
/// \brief The Write-Audit-Publish id of a snapshot already been published
135-
constexpr static std::string_view kPublishedWAPID = "published-wap-id";
132+
static const std::string kPublishedWAPID;
136133
/// \brief The original id of a cherry-picked snapshot
137-
constexpr static std::string_view kSourceSnapshotID = "source-snapshot-id";
134+
static const std::string kSourceSnapshotID;
138135
/// \brief Name of the engine that created the snapshot
139-
constexpr static std::string_view kEngineName = "engine-name";
136+
static const std::string kEngineName;
140137
/// \brief Version of the engine that created the snapshot
141-
constexpr static std::string_view kEngineVersion = "engine-version";
138+
static const std::string kEngineVersion;
142139
};
143140

144141
/// \brief Data operation that produce snapshots.
@@ -148,16 +145,27 @@ struct SnapshotSummaryFields {
148145
/// does not need to clean up deleted files for appends, which have no deleted files.
149146
struct ICEBERG_EXPORT DataOperation {
150147
/// \brief Only data files were added and no files were removed.
151-
static constexpr std::string_view kAppend = "append";
148+
static constexpr std::string kAppend = "append";
152149
/// \brief Data and delete files were added and removed without changing table data;
153150
/// i.e. compaction, change the data file format, or relocating data files.
154-
static constexpr std::string_view kReplace = "replace";
151+
static constexpr std::string kReplace = "replace";
155152
/// \brief Data and delete files were added and removed in a logical overwrite
156153
/// operation.
157-
static constexpr std::string_view kOverwrite = "overwrite";
154+
static constexpr std::string kOverwrite = "overwrite";
158155
/// \brief Data files were removed and their contents logically deleted and/or delete
159156
/// files were added to delete rows.
160-
static constexpr std::string_view kDelete = "delete";
157+
static constexpr std::string kDelete = "delete";
158+
};
159+
160+
/// \brief The location of a manifest list for this snapshot that tracks manifest files
161+
/// with additional metadata
162+
struct ICEBERG_EXPORT ManifestList {
163+
std::string manifest_list_path;
164+
};
165+
166+
/// \brief A list of manifest file locations.
167+
struct ICEBERG_EXPORT Manifests {
168+
std::vector<std::string> manifest_paths;
161169
};
162170

163171
/// \brief A snapshot of the data in a table at a point in time.
@@ -167,9 +175,6 @@ struct ICEBERG_EXPORT DataOperation {
167175
///
168176
/// Snapshots are created by table operations.
169177
struct ICEBERG_EXPORT Snapshot {
170-
using manifest_list_t = std::string;
171-
using manifests_t = std::vector<std::string>;
172-
173178
/// A unqiue long ID.
174179
int64_t snapshot_id;
175180
/// The snapshot ID of the snapshot's parent. Omitted for any snapshot with no parent.
@@ -180,8 +185,8 @@ struct ICEBERG_EXPORT Snapshot {
180185
/// inspection.
181186
int64_t timestamp_ms;
182187
/// The location of a manifest list for this snapshot that tracks manifest files with
183-
/// additional metadata.
184-
std::variant<manifest_list_t, manifests_t> manifest_list;
188+
/// additional metadata(v2) or a list of manifest file locations(v1).
189+
std::variant<std::monostate, ManifestList, Manifests> manifest_list;
185190
/// A string map that summaries the snapshot changes, including operation.
186191
std::unordered_map<std::string, std::string> summary;
187192
/// ID of the table's current schema when the snapshot was created.
@@ -192,19 +197,19 @@ struct ICEBERG_EXPORT Snapshot {
192197
///
193198
/// \return the operation that produced this snapshot, or nullopt if the operation is
194199
/// unknown.
195-
std::optional<std::string> operation() const;
200+
std::optional<std::string_view> operation() const;
196201

197202
/// \brief Get the manifest list for this snapshot.
198203
///
199204
/// \return the manifest list for this snapshot, or nullopt if the snapshot has no
200205
/// manifest list.
201-
std::optional<std::reference_wrapper<const manifest_list_t>> ManifestList() const;
206+
std::optional<std::reference_wrapper<const ManifestList>> ManifestList() const;
202207

203208
/// \brief Get the manifests for this snapshot.
204209
///
205210
/// \return the manifests for this snapshot, or nullopt if the snapshot has no
206211
/// manifests.
207-
std::optional<std::reference_wrapper<const manifests_t>> Manifests() const;
212+
std::optional<std::reference_wrapper<const Manifests>> Manifests() const;
208213

209214
/// \brief Compare two snapshots for equality.
210215
friend bool operator==(const Snapshot& lhs, const Snapshot& rhs) {

test/snapshot_test.cc

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,14 @@ class SnapshotTest : public ::testing::Test {
2727
protected:
2828
void SetUp() override {
2929
// Initialize some common test data
30-
summary1 = {{std::string(SnapshotSummaryFields::kOperation),
31-
std::string(DataOperation::kAppend)},
32-
{std::string(SnapshotSummaryFields::kAddedDataFiles), "101"}};
30+
summary1 = {{SnapshotSummaryFields::kOperation, DataOperation::kAppend},
31+
{SnapshotSummaryFields::kAddedDataFiles, "101"}};
3332

34-
summary2 = {{std::string(SnapshotSummaryFields::kOperation),
35-
std::string(DataOperation::kAppend)},
36-
{std::string(SnapshotSummaryFields::kAddedDataFiles), "101"}};
33+
summary2 = {{SnapshotSummaryFields::kOperation, DataOperation::kAppend},
34+
{SnapshotSummaryFields::kAddedDataFiles, "101"}};
3735

38-
summary3 = {{std::string(SnapshotSummaryFields::kOperation),
39-
std::string(DataOperation::kDelete)},
40-
{std::string(SnapshotSummaryFields::kDeletedDataFiles), "20"}};
36+
summary3 = {{SnapshotSummaryFields::kOperation, DataOperation::kDelete},
37+
{SnapshotSummaryFields::kDeletedDataFiles, "20"}};
4138
}
4239

4340
std::unordered_map<std::string, std::string> summary1;
@@ -51,7 +48,7 @@ TEST_F(SnapshotTest, ConstructionAndFieldAccess) {
5148
.parent_snapshot_id = 54321,
5249
.sequence_number = 1,
5350
.timestamp_ms = 1615569200000,
54-
.manifest_list = "s3://example/manifest_list.avro",
51+
.manifest_list = ManifestList("s3://example/manifest_list.avro"),
5552
.summary = summary1,
5653
.schema_id = 10};
5754

@@ -60,7 +57,8 @@ TEST_F(SnapshotTest, ConstructionAndFieldAccess) {
6057
EXPECT_EQ(*snapshot.parent_snapshot_id, 54321);
6158
EXPECT_EQ(snapshot.sequence_number, 1);
6259
EXPECT_EQ(snapshot.timestamp_ms, 1615569200000);
63-
EXPECT_EQ(snapshot.ManifestList()->get(), "s3://example/manifest_list.avro");
60+
EXPECT_EQ(snapshot.ManifestList()->get().manifest_list_path,
61+
"s3://example/manifest_list.avro");
6462
EXPECT_EQ(snapshot.operation().value(), DataOperation::kAppend);
6563
EXPECT_EQ(snapshot.summary.at(std::string(SnapshotSummaryFields::kAddedDataFiles)),
6664
"101");
@@ -72,14 +70,14 @@ TEST_F(SnapshotTest, ConstructionAndFieldAccess) {
7270

7371
TEST_F(SnapshotTest, EqualityComparison) {
7472
// Test the == and != operators
75-
Snapshot snapshot1(12345, {}, 1, 1615569200000, "s3://example/manifest_list.avro",
76-
summary1, {});
73+
Snapshot snapshot1(12345, {}, 1, 1615569200000,
74+
ManifestList("s3://example/manifest_list.avro"), summary1, {});
7775

78-
Snapshot snapshot2(12345, {}, 1, 1615569200000, "s3://example/manifest_list.avro",
79-
summary2, {});
76+
Snapshot snapshot2(12345, {}, 1, 1615569200000,
77+
ManifestList("s3://example/manifest_list.avro"), summary2, {});
8078

81-
Snapshot snapshot3(67890, {}, 1, 1615569200000, "s3://example/manifest_list.avro",
82-
summary3, {});
79+
Snapshot snapshot3(67890, {}, 1, 1615569200000,
80+
ManifestList("s3://example/manifest_list.avro"), summary3, {});
8381

8482
EXPECT_EQ(snapshot1, snapshot2);
8583
EXPECT_NE(snapshot1, snapshot3);

0 commit comments

Comments
 (0)