Skip to content

Commit 2ef55aa

Browse files
author
nullccxsy
committed
test: add & refactor manifestlistv1 test functions
1. PartitionTest: test partition 2. ComplexTypeTest: test complex type 3. PartitionComplexTypeTest: test partition with complex type
1 parent fc10fd2 commit 2ef55aa

File tree

1 file changed

+163
-204
lines changed

1 file changed

+163
-204
lines changed

test/manifest_list_reader_test.cc

Lines changed: 163 additions & 204 deletions
Original file line numberDiff line numberDiff line change
@@ -32,226 +32,185 @@
3232

3333
namespace iceberg {
3434

35-
class ManifestListReaderTest : public TempFileTestBase {
35+
class ManifestListReaderV1Test : public ::testing::Test {
3636
protected:
3737
static void SetUpTestSuite() { avro::AvroReader::Register(); }
3838

3939
void SetUp() override {
40-
TempFileTestBase::SetUp();
4140
local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>();
4241
file_io_ = std::make_shared<iceberg::arrow::ArrowFileSystemFileIO>(local_fs_);
4342
}
4443

45-
std::vector<ManifestFile> PrepareTestManifestList() {
46-
std::vector<ManifestFile> manifest_files;
47-
std::string test_dir_prefix = "/tmp/db/db/iceberg_test/metadata/";
48-
std::vector<std::string> paths = {"2bccd69e-d642-4816-bba0-261cd9bd0d93-m0.avro",
49-
"9b6ffacd-ef10-4abf-a89c-01c733696796-m0.avro",
50-
"2541e6b5-4923-4bd5-886d-72c6f7228400-m0.avro",
51-
"3118c801-d2e0-4df6-8c7a-7d4eaade32f8-m0.avro"};
52-
std::vector<int64_t> file_size = {7433, 7431, 7433, 7431};
53-
std::vector<int64_t> snapshot_id = {7412193043800610213, 5485972788975780755,
54-
1679468743751242972, 1579605567338877265};
55-
std::vector<std::vector<uint8_t>> bounds = {{'x', ';', 0x07, 0x00},
56-
{'(', 0x19, 0x07, 0x00},
57-
{0xd0, 0xd4, 0x06, 0x00},
58-
{0xb8, 0xd4, 0x06, 0x00}};
59-
for (int i = 0; i < 4; ++i) {
60-
ManifestFile manifest_file;
61-
manifest_file.manifest_path = test_dir_prefix + paths[i];
62-
manifest_file.manifest_length = file_size[i];
63-
manifest_file.partition_spec_id = 0;
64-
manifest_file.content = ManifestFile::Content::kData;
65-
manifest_file.sequence_number = 4 - i;
66-
manifest_file.min_sequence_number = 4 - i;
67-
manifest_file.added_snapshot_id = snapshot_id[i];
68-
manifest_file.added_files_count = 1;
69-
manifest_file.existing_files_count = 0;
70-
manifest_file.deleted_files_count = 0;
71-
manifest_file.added_rows_count = 1;
72-
manifest_file.existing_rows_count = 0;
73-
manifest_file.deleted_rows_count = 0;
74-
PartitionFieldSummary partition;
75-
partition.contains_null = false;
76-
partition.contains_nan = false;
77-
partition.lower_bound = bounds[i];
78-
partition.upper_bound = bounds[i];
79-
manifest_file.partitions.emplace_back(partition);
80-
manifest_files.emplace_back(manifest_file);
81-
}
82-
return manifest_files;
83-
}
84-
85-
std::vector<ManifestFile> PrepareTestManifestListPartition() {
86-
std::vector<ManifestFile> manifest_files;
87-
std::string test_dir_prefix = "iceberg-warehouse/db/v1_partition_test/metadata/";
88-
std::vector<std::string> paths = {"eafd2972-f58e-4185-9237-6378f564787e-m1.avro",
89-
"eafd2972-f58e-4185-9237-6378f564787e-m0.avro"};
90-
std::vector<int64_t> file_size = {6185, 6113};
91-
std::vector<int64_t> snapshot_id = {7532614258660258098, 7532614258660258098};
92-
93-
std::vector<std::vector<std::uint8_t>> lower_bounds = {
94-
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x30, 0x32, 0x2D, 0x32, 0x32},
95-
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}};
96-
97-
std::vector<std::vector<std::uint8_t>> upper_bounds = {
98-
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33},
99-
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}};
100-
101-
for (int i = 0; i < 2; ++i) {
102-
ManifestFile manifest_file;
103-
manifest_file.manifest_path = test_dir_prefix + paths[i];
104-
manifest_file.manifest_length = file_size[i];
105-
manifest_file.partition_spec_id = 0;
106-
manifest_file.added_snapshot_id = snapshot_id[i];
107-
manifest_file.added_files_count = 4 * (1 - i);
108-
manifest_file.existing_files_count = 0;
109-
manifest_file.deleted_files_count = 2 * i;
110-
manifest_file.added_rows_count = 6 * (1 - i);
111-
manifest_file.existing_rows_count = 0;
112-
manifest_file.deleted_rows_count = 6 * i;
113-
114-
PartitionFieldSummary partition;
115-
partition.contains_null = false;
116-
partition.contains_nan = false;
117-
partition.lower_bound = lower_bounds[i];
118-
partition.upper_bound = upper_bounds[i];
119-
manifest_file.partitions.emplace_back(partition);
120-
manifest_files.emplace_back(manifest_file);
121-
}
122-
return manifest_files;
123-
}
124-
125-
std::vector<ManifestFile> PrepareTestManifestListComplexType() {
126-
std::vector<ManifestFile> manifest_files;
127-
std::string test_dir_prefix = "iceberg-warehouse/db/v1_type_test/metadata/";
128-
std::vector<std::string> paths = {"aeffe099-3bac-4011-bc17-5875210d8dc0-m1.avro",
129-
"aeffe099-3bac-4011-bc17-5875210d8dc0-m0.avro"};
130-
std::vector<int64_t> file_size = {6498, 6513};
131-
std::vector<int64_t> snapshot_id = {4134160420377642835, 4134160420377642835};
132-
133-
for (int i = 0; i < 2; ++i) {
134-
ManifestFile manifest_file;
135-
manifest_file.manifest_path = test_dir_prefix + paths[i];
136-
manifest_file.manifest_length = file_size[i];
137-
manifest_file.partition_spec_id = 0;
138-
manifest_file.added_snapshot_id = snapshot_id[i];
139-
manifest_file.added_files_count = 1 - i;
140-
manifest_file.existing_files_count = 0;
141-
manifest_file.deleted_files_count = i;
142-
manifest_file.added_rows_count = 2 * (1 - i);
143-
manifest_file.existing_rows_count = 0;
144-
manifest_file.deleted_rows_count = 3 * i;
145-
manifest_files.emplace_back(manifest_file);
146-
}
147-
return manifest_files;
148-
}
149-
150-
std::vector<ManifestFile> PrepareTestManifestListPartitionComplex() {
151-
std::vector<ManifestFile> manifest_files;
152-
std::string test_dir_prefix =
153-
"iceberg-warehouse/db2/v1_complex_partition_test/metadata/";
154-
std::vector<std::string> paths = {"5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m0.avro",
155-
"5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m1.avro"};
156-
std::vector<int64_t> file_size = {6402, 6318};
157-
std::vector<int64_t> snapshot_id = {7522296285847100621, 7522296285847100621};
158-
159-
std::vector<std::vector<std::uint8_t>> lower_bounds = {
160-
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32},
161-
{0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
162-
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32},
163-
{0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}};
164-
165-
std::vector<std::vector<std::uint8_t>> upper_bounds = {
166-
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x34},
167-
{0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
168-
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33},
169-
{0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}};
170-
171-
for (int i = 0; i < 2; ++i) {
172-
ManifestFile manifest_file;
173-
manifest_file.manifest_path = test_dir_prefix + paths[i];
174-
manifest_file.manifest_length = file_size[i];
175-
manifest_file.partition_spec_id = 0;
176-
manifest_file.added_snapshot_id = snapshot_id[i];
177-
manifest_file.added_files_count = 0;
178-
manifest_file.existing_files_count = i == 0 ? 3 : 1;
179-
manifest_file.deleted_files_count = 1;
180-
manifest_file.added_rows_count = 0;
181-
manifest_file.existing_rows_count = i == 0 ? 4 : 1;
182-
manifest_file.deleted_rows_count = i == 0 ? 2 : 1;
183-
184-
PartitionFieldSummary partition;
185-
for (int j = 0; j < 2; ++j) {
186-
partition.contains_null = false;
187-
partition.contains_nan = false;
188-
partition.lower_bound = lower_bounds[2 * i + j];
189-
partition.upper_bound = upper_bounds[2 * i + j];
190-
manifest_file.partitions.emplace_back(partition);
191-
}
192-
manifest_files.emplace_back(manifest_file);
193-
}
194-
return manifest_files;
195-
}
196-
19744
std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_;
19845
std::shared_ptr<FileIO> file_io_;
199-
};
200-
201-
TEST_F(ManifestListReaderTest, BasicTest) {
202-
std::string path = GetResourcePath(
203-
"snap-7412193043800610213-1-2bccd69e-d642-4816-bba0-261cd9bd0d93.avro");
204-
auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_);
205-
ASSERT_EQ(manifest_reader_result.has_value(), true);
206-
auto manifest_reader = std::move(manifest_reader_result.value());
207-
auto read_result = manifest_reader->Files();
208-
ASSERT_EQ(read_result.has_value(), true);
209-
ASSERT_EQ(read_result.value().size(), 4);
21046

211-
auto expected_manifest_list = PrepareTestManifestList();
212-
ASSERT_EQ(read_result.value(), expected_manifest_list);
213-
}
214-
215-
TEST_F(ManifestListReaderTest, PartitionTest) {
216-
std::string path = GetResourcePath(
217-
"snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro");
218-
auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_);
219-
ASSERT_EQ(manifest_reader_result.has_value(), true);
220-
auto manifest_reader = std::move(manifest_reader_result.value());
221-
auto read_result = manifest_reader->Files();
222-
ASSERT_EQ(read_result.has_value(), true);
223-
ASSERT_EQ(read_result.value().size(), 2);
47+
void TestManifestListReading(const std::string& resource_name,
48+
const std::vector<ManifestFile>& expected_manifest_list) {
49+
std::string path = GetResourcePath(resource_name);
50+
auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_);
51+
ASSERT_EQ(manifest_reader_result.has_value(), true);
52+
53+
auto manifest_reader = std::move(manifest_reader_result.value());
54+
auto read_result = manifest_reader->Files();
55+
ASSERT_EQ(read_result.has_value(), true);
56+
ASSERT_EQ(read_result.value().size(), expected_manifest_list.size());
57+
ASSERT_EQ(read_result.value(), expected_manifest_list);
58+
}
59+
};
22460

225-
auto expected_manifest_list = PrepareTestManifestListPartition();
226-
ASSERT_EQ(read_result.value(), expected_manifest_list);
61+
TEST_F(ManifestListReaderV1Test, PartitionTest) {
62+
std::vector<std::string> paths = {
63+
"iceberg-warehouse/db/v1_partition_test/metadata/"
64+
"eafd2972-f58e-4185-9237-6378f564787e-m1.avro",
65+
"iceberg-warehouse/db/v1_partition_test/metadata/"
66+
"eafd2972-f58e-4185-9237-6378f564787e-m0.avro"};
67+
std::vector<int64_t> file_size = {6185, 6113};
68+
std::vector<int64_t> snapshot_id = {7532614258660258098, 7532614258660258098};
69+
70+
std::vector<std::vector<std::uint8_t>> lower_bounds = {
71+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x30, 0x32, 0x2D, 0x32, 0x32},
72+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}};
73+
74+
std::vector<std::vector<std::uint8_t>> upper_bounds = {
75+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33},
76+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}};
77+
78+
std::vector<ManifestFile> expected_manifest_list = {
79+
{.manifest_path = paths[0],
80+
.manifest_length = file_size[0],
81+
.partition_spec_id = 0,
82+
.added_snapshot_id = snapshot_id[0],
83+
.added_files_count = 4,
84+
.existing_files_count = 0,
85+
.deleted_files_count = 0,
86+
.added_rows_count = 6,
87+
.existing_rows_count = 0,
88+
.deleted_rows_count = 0,
89+
.partitions = {{.contains_null = false,
90+
.contains_nan = false,
91+
.lower_bound = lower_bounds[0],
92+
.upper_bound = upper_bounds[0]}}},
93+
94+
{.manifest_path = paths[1],
95+
.manifest_length = file_size[1],
96+
.partition_spec_id = 0,
97+
.added_snapshot_id = snapshot_id[1],
98+
.added_files_count = 0,
99+
.existing_files_count = 0,
100+
.deleted_files_count = 2,
101+
.added_rows_count = 0,
102+
.existing_rows_count = 0,
103+
.deleted_rows_count = 6,
104+
.partitions = {{.contains_null = false,
105+
.contains_nan = false,
106+
.lower_bound = lower_bounds[1],
107+
.upper_bound = upper_bounds[1]}}}};
108+
109+
TestManifestListReading(
110+
"snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro",
111+
expected_manifest_list);
227112
}
228113

229-
TEST_F(ManifestListReaderTest, ComplexTypeTest) {
230-
std::string path = GetResourcePath(
231-
"snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro");
232-
auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_);
233-
ASSERT_EQ(manifest_reader_result.has_value(), true);
234-
auto manifest_reader = std::move(manifest_reader_result.value());
235-
auto read_result = manifest_reader->Files();
236-
ASSERT_EQ(read_result.has_value(), true);
237-
ASSERT_EQ(read_result.value().size(), 2);
238-
239-
auto expected_manifest_list = PrepareTestManifestListComplexType();
240-
ASSERT_EQ(read_result.value(), expected_manifest_list);
114+
TEST_F(ManifestListReaderV1Test, ComplexTypeTest) {
115+
std::vector<std::string> paths = {
116+
"iceberg-warehouse/db/v1_type_test/metadata/"
117+
"aeffe099-3bac-4011-bc17-5875210d8dc0-m1.avro",
118+
"iceberg-warehouse/db/v1_type_test/metadata/"
119+
"aeffe099-3bac-4011-bc17-5875210d8dc0-m0.avro"};
120+
std::vector<int64_t> file_size = {6498, 6513};
121+
std::vector<int64_t> snapshot_id = {4134160420377642835, 4134160420377642835};
122+
123+
std::vector<ManifestFile> expected_manifest_list = {
124+
{
125+
.manifest_path = paths[0],
126+
.manifest_length = file_size[0],
127+
.partition_spec_id = 0,
128+
.added_snapshot_id = snapshot_id[0],
129+
.added_files_count = 1,
130+
.existing_files_count = 0,
131+
.deleted_files_count = 0,
132+
.added_rows_count = 2,
133+
.existing_rows_count = 0,
134+
.deleted_rows_count = 0,
135+
},
136+
137+
{.manifest_path = paths[1],
138+
.manifest_length = file_size[1],
139+
.partition_spec_id = 0,
140+
.added_snapshot_id = snapshot_id[1],
141+
.added_files_count = 0,
142+
.existing_files_count = 0,
143+
.deleted_files_count = 1,
144+
.added_rows_count = 0,
145+
.existing_rows_count = 0,
146+
.deleted_rows_count = 3}};
147+
148+
TestManifestListReading(
149+
"snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro",
150+
expected_manifest_list);
241151
}
242152

243-
TEST_F(ManifestListReaderTest, PartitionComplexTypeTest) {
244-
std::string path = GetResourcePath(
245-
"snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro");
246-
auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_);
247-
ASSERT_EQ(manifest_reader_result.has_value(), true);
248-
auto manifest_reader = std::move(manifest_reader_result.value());
249-
auto read_result = manifest_reader->Files();
250-
ASSERT_EQ(read_result.has_value(), true);
251-
ASSERT_EQ(read_result.value().size(), 2);
252-
253-
auto expected_manifest_list = PrepareTestManifestListPartitionComplex();
254-
ASSERT_EQ(read_result.value(), expected_manifest_list);
153+
TEST_F(ManifestListReaderV1Test, PartitionComplexTypeTest) {
154+
std::vector<std::string> paths = {
155+
"iceberg-warehouse/db2/v1_complex_partition_test/metadata/"
156+
"5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m0.avro",
157+
"iceberg-warehouse/db2/v1_complex_partition_test/metadata/"
158+
"5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m1.avro"};
159+
std::vector<int64_t> file_size = {6402, 6318};
160+
std::vector<int64_t> snapshot_id = {7522296285847100621, 7522296285847100621};
161+
162+
std::vector<std::vector<std::uint8_t>> lower_bounds = {
163+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32},
164+
{0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
165+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32},
166+
{0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}};
167+
168+
std::vector<std::vector<std::uint8_t>> upper_bounds = {
169+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x34},
170+
{0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
171+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33},
172+
{0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}};
173+
std::vector<ManifestFile> expected_manifest_list = {
174+
{.manifest_path = paths[0],
175+
.manifest_length = file_size[0],
176+
.partition_spec_id = 0,
177+
.added_snapshot_id = snapshot_id[0],
178+
.added_files_count = 0,
179+
.existing_files_count = 3,
180+
.deleted_files_count = 1,
181+
.added_rows_count = 0,
182+
.existing_rows_count = 4,
183+
.deleted_rows_count = 2,
184+
.partitions = {{.contains_null = false,
185+
.contains_nan = false,
186+
.lower_bound = lower_bounds[0],
187+
.upper_bound = upper_bounds[0]},
188+
{.contains_null = false,
189+
.contains_nan = false,
190+
.lower_bound = lower_bounds[1],
191+
.upper_bound = upper_bounds[1]}}},
192+
193+
{.manifest_path = paths[1],
194+
.manifest_length = file_size[1],
195+
.partition_spec_id = 0,
196+
.added_snapshot_id = snapshot_id[1],
197+
.added_files_count = 0,
198+
.existing_files_count = 1,
199+
.deleted_files_count = 1,
200+
.added_rows_count = 0,
201+
.existing_rows_count = 1,
202+
.deleted_rows_count = 1,
203+
.partitions = {{.contains_null = false,
204+
.contains_nan = false,
205+
.lower_bound = lower_bounds[2],
206+
.upper_bound = upper_bounds[2]},
207+
{.contains_null = false,
208+
.contains_nan = false,
209+
.lower_bound = lower_bounds[3],
210+
.upper_bound = upper_bounds[3]}}}};
211+
TestManifestListReading(
212+
"snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro",
213+
expected_manifest_list);
255214
}
256215

257216
} // namespace iceberg

0 commit comments

Comments
 (0)