Skip to content

Commit 5e1a80d

Browse files
nullccxsynullccxsy
andauthored
test: add test of manifestlistv1 (#171)
1. PartitionTest: test partition 2. ComplexTypeTest: test complex type 3. PartitionComplexTypeTest: test partition with complex type --------- Co-authored-by: nullccxsy <[email protected]>
1 parent 00b71ad commit 5e1a80d

4 files changed

+167
-54
lines changed

test/manifest_list_reader_test.cc

Lines changed: 167 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -32,72 +32,185 @@
3232

3333
namespace iceberg {
3434

35-
class ManifestListReaderTest : public TempFileTestBase {
35+
class ManifestListReaderV1Test : public ::testing::Test {
3636
protected:
3737
static void SetUpTestSuite() { avro::AvroReader::Register(); }
3838

3939
void SetUp() override {
40-
TempFileTestBase::SetUp();
4140
local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>();
4241
file_io_ = std::make_shared<iceberg::arrow::ArrowFileSystemFileIO>(local_fs_);
4342
}
4443

45-
std::vector<ManifestFile> PrepareTestManifestList() {
46-
std::vector<ManifestFile> manifest_files;
47-
std::string test_dir_prefix = "/tmp/db/db/iceberg_test/metadata/";
48-
std::vector<std::string> paths = {"2bccd69e-d642-4816-bba0-261cd9bd0d93-m0.avro",
49-
"9b6ffacd-ef10-4abf-a89c-01c733696796-m0.avro",
50-
"2541e6b5-4923-4bd5-886d-72c6f7228400-m0.avro",
51-
"3118c801-d2e0-4df6-8c7a-7d4eaade32f8-m0.avro"};
52-
std::vector<int64_t> file_size = {7433, 7431, 7433, 7431};
53-
std::vector<int64_t> snapshot_id = {7412193043800610213, 5485972788975780755,
54-
1679468743751242972, 1579605567338877265};
55-
std::vector<std::vector<uint8_t>> bounds = {{'x', ';', 0x07, 0x00},
56-
{'(', 0x19, 0x07, 0x00},
57-
{0xd0, 0xd4, 0x06, 0x00},
58-
{0xb8, 0xd4, 0x06, 0x00}};
59-
for (int i = 0; i < 4; ++i) {
60-
ManifestFile manifest_file;
61-
manifest_file.manifest_path = test_dir_prefix + paths[i];
62-
manifest_file.manifest_length = file_size[i];
63-
manifest_file.partition_spec_id = 0;
64-
manifest_file.content = ManifestFile::Content::kData;
65-
manifest_file.sequence_number = 4 - i;
66-
manifest_file.min_sequence_number = 4 - i;
67-
manifest_file.added_snapshot_id = snapshot_id[i];
68-
manifest_file.added_files_count = 1;
69-
manifest_file.existing_files_count = 0;
70-
manifest_file.deleted_files_count = 0;
71-
manifest_file.added_rows_count = 1;
72-
manifest_file.existing_rows_count = 0;
73-
manifest_file.deleted_rows_count = 0;
74-
PartitionFieldSummary partition;
75-
partition.contains_null = false;
76-
partition.contains_nan = false;
77-
partition.lower_bound = bounds[i];
78-
partition.upper_bound = bounds[i];
79-
manifest_file.partitions.emplace_back(partition);
80-
manifest_files.emplace_back(manifest_file);
81-
}
82-
return manifest_files;
83-
}
84-
8544
std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_;
8645
std::shared_ptr<FileIO> file_io_;
46+
47+
void TestManifestListReading(const std::string& resource_name,
48+
const std::vector<ManifestFile>& expected_manifest_list) {
49+
std::string path = GetResourcePath(resource_name);
50+
auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_);
51+
ASSERT_EQ(manifest_reader_result.has_value(), true);
52+
53+
auto manifest_reader = std::move(manifest_reader_result.value());
54+
auto read_result = manifest_reader->Files();
55+
ASSERT_EQ(read_result.has_value(), true);
56+
ASSERT_EQ(read_result.value().size(), expected_manifest_list.size());
57+
ASSERT_EQ(read_result.value(), expected_manifest_list);
58+
}
8759
};
8860

89-
TEST_F(ManifestListReaderTest, BasicTest) {
90-
std::string path = GetResourcePath(
91-
"snap-7412193043800610213-1-2bccd69e-d642-4816-bba0-261cd9bd0d93.avro");
92-
auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_);
93-
ASSERT_EQ(manifest_reader_result.has_value(), true);
94-
auto manifest_reader = std::move(manifest_reader_result.value());
95-
auto read_result = manifest_reader->Files();
96-
ASSERT_EQ(read_result.has_value(), true);
97-
ASSERT_EQ(read_result.value().size(), 4);
98-
99-
auto expected_manifest_list = PrepareTestManifestList();
100-
ASSERT_EQ(read_result.value(), expected_manifest_list);
61+
TEST_F(ManifestListReaderV1Test, PartitionTest) {
62+
std::vector<std::string> paths = {
63+
"iceberg-warehouse/db/v1_partition_test/metadata/"
64+
"eafd2972-f58e-4185-9237-6378f564787e-m1.avro",
65+
"iceberg-warehouse/db/v1_partition_test/metadata/"
66+
"eafd2972-f58e-4185-9237-6378f564787e-m0.avro"};
67+
std::vector<int64_t> file_size = {6185, 6113};
68+
std::vector<int64_t> snapshot_id = {7532614258660258098, 7532614258660258098};
69+
70+
std::vector<std::vector<std::uint8_t>> lower_bounds = {
71+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x30, 0x32, 0x2D, 0x32, 0x32},
72+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}};
73+
74+
std::vector<std::vector<std::uint8_t>> upper_bounds = {
75+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33},
76+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}};
77+
78+
std::vector<ManifestFile> expected_manifest_list = {
79+
{.manifest_path = paths[0],
80+
.manifest_length = file_size[0],
81+
.partition_spec_id = 0,
82+
.added_snapshot_id = snapshot_id[0],
83+
.added_files_count = 4,
84+
.existing_files_count = 0,
85+
.deleted_files_count = 0,
86+
.added_rows_count = 6,
87+
.existing_rows_count = 0,
88+
.deleted_rows_count = 0,
89+
.partitions = {{.contains_null = false,
90+
.contains_nan = false,
91+
.lower_bound = lower_bounds[0],
92+
.upper_bound = upper_bounds[0]}}},
93+
94+
{.manifest_path = paths[1],
95+
.manifest_length = file_size[1],
96+
.partition_spec_id = 0,
97+
.added_snapshot_id = snapshot_id[1],
98+
.added_files_count = 0,
99+
.existing_files_count = 0,
100+
.deleted_files_count = 2,
101+
.added_rows_count = 0,
102+
.existing_rows_count = 0,
103+
.deleted_rows_count = 6,
104+
.partitions = {{.contains_null = false,
105+
.contains_nan = false,
106+
.lower_bound = lower_bounds[1],
107+
.upper_bound = upper_bounds[1]}}}};
108+
109+
TestManifestListReading(
110+
"snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro",
111+
expected_manifest_list);
112+
}
113+
114+
TEST_F(ManifestListReaderV1Test, ComplexTypeTest) {
115+
std::vector<std::string> paths = {
116+
"iceberg-warehouse/db/v1_type_test/metadata/"
117+
"aeffe099-3bac-4011-bc17-5875210d8dc0-m1.avro",
118+
"iceberg-warehouse/db/v1_type_test/metadata/"
119+
"aeffe099-3bac-4011-bc17-5875210d8dc0-m0.avro"};
120+
std::vector<int64_t> file_size = {6498, 6513};
121+
std::vector<int64_t> snapshot_id = {4134160420377642835, 4134160420377642835};
122+
123+
std::vector<ManifestFile> expected_manifest_list = {
124+
{
125+
.manifest_path = paths[0],
126+
.manifest_length = file_size[0],
127+
.partition_spec_id = 0,
128+
.added_snapshot_id = snapshot_id[0],
129+
.added_files_count = 1,
130+
.existing_files_count = 0,
131+
.deleted_files_count = 0,
132+
.added_rows_count = 2,
133+
.existing_rows_count = 0,
134+
.deleted_rows_count = 0,
135+
},
136+
137+
{.manifest_path = paths[1],
138+
.manifest_length = file_size[1],
139+
.partition_spec_id = 0,
140+
.added_snapshot_id = snapshot_id[1],
141+
.added_files_count = 0,
142+
.existing_files_count = 0,
143+
.deleted_files_count = 1,
144+
.added_rows_count = 0,
145+
.existing_rows_count = 0,
146+
.deleted_rows_count = 3}};
147+
148+
TestManifestListReading(
149+
"snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro",
150+
expected_manifest_list);
151+
}
152+
153+
TEST_F(ManifestListReaderV1Test, PartitionComplexTypeTest) {
154+
std::vector<std::string> paths = {
155+
"iceberg-warehouse/db2/v1_complex_partition_test/metadata/"
156+
"5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m0.avro",
157+
"iceberg-warehouse/db2/v1_complex_partition_test/metadata/"
158+
"5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m1.avro"};
159+
std::vector<int64_t> file_size = {6402, 6318};
160+
std::vector<int64_t> snapshot_id = {7522296285847100621, 7522296285847100621};
161+
162+
std::vector<std::vector<std::uint8_t>> lower_bounds = {
163+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32},
164+
{0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
165+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32},
166+
{0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}};
167+
168+
std::vector<std::vector<std::uint8_t>> upper_bounds = {
169+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x34},
170+
{0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
171+
{0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33},
172+
{0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}};
173+
std::vector<ManifestFile> expected_manifest_list = {
174+
{.manifest_path = paths[0],
175+
.manifest_length = file_size[0],
176+
.partition_spec_id = 0,
177+
.added_snapshot_id = snapshot_id[0],
178+
.added_files_count = 0,
179+
.existing_files_count = 3,
180+
.deleted_files_count = 1,
181+
.added_rows_count = 0,
182+
.existing_rows_count = 4,
183+
.deleted_rows_count = 2,
184+
.partitions = {{.contains_null = false,
185+
.contains_nan = false,
186+
.lower_bound = lower_bounds[0],
187+
.upper_bound = upper_bounds[0]},
188+
{.contains_null = false,
189+
.contains_nan = false,
190+
.lower_bound = lower_bounds[1],
191+
.upper_bound = upper_bounds[1]}}},
192+
193+
{.manifest_path = paths[1],
194+
.manifest_length = file_size[1],
195+
.partition_spec_id = 0,
196+
.added_snapshot_id = snapshot_id[1],
197+
.added_files_count = 0,
198+
.existing_files_count = 1,
199+
.deleted_files_count = 1,
200+
.added_rows_count = 0,
201+
.existing_rows_count = 1,
202+
.deleted_rows_count = 1,
203+
.partitions = {{.contains_null = false,
204+
.contains_nan = false,
205+
.lower_bound = lower_bounds[2],
206+
.upper_bound = upper_bounds[2]},
207+
{.contains_null = false,
208+
.contains_nan = false,
209+
.lower_bound = lower_bounds[3],
210+
.upper_bound = upper_bounds[3]}}}};
211+
TestManifestListReading(
212+
"snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro",
213+
expected_manifest_list);
101214
}
102215

103216
} // namespace iceberg
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)