|
32 | 32 |
|
33 | 33 | namespace iceberg { |
34 | 34 |
|
35 | | -class ManifestListReaderTest : public TempFileTestBase { |
| 35 | +class ManifestListReaderV1Test : public ::testing::Test { |
36 | 36 | protected: |
37 | 37 | static void SetUpTestSuite() { avro::AvroReader::Register(); } |
38 | 38 |
|
39 | 39 | void SetUp() override { |
40 | | - TempFileTestBase::SetUp(); |
41 | 40 | local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>(); |
42 | 41 | file_io_ = std::make_shared<iceberg::arrow::ArrowFileSystemFileIO>(local_fs_); |
43 | 42 | } |
44 | 43 |
|
45 | | - std::vector<ManifestFile> PrepareTestManifestList() { |
46 | | - std::vector<ManifestFile> manifest_files; |
47 | | - std::string test_dir_prefix = "/tmp/db/db/iceberg_test/metadata/"; |
48 | | - std::vector<std::string> paths = {"2bccd69e-d642-4816-bba0-261cd9bd0d93-m0.avro", |
49 | | - "9b6ffacd-ef10-4abf-a89c-01c733696796-m0.avro", |
50 | | - "2541e6b5-4923-4bd5-886d-72c6f7228400-m0.avro", |
51 | | - "3118c801-d2e0-4df6-8c7a-7d4eaade32f8-m0.avro"}; |
52 | | - std::vector<int64_t> file_size = {7433, 7431, 7433, 7431}; |
53 | | - std::vector<int64_t> snapshot_id = {7412193043800610213, 5485972788975780755, |
54 | | - 1679468743751242972, 1579605567338877265}; |
55 | | - std::vector<std::vector<uint8_t>> bounds = {{'x', ';', 0x07, 0x00}, |
56 | | - {'(', 0x19, 0x07, 0x00}, |
57 | | - {0xd0, 0xd4, 0x06, 0x00}, |
58 | | - {0xb8, 0xd4, 0x06, 0x00}}; |
59 | | - for (int i = 0; i < 4; ++i) { |
60 | | - ManifestFile manifest_file; |
61 | | - manifest_file.manifest_path = test_dir_prefix + paths[i]; |
62 | | - manifest_file.manifest_length = file_size[i]; |
63 | | - manifest_file.partition_spec_id = 0; |
64 | | - manifest_file.content = ManifestFile::Content::kData; |
65 | | - manifest_file.sequence_number = 4 - i; |
66 | | - manifest_file.min_sequence_number = 4 - i; |
67 | | - manifest_file.added_snapshot_id = snapshot_id[i]; |
68 | | - manifest_file.added_files_count = 1; |
69 | | - manifest_file.existing_files_count = 0; |
70 | | - manifest_file.deleted_files_count = 0; |
71 | | - manifest_file.added_rows_count = 1; |
72 | | - manifest_file.existing_rows_count = 0; |
73 | | - manifest_file.deleted_rows_count = 0; |
74 | | - PartitionFieldSummary partition; |
75 | | - partition.contains_null = false; |
76 | | - partition.contains_nan = false; |
77 | | - partition.lower_bound = bounds[i]; |
78 | | - partition.upper_bound = bounds[i]; |
79 | | - manifest_file.partitions.emplace_back(partition); |
80 | | - manifest_files.emplace_back(manifest_file); |
81 | | - } |
82 | | - return manifest_files; |
83 | | - } |
84 | | - |
85 | 44 | std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_; |
86 | 45 | std::shared_ptr<FileIO> file_io_; |
| 46 | + |
| 47 | + void TestManifestListReading(const std::string& resource_name, |
| 48 | + const std::vector<ManifestFile>& expected_manifest_list) { |
| 49 | + std::string path = GetResourcePath(resource_name); |
| 50 | + auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); |
| 51 | + ASSERT_EQ(manifest_reader_result.has_value(), true); |
| 52 | + |
| 53 | + auto manifest_reader = std::move(manifest_reader_result.value()); |
| 54 | + auto read_result = manifest_reader->Files(); |
| 55 | + ASSERT_EQ(read_result.has_value(), true); |
| 56 | + ASSERT_EQ(read_result.value().size(), expected_manifest_list.size()); |
| 57 | + ASSERT_EQ(read_result.value(), expected_manifest_list); |
| 58 | + } |
87 | 59 | }; |
88 | 60 |
|
89 | | -TEST_F(ManifestListReaderTest, BasicTest) { |
90 | | - std::string path = GetResourcePath( |
91 | | - "snap-7412193043800610213-1-2bccd69e-d642-4816-bba0-261cd9bd0d93.avro"); |
92 | | - auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); |
93 | | - ASSERT_EQ(manifest_reader_result.has_value(), true); |
94 | | - auto manifest_reader = std::move(manifest_reader_result.value()); |
95 | | - auto read_result = manifest_reader->Files(); |
96 | | - ASSERT_EQ(read_result.has_value(), true); |
97 | | - ASSERT_EQ(read_result.value().size(), 4); |
98 | | - |
99 | | - auto expected_manifest_list = PrepareTestManifestList(); |
100 | | - ASSERT_EQ(read_result.value(), expected_manifest_list); |
| 61 | +TEST_F(ManifestListReaderV1Test, PartitionTest) { |
| 62 | + std::vector<std::string> paths = { |
| 63 | + "iceberg-warehouse/db/v1_partition_test/metadata/" |
| 64 | + "eafd2972-f58e-4185-9237-6378f564787e-m1.avro", |
| 65 | + "iceberg-warehouse/db/v1_partition_test/metadata/" |
| 66 | + "eafd2972-f58e-4185-9237-6378f564787e-m0.avro"}; |
| 67 | + std::vector<int64_t> file_size = {6185, 6113}; |
| 68 | + std::vector<int64_t> snapshot_id = {7532614258660258098, 7532614258660258098}; |
| 69 | + |
| 70 | + std::vector<std::vector<std::uint8_t>> lower_bounds = { |
| 71 | + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x30, 0x32, 0x2D, 0x32, 0x32}, |
| 72 | + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}}; |
| 73 | + |
| 74 | + std::vector<std::vector<std::uint8_t>> upper_bounds = { |
| 75 | + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}, |
| 76 | + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}}; |
| 77 | + |
| 78 | + std::vector<ManifestFile> expected_manifest_list = { |
| 79 | + {.manifest_path = paths[0], |
| 80 | + .manifest_length = file_size[0], |
| 81 | + .partition_spec_id = 0, |
| 82 | + .added_snapshot_id = snapshot_id[0], |
| 83 | + .added_files_count = 4, |
| 84 | + .existing_files_count = 0, |
| 85 | + .deleted_files_count = 0, |
| 86 | + .added_rows_count = 6, |
| 87 | + .existing_rows_count = 0, |
| 88 | + .deleted_rows_count = 0, |
| 89 | + .partitions = {{.contains_null = false, |
| 90 | + .contains_nan = false, |
| 91 | + .lower_bound = lower_bounds[0], |
| 92 | + .upper_bound = upper_bounds[0]}}}, |
| 93 | + |
| 94 | + {.manifest_path = paths[1], |
| 95 | + .manifest_length = file_size[1], |
| 96 | + .partition_spec_id = 0, |
| 97 | + .added_snapshot_id = snapshot_id[1], |
| 98 | + .added_files_count = 0, |
| 99 | + .existing_files_count = 0, |
| 100 | + .deleted_files_count = 2, |
| 101 | + .added_rows_count = 0, |
| 102 | + .existing_rows_count = 0, |
| 103 | + .deleted_rows_count = 6, |
| 104 | + .partitions = {{.contains_null = false, |
| 105 | + .contains_nan = false, |
| 106 | + .lower_bound = lower_bounds[1], |
| 107 | + .upper_bound = upper_bounds[1]}}}}; |
| 108 | + |
| 109 | + TestManifestListReading( |
| 110 | + "snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro", |
| 111 | + expected_manifest_list); |
| 112 | +} |
| 113 | + |
| 114 | +TEST_F(ManifestListReaderV1Test, ComplexTypeTest) { |
| 115 | + std::vector<std::string> paths = { |
| 116 | + "iceberg-warehouse/db/v1_type_test/metadata/" |
| 117 | + "aeffe099-3bac-4011-bc17-5875210d8dc0-m1.avro", |
| 118 | + "iceberg-warehouse/db/v1_type_test/metadata/" |
| 119 | + "aeffe099-3bac-4011-bc17-5875210d8dc0-m0.avro"}; |
| 120 | + std::vector<int64_t> file_size = {6498, 6513}; |
| 121 | + std::vector<int64_t> snapshot_id = {4134160420377642835, 4134160420377642835}; |
| 122 | + |
| 123 | + std::vector<ManifestFile> expected_manifest_list = { |
| 124 | + { |
| 125 | + .manifest_path = paths[0], |
| 126 | + .manifest_length = file_size[0], |
| 127 | + .partition_spec_id = 0, |
| 128 | + .added_snapshot_id = snapshot_id[0], |
| 129 | + .added_files_count = 1, |
| 130 | + .existing_files_count = 0, |
| 131 | + .deleted_files_count = 0, |
| 132 | + .added_rows_count = 2, |
| 133 | + .existing_rows_count = 0, |
| 134 | + .deleted_rows_count = 0, |
| 135 | + }, |
| 136 | + |
| 137 | + {.manifest_path = paths[1], |
| 138 | + .manifest_length = file_size[1], |
| 139 | + .partition_spec_id = 0, |
| 140 | + .added_snapshot_id = snapshot_id[1], |
| 141 | + .added_files_count = 0, |
| 142 | + .existing_files_count = 0, |
| 143 | + .deleted_files_count = 1, |
| 144 | + .added_rows_count = 0, |
| 145 | + .existing_rows_count = 0, |
| 146 | + .deleted_rows_count = 3}}; |
| 147 | + |
| 148 | + TestManifestListReading( |
| 149 | + "snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro", |
| 150 | + expected_manifest_list); |
| 151 | +} |
| 152 | + |
| 153 | +TEST_F(ManifestListReaderV1Test, PartitionComplexTypeTest) { |
| 154 | + std::vector<std::string> paths = { |
| 155 | + "iceberg-warehouse/db2/v1_complex_partition_test/metadata/" |
| 156 | + "5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m0.avro", |
| 157 | + "iceberg-warehouse/db2/v1_complex_partition_test/metadata/" |
| 158 | + "5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m1.avro"}; |
| 159 | + std::vector<int64_t> file_size = {6402, 6318}; |
| 160 | + std::vector<int64_t> snapshot_id = {7522296285847100621, 7522296285847100621}; |
| 161 | + |
| 162 | + std::vector<std::vector<std::uint8_t>> lower_bounds = { |
| 163 | + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}, |
| 164 | + {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, |
| 165 | + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}, |
| 166 | + {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; |
| 167 | + |
| 168 | + std::vector<std::vector<std::uint8_t>> upper_bounds = { |
| 169 | + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x34}, |
| 170 | + {0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, |
| 171 | + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}, |
| 172 | + {0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; |
| 173 | + std::vector<ManifestFile> expected_manifest_list = { |
| 174 | + {.manifest_path = paths[0], |
| 175 | + .manifest_length = file_size[0], |
| 176 | + .partition_spec_id = 0, |
| 177 | + .added_snapshot_id = snapshot_id[0], |
| 178 | + .added_files_count = 0, |
| 179 | + .existing_files_count = 3, |
| 180 | + .deleted_files_count = 1, |
| 181 | + .added_rows_count = 0, |
| 182 | + .existing_rows_count = 4, |
| 183 | + .deleted_rows_count = 2, |
| 184 | + .partitions = {{.contains_null = false, |
| 185 | + .contains_nan = false, |
| 186 | + .lower_bound = lower_bounds[0], |
| 187 | + .upper_bound = upper_bounds[0]}, |
| 188 | + {.contains_null = false, |
| 189 | + .contains_nan = false, |
| 190 | + .lower_bound = lower_bounds[1], |
| 191 | + .upper_bound = upper_bounds[1]}}}, |
| 192 | + |
| 193 | + {.manifest_path = paths[1], |
| 194 | + .manifest_length = file_size[1], |
| 195 | + .partition_spec_id = 0, |
| 196 | + .added_snapshot_id = snapshot_id[1], |
| 197 | + .added_files_count = 0, |
| 198 | + .existing_files_count = 1, |
| 199 | + .deleted_files_count = 1, |
| 200 | + .added_rows_count = 0, |
| 201 | + .existing_rows_count = 1, |
| 202 | + .deleted_rows_count = 1, |
| 203 | + .partitions = {{.contains_null = false, |
| 204 | + .contains_nan = false, |
| 205 | + .lower_bound = lower_bounds[2], |
| 206 | + .upper_bound = upper_bounds[2]}, |
| 207 | + {.contains_null = false, |
| 208 | + .contains_nan = false, |
| 209 | + .lower_bound = lower_bounds[3], |
| 210 | + .upper_bound = upper_bounds[3]}}}}; |
| 211 | + TestManifestListReading( |
| 212 | + "snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro", |
| 213 | + expected_manifest_list); |
101 | 214 | } |
102 | 215 |
|
103 | 216 | } // namespace iceberg |
0 commit comments