3535
3636namespace iceberg {
3737
38- class ManifestReaderV1Test : public TempFileTestBase {
38+ class ManifestReaderTestBase : public TempFileTestBase {
3939 protected:
4040 static void SetUpTestSuite () { avro::AvroReader::Register (); }
4141
@@ -47,7 +47,44 @@ class ManifestReaderV1Test : public TempFileTestBase {
4747 avro::RegisterLogicalTypes ();
4848 }
4949
50- std::vector<ManifestEntry> PrepareV1ManifestEntries () {
50+ void TestManifestReading (const std::string& resource_name,
51+ const std::vector<ManifestEntry>& expected_entries,
52+ std::shared_ptr<Schema> partition_schema = nullptr ) {
53+ std::string path = GetResourcePath (resource_name);
54+ auto manifest_reader_result = ManifestReader::Make (path, file_io_, partition_schema);
55+ ASSERT_EQ (manifest_reader_result.has_value (), true )
56+ << manifest_reader_result.error ().message ;
57+
58+ auto manifest_reader = std::move (manifest_reader_result.value ());
59+ auto read_result = manifest_reader->Entries ();
60+ ASSERT_EQ (read_result.has_value (), true ) << read_result.error ().message ;
61+ ASSERT_EQ (read_result.value ().size (), expected_entries.size ());
62+ ASSERT_EQ (read_result.value (), expected_entries);
63+ }
64+
65+ void TestManifestReadingWithManifestFile (
66+ const ManifestFile& manifest_file,
67+ const std::vector<ManifestEntry>& expected_entries,
68+ std::shared_ptr<Schema> partition_schema = nullptr ) {
69+ auto manifest_reader_result =
70+ ManifestReader::Make (manifest_file, file_io_, partition_schema);
71+ ASSERT_EQ (manifest_reader_result.has_value (), true )
72+ << manifest_reader_result.error ().message ;
73+
74+ auto manifest_reader = std::move (manifest_reader_result.value ());
75+ auto read_result = manifest_reader->Entries ();
76+ ASSERT_EQ (read_result.has_value (), true ) << read_result.error ().message ;
77+ ASSERT_EQ (read_result.value ().size (), expected_entries.size ());
78+ ASSERT_EQ (read_result.value (), expected_entries);
79+ }
80+
81+ std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_;
82+ std::shared_ptr<FileIO> file_io_;
83+ };
84+
85+ class ManifestReaderV1Test : public ManifestReaderTestBase {
86+ protected:
87+ std::vector<ManifestEntry> PreparePartitionedTestData () {
5188 std::vector<ManifestEntry> manifest_entries;
5289 std::string test_dir_prefix = " /tmp/db/db/iceberg_test/data/" ;
5390 std::vector<std::string> paths = {
@@ -99,40 +136,22 @@ class ManifestReaderV1Test : public TempFileTestBase {
99136 }
100137 return manifest_entries;
101138 }
102-
103- std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_;
104- std::shared_ptr<FileIO> file_io_;
105139};
106140
107- TEST_F (ManifestReaderV1Test, V1PartitionedBasicTest ) {
141+ TEST_F (ManifestReaderV1Test, PartitionedTest ) {
108142 iceberg::SchemaField partition_field (1000 , " order_ts_hour" , iceberg::int32 (), true );
109143 auto partition_schema =
110144 std::make_shared<Schema>(std::vector<SchemaField>({partition_field}));
111- std::string path = GetResourcePath (" 56357cd7-391f-4df8-aa24-e7e667da8870-m4.avro" );
112- auto manifest_reader_result = ManifestReader::Make (path, file_io_, partition_schema);
113- ASSERT_EQ (manifest_reader_result.has_value (), true )
114- << manifest_reader_result.error ().message ;
115- auto manifest_reader = std::move (manifest_reader_result.value ());
116- auto read_result = manifest_reader->Entries ();
117- ASSERT_EQ (read_result.has_value (), true ) << read_result.error ().message ;
118-
119- auto expected_entries = PrepareV1ManifestEntries ();
120- ASSERT_EQ (read_result.value (), expected_entries);
145+ auto expected_entries = PreparePartitionedTestData ();
146+ TestManifestReading (" 56357cd7-391f-4df8-aa24-e7e667da8870-m4.avro" , expected_entries,
147+ partition_schema);
121148}
122149
123- class ManifestReaderV2Test : public TempFileTestBase {
150+ class ManifestReaderV2Test : public ManifestReaderTestBase {
124151 protected:
125- static void SetUpTestSuite () { avro::AvroReader::Register (); }
126-
127- void SetUp () override {
128- TempFileTestBase::SetUp ();
129- local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>();
130- file_io_ = std::make_shared<iceberg::arrow::ArrowFileSystemFileIO>(local_fs_);
131-
132- avro::RegisterLogicalTypes ();
133- }
134-
135- std::vector<ManifestEntry> PrepareV2NonPartitionedManifestEntries () {
152+ std::vector<ManifestEntry> CreateV2TestData (
153+ std::optional<int64_t > sequence_number = std::nullopt ,
154+ std::optional<int32_t > partition_spec_id = std::nullopt ) {
136155 std::vector<ManifestEntry> manifest_entries;
137156 std::string test_dir_prefix = " /tmp/db/db/v2_manifest_non_partitioned/data/" ;
138157
@@ -154,104 +173,53 @@ class ManifestReaderV2Test : public TempFileTestBase {
154173 {3 , {' d' , ' a' , ' t' , ' a' , ' _' , ' c' , ' o' , ' n' , ' t' , ' e' , ' n' , ' t' , ' _' , ' 4' }},
155174 {4 , {0x14 , 0xae , 0x47 , 0xe1 , 0x7a , 0x8c , 0x7c , 0x40 }}}};
156175
176+ DataFile data_file{.file_path = test_dir_prefix + paths[0 ],
177+ .file_format = FileFormatType::kParquet ,
178+ .record_count = record_counts[0 ],
179+ .file_size_in_bytes = file_sizes[0 ],
180+ .column_sizes = {{1 , 56 }, {2 , 73 }, {3 , 66 }, {4 , 67 }},
181+ .value_counts = {{1 , 4 }, {2 , 4 }, {3 , 4 }, {4 , 4 }},
182+ .null_value_counts = {{1 , 0 }, {2 , 0 }, {3 , 0 }, {4 , 0 }},
183+ .nan_value_counts = {{4 , 0 }},
184+ .lower_bounds = lower_bounds[0 ],
185+ .upper_bounds = upper_bounds[0 ],
186+ .key_metadata = {},
187+ .split_offsets = {4 },
188+ .equality_ids = {},
189+ .sort_order_id = 0 ,
190+ .first_row_id = std::nullopt ,
191+ .referenced_data_file = std::nullopt ,
192+ .content_offset = std::nullopt ,
193+ .content_size_in_bytes = std::nullopt };
194+
195+ if (partition_spec_id.has_value ()) {
196+ data_file.partition_spec_id = partition_spec_id.value ();
197+ }
198+
157199 manifest_entries.emplace_back (
158200 ManifestEntry{.status = ManifestStatus::kAdded ,
159201 .snapshot_id = 679879563479918846LL ,
160- .sequence_number = std::nullopt ,
161- .file_sequence_number = std::nullopt ,
162- .data_file = std::make_shared<DataFile>(
163- DataFile{.file_path = test_dir_prefix + paths[0 ],
164- .file_format = FileFormatType::kParquet ,
165- .record_count = record_counts[0 ],
166- .file_size_in_bytes = file_sizes[0 ],
167- .column_sizes = {{1 , 56 }, {2 , 73 }, {3 , 66 }, {4 , 67 }},
168- .value_counts = {{1 , 4 }, {2 , 4 }, {3 , 4 }, {4 , 4 }},
169- .null_value_counts = {{1 , 0 }, {2 , 0 }, {3 , 0 }, {4 , 0 }},
170- .nan_value_counts = {{4 , 0 }},
171- .lower_bounds = lower_bounds[0 ],
172- .upper_bounds = upper_bounds[0 ],
173- .key_metadata = {},
174- .split_offsets = {4 },
175- .equality_ids = {},
176- .sort_order_id = 0 ,
177- .first_row_id = std::nullopt ,
178- .referenced_data_file = std::nullopt ,
179- .content_offset = std::nullopt ,
180- .content_size_in_bytes = std::nullopt })});
202+ .sequence_number = sequence_number,
203+ .file_sequence_number = sequence_number,
204+ .data_file = std::make_shared<DataFile>(data_file)});
181205 return manifest_entries;
182206 }
183207
184- std::vector<ManifestEntry> prepareV2ManifestEntryMetadataInheritance () {
185- std::vector<ManifestEntry> manifest_entries;
186- std::string test_dir_prefix = " /tmp/db/db/v2_manifest_non_partitioned/data/" ;
187-
188- std::vector<std::string> paths = {
189- " 00000-0-b0f98903-6d21-45fd-9e0b-afbd4963e365-0-00001.parquet" };
190-
191- std::vector<int64_t > file_sizes = {1344 };
192- std::vector<int64_t > record_counts = {4 };
193-
194- std::vector<std::map<int32_t , std::vector<uint8_t >>> lower_bounds = {
195- {{1 , {0x01 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 }},
196- {2 , {' r' , ' e' , ' c' , ' o' , ' r' , ' d' , ' _' , ' f' , ' o' , ' u' , ' r' }},
197- {3 , {' d' , ' a' , ' t' , ' a' , ' _' , ' c' , ' o' , ' n' , ' t' , ' e' , ' n' , ' t' , ' _' , ' 1' }},
198- {4 , {0xcd , 0xcc , 0xcc , 0xcc , 0xcc , 0xdc , 0x5e , 0x40 }}}};
199-
200- std::vector<std::map<int32_t , std::vector<uint8_t >>> upper_bounds = {
201- {{1 , {0x04 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 }},
202- {2 , {' r' , ' e' , ' c' , ' o' , ' r' , ' d' , ' _' , ' t' , ' w' , ' o' }},
203- {3 , {' d' , ' a' , ' t' , ' a' , ' _' , ' c' , ' o' , ' n' , ' t' , ' e' , ' n' , ' t' , ' _' , ' 4' }},
204- {4 , {0x14 , 0xae , 0x47 , 0xe1 , 0x7a , 0x8c , 0x7c , 0x40 }}}};
205-
206- manifest_entries.emplace_back (
207- ManifestEntry{.status = ManifestStatus::kAdded ,
208- .snapshot_id = 679879563479918846LL ,
209- .sequence_number = 15 ,
210- .file_sequence_number = 15 ,
211- .data_file = std::make_shared<DataFile>(
212- DataFile{.file_path = test_dir_prefix + paths[0 ],
213- .file_format = FileFormatType::kParquet ,
214- .record_count = record_counts[0 ],
215- .file_size_in_bytes = file_sizes[0 ],
216- .column_sizes = {{1 , 56 }, {2 , 73 }, {3 , 66 }, {4 , 67 }},
217- .value_counts = {{1 , 4 }, {2 , 4 }, {3 , 4 }, {4 , 4 }},
218- .null_value_counts = {{1 , 0 }, {2 , 0 }, {3 , 0 }, {4 , 0 }},
219- .nan_value_counts = {{4 , 0 }},
220- .lower_bounds = lower_bounds[0 ],
221- .upper_bounds = upper_bounds[0 ],
222- .key_metadata = {},
223- .split_offsets = {4 },
224- .equality_ids = {},
225- .sort_order_id = 0 ,
226- .partition_spec_id = 12 , // inherit from manifest
227- .first_row_id = std::nullopt ,
228- .referenced_data_file = std::nullopt ,
229- .content_offset = std::nullopt ,
230- .content_size_in_bytes = std::nullopt })});
231- return manifest_entries;
208+ std::vector<ManifestEntry> PrepareNonPartitionedTestData () {
209+ return CreateV2TestData ();
232210 }
233211
234- std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_;
235- std::shared_ptr<FileIO> file_io_;
212+ std::vector<ManifestEntry> PrepareMetadataInheritanceTestData () {
213+ return CreateV2TestData (15 , 12 );
214+ }
236215};
237216
238- TEST_F (ManifestReaderV2Test, V2NonPartitionedBasicTest) {
239- std::string path = GetResourcePath (" 2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro" );
240-
241- auto manifest_reader_result = ManifestReader::Make (path, file_io_, nullptr );
242- ASSERT_EQ (manifest_reader_result.has_value (), true )
243- << manifest_reader_result.error ().message ;
244-
245- auto manifest_reader = std::move (manifest_reader_result.value ());
246- auto read_result = manifest_reader->Entries ();
247- ASSERT_EQ (read_result.has_value (), true ) << read_result.error ().message ;
248- ASSERT_EQ (read_result.value ().size (), 1 );
249-
250- auto expected_entries = PrepareV2NonPartitionedManifestEntries ();
251- ASSERT_EQ (read_result.value (), expected_entries);
217+ TEST_F (ManifestReaderV2Test, NonPartitionedTest) {
218+ auto expected_entries = PrepareNonPartitionedTestData ();
219+ TestManifestReading (" 2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro" , expected_entries);
252220}
253221
254- TEST_F (ManifestReaderV2Test, V2ManifestEntryMetadataInheritanceTest ) {
222+ TEST_F (ManifestReaderV2Test, MetadataInheritanceTest ) {
255223 std::string path = GetResourcePath (" 2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro" );
256224 ManifestFile manifest_file{
257225 .manifest_path = path,
@@ -261,17 +229,8 @@ TEST_F(ManifestReaderV2Test, V2ManifestEntryMetadataInheritanceTest) {
261229 .sequence_number = 15 ,
262230 .added_snapshot_id = 679879563479918846LL ,
263231 };
264- auto manifest_reader_result = ManifestReader::Make (manifest_file, file_io_, nullptr );
265- ASSERT_EQ (manifest_reader_result.has_value (), true )
266- << manifest_reader_result.error ().message ;
267-
268- auto manifest_reader = std::move (manifest_reader_result.value ());
269- auto read_result = manifest_reader->Entries ();
270- ASSERT_EQ (read_result.has_value (), true ) << read_result.error ().message ;
271- ASSERT_EQ (read_result.value ().size (), 1 );
272-
273- auto expected_entries = prepareV2ManifestEntryMetadataInheritance ();
274- ASSERT_EQ (read_result.value (), expected_entries);
232+ auto expected_entries = PrepareMetadataInheritanceTestData ();
233+ TestManifestReadingWithManifestFile (manifest_file, expected_entries);
275234}
276235
277236} // namespace iceberg
0 commit comments