1919
2020#include " iceberg/manifest_reader.h"
2121
22+ #include < cstddef>
23+
2224#include < arrow/filesystem/localfs.h>
2325#include < gtest/gtest.h>
2426
2527#include " iceberg/arrow/arrow_fs_file_io_internal.h"
2628#include " iceberg/avro/avro_reader.h"
2729#include " iceberg/avro/avro_register.h"
28- #include " iceberg/avro/avro_schema_util_internal.h"
2930#include " iceberg/manifest_entry.h"
3031#include " iceberg/schema.h"
3132#include " temp_file_test_base.h"
3233#include " test_common.h"
3334
3435namespace iceberg {
3536
36- class ManifestReaderTest : public TempFileTestBase {
37+ class ManifestReaderV1Test : public TempFileTestBase {
3738 protected:
3839 static void SetUpTestSuite () { avro::AvroReader::Register (); }
3940
@@ -45,7 +46,7 @@ class ManifestReaderTest : public TempFileTestBase {
4546 avro::RegisterLogicalTypes ();
4647 }
4748
48- std::vector<ManifestEntry> prepare_manifest_entries () {
49+ std::vector<ManifestEntry> prepareV1ManifestEntries () {
4950 std::vector<ManifestEntry> manifest_entries;
5051 std::string test_dir_prefix = " /tmp/db/db/iceberg_test/data/" ;
5152 std::vector<std::string> paths = {
@@ -102,7 +103,7 @@ class ManifestReaderTest : public TempFileTestBase {
102103 std::shared_ptr<FileIO> file_io_;
103104};
104105
105- TEST_F (ManifestReaderTest, BasicTest ) {
106+ TEST_F (ManifestReaderV1Test, V1PartitionedBasicTest ) {
106107 iceberg::SchemaField partition_field (1000 , " order_ts_hour" , iceberg::int32 (), true );
107108 auto partition_schema =
108109 std::make_shared<Schema>(std::vector<SchemaField>({partition_field}));
@@ -115,7 +116,89 @@ TEST_F(ManifestReaderTest, BasicTest) {
115116 auto read_result = manifest_reader->Entries ();
116117 ASSERT_EQ (read_result.has_value (), true ) << read_result.error ().message ;
117118
118- auto expected_entries = prepare_manifest_entries ();
119+ auto expected_entries = prepareV1ManifestEntries ();
120+ ASSERT_EQ (read_result.value (), expected_entries);
121+ }
122+
123+ class ManifestReaderV2Test : public TempFileTestBase {
124+ protected:
125+ static void SetUpTestSuite () { avro::AvroReader::Register (); }
126+
127+ void SetUp () override {
128+ TempFileTestBase::SetUp ();
129+ local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>();
130+ file_io_ = std::make_shared<iceberg::arrow::ArrowFileSystemFileIO>(local_fs_);
131+
132+ avro::RegisterLogicalTypes ();
133+ }
134+
135+ std::vector<ManifestEntry> prepareV2NonPartitionedManifestEntries () {
136+ std::vector<ManifestEntry> manifest_entries;
137+ std::string test_dir_prefix = " /tmp/db/db/v2_manifest_non_partitioned/data/" ;
138+
139+ std::vector<std::string> paths = {
140+ " 00000-0-b0f98903-6d21-45fd-9e0b-afbd4963e365-0-00001.parquet" };
141+
142+ std::vector<int64_t > file_sizes = {1344 };
143+ std::vector<int64_t > record_counts = {4 };
144+
145+ // Real bounds data extracted from the manifest
146+ std::vector<std::map<int32_t , std::vector<uint8_t >>> lower_bounds = {
147+ {{1 , {0x01 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 }},
148+ {2 , {' r' , ' e' , ' c' , ' o' , ' r' , ' d' , ' _' , ' f' , ' o' , ' u' , ' r' }},
149+ {3 , {' d' , ' a' , ' t' , ' a' , ' _' , ' c' , ' o' , ' n' , ' t' , ' e' , ' n' , ' t' , ' _' , ' 1' }},
150+ {4 , {0xcd , 0xcc , 0xcc , 0xcc , 0xcc , 0xdc , 0x5e , 0x40 }}}};
151+
152+ std::vector<std::map<int32_t , std::vector<uint8_t >>> upper_bounds = {
153+ {{1 , {0x04 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 , 0x00 }},
154+ {2 , {' r' , ' e' , ' c' , ' o' , ' r' , ' d' , ' _' , ' t' , ' w' , ' o' }},
155+ {3 , {' d' , ' a' , ' t' , ' a' , ' _' , ' c' , ' o' , ' n' , ' t' , ' e' , ' n' , ' t' , ' _' , ' 4' }},
156+ {4 , {0x14 , 0xae , 0x47 , 0xe1 , 0x7a , 0x8c , 0x7c , 0x40 }}}};
157+
158+ manifest_entries.emplace_back (
159+ ManifestEntry{.status = ManifestStatus::kAdded ,
160+ .snapshot_id = 679879563479918846LL ,
161+ .sequence_number = std::nullopt ,
162+ .file_sequence_number = std::nullopt ,
163+ .data_file = std::make_shared<DataFile>(
164+ DataFile{.file_path = test_dir_prefix + paths[0 ],
165+ .file_format = FileFormatType::kParquet ,
166+ .record_count = record_counts[0 ],
167+ .file_size_in_bytes = file_sizes[0 ],
168+ .column_sizes = {{1 , 56 }, {2 , 73 }, {3 , 66 }, {4 , 67 }},
169+ .value_counts = {{1 , 4 }, {2 , 4 }, {3 , 4 }, {4 , 4 }},
170+ .null_value_counts = {{1 , 0 }, {2 , 0 }, {3 , 0 }, {4 , 0 }},
171+ .nan_value_counts = {{4 , 0 }},
172+ .lower_bounds = lower_bounds[0 ],
173+ .upper_bounds = upper_bounds[0 ],
174+ .key_metadata = {},
175+ .split_offsets = {4 },
176+ .equality_ids = {},
177+ .sort_order_id = 0 ,
178+ .first_row_id = std::nullopt ,
179+ .referenced_data_file = std::nullopt ,
180+ .content_offset = std::nullopt ,
181+ .content_size_in_bytes = std::nullopt })});
182+ return manifest_entries;
183+ }
184+
185+ std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_;
186+ std::shared_ptr<FileIO> file_io_;
187+ };
188+
189+ TEST_F (ManifestReaderV2Test, V2NonPartitionedBasicTest) {
190+ std::string path = GetResourcePath (" 2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro" );
191+
192+ auto manifest_reader_result = ManifestReader::MakeReader (path, file_io_, nullptr );
193+ ASSERT_EQ (manifest_reader_result.has_value (), true )
194+ << manifest_reader_result.error ().message ;
195+
196+ auto manifest_reader = std::move (manifest_reader_result.value ());
197+ auto read_result = manifest_reader->Entries ();
198+ ASSERT_EQ (read_result.has_value (), true ) << read_result.error ().message ;
199+ ASSERT_EQ (read_result.value ().size (), 1 );
200+
201+ auto expected_entries = prepareV2NonPartitionedManifestEntries ();
119202 ASSERT_EQ (read_result.value (), expected_entries);
120203}
121204
0 commit comments