diff --git a/test/manifest_list_reader_test.cc b/test/manifest_list_reader_test.cc index 90c9fd8f4..7497a1203 100644 --- a/test/manifest_list_reader_test.cc +++ b/test/manifest_list_reader_test.cc @@ -32,72 +32,185 @@ namespace iceberg { -class ManifestListReaderTest : public TempFileTestBase { +class ManifestListReaderV1Test : public ::testing::Test { protected: static void SetUpTestSuite() { avro::AvroReader::Register(); } void SetUp() override { - TempFileTestBase::SetUp(); local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>(); file_io_ = std::make_shared(local_fs_); } - std::vector PrepareTestManifestList() { - std::vector manifest_files; - std::string test_dir_prefix = "/tmp/db/db/iceberg_test/metadata/"; - std::vector paths = {"2bccd69e-d642-4816-bba0-261cd9bd0d93-m0.avro", - "9b6ffacd-ef10-4abf-a89c-01c733696796-m0.avro", - "2541e6b5-4923-4bd5-886d-72c6f7228400-m0.avro", - "3118c801-d2e0-4df6-8c7a-7d4eaade32f8-m0.avro"}; - std::vector file_size = {7433, 7431, 7433, 7431}; - std::vector snapshot_id = {7412193043800610213, 5485972788975780755, - 1679468743751242972, 1579605567338877265}; - std::vector> bounds = {{'x', ';', 0x07, 0x00}, - {'(', 0x19, 0x07, 0x00}, - {0xd0, 0xd4, 0x06, 0x00}, - {0xb8, 0xd4, 0x06, 0x00}}; - for (int i = 0; i < 4; ++i) { - ManifestFile manifest_file; - manifest_file.manifest_path = test_dir_prefix + paths[i]; - manifest_file.manifest_length = file_size[i]; - manifest_file.partition_spec_id = 0; - manifest_file.content = ManifestFile::Content::kData; - manifest_file.sequence_number = 4 - i; - manifest_file.min_sequence_number = 4 - i; - manifest_file.added_snapshot_id = snapshot_id[i]; - manifest_file.added_files_count = 1; - manifest_file.existing_files_count = 0; - manifest_file.deleted_files_count = 0; - manifest_file.added_rows_count = 1; - manifest_file.existing_rows_count = 0; - manifest_file.deleted_rows_count = 0; - PartitionFieldSummary partition; - partition.contains_null = false; - partition.contains_nan = false; - partition.lower_bound = bounds[i]; - partition.upper_bound = bounds[i]; - manifest_file.partitions.emplace_back(partition); - manifest_files.emplace_back(manifest_file); - } - return manifest_files; - } - std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_; std::shared_ptr file_io_; + + void TestManifestListReading(const std::string& resource_name, + const std::vector& expected_manifest_list) { + std::string path = GetResourcePath(resource_name); + auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); + ASSERT_EQ(manifest_reader_result.has_value(), true); + + auto manifest_reader = std::move(manifest_reader_result.value()); + auto read_result = manifest_reader->Files(); + ASSERT_EQ(read_result.has_value(), true); + ASSERT_EQ(read_result.value().size(), expected_manifest_list.size()); + ASSERT_EQ(read_result.value(), expected_manifest_list); + } }; -TEST_F(ManifestListReaderTest, BasicTest) { - std::string path = GetResourcePath( - "snap-7412193043800610213-1-2bccd69e-d642-4816-bba0-261cd9bd0d93.avro"); - auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); - ASSERT_EQ(manifest_reader_result.has_value(), true); - auto manifest_reader = std::move(manifest_reader_result.value()); - auto read_result = manifest_reader->Files(); - ASSERT_EQ(read_result.has_value(), true); - ASSERT_EQ(read_result.value().size(), 4); - - auto expected_manifest_list = PrepareTestManifestList(); - ASSERT_EQ(read_result.value(), expected_manifest_list); +TEST_F(ManifestListReaderV1Test, PartitionTest) { + std::vector paths = { + "iceberg-warehouse/db/v1_partition_test/metadata/" + "eafd2972-f58e-4185-9237-6378f564787e-m1.avro", + "iceberg-warehouse/db/v1_partition_test/metadata/" + "eafd2972-f58e-4185-9237-6378f564787e-m0.avro"}; + std::vector file_size = {6185, 6113}; + std::vector snapshot_id = {7532614258660258098, 7532614258660258098}; + + std::vector> lower_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x30, 0x32, 0x2D, 0x32, 0x32}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}}; + + std::vector> upper_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}}; + + std::vector expected_manifest_list = { + {.manifest_path = paths[0], + .manifest_length = file_size[0], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[0], + .added_files_count = 4, + .existing_files_count = 0, + .deleted_files_count = 0, + .added_rows_count = 6, + .existing_rows_count = 0, + .deleted_rows_count = 0, + .partitions = {{.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[0], + .upper_bound = upper_bounds[0]}}}, + + {.manifest_path = paths[1], + .manifest_length = file_size[1], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[1], + .added_files_count = 0, + .existing_files_count = 0, + .deleted_files_count = 2, + .added_rows_count = 0, + .existing_rows_count = 0, + .deleted_rows_count = 6, + .partitions = {{.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[1], + .upper_bound = upper_bounds[1]}}}}; + + TestManifestListReading( + "snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro", + expected_manifest_list); +} + +TEST_F(ManifestListReaderV1Test, ComplexTypeTest) { + std::vector paths = { + "iceberg-warehouse/db/v1_type_test/metadata/" + "aeffe099-3bac-4011-bc17-5875210d8dc0-m1.avro", + "iceberg-warehouse/db/v1_type_test/metadata/" + "aeffe099-3bac-4011-bc17-5875210d8dc0-m0.avro"}; + std::vector file_size = {6498, 6513}; + std::vector snapshot_id = {4134160420377642835, 4134160420377642835}; + + std::vector expected_manifest_list = { + { + .manifest_path = paths[0], + .manifest_length = file_size[0], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[0], + .added_files_count = 1, + .existing_files_count = 0, + .deleted_files_count = 0, + .added_rows_count = 2, + .existing_rows_count = 0, + .deleted_rows_count = 0, + }, + + {.manifest_path = paths[1], + .manifest_length = file_size[1], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[1], + .added_files_count = 0, + .existing_files_count = 0, + .deleted_files_count = 1, + .added_rows_count = 0, + .existing_rows_count = 0, + .deleted_rows_count = 3}}; + + TestManifestListReading( + "snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro", + expected_manifest_list); +} + +TEST_F(ManifestListReaderV1Test, PartitionComplexTypeTest) { + std::vector paths = { + "iceberg-warehouse/db2/v1_complex_partition_test/metadata/" + "5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m0.avro", + "iceberg-warehouse/db2/v1_complex_partition_test/metadata/" + "5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m1.avro"}; + std::vector file_size = {6402, 6318}; + std::vector snapshot_id = {7522296285847100621, 7522296285847100621}; + + std::vector> lower_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}, + {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}, + {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; + + std::vector> upper_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x34}, + {0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}, + {0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; + std::vector expected_manifest_list = { + {.manifest_path = paths[0], + .manifest_length = file_size[0], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[0], + .added_files_count = 0, + .existing_files_count = 3, + .deleted_files_count = 1, + .added_rows_count = 0, + .existing_rows_count = 4, + .deleted_rows_count = 2, + .partitions = {{.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[0], + .upper_bound = upper_bounds[0]}, + {.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[1], + .upper_bound = upper_bounds[1]}}}, + + {.manifest_path = paths[1], + .manifest_length = file_size[1], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[1], + .added_files_count = 0, + .existing_files_count = 1, + .deleted_files_count = 1, + .added_rows_count = 0, + .existing_rows_count = 1, + .deleted_rows_count = 1, + .partitions = {{.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[2], + .upper_bound = upper_bounds[2]}, + {.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[3], + .upper_bound = upper_bounds[3]}}}}; + TestManifestListReading( + "snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro", + expected_manifest_list); } } // namespace iceberg diff --git a/test/resources/snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro b/test/resources/snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro new file mode 100644 index 000000000..29584b8ce Binary files /dev/null and b/test/resources/snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro differ diff --git a/test/resources/snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro b/test/resources/snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro new file mode 100644 index 000000000..590edc1f8 Binary files /dev/null and b/test/resources/snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro differ diff --git a/test/resources/snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro b/test/resources/snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro new file mode 100644 index 000000000..4fba684a2 Binary files /dev/null and b/test/resources/snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro differ