From 5006936b4ccd176485ffcf80b42136d677533f93 Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Mon, 11 Aug 2025 16:34:11 +0800 Subject: [PATCH 1/4] test: add V2NonPartitionedTest for ManifestListReader --- test/manifest_list_reader_test.cc | 105 ++++++++++++++++++ ...-ccb6dbcb-0611-48da-be68-bd506ea63188.avro | Bin 0 -> 4598 bytes 2 files changed, 105 insertions(+) create mode 100644 test/resources/snap-251167482216575399-1-ccb6dbcb-0611-48da-be68-bd506ea63188.avro diff --git a/test/manifest_list_reader_test.cc b/test/manifest_list_reader_test.cc index 7497a1203..e5f08c387 100644 --- a/test/manifest_list_reader_test.cc +++ b/test/manifest_list_reader_test.cc @@ -41,6 +41,87 @@ class ManifestListReaderV1Test : public ::testing::Test { file_io_ = std::make_shared(local_fs_); } + std::vector PrepareTestManifestList() { + std::vector manifest_files; + std::string test_dir_prefix = "/tmp/db/db/iceberg_test/metadata/"; + std::vector paths = {"2bccd69e-d642-4816-bba0-261cd9bd0d93-m0.avro", + "9b6ffacd-ef10-4abf-a89c-01c733696796-m0.avro", + "2541e6b5-4923-4bd5-886d-72c6f7228400-m0.avro", + "3118c801-d2e0-4df6-8c7a-7d4eaade32f8-m0.avro"}; + std::vector file_size = {7433, 7431, 7433, 7431}; + std::vector snapshot_id = {7412193043800610213, 5485972788975780755, + 1679468743751242972, 1579605567338877265}; + std::vector> bounds = {{'x', ';', 0x07, 0x00}, + {'(', 0x19, 0x07, 0x00}, + {0xd0, 0xd4, 0x06, 0x00}, + {0xb8, 0xd4, 0x06, 0x00}}; + for (int i = 0; i < 4; ++i) { + ManifestFile manifest_file; + manifest_file.manifest_path = test_dir_prefix + paths[i]; + manifest_file.manifest_length = file_size[i]; + manifest_file.partition_spec_id = 0; + manifest_file.content = ManifestFile::Content::kData; + manifest_file.sequence_number = 4 - i; + manifest_file.min_sequence_number = 4 - i; + manifest_file.added_snapshot_id = snapshot_id[i]; + manifest_file.added_files_count = 1; + manifest_file.existing_files_count = 0; + manifest_file.deleted_files_count = 0; + manifest_file.added_rows_count = 1; + manifest_file.existing_rows_count = 0; + manifest_file.deleted_rows_count = 0; + PartitionFieldSummary partition; + partition.contains_null = false; + partition.contains_nan = false; + partition.lower_bound = bounds[i]; + partition.upper_bound = bounds[i]; + manifest_file.partitions.emplace_back(partition); + manifest_files.emplace_back(manifest_file); + } + return manifest_files; + } + + std::vector PrepareV2NonPartitionedTestManifestList() { + std::vector manifest_files; + std::string test_dir_prefix = + "/tmp/db/db/v2_non_partitioned_test/metadata/"; + + std::vector paths = { + "ccb6dbcb-0611-48da-be68-bd506ea63188-m0.avro", + "b89a10c9-a7a8-4526-99c5-5587a4ea7527-m0.avro", + "a74d20fa-c800-4706-9ddb-66be15a5ecb0-m0.avro", + "ae7d5fce-7245-4335-9b57-bc598c595c84-m0.avro" + }; + + std::vector file_size = {7169, 7170, 7169, 7170}; + + std::vector snapshot_id = { + 251167482216575399, + 4248697313956014690, + 281757490425433194, + 5521202581490753283 + }; + + for (int i = 0; i < 4; ++i) { + ManifestFile manifest_file; + manifest_file.manifest_path = test_dir_prefix + paths[i]; + manifest_file.manifest_length = file_size[i]; + manifest_file.partition_spec_id = 0; + manifest_file.content = ManifestFile::Content::kData; + manifest_file.sequence_number = 4 - i; + manifest_file.min_sequence_number = 4 - i; + manifest_file.added_snapshot_id = snapshot_id[i]; + manifest_file.added_files_count = 1; + manifest_file.existing_files_count = 0; + manifest_file.deleted_files_count = 0; + manifest_file.added_rows_count = 1; + manifest_file.existing_rows_count = 0; + manifest_file.deleted_rows_count = 0; + + manifest_files.emplace_back(manifest_file); + } + return manifest_files; + } std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_; std::shared_ptr file_io_; @@ -213,4 +294,28 @@ TEST_F(ManifestListReaderV1Test, PartitionComplexTypeTest) { expected_manifest_list); } +TEST_F(ManifestListReaderV1Test, V2NonPartitionedTest) { + std::string path = GetResourcePath( + "snap-251167482216575399-1-ccb6dbcb-0611-48da-be68-bd506ea63188.avro"); + + auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); + ASSERT_EQ(manifest_reader_result.has_value(), true); + + auto manifest_reader = +std::move(manifest_reader_result.value()); + auto read_result = manifest_reader->Files(); + ASSERT_EQ(read_result.has_value(), true); + ASSERT_EQ(read_result.value().size(), 4); + + auto expected_manifest_list = PrepareV2NonPartitionedTestManifestList(); + ASSERT_EQ(read_result.value(), expected_manifest_list); + + // test all the manifest files are non-partitioned + for (const auto& manifest : read_result.value()) { + ASSERT_EQ(manifest.partition_spec_id, 0); + ASSERT_TRUE(manifest.partitions.empty()); // + ASSERT_EQ(manifest.content, ManifestFile::Content::kData); + } +} + } // namespace iceberg diff --git a/test/resources/snap-251167482216575399-1-ccb6dbcb-0611-48da-be68-bd506ea63188.avro b/test/resources/snap-251167482216575399-1-ccb6dbcb-0611-48da-be68-bd506ea63188.avro new file mode 100644 index 0000000000000000000000000000000000000000..d8621c6b319830709dd19ef823311b8c82f6d86e GIT binary patch literal 4598 zcmbVPe~1)S98Xg-EdLO>M67#_q6Bv}v$wl7KfP-ijb5^2XR4)a)eQ)nbldnnb~>o&F;?LZh!9Vo6qO_ zKHp!TS)N%tW^xl;ZyNVoat`I;rRI$SE*6lG6ig%=rXsWmI?Ox2^H5i_$inHYsv+_? zt0GOYgygD?6(jk8(80wptEvde zm?-wPRAOsL@8Z!lgTr27V5k8nKwzoQAfW0XQ^d&f!z}?z9B=`eSasq_TLmPi2{R2` z)iI4aMg9a!76RKs3cv>FF;!e1Z-j%D4ILx*{gMhi*aGiQ3m92|a|rm-CV@Clff%*` zX`X^KR1vw<&QVyX+eW&K(z>0`Ad|%%kR(+)1-w(BCx`R^Dd>R~fQo{go4ZN%RFYyG zS6(Ih)Wx^V=pjV{{KGyi$pnAXGO9R7<%W{BbXc%*1`Z@7c(Kw6Co4Dyv3G|WEMi*) zi&H69S~l#kOg-%A0!SjwBJhuK8i`R35^Yc|Oe)=NkeaBSrIR&)dGWncP)5gH=+;cF zcq@fAjh>2F13G&WILE3fAn`>{6&MwiDiDgxC(9RLR9IhtP*gr&wDX>R7dm(+!RK5(8Y$GPa4IMZZ4C=aC#K+_ z=-@)oNc)@V4CyE~NPSfWZx&Pj$NkX@+_!-H7RVYVfaKb4Z#n{n6Y~qgE)@O;vHMoz z#8U6--WGv$Tb_)D(o*T@r58)t`8*^O!?_BAa*oMa^FAU!-PfUFI*%}P_5rP%FeN*6 z*(?^^txDbl{(B&!8JU`tQi9xMs%rpbLqYOnqP7MxYVzkVx!kNsOp@a5(T+$&ipSbx zZHdGTae`7by;BWA%1QP2cFChU# z5TZ|@_$tf{4PIi&rusAtPxavp2xDOB_2p@pz?Y{QzR1P^oimQLVY+0YX3=p;=dng? zD>+uS&FBNp_0Y-9efxP+G(jtuc@WR=F!sQ#_RYv>UZk1?efV-4JydZ5v0 zt5_h$N|sgWn>grpLP=GfxB+JmyC1;y*d9j4_EXT|G)xw5bpmFF;X3&uF2@VHhNWS# zvMMNZFATIS9YJMk+87e$EK=8w;o{86>}Vv~5l^(YNo|Q(T$G~mgm}wv=T0GZc>Znn zvHQkvIeqe#i8pTDdq!G%;-P6@UWvXq_`uaYOU++~E}q@|$G~UvpWom2+TPBEzs|(( zt?NE?{+)eWlT*jtHRs7mUp;pB3tPWA_r<@jZkl`g>FGbk?UAAA!9CrnBhlOUiPPJE zSb4m8bN}^cXN-UJ0J=CBnfMX{J7%sqc*mgyeRJOx+9s?$y!`B`8=vfW^Y|Kj)1TKy zJ{p|WJ@sb)$jc|+-oI>c%HrK)wjF)$gZ_zI7wx{bU;6IY`6Y8^pBg%U`Nu04{<>Ot zWLG~vI&I2RGsi7%O@5u2@YZ10;^EfqKkvNx&*^i2U%qrbH}LWM!$(f^{r-lv4QwCV jr+oW))6%nd9@zDL^MxDu!%sKtef><=z=OX{DwpFw(Lp(B literal 0 HcmV?d00001 From 9e71384c7b83c99d9447f8295ad778fef4496c85 Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Mon, 11 Aug 2025 16:48:44 +0800 Subject: [PATCH 2/4] style: format V2NonPartitionedTest with clang-format - Apply clang-format to fix code style issues - Remove trailing whitespaces - Fix indentation and alignment - Ensure consistent formatting with project standards --- test/manifest_list_reader_test.cc | 32 ++++++++++++------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/test/manifest_list_reader_test.cc b/test/manifest_list_reader_test.cc index e5f08c387..ff8a925f6 100644 --- a/test/manifest_list_reader_test.cc +++ b/test/manifest_list_reader_test.cc @@ -83,32 +83,25 @@ class ManifestListReaderV1Test : public ::testing::Test { std::vector PrepareV2NonPartitionedTestManifestList() { std::vector manifest_files; - std::string test_dir_prefix = - "/tmp/db/db/v2_non_partitioned_test/metadata/"; + std::string test_dir_prefix = "/tmp/db/db/v2_non_partitioned_test/metadata/"; - std::vector paths = { - "ccb6dbcb-0611-48da-be68-bd506ea63188-m0.avro", - "b89a10c9-a7a8-4526-99c5-5587a4ea7527-m0.avro", - "a74d20fa-c800-4706-9ddb-66be15a5ecb0-m0.avro", - "ae7d5fce-7245-4335-9b57-bc598c595c84-m0.avro" - }; + std::vector paths = {"ccb6dbcb-0611-48da-be68-bd506ea63188-m0.avro", + "b89a10c9-a7a8-4526-99c5-5587a4ea7527-m0.avro", + "a74d20fa-c800-4706-9ddb-66be15a5ecb0-m0.avro", + "ae7d5fce-7245-4335-9b57-bc598c595c84-m0.avro"}; std::vector file_size = {7169, 7170, 7169, 7170}; - std::vector snapshot_id = { - 251167482216575399, - 4248697313956014690, - 281757490425433194, - 5521202581490753283 - }; + std::vector snapshot_id = {251167482216575399, 4248697313956014690, + 281757490425433194, 5521202581490753283}; for (int i = 0; i < 4; ++i) { ManifestFile manifest_file; manifest_file.manifest_path = test_dir_prefix + paths[i]; manifest_file.manifest_length = file_size[i]; - manifest_file.partition_spec_id = 0; + manifest_file.partition_spec_id = 0; manifest_file.content = ManifestFile::Content::kData; - manifest_file.sequence_number = 4 - i; + manifest_file.sequence_number = 4 - i; manifest_file.min_sequence_number = 4 - i; manifest_file.added_snapshot_id = snapshot_id[i]; manifest_file.added_files_count = 1; @@ -296,13 +289,12 @@ TEST_F(ManifestListReaderV1Test, PartitionComplexTypeTest) { TEST_F(ManifestListReaderV1Test, V2NonPartitionedTest) { std::string path = GetResourcePath( - "snap-251167482216575399-1-ccb6dbcb-0611-48da-be68-bd506ea63188.avro"); + "snap-251167482216575399-1-ccb6dbcb-0611-48da-be68-bd506ea63188.avro"); auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); ASSERT_EQ(manifest_reader_result.has_value(), true); - auto manifest_reader = -std::move(manifest_reader_result.value()); + auto manifest_reader = std::move(manifest_reader_result.value()); auto read_result = manifest_reader->Files(); ASSERT_EQ(read_result.has_value(), true); ASSERT_EQ(read_result.value().size(), 4); @@ -313,7 +305,7 @@ std::move(manifest_reader_result.value()); // test all the manifest files are non-partitioned for (const auto& manifest : read_result.value()) { ASSERT_EQ(manifest.partition_spec_id, 0); - ASSERT_TRUE(manifest.partitions.empty()); // + ASSERT_TRUE(manifest.partitions.empty()); // ASSERT_EQ(manifest.content, ManifestFile::Content::kData); } } From 23035ad70b0ed81cb99ab0494b49c7bc67e12e1e Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Wed, 13 Aug 2025 15:35:42 +0800 Subject: [PATCH 3/4] refactor: separate V1 and V2 ManifestListReader tests - Rename ManifestListReaderTest to ManifestListReaderV2Test - Rename PrepareTestManifestList to PrepareV2PartitionedTestManifestList - Rename BasicTest to PartitionedTest - Rename V2NonPartitionedTest to NonPartitionedTest - Prepare structure for adding V1 tests --- test/manifest_list_reader_test.cc | 49 +++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/test/manifest_list_reader_test.cc b/test/manifest_list_reader_test.cc index ff8a925f6..088197468 100644 --- a/test/manifest_list_reader_test.cc +++ b/test/manifest_list_reader_test.cc @@ -25,8 +25,6 @@ #include "iceberg/avro/avro_reader.h" #include "iceberg/manifest_list.h" #include "iceberg/manifest_reader.h" -#include "iceberg/schema.h" -#include "matchers.h" #include "temp_file_test_base.h" #include "test_common.h" @@ -41,7 +39,33 @@ class ManifestListReaderV1Test : public ::testing::Test { file_io_ = std::make_shared(local_fs_); } - std::vector PrepareTestManifestList() { + std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_; + std::shared_ptr file_io_; + + void TestManifestListReading(const std::string& resource_name, + const std::vector& expected_manifest_list) { + std::string path = GetResourcePath(resource_name); + auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); + ASSERT_EQ(manifest_reader_result.has_value(), true); + + auto manifest_reader = std::move(manifest_reader_result.value()); + auto read_result = manifest_reader->Files(); + ASSERT_EQ(read_result.has_value(), true); + ASSERT_EQ(read_result.value().size(), expected_manifest_list.size()); + ASSERT_EQ(read_result.value(), expected_manifest_list); + } +}; + +class ManifestListReaderV2Test : public TempFileTestBase { + protected: + static void SetUpTestSuite() { avro::AvroReader::Register(); } + + void SetUp() override { + local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>(); + file_io_ = std::make_shared(local_fs_); + } + + std::vector PrepareV2PartitionedTestManifestList() { std::vector manifest_files; std::string test_dir_prefix = "/tmp/db/db/iceberg_test/metadata/"; std::vector paths = {"2bccd69e-d642-4816-bba0-261cd9bd0d93-m0.avro", @@ -115,6 +139,7 @@ class ManifestListReaderV1Test : public ::testing::Test { } return manifest_files; } + std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_; std::shared_ptr file_io_; @@ -287,10 +312,24 @@ TEST_F(ManifestListReaderV1Test, PartitionComplexTypeTest) { expected_manifest_list); } -TEST_F(ManifestListReaderV1Test, V2NonPartitionedTest) { +TEST_F(ManifestListReaderV2Test, V2PartitionedTest) { std::string path = GetResourcePath( - "snap-251167482216575399-1-ccb6dbcb-0611-48da-be68-bd506ea63188.avro"); + "snap-7412193043800610213-1-2bccd69e-d642-4816-bba0-261cd9bd0d93.avro"); + auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); + ASSERT_EQ(manifest_reader_result.has_value(), true); + auto manifest_reader = std::move(manifest_reader_result.value()); + auto read_result = manifest_reader->Files(); + ASSERT_EQ(read_result.has_value(), true); + ASSERT_EQ(read_result.value().size(), 4); + + auto expected_manifest_list = PrepareV2PartitionedTestManifestList(); + ASSERT_EQ(read_result.value(), expected_manifest_list); +} + +TEST_F(ManifestListReaderV2Test, V2NonPartitionedTest) { + std::string path = GetResourcePath( + "snap-251167482216575399-1-ccb6dbcb-0611-48da-be68-bd506ea63188.avro"); auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); ASSERT_EQ(manifest_reader_result.has_value(), true); From fa0de355ab8061b326575d761432e94c2ef324e3 Mon Sep 17 00:00:00 2001 From: HeartLinked Date: Thu, 14 Aug 2025 12:36:58 +0800 Subject: [PATCH 4/4] refactor: improve ManifestListReader test structure - Introduce unified ManifestListReaderTestBase for common functionality - Unify V1 and V2 test class inheritance and setup patterns - Extract test data preparation into dedicated helper methods - Add TestNonPartitionedManifests() helper for validation - Standardize test naming: remove version prefixes, use consistent patterns - Improve code organization and reduce duplication - Maintain all existing test coverage while enhancing maintainability --- test/manifest_list_reader_test.cc | 377 ++++++++++++++---------------- 1 file changed, 177 insertions(+), 200 deletions(-) diff --git a/test/manifest_list_reader_test.cc b/test/manifest_list_reader_test.cc index 088197468..75254801f 100644 --- a/test/manifest_list_reader_test.cc +++ b/test/manifest_list_reader_test.cc @@ -30,18 +30,16 @@ namespace iceberg { -class ManifestListReaderV1Test : public ::testing::Test { +class ManifestListReaderTestBase : public TempFileTestBase { protected: static void SetUpTestSuite() { avro::AvroReader::Register(); } void SetUp() override { + TempFileTestBase::SetUp(); local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>(); file_io_ = std::make_shared(local_fs_); } - std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_; - std::shared_ptr file_io_; - void TestManifestListReading(const std::string& resource_name, const std::vector& expected_manifest_list) { std::string path = GetResourcePath(resource_name); @@ -54,18 +52,165 @@ class ManifestListReaderV1Test : public ::testing::Test { ASSERT_EQ(read_result.value().size(), expected_manifest_list.size()); ASSERT_EQ(read_result.value(), expected_manifest_list); } + + void TestNonPartitionedManifests(const std::vector& manifest_files) { + for (const auto& manifest : manifest_files) { + ASSERT_EQ(manifest.partition_spec_id, 0); + ASSERT_TRUE(manifest.partitions.empty()); + ASSERT_EQ(manifest.content, ManifestFile::Content::kData); + } + } + + std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_; + std::shared_ptr file_io_; }; -class ManifestListReaderV2Test : public TempFileTestBase { +class ManifestListReaderV1Test : public ManifestListReaderTestBase { protected: - static void SetUpTestSuite() { avro::AvroReader::Register(); } + std::vector PreparePartitionedTestData() { + std::vector paths = { + "iceberg-warehouse/db/v1_partition_test/metadata/" + "eafd2972-f58e-4185-9237-6378f564787e-m1.avro", + "iceberg-warehouse/db/v1_partition_test/metadata/" + "eafd2972-f58e-4185-9237-6378f564787e-m0.avro"}; + std::vector file_size = {6185, 6113}; + std::vector snapshot_id = {7532614258660258098, 7532614258660258098}; + + std::vector> lower_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x30, 0x32, 0x2D, 0x32, 0x32}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}}; + + std::vector> upper_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}}; + + return {{.manifest_path = paths[0], + .manifest_length = file_size[0], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[0], + .added_files_count = 4, + .existing_files_count = 0, + .deleted_files_count = 0, + .added_rows_count = 6, + .existing_rows_count = 0, + .deleted_rows_count = 0, + .partitions = {{.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[0], + .upper_bound = upper_bounds[0]}}}, + + {.manifest_path = paths[1], + .manifest_length = file_size[1], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[1], + .added_files_count = 0, + .existing_files_count = 0, + .deleted_files_count = 2, + .added_rows_count = 0, + .existing_rows_count = 0, + .deleted_rows_count = 6, + .partitions = {{.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[1], + .upper_bound = upper_bounds[1]}}}}; + } - void SetUp() override { - local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>(); - file_io_ = std::make_shared(local_fs_); + std::vector PrepareComplexTypeTestData() { + std::vector paths = { + "iceberg-warehouse/db/v1_type_test/metadata/" + "aeffe099-3bac-4011-bc17-5875210d8dc0-m1.avro", + "iceberg-warehouse/db/v1_type_test/metadata/" + "aeffe099-3bac-4011-bc17-5875210d8dc0-m0.avro"}; + std::vector file_size = {6498, 6513}; + std::vector snapshot_id = {4134160420377642835, 4134160420377642835}; + + return {{.manifest_path = paths[0], + .manifest_length = file_size[0], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[0], + .added_files_count = 1, + .existing_files_count = 0, + .deleted_files_count = 0, + .added_rows_count = 2, + .existing_rows_count = 0, + .deleted_rows_count = 0}, + + {.manifest_path = paths[1], + .manifest_length = file_size[1], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[1], + .added_files_count = 0, + .existing_files_count = 0, + .deleted_files_count = 1, + .added_rows_count = 0, + .existing_rows_count = 0, + .deleted_rows_count = 3}}; + } + + std::vector PrepareComplexPartitionedTestData() { + std::vector paths = { + "iceberg-warehouse/db2/v1_complex_partition_test/metadata/" + "5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m0.avro", + "iceberg-warehouse/db2/v1_complex_partition_test/metadata/" + "5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m1.avro"}; + std::vector file_size = {6402, 6318}; + std::vector snapshot_id = {7522296285847100621, 7522296285847100621}; + + std::vector> lower_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}, + {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}, + {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; + + std::vector> upper_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x34}, + {0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}, + {0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; + + return {{.manifest_path = paths[0], + .manifest_length = file_size[0], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[0], + .added_files_count = 0, + .existing_files_count = 3, + .deleted_files_count = 1, + .added_rows_count = 0, + .existing_rows_count = 4, + .deleted_rows_count = 2, + .partitions = {{.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[0], + .upper_bound = upper_bounds[0]}, + {.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[1], + .upper_bound = upper_bounds[1]}}}, + + {.manifest_path = paths[1], + .manifest_length = file_size[1], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[1], + .added_files_count = 0, + .existing_files_count = 1, + .deleted_files_count = 1, + .added_rows_count = 0, + .existing_rows_count = 1, + .deleted_rows_count = 1, + .partitions = {{.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[2], + .upper_bound = upper_bounds[2]}, + {.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[3], + .upper_bound = upper_bounds[3]}}}}; } +}; - std::vector PrepareV2PartitionedTestManifestList() { +class ManifestListReaderV2Test : public ManifestListReaderTestBase { + protected: + std::vector PreparePartitionedTestData() { std::vector manifest_files; std::string test_dir_prefix = "/tmp/db/db/iceberg_test/metadata/"; std::vector paths = {"2bccd69e-d642-4816-bba0-261cd9bd0d93-m0.avro", @@ -105,7 +250,7 @@ class ManifestListReaderV2Test : public TempFileTestBase { return manifest_files; } - std::vector PrepareV2NonPartitionedTestManifestList() { + std::vector PrepareNonPartitionedTestData() { std::vector manifest_files; std::string test_dir_prefix = "/tmp/db/db/v2_non_partitioned_test/metadata/"; @@ -134,219 +279,51 @@ class ManifestListReaderV2Test : public TempFileTestBase { manifest_file.added_rows_count = 1; manifest_file.existing_rows_count = 0; manifest_file.deleted_rows_count = 0; - + // Note: no partitions for non-partitioned test manifest_files.emplace_back(manifest_file); } return manifest_files; } - - std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_; - std::shared_ptr file_io_; - - void TestManifestListReading(const std::string& resource_name, - const std::vector& expected_manifest_list) { - std::string path = GetResourcePath(resource_name); - auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); - ASSERT_EQ(manifest_reader_result.has_value(), true); - - auto manifest_reader = std::move(manifest_reader_result.value()); - auto read_result = manifest_reader->Files(); - ASSERT_EQ(read_result.has_value(), true); - ASSERT_EQ(read_result.value().size(), expected_manifest_list.size()); - ASSERT_EQ(read_result.value(), expected_manifest_list); - } }; -TEST_F(ManifestListReaderV1Test, PartitionTest) { - std::vector paths = { - "iceberg-warehouse/db/v1_partition_test/metadata/" - "eafd2972-f58e-4185-9237-6378f564787e-m1.avro", - "iceberg-warehouse/db/v1_partition_test/metadata/" - "eafd2972-f58e-4185-9237-6378f564787e-m0.avro"}; - std::vector file_size = {6185, 6113}; - std::vector snapshot_id = {7532614258660258098, 7532614258660258098}; - - std::vector> lower_bounds = { - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x30, 0x32, 0x2D, 0x32, 0x32}, - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}}; - - std::vector> upper_bounds = { - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}, - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}}; - - std::vector expected_manifest_list = { - {.manifest_path = paths[0], - .manifest_length = file_size[0], - .partition_spec_id = 0, - .added_snapshot_id = snapshot_id[0], - .added_files_count = 4, - .existing_files_count = 0, - .deleted_files_count = 0, - .added_rows_count = 6, - .existing_rows_count = 0, - .deleted_rows_count = 0, - .partitions = {{.contains_null = false, - .contains_nan = false, - .lower_bound = lower_bounds[0], - .upper_bound = upper_bounds[0]}}}, - - {.manifest_path = paths[1], - .manifest_length = file_size[1], - .partition_spec_id = 0, - .added_snapshot_id = snapshot_id[1], - .added_files_count = 0, - .existing_files_count = 0, - .deleted_files_count = 2, - .added_rows_count = 0, - .existing_rows_count = 0, - .deleted_rows_count = 6, - .partitions = {{.contains_null = false, - .contains_nan = false, - .lower_bound = lower_bounds[1], - .upper_bound = upper_bounds[1]}}}}; - +// V1 Tests +TEST_F(ManifestListReaderV1Test, PartitionedTest) { + auto expected_manifest_list = PreparePartitionedTestData(); TestManifestListReading( "snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro", expected_manifest_list); } TEST_F(ManifestListReaderV1Test, ComplexTypeTest) { - std::vector paths = { - "iceberg-warehouse/db/v1_type_test/metadata/" - "aeffe099-3bac-4011-bc17-5875210d8dc0-m1.avro", - "iceberg-warehouse/db/v1_type_test/metadata/" - "aeffe099-3bac-4011-bc17-5875210d8dc0-m0.avro"}; - std::vector file_size = {6498, 6513}; - std::vector snapshot_id = {4134160420377642835, 4134160420377642835}; - - std::vector expected_manifest_list = { - { - .manifest_path = paths[0], - .manifest_length = file_size[0], - .partition_spec_id = 0, - .added_snapshot_id = snapshot_id[0], - .added_files_count = 1, - .existing_files_count = 0, - .deleted_files_count = 0, - .added_rows_count = 2, - .existing_rows_count = 0, - .deleted_rows_count = 0, - }, - - {.manifest_path = paths[1], - .manifest_length = file_size[1], - .partition_spec_id = 0, - .added_snapshot_id = snapshot_id[1], - .added_files_count = 0, - .existing_files_count = 0, - .deleted_files_count = 1, - .added_rows_count = 0, - .existing_rows_count = 0, - .deleted_rows_count = 3}}; - + auto expected_manifest_list = PrepareComplexTypeTestData(); TestManifestListReading( "snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro", expected_manifest_list); } -TEST_F(ManifestListReaderV1Test, PartitionComplexTypeTest) { - std::vector paths = { - "iceberg-warehouse/db2/v1_complex_partition_test/metadata/" - "5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m0.avro", - "iceberg-warehouse/db2/v1_complex_partition_test/metadata/" - "5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m1.avro"}; - std::vector file_size = {6402, 6318}; - std::vector snapshot_id = {7522296285847100621, 7522296285847100621}; - - std::vector> lower_bounds = { - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}, - {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}, - {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; - - std::vector> upper_bounds = { - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x34}, - {0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}, - {0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; - std::vector expected_manifest_list = { - {.manifest_path = paths[0], - .manifest_length = file_size[0], - .partition_spec_id = 0, - .added_snapshot_id = snapshot_id[0], - .added_files_count = 0, - .existing_files_count = 3, - .deleted_files_count = 1, - .added_rows_count = 0, - .existing_rows_count = 4, - .deleted_rows_count = 2, - .partitions = {{.contains_null = false, - .contains_nan = false, - .lower_bound = lower_bounds[0], - .upper_bound = upper_bounds[0]}, - {.contains_null = false, - .contains_nan = false, - .lower_bound = lower_bounds[1], - .upper_bound = upper_bounds[1]}}}, - - {.manifest_path = paths[1], - .manifest_length = file_size[1], - .partition_spec_id = 0, - .added_snapshot_id = snapshot_id[1], - .added_files_count = 0, - .existing_files_count = 1, - .deleted_files_count = 1, - .added_rows_count = 0, - .existing_rows_count = 1, - .deleted_rows_count = 1, - .partitions = {{.contains_null = false, - .contains_nan = false, - .lower_bound = lower_bounds[2], - .upper_bound = upper_bounds[2]}, - {.contains_null = false, - .contains_nan = false, - .lower_bound = lower_bounds[3], - .upper_bound = upper_bounds[3]}}}}; +TEST_F(ManifestListReaderV1Test, ComplexPartitionedTest) { + auto expected_manifest_list = PrepareComplexPartitionedTestData(); TestManifestListReading( "snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro", expected_manifest_list); } -TEST_F(ManifestListReaderV2Test, V2PartitionedTest) { - std::string path = GetResourcePath( - "snap-7412193043800610213-1-2bccd69e-d642-4816-bba0-261cd9bd0d93.avro"); - auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); - ASSERT_EQ(manifest_reader_result.has_value(), true); - - auto manifest_reader = std::move(manifest_reader_result.value()); - auto read_result = manifest_reader->Files(); - ASSERT_EQ(read_result.has_value(), true); - ASSERT_EQ(read_result.value().size(), 4); - - auto expected_manifest_list = PrepareV2PartitionedTestManifestList(); - ASSERT_EQ(read_result.value(), expected_manifest_list); +// V2 Tests +TEST_F(ManifestListReaderV2Test, PartitionedTest) { + auto expected_manifest_list = PreparePartitionedTestData(); + TestManifestListReading( + "snap-7412193043800610213-1-2bccd69e-d642-4816-bba0-261cd9bd0d93.avro", + expected_manifest_list); } -TEST_F(ManifestListReaderV2Test, V2NonPartitionedTest) { - std::string path = GetResourcePath( - "snap-251167482216575399-1-ccb6dbcb-0611-48da-be68-bd506ea63188.avro"); - auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); - ASSERT_EQ(manifest_reader_result.has_value(), true); - - auto manifest_reader = std::move(manifest_reader_result.value()); - auto read_result = manifest_reader->Files(); - ASSERT_EQ(read_result.has_value(), true); - ASSERT_EQ(read_result.value().size(), 4); - - auto expected_manifest_list = PrepareV2NonPartitionedTestManifestList(); - ASSERT_EQ(read_result.value(), expected_manifest_list); - - // test all the manifest files are non-partitioned - for (const auto& manifest : read_result.value()) { - ASSERT_EQ(manifest.partition_spec_id, 0); - ASSERT_TRUE(manifest.partitions.empty()); // - ASSERT_EQ(manifest.content, ManifestFile::Content::kData); - } +TEST_F(ManifestListReaderV2Test, NonPartitionedTest) { + auto expected_manifest_list = PrepareNonPartitionedTestData(); + TestManifestListReading( + "snap-251167482216575399-1-ccb6dbcb-0611-48da-be68-bd506ea63188.avro", + expected_manifest_list); + + // Additional verification: ensure all manifests are truly non-partitioned + TestNonPartitionedManifests(expected_manifest_list); } } // namespace iceberg