From fc10fd2f1d97c88c49ea699595a66ff05fc108c1 Mon Sep 17 00:00:00 2001 From: nullccxsy <32149055912@qq.com> Date: Mon, 11 Aug 2025 16:20:14 +0800 Subject: [PATCH 1/2] test: add test of manifestlistv1 1. PartitionTest: test partition 2. ComplexTypeTest: test complex type 3. PartitionComplexTypeTest: test partition with complex type --- test/manifest_list_reader_test.cc | 154 ++++++++++++++++++ ...-aeffe099-3bac-4011-bc17-5875210d8dc0.avro | Bin 0 -> 3951 bytes ...-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro | Bin 0 -> 4003 bytes ...-eafd2972-f58e-4185-9237-6378f564787e.avro | Bin 0 -> 3986 bytes 4 files changed, 154 insertions(+) create mode 100644 test/resources/snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro create mode 100644 test/resources/snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro create mode 100644 test/resources/snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro diff --git a/test/manifest_list_reader_test.cc b/test/manifest_list_reader_test.cc index 90c9fd8f4..4596f93c3 100644 --- a/test/manifest_list_reader_test.cc +++ b/test/manifest_list_reader_test.cc @@ -82,6 +82,118 @@ class ManifestListReaderTest : public TempFileTestBase { return manifest_files; } + std::vector PrepareTestManifestListPartition() { + std::vector manifest_files; + std::string test_dir_prefix = "iceberg-warehouse/db/v1_partition_test/metadata/"; + std::vector paths = {"eafd2972-f58e-4185-9237-6378f564787e-m1.avro", + "eafd2972-f58e-4185-9237-6378f564787e-m0.avro"}; + std::vector file_size = {6185, 6113}; + std::vector snapshot_id = {7532614258660258098, 7532614258660258098}; + + std::vector> lower_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x30, 0x32, 0x2D, 0x32, 0x32}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}}; + + std::vector> upper_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}}; + + for (int i = 0; i < 2; ++i) { + ManifestFile manifest_file; + manifest_file.manifest_path = test_dir_prefix + paths[i]; + manifest_file.manifest_length = file_size[i]; + manifest_file.partition_spec_id = 0; + manifest_file.added_snapshot_id = snapshot_id[i]; + manifest_file.added_files_count = 4 * (1 - i); + manifest_file.existing_files_count = 0; + manifest_file.deleted_files_count = 2 * i; + manifest_file.added_rows_count = 6 * (1 - i); + manifest_file.existing_rows_count = 0; + manifest_file.deleted_rows_count = 6 * i; + + PartitionFieldSummary partition; + partition.contains_null = false; + partition.contains_nan = false; + partition.lower_bound = lower_bounds[i]; + partition.upper_bound = upper_bounds[i]; + manifest_file.partitions.emplace_back(partition); + manifest_files.emplace_back(manifest_file); + } + return manifest_files; + } + + std::vector PrepareTestManifestListComplexType() { + std::vector manifest_files; + std::string test_dir_prefix = "iceberg-warehouse/db/v1_type_test/metadata/"; + std::vector paths = {"aeffe099-3bac-4011-bc17-5875210d8dc0-m1.avro", + "aeffe099-3bac-4011-bc17-5875210d8dc0-m0.avro"}; + std::vector file_size = {6498, 6513}; + std::vector snapshot_id = {4134160420377642835, 4134160420377642835}; + + for (int i = 0; i < 2; ++i) { + ManifestFile manifest_file; + manifest_file.manifest_path = test_dir_prefix + paths[i]; + manifest_file.manifest_length = file_size[i]; + manifest_file.partition_spec_id = 0; + manifest_file.added_snapshot_id = snapshot_id[i]; + manifest_file.added_files_count = 1 - i; + manifest_file.existing_files_count = 0; + manifest_file.deleted_files_count = i; + manifest_file.added_rows_count = 2 * (1 - i); + manifest_file.existing_rows_count = 0; + manifest_file.deleted_rows_count = 3 * i; + manifest_files.emplace_back(manifest_file); + } + return manifest_files; + } + + std::vector PrepareTestManifestListPartitionComplex() { + std::vector manifest_files; + std::string test_dir_prefix = + "iceberg-warehouse/db2/v1_complex_partition_test/metadata/"; + std::vector paths = {"5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m0.avro", + "5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m1.avro"}; + std::vector file_size = {6402, 6318}; + std::vector snapshot_id = {7522296285847100621, 7522296285847100621}; + + std::vector> lower_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}, + {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}, + {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; + + std::vector> upper_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x34}, + {0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}, + {0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; + + for (int i = 0; i < 2; ++i) { + ManifestFile manifest_file; + manifest_file.manifest_path = test_dir_prefix + paths[i]; + manifest_file.manifest_length = file_size[i]; + manifest_file.partition_spec_id = 0; + manifest_file.added_snapshot_id = snapshot_id[i]; + manifest_file.added_files_count = 0; + manifest_file.existing_files_count = i == 0 ? 3 : 1; + manifest_file.deleted_files_count = 1; + manifest_file.added_rows_count = 0; + manifest_file.existing_rows_count = i == 0 ? 4 : 1; + manifest_file.deleted_rows_count = i == 0 ? 2 : 1; + + PartitionFieldSummary partition; + for (int j = 0; j < 2; ++j) { + partition.contains_null = false; + partition.contains_nan = false; + partition.lower_bound = lower_bounds[2 * i + j]; + partition.upper_bound = upper_bounds[2 * i + j]; + manifest_file.partitions.emplace_back(partition); + } + manifest_files.emplace_back(manifest_file); + } + return manifest_files; + } + std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_; std::shared_ptr file_io_; }; @@ -100,4 +212,46 @@ TEST_F(ManifestListReaderTest, BasicTest) { ASSERT_EQ(read_result.value(), expected_manifest_list); } +TEST_F(ManifestListReaderTest, PartitionTest) { + std::string path = GetResourcePath( + "snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro"); + auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); + ASSERT_EQ(manifest_reader_result.has_value(), true); + auto manifest_reader = std::move(manifest_reader_result.value()); + auto read_result = manifest_reader->Files(); + ASSERT_EQ(read_result.has_value(), true); + ASSERT_EQ(read_result.value().size(), 2); + + auto expected_manifest_list = PrepareTestManifestListPartition(); + ASSERT_EQ(read_result.value(), expected_manifest_list); +} + +TEST_F(ManifestListReaderTest, ComplexTypeTest) { + std::string path = GetResourcePath( + "snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro"); + auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); + ASSERT_EQ(manifest_reader_result.has_value(), true); + auto manifest_reader = std::move(manifest_reader_result.value()); + auto read_result = manifest_reader->Files(); + ASSERT_EQ(read_result.has_value(), true); + ASSERT_EQ(read_result.value().size(), 2); + + auto expected_manifest_list = PrepareTestManifestListComplexType(); + ASSERT_EQ(read_result.value(), expected_manifest_list); +} + +TEST_F(ManifestListReaderTest, PartitionComplexTypeTest) { + std::string path = GetResourcePath( + "snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro"); + auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); + ASSERT_EQ(manifest_reader_result.has_value(), true); + auto manifest_reader = std::move(manifest_reader_result.value()); + auto read_result = manifest_reader->Files(); + ASSERT_EQ(read_result.has_value(), true); + ASSERT_EQ(read_result.value().size(), 2); + + auto expected_manifest_list = PrepareTestManifestListPartitionComplex(); + ASSERT_EQ(read_result.value(), expected_manifest_list); +} + } // namespace iceberg diff --git a/test/resources/snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro b/test/resources/snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro new file mode 100644 index 0000000000000000000000000000000000000000..29584b8ce4f9156da570cf293587e846545c49b9 GIT binary patch literal 3951 zcmbW3O>Epm6vxwwV}*o@3W6eytcX*J?ET*5Qlu8C5)!B>C?Bnyu{|3*>yO1AXIZL< z3kPy*S4#*?5V4gvbm+OIcEU=+WOE^qnq>u-DS_FmSL9US9?YokzXoSW2g^syj>5 zv0dbE%F*3GihU!%&;>L>5IOH7;P}88Vid7-v%s1ZxDP|@(0uf$k4&AAdE$_>Khb76BCFJ~)gY-~del8xj~1X#f*lCMrt z=y@aE4B{l;wCd}MAG)=R}y? zV{-lFXjii><$-kxXdxG2Qp576yOQoGk84Oc-UqZ<@-^g=fME!Sw5Aw&r1SarNvVW& zwZ(2Nrw}vnF?9S$&n_oZh!F%Xf_-J*)=~{+qt!~q>$$Gh_W*2w08KxmSfL@L~!&d2%5Gi=c(sW zn^GauaS{fDLggh>g)>v3)Ac2EuE~Ro>%2US>%1&a9$`%8<#9~rWohyh>!G3UAq*{u zA*uePCK9jUMZD^p;jo{071;;aK47?kaq87+ZPF3XCbnt@7BZhEUF$-OE-*hJP4aS0 ztJbuJTC1qlX0uVNw5s)sq(wZ4FKweRB93%cyXct6K;dTQ&W~Q?BZru3Y*KqleLLwo zHD}71hfe!<=FTB{H+DiqoR)+_tQN`~ZcY0$rD)R>?$ZUFoC_BEwm?)kf=@unu?ryo z@8AMZ<$zRd?5NM$7Y+2+grp^op7J>6uk{b$BPAr0piNiITbHP&axke>B6bLb7m_+9% zFp16t2$@V+YfzI9IZSaOYvMELYo#ilWJ&Wctio~hQMxIfy?4_ee)IEFAD&(RasSh!gYnw<{`%)DN4vYbXU?r&Ui_i+_;+XF`@f%g@EQ1bP+1|2~_kN$> z`@P@y_kHiw*QSo0fa{L^x~n!&3;M5I6L6=EgayGts_kgPtYEZOPuy_G@F46$D!G>et;e6#pSg2`8Q(OzS-G+@*VN)4bqZKqZZa@rzjpR=R z;(`w)KQ}_5=XDj;_JVxFs;>x^XPRUyJ;ar`_HWRv0J5;t0r7eb>CiK=us}CN*BaHS zkU*iUhKtEP2G^gDb}`zLA6QHPHDn@8YMB3Y7s4I+ais)~^#N^`bPbs#pyR*}ttmz= z()sNBuvDC)Scz{fr4ZG&Ff=Szi7qEnNZqzg1TC^9l*p3fk>1q-v^rrkH=qk3pql{8 z6Rbni8*ah*#ECd82%SEY5HneCWI>@dhMCTA$sgus+vSOVXDwFXq$3g_b zE)eVjy6pf+jvaIXEl_5+O388IwcAes4t7lrmbzEEvjSDKdgzsD_9XPTB_4&2VOsB=@e;^7R2Y)k>e6anwCx(Dyk!AHFD?QUS=bQ zn5w5zdkTF!=sGcF%8`eT`gi2c4*J+L97LR!ghFvGgbCc5_Gd_uMk&mvGdL+1%=B%J zkQstaz=UIGK>pvsIbeJ(TglP z6ZGT&ChX*kfip^s5sP?oL@egiir8`w9erW+zJc8ra4Dw!P7D?jbnnNY(bjz(u-@TAXU z-5kb;McSn&hGpAz%8+?ndSAq;!=w+fWu7aaI%6D|2Tor}YAB32F%Qls4%2AL1#`t` z8coP7c zLQ#|or6WI8&;F_1wP!y4`)K$6PXD`^$J6H@Z13#tZq>H#tlV1O+U+Z~y{`wp_LZWa z9PABNu76Q`PPx9k_H+K2mtn>ByiU-l2|-MVgkWUbw9_P^BjoBm$?pxNJE zyVHcb<}JmXgFWS+R?5T5l4 z{SjQb1bhz}fVf}>TVyofaw;q|3}mRD13O;RC7H09j920nEH-IE0>X{-Uj&kZk0if1 zMq%gmRNeK%e50ywNRDsYbSpc=jimPPvaA4d2=0Ppy@5>V+eBJo8{%t?>r_dh(5IG1 z=skwlUyOD+-clS`O#uyLBSLFf{B)P19mR3A6pr@+YnE&cxg;RQu*+&nP@8r>|9(^| zxLQ(@Tgxa!cO3#P$5Z3Wi4~%`u8p8Ww?qMcuT7N|OGoMPgRS`OHl%`MM?p|;igKQK zo>Dzk$Y`8kcPCeQ;Z)(wNa$>R0iDa~!Nql69L9BCl$Rc1iOh@R5}6mJrKea6b=5W^ z(0~Ne>W^w7^a@_Y%Z`q_ozSc3K9KAKn(b;QUY*>a9r1c-tGa6-{bky-z zF4n7y<(gb6SCv|=ME{l=$_3gYZAccj5%#Df&B_-n9cc(}#_s(1JRdpKRDGSxW z*OfA+9DC@vf5+~O(I>uz5p`M`3MI9Wrf_T4pAkhKr*NOn;bdGe*S7_t%n^J7rW`v5 z^8XGl0E-+Di7mOS$h&g@nNt+;a>1*N@UlK9cqX(xaXsl1jk~#sUggobpr;3LVW(dV zf>Gm)M8wl05;0FE)H#7)IRPlfH;`3z!%dS(ly~ zmT%V?Ll$w_eNm^5l0LzfMXqA%OmJWkIC~|Tp)lseBDk11T%#EmEEJz>G-ZbZA%;vM z(dh|^L?;4-Oh&9VFL^E7IwyK8y~a1G@Y$;>na0dHmFi-t-YCmTrP`<~wTe_}kuap}KtPkJ*174)5IEAFL0KH}0(s_5*eO?$KfYzG|LRzgb)DU+>>J zd_(Q8fBfU&!Fqq#U+X_O+#i0Y8J2VZz}yOsT3QnxwJgIvZke_dm|Ldizp2ufi`sG^Z)(=0wc&dADEbph>wM^Fl literal 0 HcmV?d00001 From 2ef55aa06d29109c73facc9715476a17435a1003 Mon Sep 17 00:00:00 2001 From: nullccxsy <32149055912@qq.com> Date: Wed, 13 Aug 2025 17:34:30 +0800 Subject: [PATCH 2/2] test: add & refactor manifestlistv1 test functions 1. PartitionTest: test partition 2. ComplexTypeTest: test complex type 3. PartitionComplexTypeTest: test partition with complex type --- test/manifest_list_reader_test.cc | 367 +++++++++++++----------------- 1 file changed, 163 insertions(+), 204 deletions(-) diff --git a/test/manifest_list_reader_test.cc b/test/manifest_list_reader_test.cc index 4596f93c3..7497a1203 100644 --- a/test/manifest_list_reader_test.cc +++ b/test/manifest_list_reader_test.cc @@ -32,226 +32,185 @@ namespace iceberg { -class ManifestListReaderTest : public TempFileTestBase { +class ManifestListReaderV1Test : public ::testing::Test { protected: static void SetUpTestSuite() { avro::AvroReader::Register(); } void SetUp() override { - TempFileTestBase::SetUp(); local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>(); file_io_ = std::make_shared(local_fs_); } - std::vector PrepareTestManifestList() { - std::vector manifest_files; - std::string test_dir_prefix = "/tmp/db/db/iceberg_test/metadata/"; - std::vector paths = {"2bccd69e-d642-4816-bba0-261cd9bd0d93-m0.avro", - "9b6ffacd-ef10-4abf-a89c-01c733696796-m0.avro", - "2541e6b5-4923-4bd5-886d-72c6f7228400-m0.avro", - "3118c801-d2e0-4df6-8c7a-7d4eaade32f8-m0.avro"}; - std::vector file_size = {7433, 7431, 7433, 7431}; - std::vector snapshot_id = {7412193043800610213, 5485972788975780755, - 1679468743751242972, 1579605567338877265}; - std::vector> bounds = {{'x', ';', 0x07, 0x00}, - {'(', 0x19, 0x07, 0x00}, - {0xd0, 0xd4, 0x06, 0x00}, - {0xb8, 0xd4, 0x06, 0x00}}; - for (int i = 0; i < 4; ++i) { - ManifestFile manifest_file; - manifest_file.manifest_path = test_dir_prefix + paths[i]; - manifest_file.manifest_length = file_size[i]; - manifest_file.partition_spec_id = 0; - manifest_file.content = ManifestFile::Content::kData; - manifest_file.sequence_number = 4 - i; - manifest_file.min_sequence_number = 4 - i; - manifest_file.added_snapshot_id = snapshot_id[i]; - manifest_file.added_files_count = 1; - manifest_file.existing_files_count = 0; - manifest_file.deleted_files_count = 0; - manifest_file.added_rows_count = 1; - manifest_file.existing_rows_count = 0; - manifest_file.deleted_rows_count = 0; - PartitionFieldSummary partition; - partition.contains_null = false; - partition.contains_nan = false; - partition.lower_bound = bounds[i]; - partition.upper_bound = bounds[i]; - manifest_file.partitions.emplace_back(partition); - manifest_files.emplace_back(manifest_file); - } - return manifest_files; - } - - std::vector PrepareTestManifestListPartition() { - std::vector manifest_files; - std::string test_dir_prefix = "iceberg-warehouse/db/v1_partition_test/metadata/"; - std::vector paths = {"eafd2972-f58e-4185-9237-6378f564787e-m1.avro", - "eafd2972-f58e-4185-9237-6378f564787e-m0.avro"}; - std::vector file_size = {6185, 6113}; - std::vector snapshot_id = {7532614258660258098, 7532614258660258098}; - - std::vector> lower_bounds = { - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x30, 0x32, 0x2D, 0x32, 0x32}, - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}}; - - std::vector> upper_bounds = { - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}, - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}}; - - for (int i = 0; i < 2; ++i) { - ManifestFile manifest_file; - manifest_file.manifest_path = test_dir_prefix + paths[i]; - manifest_file.manifest_length = file_size[i]; - manifest_file.partition_spec_id = 0; - manifest_file.added_snapshot_id = snapshot_id[i]; - manifest_file.added_files_count = 4 * (1 - i); - manifest_file.existing_files_count = 0; - manifest_file.deleted_files_count = 2 * i; - manifest_file.added_rows_count = 6 * (1 - i); - manifest_file.existing_rows_count = 0; - manifest_file.deleted_rows_count = 6 * i; - - PartitionFieldSummary partition; - partition.contains_null = false; - partition.contains_nan = false; - partition.lower_bound = lower_bounds[i]; - partition.upper_bound = upper_bounds[i]; - manifest_file.partitions.emplace_back(partition); - manifest_files.emplace_back(manifest_file); - } - return manifest_files; - } - - std::vector PrepareTestManifestListComplexType() { - std::vector manifest_files; - std::string test_dir_prefix = "iceberg-warehouse/db/v1_type_test/metadata/"; - std::vector paths = {"aeffe099-3bac-4011-bc17-5875210d8dc0-m1.avro", - "aeffe099-3bac-4011-bc17-5875210d8dc0-m0.avro"}; - std::vector file_size = {6498, 6513}; - std::vector snapshot_id = {4134160420377642835, 4134160420377642835}; - - for (int i = 0; i < 2; ++i) { - ManifestFile manifest_file; - manifest_file.manifest_path = test_dir_prefix + paths[i]; - manifest_file.manifest_length = file_size[i]; - manifest_file.partition_spec_id = 0; - manifest_file.added_snapshot_id = snapshot_id[i]; - manifest_file.added_files_count = 1 - i; - manifest_file.existing_files_count = 0; - manifest_file.deleted_files_count = i; - manifest_file.added_rows_count = 2 * (1 - i); - manifest_file.existing_rows_count = 0; - manifest_file.deleted_rows_count = 3 * i; - manifest_files.emplace_back(manifest_file); - } - return manifest_files; - } - - std::vector PrepareTestManifestListPartitionComplex() { - std::vector manifest_files; - std::string test_dir_prefix = - "iceberg-warehouse/db2/v1_complex_partition_test/metadata/"; - std::vector paths = {"5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m0.avro", - "5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m1.avro"}; - std::vector file_size = {6402, 6318}; - std::vector snapshot_id = {7522296285847100621, 7522296285847100621}; - - std::vector> lower_bounds = { - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}, - {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}, - {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; - - std::vector> upper_bounds = { - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x34}, - {0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}, - {0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; - - for (int i = 0; i < 2; ++i) { - ManifestFile manifest_file; - manifest_file.manifest_path = test_dir_prefix + paths[i]; - manifest_file.manifest_length = file_size[i]; - manifest_file.partition_spec_id = 0; - manifest_file.added_snapshot_id = snapshot_id[i]; - manifest_file.added_files_count = 0; - manifest_file.existing_files_count = i == 0 ? 3 : 1; - manifest_file.deleted_files_count = 1; - manifest_file.added_rows_count = 0; - manifest_file.existing_rows_count = i == 0 ? 4 : 1; - manifest_file.deleted_rows_count = i == 0 ? 2 : 1; - - PartitionFieldSummary partition; - for (int j = 0; j < 2; ++j) { - partition.contains_null = false; - partition.contains_nan = false; - partition.lower_bound = lower_bounds[2 * i + j]; - partition.upper_bound = upper_bounds[2 * i + j]; - manifest_file.partitions.emplace_back(partition); - } - manifest_files.emplace_back(manifest_file); - } - return manifest_files; - } - std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_; std::shared_ptr file_io_; -}; - -TEST_F(ManifestListReaderTest, BasicTest) { - std::string path = GetResourcePath( - "snap-7412193043800610213-1-2bccd69e-d642-4816-bba0-261cd9bd0d93.avro"); - auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); - ASSERT_EQ(manifest_reader_result.has_value(), true); - auto manifest_reader = std::move(manifest_reader_result.value()); - auto read_result = manifest_reader->Files(); - ASSERT_EQ(read_result.has_value(), true); - ASSERT_EQ(read_result.value().size(), 4); - auto expected_manifest_list = PrepareTestManifestList(); - ASSERT_EQ(read_result.value(), expected_manifest_list); -} - -TEST_F(ManifestListReaderTest, PartitionTest) { - std::string path = GetResourcePath( - "snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro"); - auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); - ASSERT_EQ(manifest_reader_result.has_value(), true); - auto manifest_reader = std::move(manifest_reader_result.value()); - auto read_result = manifest_reader->Files(); - ASSERT_EQ(read_result.has_value(), true); - ASSERT_EQ(read_result.value().size(), 2); + void TestManifestListReading(const std::string& resource_name, + const std::vector& expected_manifest_list) { + std::string path = GetResourcePath(resource_name); + auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); + ASSERT_EQ(manifest_reader_result.has_value(), true); + + auto manifest_reader = std::move(manifest_reader_result.value()); + auto read_result = manifest_reader->Files(); + ASSERT_EQ(read_result.has_value(), true); + ASSERT_EQ(read_result.value().size(), expected_manifest_list.size()); + ASSERT_EQ(read_result.value(), expected_manifest_list); + } +}; - auto expected_manifest_list = PrepareTestManifestListPartition(); - ASSERT_EQ(read_result.value(), expected_manifest_list); +TEST_F(ManifestListReaderV1Test, PartitionTest) { + std::vector paths = { + "iceberg-warehouse/db/v1_partition_test/metadata/" + "eafd2972-f58e-4185-9237-6378f564787e-m1.avro", + "iceberg-warehouse/db/v1_partition_test/metadata/" + "eafd2972-f58e-4185-9237-6378f564787e-m0.avro"}; + std::vector file_size = {6185, 6113}; + std::vector snapshot_id = {7532614258660258098, 7532614258660258098}; + + std::vector> lower_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x30, 0x32, 0x2D, 0x32, 0x32}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}}; + + std::vector> upper_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}}; + + std::vector expected_manifest_list = { + {.manifest_path = paths[0], + .manifest_length = file_size[0], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[0], + .added_files_count = 4, + .existing_files_count = 0, + .deleted_files_count = 0, + .added_rows_count = 6, + .existing_rows_count = 0, + .deleted_rows_count = 0, + .partitions = {{.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[0], + .upper_bound = upper_bounds[0]}}}, + + {.manifest_path = paths[1], + .manifest_length = file_size[1], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[1], + .added_files_count = 0, + .existing_files_count = 0, + .deleted_files_count = 2, + .added_rows_count = 0, + .existing_rows_count = 0, + .deleted_rows_count = 6, + .partitions = {{.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[1], + .upper_bound = upper_bounds[1]}}}}; + + TestManifestListReading( + "snap-7532614258660258098-1-eafd2972-f58e-4185-9237-6378f564787e.avro", + expected_manifest_list); } -TEST_F(ManifestListReaderTest, ComplexTypeTest) { - std::string path = GetResourcePath( - "snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro"); - auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); - ASSERT_EQ(manifest_reader_result.has_value(), true); - auto manifest_reader = std::move(manifest_reader_result.value()); - auto read_result = manifest_reader->Files(); - ASSERT_EQ(read_result.has_value(), true); - ASSERT_EQ(read_result.value().size(), 2); - - auto expected_manifest_list = PrepareTestManifestListComplexType(); - ASSERT_EQ(read_result.value(), expected_manifest_list); +TEST_F(ManifestListReaderV1Test, ComplexTypeTest) { + std::vector paths = { + "iceberg-warehouse/db/v1_type_test/metadata/" + "aeffe099-3bac-4011-bc17-5875210d8dc0-m1.avro", + "iceberg-warehouse/db/v1_type_test/metadata/" + "aeffe099-3bac-4011-bc17-5875210d8dc0-m0.avro"}; + std::vector file_size = {6498, 6513}; + std::vector snapshot_id = {4134160420377642835, 4134160420377642835}; + + std::vector expected_manifest_list = { + { + .manifest_path = paths[0], + .manifest_length = file_size[0], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[0], + .added_files_count = 1, + .existing_files_count = 0, + .deleted_files_count = 0, + .added_rows_count = 2, + .existing_rows_count = 0, + .deleted_rows_count = 0, + }, + + {.manifest_path = paths[1], + .manifest_length = file_size[1], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[1], + .added_files_count = 0, + .existing_files_count = 0, + .deleted_files_count = 1, + .added_rows_count = 0, + .existing_rows_count = 0, + .deleted_rows_count = 3}}; + + TestManifestListReading( + "snap-4134160420377642835-1-aeffe099-3bac-4011-bc17-5875210d8dc0.avro", + expected_manifest_list); } -TEST_F(ManifestListReaderTest, PartitionComplexTypeTest) { - std::string path = GetResourcePath( - "snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro"); - auto manifest_reader_result = ManifestListReader::MakeReader(path, file_io_); - ASSERT_EQ(manifest_reader_result.has_value(), true); - auto manifest_reader = std::move(manifest_reader_result.value()); - auto read_result = manifest_reader->Files(); - ASSERT_EQ(read_result.has_value(), true); - ASSERT_EQ(read_result.value().size(), 2); - - auto expected_manifest_list = PrepareTestManifestListPartitionComplex(); - ASSERT_EQ(read_result.value(), expected_manifest_list); +TEST_F(ManifestListReaderV1Test, PartitionComplexTypeTest) { + std::vector paths = { + "iceberg-warehouse/db2/v1_complex_partition_test/metadata/" + "5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m0.avro", + "iceberg-warehouse/db2/v1_complex_partition_test/metadata/" + "5d690750-8fb4-4cd1-8ae7-85c7b39abe14-m1.avro"}; + std::vector file_size = {6402, 6318}; + std::vector snapshot_id = {7522296285847100621, 7522296285847100621}; + + std::vector> lower_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}, + {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x32}, + {0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; + + std::vector> upper_bounds = { + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x34}, + {0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x32, 0x30, 0x32, 0x32, 0x2D, 0x32, 0x2D, 0x32, 0x33}, + {0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}}; + std::vector expected_manifest_list = { + {.manifest_path = paths[0], + .manifest_length = file_size[0], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[0], + .added_files_count = 0, + .existing_files_count = 3, + .deleted_files_count = 1, + .added_rows_count = 0, + .existing_rows_count = 4, + .deleted_rows_count = 2, + .partitions = {{.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[0], + .upper_bound = upper_bounds[0]}, + {.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[1], + .upper_bound = upper_bounds[1]}}}, + + {.manifest_path = paths[1], + .manifest_length = file_size[1], + .partition_spec_id = 0, + .added_snapshot_id = snapshot_id[1], + .added_files_count = 0, + .existing_files_count = 1, + .deleted_files_count = 1, + .added_rows_count = 0, + .existing_rows_count = 1, + .deleted_rows_count = 1, + .partitions = {{.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[2], + .upper_bound = upper_bounds[2]}, + {.contains_null = false, + .contains_nan = false, + .lower_bound = lower_bounds[3], + .upper_bound = upper_bounds[3]}}}}; + TestManifestListReading( + "snap-7522296285847100621-1-5d690750-8fb4-4cd1-8ae7-85c7b39abe14.avro", + expected_manifest_list); } } // namespace iceberg