Skip to content

Commit 89efa42

Browse files
committed
test: add ManifestReaderV2Test for non-partitioned manifests
- Add V2NonPartitionedBasicTest to test manifest reader v2 functionality - Fix bounds values to match actual manifest file data - Use proper C++23 designated initializers for test data setup
1 parent 8ecee31 commit 89efa42

File tree

2 files changed

+88
-5
lines changed

2 files changed

+88
-5
lines changed

test/manifest_reader_test.cc

Lines changed: 88 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,22 @@
1919

2020
#include "iceberg/manifest_reader.h"
2121

22+
#include <cstddef>
23+
2224
#include <arrow/filesystem/localfs.h>
2325
#include <gtest/gtest.h>
2426

2527
#include "iceberg/arrow/arrow_fs_file_io_internal.h"
2628
#include "iceberg/avro/avro_reader.h"
2729
#include "iceberg/avro/avro_register.h"
28-
#include "iceberg/avro/avro_schema_util_internal.h"
2930
#include "iceberg/manifest_entry.h"
3031
#include "iceberg/schema.h"
3132
#include "temp_file_test_base.h"
3233
#include "test_common.h"
3334

3435
namespace iceberg {
3536

36-
class ManifestReaderTest : public TempFileTestBase {
37+
class ManifestReaderV1Test : public TempFileTestBase {
3738
protected:
3839
static void SetUpTestSuite() { avro::AvroReader::Register(); }
3940

@@ -45,7 +46,7 @@ class ManifestReaderTest : public TempFileTestBase {
4546
avro::RegisterLogicalTypes();
4647
}
4748

48-
std::vector<ManifestEntry> prepare_manifest_entries() {
49+
std::vector<ManifestEntry> prepareV1ManifestEntries() {
4950
std::vector<ManifestEntry> manifest_entries;
5051
std::string test_dir_prefix = "/tmp/db/db/iceberg_test/data/";
5152
std::vector<std::string> paths = {
@@ -102,7 +103,7 @@ class ManifestReaderTest : public TempFileTestBase {
102103
std::shared_ptr<FileIO> file_io_;
103104
};
104105

105-
TEST_F(ManifestReaderTest, BasicTest) {
106+
TEST_F(ManifestReaderV1Test, V1PartitionedBasicTest) {
106107
iceberg::SchemaField partition_field(1000, "order_ts_hour", iceberg::int32(), true);
107108
auto partition_schema =
108109
std::make_shared<Schema>(std::vector<SchemaField>({partition_field}));
@@ -115,7 +116,89 @@ TEST_F(ManifestReaderTest, BasicTest) {
115116
auto read_result = manifest_reader->Entries();
116117
ASSERT_EQ(read_result.has_value(), true) << read_result.error().message;
117118

118-
auto expected_entries = prepare_manifest_entries();
119+
auto expected_entries = prepareV1ManifestEntries();
120+
ASSERT_EQ(read_result.value(), expected_entries);
121+
}
122+
123+
class ManifestReaderV2Test : public TempFileTestBase {
124+
protected:
125+
static void SetUpTestSuite() { avro::AvroReader::Register(); }
126+
127+
void SetUp() override {
128+
TempFileTestBase::SetUp();
129+
local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>();
130+
file_io_ = std::make_shared<iceberg::arrow::ArrowFileSystemFileIO>(local_fs_);
131+
132+
avro::RegisterLogicalTypes();
133+
}
134+
135+
std::vector<ManifestEntry> prepareV2NonPartitionedManifestEntries() {
136+
std::vector<ManifestEntry> manifest_entries;
137+
std::string test_dir_prefix = "/tmp/db/db/v2_manifest_non_partitioned/data/";
138+
139+
std::vector<std::string> paths = {
140+
"00000-0-b0f98903-6d21-45fd-9e0b-afbd4963e365-0-00001.parquet"};
141+
142+
std::vector<int64_t> file_sizes = {1344};
143+
std::vector<int64_t> record_counts = {4};
144+
145+
// Real bounds data extracted from the manifest
146+
std::vector<std::map<int32_t, std::vector<uint8_t>>> lower_bounds = {
147+
{{1, {0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}},
148+
{2, {'r', 'e', 'c', 'o', 'r', 'd', '_', 'f', 'o', 'u', 'r'}},
149+
{3, {'d', 'a', 't', 'a', '_', 'c', 'o', 'n', 't', 'e', 'n', 't', '_', '1'}},
150+
{4, {0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xdc, 0x5e, 0x40}}}};
151+
152+
std::vector<std::map<int32_t, std::vector<uint8_t>>> upper_bounds = {
153+
{{1, {0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}},
154+
{2, {'r', 'e', 'c', 'o', 'r', 'd', '_', 't', 'w', 'o'}},
155+
{3, {'d', 'a', 't', 'a', '_', 'c', 'o', 'n', 't', 'e', 'n', 't', '_', '4'}},
156+
{4, {0x14, 0xae, 0x47, 0xe1, 0x7a, 0x8c, 0x7c, 0x40}}}};
157+
158+
manifest_entries.emplace_back(
159+
ManifestEntry{.status = ManifestStatus::kAdded,
160+
.snapshot_id = 679879563479918846LL,
161+
.sequence_number = std::nullopt,
162+
.file_sequence_number = std::nullopt,
163+
.data_file = std::make_shared<DataFile>(
164+
DataFile{.file_path = test_dir_prefix + paths[0],
165+
.file_format = FileFormatType::kParquet,
166+
.record_count = record_counts[0],
167+
.file_size_in_bytes = file_sizes[0],
168+
.column_sizes = {{1, 56}, {2, 73}, {3, 66}, {4, 67}},
169+
.value_counts = {{1, 4}, {2, 4}, {3, 4}, {4, 4}},
170+
.null_value_counts = {{1, 0}, {2, 0}, {3, 0}, {4, 0}},
171+
.nan_value_counts = {{4, 0}},
172+
.lower_bounds = lower_bounds[0],
173+
.upper_bounds = upper_bounds[0],
174+
.key_metadata = {},
175+
.split_offsets = {4},
176+
.equality_ids = {},
177+
.sort_order_id = 0,
178+
.first_row_id = std::nullopt,
179+
.referenced_data_file = std::nullopt,
180+
.content_offset = std::nullopt,
181+
.content_size_in_bytes = std::nullopt})});
182+
return manifest_entries;
183+
}
184+
185+
std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_;
186+
std::shared_ptr<FileIO> file_io_;
187+
};
188+
189+
TEST_F(ManifestReaderV2Test, V2NonPartitionedBasicTest) {
190+
std::string path = GetResourcePath("2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro");
191+
192+
auto manifest_reader_result = ManifestReader::MakeReader(path, file_io_, nullptr);
193+
ASSERT_EQ(manifest_reader_result.has_value(), true)
194+
<< manifest_reader_result.error().message;
195+
196+
auto manifest_reader = std::move(manifest_reader_result.value());
197+
auto read_result = manifest_reader->Entries();
198+
ASSERT_EQ(read_result.has_value(), true) << read_result.error().message;
199+
ASSERT_EQ(read_result.value().size(), 1);
200+
201+
auto expected_entries = prepareV2NonPartitionedManifestEntries();
119202
ASSERT_EQ(read_result.value(), expected_entries);
120203
}
121204

7.04 KB
Binary file not shown.

0 commit comments

Comments
 (0)