Skip to content

Commit bc1754c

Browse files
committed
modify test
1 parent c6546d1 commit bc1754c

File tree

1 file changed

+84
-125
lines changed

1 file changed

+84
-125
lines changed

test/manifest_reader_test.cc

Lines changed: 84 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
namespace iceberg {
3737

38-
class ManifestReaderV1Test : public TempFileTestBase {
38+
class ManifestReaderTestBase : public TempFileTestBase {
3939
protected:
4040
static void SetUpTestSuite() { avro::AvroReader::Register(); }
4141

@@ -47,7 +47,44 @@ class ManifestReaderV1Test : public TempFileTestBase {
4747
avro::RegisterLogicalTypes();
4848
}
4949

50-
std::vector<ManifestEntry> PrepareV1ManifestEntries() {
50+
void TestManifestReading(const std::string& resource_name,
51+
const std::vector<ManifestEntry>& expected_entries,
52+
std::shared_ptr<Schema> partition_schema = nullptr) {
53+
std::string path = GetResourcePath(resource_name);
54+
auto manifest_reader_result = ManifestReader::Make(path, file_io_, partition_schema);
55+
ASSERT_EQ(manifest_reader_result.has_value(), true)
56+
<< manifest_reader_result.error().message;
57+
58+
auto manifest_reader = std::move(manifest_reader_result.value());
59+
auto read_result = manifest_reader->Entries();
60+
ASSERT_EQ(read_result.has_value(), true) << read_result.error().message;
61+
ASSERT_EQ(read_result.value().size(), expected_entries.size());
62+
ASSERT_EQ(read_result.value(), expected_entries);
63+
}
64+
65+
void TestManifestReadingWithManifestFile(
66+
const ManifestFile& manifest_file,
67+
const std::vector<ManifestEntry>& expected_entries,
68+
std::shared_ptr<Schema> partition_schema = nullptr) {
69+
auto manifest_reader_result =
70+
ManifestReader::Make(manifest_file, file_io_, partition_schema);
71+
ASSERT_EQ(manifest_reader_result.has_value(), true)
72+
<< manifest_reader_result.error().message;
73+
74+
auto manifest_reader = std::move(manifest_reader_result.value());
75+
auto read_result = manifest_reader->Entries();
76+
ASSERT_EQ(read_result.has_value(), true) << read_result.error().message;
77+
ASSERT_EQ(read_result.value().size(), expected_entries.size());
78+
ASSERT_EQ(read_result.value(), expected_entries);
79+
}
80+
81+
std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_;
82+
std::shared_ptr<FileIO> file_io_;
83+
};
84+
85+
class ManifestReaderV1Test : public ManifestReaderTestBase {
86+
protected:
87+
std::vector<ManifestEntry> PreparePartitionedTestData() {
5188
std::vector<ManifestEntry> manifest_entries;
5289
std::string test_dir_prefix = "/tmp/db/db/iceberg_test/data/";
5390
std::vector<std::string> paths = {
@@ -99,40 +136,22 @@ class ManifestReaderV1Test : public TempFileTestBase {
99136
}
100137
return manifest_entries;
101138
}
102-
103-
std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_;
104-
std::shared_ptr<FileIO> file_io_;
105139
};
106140

107-
TEST_F(ManifestReaderV1Test, V1PartitionedBasicTest) {
141+
TEST_F(ManifestReaderV1Test, PartitionedTest) {
108142
iceberg::SchemaField partition_field(1000, "order_ts_hour", iceberg::int32(), true);
109143
auto partition_schema =
110144
std::make_shared<Schema>(std::vector<SchemaField>({partition_field}));
111-
std::string path = GetResourcePath("56357cd7-391f-4df8-aa24-e7e667da8870-m4.avro");
112-
auto manifest_reader_result = ManifestReader::Make(path, file_io_, partition_schema);
113-
ASSERT_EQ(manifest_reader_result.has_value(), true)
114-
<< manifest_reader_result.error().message;
115-
auto manifest_reader = std::move(manifest_reader_result.value());
116-
auto read_result = manifest_reader->Entries();
117-
ASSERT_EQ(read_result.has_value(), true) << read_result.error().message;
118-
119-
auto expected_entries = PrepareV1ManifestEntries();
120-
ASSERT_EQ(read_result.value(), expected_entries);
145+
auto expected_entries = PreparePartitionedTestData();
146+
TestManifestReading("56357cd7-391f-4df8-aa24-e7e667da8870-m4.avro", expected_entries,
147+
partition_schema);
121148
}
122149

123-
class ManifestReaderV2Test : public TempFileTestBase {
150+
class ManifestReaderV2Test : public ManifestReaderTestBase {
124151
protected:
125-
static void SetUpTestSuite() { avro::AvroReader::Register(); }
126-
127-
void SetUp() override {
128-
TempFileTestBase::SetUp();
129-
local_fs_ = std::make_shared<::arrow::fs::LocalFileSystem>();
130-
file_io_ = std::make_shared<iceberg::arrow::ArrowFileSystemFileIO>(local_fs_);
131-
132-
avro::RegisterLogicalTypes();
133-
}
134-
135-
std::vector<ManifestEntry> PrepareV2NonPartitionedManifestEntries() {
152+
std::vector<ManifestEntry> CreateV2TestData(
153+
std::optional<int64_t> sequence_number = std::nullopt,
154+
std::optional<int32_t> partition_spec_id = std::nullopt) {
136155
std::vector<ManifestEntry> manifest_entries;
137156
std::string test_dir_prefix = "/tmp/db/db/v2_manifest_non_partitioned/data/";
138157

@@ -154,104 +173,53 @@ class ManifestReaderV2Test : public TempFileTestBase {
154173
{3, {'d', 'a', 't', 'a', '_', 'c', 'o', 'n', 't', 'e', 'n', 't', '_', '4'}},
155174
{4, {0x14, 0xae, 0x47, 0xe1, 0x7a, 0x8c, 0x7c, 0x40}}}};
156175

176+
DataFile data_file{.file_path = test_dir_prefix + paths[0],
177+
.file_format = FileFormatType::kParquet,
178+
.record_count = record_counts[0],
179+
.file_size_in_bytes = file_sizes[0],
180+
.column_sizes = {{1, 56}, {2, 73}, {3, 66}, {4, 67}},
181+
.value_counts = {{1, 4}, {2, 4}, {3, 4}, {4, 4}},
182+
.null_value_counts = {{1, 0}, {2, 0}, {3, 0}, {4, 0}},
183+
.nan_value_counts = {{4, 0}},
184+
.lower_bounds = lower_bounds[0],
185+
.upper_bounds = upper_bounds[0],
186+
.key_metadata = {},
187+
.split_offsets = {4},
188+
.equality_ids = {},
189+
.sort_order_id = 0,
190+
.first_row_id = std::nullopt,
191+
.referenced_data_file = std::nullopt,
192+
.content_offset = std::nullopt,
193+
.content_size_in_bytes = std::nullopt};
194+
195+
if (partition_spec_id.has_value()) {
196+
data_file.partition_spec_id = partition_spec_id.value();
197+
}
198+
157199
manifest_entries.emplace_back(
158200
ManifestEntry{.status = ManifestStatus::kAdded,
159201
.snapshot_id = 679879563479918846LL,
160-
.sequence_number = std::nullopt,
161-
.file_sequence_number = std::nullopt,
162-
.data_file = std::make_shared<DataFile>(
163-
DataFile{.file_path = test_dir_prefix + paths[0],
164-
.file_format = FileFormatType::kParquet,
165-
.record_count = record_counts[0],
166-
.file_size_in_bytes = file_sizes[0],
167-
.column_sizes = {{1, 56}, {2, 73}, {3, 66}, {4, 67}},
168-
.value_counts = {{1, 4}, {2, 4}, {3, 4}, {4, 4}},
169-
.null_value_counts = {{1, 0}, {2, 0}, {3, 0}, {4, 0}},
170-
.nan_value_counts = {{4, 0}},
171-
.lower_bounds = lower_bounds[0],
172-
.upper_bounds = upper_bounds[0],
173-
.key_metadata = {},
174-
.split_offsets = {4},
175-
.equality_ids = {},
176-
.sort_order_id = 0,
177-
.first_row_id = std::nullopt,
178-
.referenced_data_file = std::nullopt,
179-
.content_offset = std::nullopt,
180-
.content_size_in_bytes = std::nullopt})});
202+
.sequence_number = sequence_number,
203+
.file_sequence_number = sequence_number,
204+
.data_file = std::make_shared<DataFile>(data_file)});
181205
return manifest_entries;
182206
}
183207

184-
std::vector<ManifestEntry> prepareV2ManifestEntryMetadataInheritance() {
185-
std::vector<ManifestEntry> manifest_entries;
186-
std::string test_dir_prefix = "/tmp/db/db/v2_manifest_non_partitioned/data/";
187-
188-
std::vector<std::string> paths = {
189-
"00000-0-b0f98903-6d21-45fd-9e0b-afbd4963e365-0-00001.parquet"};
190-
191-
std::vector<int64_t> file_sizes = {1344};
192-
std::vector<int64_t> record_counts = {4};
193-
194-
std::vector<std::map<int32_t, std::vector<uint8_t>>> lower_bounds = {
195-
{{1, {0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}},
196-
{2, {'r', 'e', 'c', 'o', 'r', 'd', '_', 'f', 'o', 'u', 'r'}},
197-
{3, {'d', 'a', 't', 'a', '_', 'c', 'o', 'n', 't', 'e', 'n', 't', '_', '1'}},
198-
{4, {0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xdc, 0x5e, 0x40}}}};
199-
200-
std::vector<std::map<int32_t, std::vector<uint8_t>>> upper_bounds = {
201-
{{1, {0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}},
202-
{2, {'r', 'e', 'c', 'o', 'r', 'd', '_', 't', 'w', 'o'}},
203-
{3, {'d', 'a', 't', 'a', '_', 'c', 'o', 'n', 't', 'e', 'n', 't', '_', '4'}},
204-
{4, {0x14, 0xae, 0x47, 0xe1, 0x7a, 0x8c, 0x7c, 0x40}}}};
205-
206-
manifest_entries.emplace_back(
207-
ManifestEntry{.status = ManifestStatus::kAdded,
208-
.snapshot_id = 679879563479918846LL,
209-
.sequence_number = 15,
210-
.file_sequence_number = 15,
211-
.data_file = std::make_shared<DataFile>(
212-
DataFile{.file_path = test_dir_prefix + paths[0],
213-
.file_format = FileFormatType::kParquet,
214-
.record_count = record_counts[0],
215-
.file_size_in_bytes = file_sizes[0],
216-
.column_sizes = {{1, 56}, {2, 73}, {3, 66}, {4, 67}},
217-
.value_counts = {{1, 4}, {2, 4}, {3, 4}, {4, 4}},
218-
.null_value_counts = {{1, 0}, {2, 0}, {3, 0}, {4, 0}},
219-
.nan_value_counts = {{4, 0}},
220-
.lower_bounds = lower_bounds[0],
221-
.upper_bounds = upper_bounds[0],
222-
.key_metadata = {},
223-
.split_offsets = {4},
224-
.equality_ids = {},
225-
.sort_order_id = 0,
226-
.partition_spec_id = 12, // inherit from manifest
227-
.first_row_id = std::nullopt,
228-
.referenced_data_file = std::nullopt,
229-
.content_offset = std::nullopt,
230-
.content_size_in_bytes = std::nullopt})});
231-
return manifest_entries;
208+
std::vector<ManifestEntry> PrepareNonPartitionedTestData() {
209+
return CreateV2TestData();
232210
}
233211

234-
std::shared_ptr<::arrow::fs::LocalFileSystem> local_fs_;
235-
std::shared_ptr<FileIO> file_io_;
212+
std::vector<ManifestEntry> PrepareMetadataInheritanceTestData() {
213+
return CreateV2TestData(15, 12);
214+
}
236215
};
237216

238-
TEST_F(ManifestReaderV2Test, V2NonPartitionedBasicTest) {
239-
std::string path = GetResourcePath("2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro");
240-
241-
auto manifest_reader_result = ManifestReader::Make(path, file_io_, nullptr);
242-
ASSERT_EQ(manifest_reader_result.has_value(), true)
243-
<< manifest_reader_result.error().message;
244-
245-
auto manifest_reader = std::move(manifest_reader_result.value());
246-
auto read_result = manifest_reader->Entries();
247-
ASSERT_EQ(read_result.has_value(), true) << read_result.error().message;
248-
ASSERT_EQ(read_result.value().size(), 1);
249-
250-
auto expected_entries = PrepareV2NonPartitionedManifestEntries();
251-
ASSERT_EQ(read_result.value(), expected_entries);
217+
TEST_F(ManifestReaderV2Test, NonPartitionedTest) {
218+
auto expected_entries = PrepareNonPartitionedTestData();
219+
TestManifestReading("2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro", expected_entries);
252220
}
253221

254-
TEST_F(ManifestReaderV2Test, V2ManifestEntryMetadataInheritanceTest) {
222+
TEST_F(ManifestReaderV2Test, MetadataInheritanceTest) {
255223
std::string path = GetResourcePath("2ddf1bc9-830b-4015-aced-c060df36f150-m0.avro");
256224
ManifestFile manifest_file{
257225
.manifest_path = path,
@@ -261,17 +229,8 @@ TEST_F(ManifestReaderV2Test, V2ManifestEntryMetadataInheritanceTest) {
261229
.sequence_number = 15,
262230
.added_snapshot_id = 679879563479918846LL,
263231
};
264-
auto manifest_reader_result = ManifestReader::Make(manifest_file, file_io_, nullptr);
265-
ASSERT_EQ(manifest_reader_result.has_value(), true)
266-
<< manifest_reader_result.error().message;
267-
268-
auto manifest_reader = std::move(manifest_reader_result.value());
269-
auto read_result = manifest_reader->Entries();
270-
ASSERT_EQ(read_result.has_value(), true) << read_result.error().message;
271-
ASSERT_EQ(read_result.value().size(), 1);
272-
273-
auto expected_entries = prepareV2ManifestEntryMetadataInheritance();
274-
ASSERT_EQ(read_result.value(), expected_entries);
232+
auto expected_entries = PrepareMetadataInheritanceTestData();
233+
TestManifestReadingWithManifestFile(manifest_file, expected_entries);
275234
}
276235

277236
} // namespace iceberg

0 commit comments

Comments
 (0)