Skip to content

Commit 05e9e40

Browse files
committed
add test
1 parent 46233c4 commit 05e9e40

File tree

2 files changed

+65
-4
lines changed

2 files changed

+65
-4
lines changed

test/avro_test.cc

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,16 @@
2828
#include <avro/Generic.hh>
2929
#include <avro/GenericDatum.hh>
3030
#include <gtest/gtest.h>
31+
#include <iceberg/util/checked_cast.h>
3132

3233
#include "iceberg/arrow/arrow_fs_file_io_internal.h"
34+
#include "iceberg/arrow_array_reader.h"
3335
#include "iceberg/avro/avro_register.h"
36+
#include "iceberg/file_format.h"
3437
#include "iceberg/file_reader.h"
38+
#include "iceberg/manifest_entry.h"
3539
#include "iceberg/schema.h"
40+
#include "iceberg/table_scan.h"
3641
#include "iceberg/type.h"
3742
#include "matchers.h"
3843
#include "temp_file_test_base.h"
@@ -77,7 +82,7 @@ class AvroReaderTest : public TempFileTestBase {
7782
writer.close();
7883
}
7984

80-
void VerifyNextBatch(Reader& reader, std::string_view expected_json) {
85+
void VerifyNextBatch(ArrowArrayReader& reader, std::string_view expected_json) {
8186
// Boilerplate to get Arrow schema
8287
auto schema_result = reader.Schema();
8388
ASSERT_THAT(schema_result, IsOk());
@@ -99,7 +104,7 @@ class AvroReaderTest : public TempFileTestBase {
99104
ASSERT_TRUE(arrow_array->Equals(*expected_array));
100105
}
101106

102-
void VerifyExhausted(Reader& reader) {
107+
void VerifyExhausted(ArrowArrayReader& reader) {
103108
auto data = reader.Next();
104109
ASSERT_THAT(data, IsOk());
105110
ASSERT_FALSE(data.value().has_value());
@@ -161,4 +166,30 @@ TEST_F(AvroReaderTest, ReadWithBatchSize) {
161166
ASSERT_NO_FATAL_FAILURE(VerifyExhausted(*reader));
162167
}
163168

169+
TEST_F(AvroReaderTest, ReadViaFileScanTask) {
170+
CreateSimpleAvroFile();
171+
172+
auto data_file = std::make_shared<DataFile>();
173+
data_file->file_path = temp_avro_file_;
174+
data_file->file_format = FileFormatType::kAvro;
175+
auto arrow_file_io = internal::checked_cast<arrow::ArrowFileSystemFileIO&>(*file_io_);
176+
data_file->file_size_in_bytes =
177+
arrow_file_io.fs()->GetFileInfo(temp_avro_file_).ValueOrDie().size();
178+
179+
TableScanContext context;
180+
context.projected_schema = std::make_shared<Schema>(std::vector<SchemaField>{
181+
SchemaField::MakeOptional(2, "name", std::make_shared<StringType>()),
182+
SchemaField::MakeRequired(1, "id", std::make_shared<IntType>())});
183+
184+
FileScanTask task(data_file);
185+
186+
auto reader_result = task.ToArrowArrayReader(context, file_io_);
187+
ASSERT_THAT(reader_result, IsOk());
188+
auto reader = std::move(reader_result).value();
189+
190+
ASSERT_NO_FATAL_FAILURE(
191+
VerifyNextBatch(*reader, R"([["Alice", 1], ["Bob", 2], ["Charlie", 3]])"));
192+
ASSERT_NO_FATAL_FAILURE(VerifyExhausted(*reader));
193+
}
194+
164195
} // namespace iceberg::avro

test/parquet_test.cc

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,13 @@
2828
#include <parquet/metadata.h>
2929

3030
#include "iceberg/arrow/arrow_fs_file_io_internal.h"
31+
#include "iceberg/arrow_array_reader.h"
32+
#include "iceberg/file_format.h"
33+
#include "iceberg/manifest_entry.h"
3134
#include "iceberg/parquet/parquet_reader.h"
3235
#include "iceberg/parquet/parquet_register.h"
3336
#include "iceberg/schema.h"
37+
#include "iceberg/table_scan.h"
3438
#include "iceberg/type.h"
3539
#include "iceberg/util/checked_cast.h"
3640
#include "matchers.h"
@@ -72,7 +76,7 @@ class ParquetReaderTest : public TempFileTestBase {
7276
.ok());
7377
}
7478

75-
void VerifyNextBatch(Reader& reader, std::string_view expected_json) {
79+
void VerifyNextBatch(ArrowArrayReader& reader, std::string_view expected_json) {
7680
// Boilerplate to get Arrow schema
7781
auto schema_result = reader.Schema();
7882
ASSERT_THAT(schema_result, IsOk());
@@ -94,7 +98,7 @@ class ParquetReaderTest : public TempFileTestBase {
9498
ASSERT_TRUE(arrow_array->Equals(*expected_array));
9599
}
96100

97-
void VerifyExhausted(Reader& reader) {
101+
void VerifyExhausted(ArrowArrayReader& reader) {
98102
auto data = reader.Next();
99103
ASSERT_THAT(data, IsOk());
100104
ASSERT_FALSE(data.value().has_value());
@@ -204,4 +208,30 @@ TEST_F(ParquetReaderTest, ReadSplit) {
204208
}
205209
}
206210

211+
TEST_F(ParquetReaderTest, ReadViaFileScanTask) {
212+
CreateSimpleParquetFile();
213+
214+
auto data_file = std::make_shared<DataFile>();
215+
data_file->file_path = temp_parquet_file_;
216+
data_file->file_format = FileFormatType::kParquet;
217+
auto io_internal = internal::checked_cast<arrow::ArrowFileSystemFileIO&>(*file_io_);
218+
data_file->file_size_in_bytes =
219+
io_internal.fs()->GetFileInfo(temp_parquet_file_).ValueOrDie().size();
220+
221+
TableScanContext context;
222+
context.projected_schema = std::make_shared<Schema>(
223+
std::vector<SchemaField>{SchemaField::MakeOptional(2, "name", string()),
224+
SchemaField::MakeRequired(1, "id", int32())});
225+
226+
FileScanTask task(data_file);
227+
228+
auto reader_result = task.ToArrowArrayReader(context, file_io_);
229+
ASSERT_THAT(reader_result, IsOk());
230+
auto reader = std::move(reader_result.value());
231+
232+
ASSERT_NO_FATAL_FAILURE(
233+
VerifyNextBatch(*reader, R"([["Foo", 1], ["Bar", 2], ["Baz", 3]])"));
234+
ASSERT_NO_FATAL_FAILURE(VerifyExhausted(*reader));
235+
}
236+
207237
} // namespace iceberg::parquet

0 commit comments

Comments
 (0)