@@ -124,6 +124,19 @@ static constexpr int LARGE_SIZE = 10000;
124
124
125
125
static constexpr uint32_t kDefaultSeed = 0 ;
126
126
127
+ struct ListCase {
128
+ ::arrow::Type::type type_id;
129
+ std::function<std::shared_ptr<::arrow::DataType>(std::shared_ptr<::arrow::Field>)>
130
+ type_factory;
131
+ };
132
+
133
+ static const std::vector<ListCase> kListCases = {
134
+ {::arrow::Type::LIST,
135
+ [](std::shared_ptr<::arrow::Field> field) { return ::arrow::list (field); }},
136
+ {::arrow::Type::LARGE_LIST,
137
+ [](std::shared_ptr<::arrow::Field> field) { return ::arrow::large_list (field); }},
138
+ };
139
+
127
140
std::shared_ptr<const LogicalType> get_logical_type (const DataType& type) {
128
141
switch (type.id ()) {
129
142
case ArrowId::UINT8:
@@ -426,10 +439,13 @@ void CheckConfiguredRoundtrip(
426
439
const std::shared_ptr<::parquet::WriterProperties>& writer_properties =
427
440
::parquet::default_writer_properties (),
428
441
const std::shared_ptr<ArrowWriterProperties>& arrow_writer_properties =
429
- default_arrow_writer_properties()) {
442
+ default_arrow_writer_properties(),
443
+ const ArrowReaderProperties& arrow_reader_properties =
444
+ default_arrow_reader_properties()) {
430
445
std::shared_ptr<Table> actual_table;
431
446
ASSERT_NO_FATAL_FAILURE (DoRoundtrip (input_table, input_table->num_rows (), &actual_table,
432
- writer_properties, arrow_writer_properties));
447
+ writer_properties, arrow_writer_properties,
448
+ arrow_reader_properties));
433
449
if (expected_table) {
434
450
ASSERT_NO_FATAL_FAILURE (::arrow::AssertSchemaEqual (*actual_table->schema (),
435
451
*expected_table->schema (),
@@ -446,14 +462,18 @@ void CheckConfiguredRoundtrip(
446
462
void DoSimpleRoundtrip (const std::shared_ptr<Table>& table, bool use_threads,
447
463
int64_t row_group_size, const std::vector<int >& column_subset,
448
464
std::shared_ptr<Table>* out,
449
- const std::shared_ptr<ArrowWriterProperties>& arrow_properties =
450
- default_arrow_writer_properties ()) {
465
+ const std::shared_ptr<ArrowWriterProperties>&
466
+ arrow_writer_properties = default_arrow_writer_properties(),
467
+ const ArrowReaderProperties& arrow_reader_properties =
468
+ default_arrow_reader_properties()) {
451
469
std::shared_ptr<Buffer> buffer;
452
470
ASSERT_NO_FATAL_FAILURE (
453
- WriteTableToBuffer (table, row_group_size, arrow_properties , &buffer));
471
+ WriteTableToBuffer (table, row_group_size, arrow_writer_properties , &buffer));
454
472
455
- ASSERT_OK_AND_ASSIGN (auto reader, OpenFile (std::make_shared<BufferReader>(buffer),
456
- ::arrow::default_memory_pool ()));
473
+ std::unique_ptr<FileReader> reader;
474
+ FileReaderBuilder builder;
475
+ ASSERT_OK_NO_THROW (builder.Open (std::make_shared<BufferReader>(buffer)));
476
+ ASSERT_OK (builder.properties (arrow_reader_properties)->Build (&reader));
457
477
458
478
reader->set_use_threads (use_threads);
459
479
if (column_subset.size () > 0 ) {
@@ -468,15 +488,15 @@ void DoRoundTripWithBatches(
468
488
const std::shared_ptr<Table>& table, bool use_threads, int64_t row_group_size,
469
489
const std::vector<int >& column_subset, std::shared_ptr<Table>* out,
470
490
const std::shared_ptr<ArrowWriterProperties>& arrow_writer_properties =
471
- default_arrow_writer_properties ()) {
491
+ default_arrow_writer_properties (),
492
+ ArrowReaderProperties arrow_reader_properties = default_arrow_reader_properties()) {
472
493
std::shared_ptr<Buffer> buffer;
473
494
ASSERT_NO_FATAL_FAILURE (
474
495
WriteTableToBuffer (table, row_group_size, arrow_writer_properties, &buffer));
475
496
476
497
std::unique_ptr<FileReader> reader;
477
498
FileReaderBuilder builder;
478
499
ASSERT_OK_NO_THROW (builder.Open (std::make_shared<BufferReader>(buffer)));
479
- ArrowReaderProperties arrow_reader_properties;
480
500
arrow_reader_properties.set_batch_size (row_group_size - 1 );
481
501
ASSERT_OK_NO_THROW (builder.memory_pool (::arrow::default_memory_pool ())
482
502
->properties (arrow_reader_properties)
@@ -497,23 +517,24 @@ void DoRoundTripWithBatches(
497
517
ASSERT_OK_AND_ASSIGN (*out, Table::FromRecordBatchReader (batch_reader.get ()));
498
518
}
499
519
500
- void CheckSimpleRoundtrip (
501
- const std::shared_ptr<Table>& table, int64_t row_group_size,
502
- const std::shared_ptr<ArrowWriterProperties>& arrow_writer_properties =
503
- default_arrow_writer_properties ()) {
520
+ void CheckSimpleRoundtrip (const std::shared_ptr<Table>& table, int64_t row_group_size,
521
+ const std::shared_ptr<ArrowWriterProperties>&
522
+ arrow_writer_properties = default_arrow_writer_properties(),
523
+ const ArrowReaderProperties& arrow_reader_properties =
524
+ default_arrow_reader_properties()) {
504
525
std::shared_ptr<Table> result;
505
- ASSERT_NO_FATAL_FAILURE (DoSimpleRoundtrip (table, false /* use_threads */ ,
506
- row_group_size, {}, &result,
507
- arrow_writer_properties));
526
+ ASSERT_NO_FATAL_FAILURE (
527
+ DoSimpleRoundtrip (table, false /* use_threads */ , row_group_size, {}, &result,
528
+ arrow_writer_properties, arrow_reader_properties ));
508
529
::arrow::AssertSchemaEqual (*table->schema (), *result->schema(),
509
530
/* check_metadata=*/ false);
510
531
ASSERT_OK (result->ValidateFull ());
511
532
512
533
::arrow::AssertTablesEqual (*table, *result, false );
513
534
514
- ASSERT_NO_FATAL_FAILURE (DoRoundTripWithBatches(table, false /* use_threads */ ,
515
- row_group_size, {}, &result,
516
- arrow_writer_properties));
535
+ ASSERT_NO_FATAL_FAILURE (
536
+ DoRoundTripWithBatches (table, false /* use_threads */ , row_group_size, {}, &result,
537
+ arrow_writer_properties, arrow_reader_properties ));
517
538
::arrow::AssertSchemaEqual (*table->schema (), *result->schema(),
518
539
/* check_metadata=*/ false);
519
540
ASSERT_OK (result->ValidateFull ());
@@ -3198,8 +3219,22 @@ TEST(ArrowReadWrite, LargeList) {
3198
3219
[7, 8, 9]])" ;
3199
3220
auto array = ::arrow::ArrayFromJSON (type, json);
3200
3221
auto table = ::arrow::Table::Make (::arrow::schema ({field (" root" , type)}), {array});
3201
- auto props_store_schema = ArrowWriterProperties::Builder ().store_schema ()->build ();
3202
- CheckSimpleRoundtrip (table, 2 , props_store_schema);
3222
+ {
3223
+ // If the schema is stored, the large_list is restored regardless of
3224
+ // the list_type setting
3225
+ for (auto list_type : {::arrow::Type::LIST, ::arrow::Type::LARGE_LIST}) {
3226
+ ArrowReaderProperties reader_props;
3227
+ reader_props.set_list_type (list_type);
3228
+ auto writer_props = ArrowWriterProperties::Builder ().store_schema ()->build ();
3229
+ CheckSimpleRoundtrip (table, 2 , writer_props, reader_props);
3230
+ }
3231
+ }
3232
+ {
3233
+ // If the schema is not stored, large_list is read depending on the list_type setting
3234
+ ArrowReaderProperties reader_props;
3235
+ reader_props.set_list_type (::arrow::Type::LARGE_LIST);
3236
+ CheckSimpleRoundtrip (table, 2 , default_arrow_writer_properties (), reader_props);
3237
+ }
3203
3238
}
3204
3239
3205
3240
TEST (ArrowReadWrite, FixedSizeList) {
@@ -3224,20 +3259,25 @@ TEST(ArrowReadWrite, ListOfStructOfList2) {
3224
3259
using ::arrow::list;
3225
3260
using ::arrow::struct_;
3226
3261
3227
- auto type =
3228
- list (field (" item" ,
3229
- struct_ ({field (" a" , ::arrow::int16 (), /* nullable=*/ false ),
3230
- field (" b" , list (::arrow::int64 ()), /* nullable=*/ false )}),
3231
- /* nullable=*/ false ));
3232
-
3233
- const char * json = R"( [
3234
- [{"a": 123, "b": [1, 2, 3]}],
3235
- null,
3236
- [],
3237
- [{"a": 456, "b": []}, {"a": 789, "b": [null]}, {"a": 876, "b": [4, 5, 6]}]])" ;
3238
- auto array = ::arrow::ArrayFromJSON (type, json);
3239
- auto table = ::arrow::Table::Make (::arrow::schema ({field (" root" , type)}), {array});
3240
- CheckSimpleRoundtrip (table, 2 );
3262
+ for (const auto & list_case : kListCases ) {
3263
+ auto type = list_case.type_factory (
3264
+ field (" item" ,
3265
+ struct_ ({field (" a" , ::arrow::int16 (), /* nullable=*/ false ),
3266
+ field (" b" , list_case.type_factory (field (" item" , ::arrow::int64 ())),
3267
+ /* nullable=*/ false )}),
3268
+ /* nullable=*/ false ));
3269
+
3270
+ const char * json = R"( [
3271
+ [{"a": 123, "b": [1, 2, 3]}],
3272
+ null,
3273
+ [],
3274
+ [{"a": 456, "b": []}, {"a": 789, "b": [null]}, {"a": 876, "b": [4, 5, 6]}]])" ;
3275
+ auto array = ::arrow::ArrayFromJSON (type, json);
3276
+ auto table = ::arrow::Table::Make (::arrow::schema ({field (" root" , type)}), {array});
3277
+ ArrowReaderProperties reader_props;
3278
+ reader_props.set_list_type (list_case.type_id );
3279
+ CheckSimpleRoundtrip (table, 2 , default_arrow_writer_properties (), reader_props);
3280
+ }
3241
3281
}
3242
3282
3243
3283
TEST (ArrowReadWrite, StructOfLists) {
0 commit comments