@@ -123,6 +123,31 @@ class ParquetReaderTest : public ::testing::Test {
123123 array, {.path = temp_parquet_file_, .schema = schema, .io = file_io_}));
124124 }
125125
126+ void CreateSplitParquetFile () {
127+ const std::string kParquetFieldIdKey = " PARQUET:field_id" ;
128+ auto arrow_schema = ::arrow::schema (
129+ {::arrow::field (" id" , ::arrow::int32 (), /* nullable=*/ false ,
130+ ::arrow::KeyValueMetadata::Make ({kParquetFieldIdKey }, {" 1" })),
131+ ::arrow::field(" name" , ::arrow::utf8(), /* nullable=*/ true,
132+ ::arrow::KeyValueMetadata::Make({kParquetFieldIdKey }, {" 2" }))});
133+ auto table = ::arrow::Table::FromRecordBatches (
134+ arrow_schema, {::arrow::RecordBatch::FromStructArray (
135+ ::arrow::json::ArrayFromJSONString (
136+ ::arrow::struct_ (arrow_schema->fields ()),
137+ R"([[1 , " Foo" ], [2 , " Bar" ], [3 , " Baz" ]])")
138+ .ValueOrDie())
139+ .ValueOrDie()})
140+ .ValueOrDie();
141+
142+ auto io = internal::checked_cast<arrow::ArrowFileSystemFileIO&>(*file_io_);
143+ auto outfile = io.fs ()->OpenOutputStream (temp_parquet_file_).ValueOrDie ();
144+
145+ ASSERT_TRUE (::parquet::arrow::WriteTable (*table, ::arrow::default_memory_pool (),
146+ outfile, /* chunk_size=*/ 2 )
147+ .ok ());
148+ ASSERT_TRUE (outfile->Close ().ok ());
149+ }
150+
126151 void VerifyNextBatch (Reader& reader, std::string_view expected_json) {
127152 // Boilerplate to get Arrow schema
128153 auto schema_result = reader.Schema ();
@@ -213,7 +238,7 @@ TEST_F(ParquetReaderTest, ReadWithBatchSize) {
213238}
214239
215240TEST_F (ParquetReaderTest, ReadSplit) {
216- CreateSimpleParquetFile ();
241+ CreateSplitParquetFile ();
217242
218243 // Read split offsets
219244 auto io = internal::checked_cast<arrow::ArrowFileSystemFileIO&>(*file_io_);
0 commit comments