5454import org .apache .parquet .io .ParquetDecodingException ;
5555import org .apache .parquet .io .api .Binary ;
5656import org .apache .parquet .io .api .RecordConsumer ;
57- import org .apache .parquet .schema .GroupType ;
58- import org .apache .parquet .schema .LogicalTypeAnnotation ;
57+ import org .apache .parquet .schema .*;
5958import org .apache .parquet .schema .LogicalTypeAnnotation .TimeUnit ;
60- import org .apache .parquet .schema .MessageType ;
6159import org .apache .parquet .schema .PrimitiveType .PrimitiveTypeName ;
62- import org .apache .parquet .schema .Type ;
63- import org .apache .parquet .schema .Types ;
6460import org .apache .parquet .variant .ImmutableMetadata ;
6561import org .apache .parquet .variant .Variant ;
6662import org .apache .parquet .variant .VariantArrayBuilder ;
@@ -253,23 +249,26 @@ public void testUnshredded() throws Exception {
253249 });
254250 Binary expectedValue = Binary .fromConstantByteBuffer (testValue .getValueBuffer ());
255251 Binary expectedMetadata = Binary .fromConstantByteBuffer (testValue .getMetadataBuffer ());
252+ // Test with value before metadata in the schema: Spark's initial implementation wrote in this
253+ // order. The read schema (set below) requires metadata before value, but the order in the file
254+ // schema shouldn't matter.
256255 Path test = writeDirect (
257256 "message VariantMessage {" + " required group v (VARIANT(1)) {"
258- + " required binary metadata;"
259257 + " required binary value;"
258+ + " required binary metadata;"
260259 + " }"
261260 + "}" ,
262261 rc -> {
263262 rc .startMessage ();
264263 rc .startField ("v" , 0 );
265264
266265 rc .startGroup ();
267- rc .startField ("metadata" , 0 );
268- rc .addBinary (expectedMetadata );
269- rc .endField ("metadata" , 0 );
270- rc .startField ("value" , 1 );
266+ rc .startField ("value" , 0 );
271267 rc .addBinary (expectedValue );
272- rc .endField ("value" , 1 );
268+ rc .endField ("value" , 0 );
269+ rc .startField ("metadata" , 1 );
270+ rc .addBinary (expectedMetadata );
271+ rc .endField ("metadata" , 1 );
273272 rc .endGroup ();
274273
275274 rc .endField ("v" , 0 );
@@ -292,8 +291,17 @@ public void testUnshredded() throws Exception {
292291 "value" ,
293292 expectedValue .toByteBuffer ()));
294293
294+ MessageType readSchema =
295+ MessageTypeParser .parseMessageType ("message VariantMessage {" + " required group v (VARIANT(1)) {"
296+ + " required binary metadata;"
297+ + " required binary value;"
298+ + " }"
299+ + "}" );
300+ Configuration conf = new Configuration ();
301+ AvroReadSupport .setRequestedProjection (conf , avroSchema (readSchema ));
302+
295303 // both should behave the same way
296- assertReaderContains (new AvroParquetReader (new Configuration () , test ), expectedSchema , expectedRecord );
304+ assertReaderContains (new AvroParquetReader (conf , test ), expectedSchema , expectedRecord );
297305 }
298306
299307 /**
0 commit comments