Skip to content

Commit cebcf37

Browse files
committed
Test out of order value and metadata
1 parent cc0b38f commit cebcf37

File tree

1 file changed

+20
-12
lines changed

1 file changed

+20
-12
lines changed

parquet-avro/src/test/java/org/apache/parquet/avro/TestReadVariant.java

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,9 @@
5454
import org.apache.parquet.io.ParquetDecodingException;
5555
import org.apache.parquet.io.api.Binary;
5656
import org.apache.parquet.io.api.RecordConsumer;
57-
import org.apache.parquet.schema.GroupType;
58-
import org.apache.parquet.schema.LogicalTypeAnnotation;
57+
import org.apache.parquet.schema.*;
5958
import org.apache.parquet.schema.LogicalTypeAnnotation.TimeUnit;
60-
import org.apache.parquet.schema.MessageType;
6159
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
62-
import org.apache.parquet.schema.Type;
63-
import org.apache.parquet.schema.Types;
6460
import org.apache.parquet.variant.ImmutableMetadata;
6561
import org.apache.parquet.variant.Variant;
6662
import org.apache.parquet.variant.VariantArrayBuilder;
@@ -253,23 +249,26 @@ public void testUnshredded() throws Exception {
253249
});
254250
Binary expectedValue = Binary.fromConstantByteBuffer(testValue.getValueBuffer());
255251
Binary expectedMetadata = Binary.fromConstantByteBuffer(testValue.getMetadataBuffer());
252+
// Test with value before metadata in the schema: Spark's initial implementation wrote in this
253+
// order. The read schema (set below) requires metadata before value, but the order in the file
254+
// schema shouldn't matter.
256255
Path test = writeDirect(
257256
"message VariantMessage {" + " required group v (VARIANT(1)) {"
258-
+ " required binary metadata;"
259257
+ " required binary value;"
258+
+ " required binary metadata;"
260259
+ " }"
261260
+ "}",
262261
rc -> {
263262
rc.startMessage();
264263
rc.startField("v", 0);
265264

266265
rc.startGroup();
267-
rc.startField("metadata", 0);
268-
rc.addBinary(expectedMetadata);
269-
rc.endField("metadata", 0);
270-
rc.startField("value", 1);
266+
rc.startField("value", 0);
271267
rc.addBinary(expectedValue);
272-
rc.endField("value", 1);
268+
rc.endField("value", 0);
269+
rc.startField("metadata", 1);
270+
rc.addBinary(expectedMetadata);
271+
rc.endField("metadata", 1);
273272
rc.endGroup();
274273

275274
rc.endField("v", 0);
@@ -292,8 +291,17 @@ public void testUnshredded() throws Exception {
292291
"value",
293292
expectedValue.toByteBuffer()));
294293

294+
MessageType readSchema =
295+
MessageTypeParser.parseMessageType("message VariantMessage {" + " required group v (VARIANT(1)) {"
296+
+ " required binary metadata;"
297+
+ " required binary value;"
298+
+ " }"
299+
+ "}");
300+
Configuration conf = new Configuration();
301+
AvroReadSupport.setRequestedProjection(conf, avroSchema(readSchema));
302+
295303
// both should behave the same way
296-
assertReaderContains(new AvroParquetReader(new Configuration(), test), expectedSchema, expectedRecord);
304+
assertReaderContains(new AvroParquetReader(conf, test), expectedSchema, expectedRecord);
297305
}
298306

299307
/**

0 commit comments

Comments
 (0)