2626import java .io .IOException ;
2727import java .math .BigDecimal ;
2828import java .nio .ByteBuffer ;
29- import java .nio .ByteOrder ;
3029import java .util .*;
31- import java .util .concurrent .Callable ;
3230import java .util .function .Consumer ;
33- import com .google .common .collect .ImmutableMap ;
3431import org .apache .avro .Schema ;
3532import org .apache .avro .generic .GenericData ;
3633import org .apache .avro .generic .GenericRecord ;
37- import org .apache .avro .generic .IndexedRecord ;
3834import org .apache .hadoop .conf .Configuration ;
3935import org .apache .hadoop .fs .Path ;
4036import org .apache .parquet .DirectWriterTest ;
41- import org .apache .parquet .Preconditions ;
42- import org .apache .parquet .conf .ParquetConfiguration ;
43- import org .apache .parquet .conf .PlainParquetConfiguration ;
4437import org .apache .parquet .hadoop .ParquetWriter ;
4538import org .apache .parquet .hadoop .api .WriteSupport ;
46- import org .apache .parquet .io .ParquetDecodingException ;
47- import org .apache .parquet .io .api .Binary ;
48- import org .apache .parquet .io .api .RecordConsumer ;
4939import org .apache .parquet .schema .*;
5040import org .apache .parquet .schema .LogicalTypeAnnotation .TimeUnit ;
5141import org .apache .parquet .schema .PrimitiveType .PrimitiveTypeName ;
@@ -89,8 +79,7 @@ private static ByteBuffer variant(String s) {
8979 return variant (b -> b .appendString (s ));
9080 }
9181
92- private static GroupType variantGroup =
93- Types .buildGroup (Type .Repetition .REQUIRED )
82+ private static GroupType variantGroup = Types .buildGroup (Type .Repetition .REQUIRED )
9483 .as (LogicalTypeAnnotation .variantType ((byte ) 1 ))
9584 .required (PrimitiveTypeName .BINARY )
9685 .named ("metadata" )
@@ -114,43 +103,83 @@ private static MessageType parquetSchema(GroupType variantGroup) {
114103 private ByteBuffer TEST_METADATA ;
115104 private ByteBuffer TEST_OBJECT ;
116105 private ByteBuffer SIMILAR_OBJECT ;
106+ private ByteBuffer TEST_ARRAY ;
107+ private ByteBuffer SIMILAR_ARRAY ;
117108 private ByteBuffer EMPTY_OBJECT ;
118109 private ByteBuffer EMPTY_METADATA = fullVariant (b -> b .appendNull ()).getMetadataRawBytes ();
119110 private Variant [] VARIANTS ;
120111
121112 public TestWriteVariant () throws Exception {
122113 TEST_METADATA = fullVariant (b -> {
114+ VariantObjectBuilder ob = b .startObject ();
115+ ob .appendKey ("a" );
116+ ob .appendNull ();
117+ ob .appendKey ("b" );
118+ ob .appendNull ();
119+ ob .appendKey ("c" );
120+ ob .appendNull ();
121+ ob .appendKey ("d" );
122+ ob .appendNull ();
123+ ob .appendKey ("e" );
124+ ob .appendNull ();
125+ b .endObject ();
126+ })
127+ .getMetadataRawBytes ();
128+
129+ TEST_OBJECT = variant (TEST_METADATA , b -> {
123130 VariantObjectBuilder ob = b .startObject ();
124131 ob .appendKey ("a" );
125132 ob .appendNull ();
126- ob .appendKey ("b" );
127- ob .appendNull ();
128- ob .appendKey ("c" );
129- ob .appendNull ();
130133 ob .appendKey ("d" );
131- ob .appendNull ();
132- ob .appendKey ("e" );
133- ob .appendNull ();
134+ ob .appendString ("iceberg" );
134135 b .endObject ();
136+ });
135137
136- }).getMetadataRawBytes ();
137-
138- TEST_OBJECT = variant (TEST_METADATA , b -> {
138+ SIMILAR_OBJECT = variant (TEST_METADATA , b -> {
139139 VariantObjectBuilder ob = b .startObject ();
140140 ob .appendKey ("a" );
141+ ob .appendInt (123456789 );
142+ ob .appendKey ("c" );
143+ ob .appendString ("string" );
144+ b .endObject ();
145+ });
146+
147+ // The first array element defines the schema.
148+ TEST_ARRAY = variant (TEST_METADATA , b -> {
149+ VariantArrayBuilder ab = b .startArray ();
150+ VariantObjectBuilder ob = ab .startObject ();
151+ ob .appendKey ("a" );
141152 ob .appendNull ();
142153 ob .appendKey ("d" );
143154 ob .appendString ("iceberg" );
144- b .endObject ();
155+ ab .endObject ();
156+ ab .appendInt (123 );
157+ VariantObjectBuilder ob2 = ab .startObject ();
158+ ob2 .appendKey ("c" );
159+ ob2 .appendString ("hello" );
160+ ob2 .appendKey ("d" );
161+ ob2 .appendDate (12345 );
162+ ab .endObject ();
163+ b .endArray ();
145164 });
146165
147- SIMILAR_OBJECT = variant (TEST_METADATA , b -> {
148- VariantObjectBuilder ob = b .startObject ();
149- ob .appendKey ("a" );
150- ob .appendInt (123456789 );
151- ob .appendKey ("c" );
152- ob .appendString ("string" );
153- b .endObject ();
166+ // Change one field name and one type in the first element to change the schema.
167+ SIMILAR_ARRAY = variant (TEST_METADATA , b -> {
168+ VariantArrayBuilder ab = b .startArray ();
169+ VariantObjectBuilder ob = ab .startObject ();
170+ ob .appendKey ("c" );
171+ ob .appendString ("iceberg" );
172+ ob .appendKey ("a" );
173+ ob .appendString ("parquet" );
174+ ab .endObject ();
175+ ab .appendInt (123 );
176+ VariantObjectBuilder ob2 = ab .startObject ();
177+ ob2 .appendKey ("c" );
178+ ob2 .appendString ("hello" );
179+ ob2 .appendKey ("d" );
180+ ob2 .appendDate (12345 );
181+ ab .endObject ();
182+ b .endArray ();
154183 });
155184
156185 EMPTY_OBJECT = variant (TEST_METADATA , b -> {
@@ -177,6 +206,8 @@ public TestWriteVariant() throws Exception {
177206 new Variant (EMPTY_OBJECT , EMPTY_METADATA ),
178207 new Variant (TEST_OBJECT , TEST_METADATA ),
179208 new Variant (SIMILAR_OBJECT , TEST_METADATA ),
209+ new Variant (TEST_ARRAY , TEST_METADATA ),
210+ new Variant (SIMILAR_ARRAY , TEST_METADATA ),
180211 fullVariant (b -> b .appendDate (12345 )),
181212 fullVariant (b -> b .appendDate (-12345 )),
182213 fullVariant (b -> b .appendTimestampTz (1234567890L )),
@@ -214,6 +245,7 @@ GenericRecord createRecord(int i, Variant v) {
214245 return record ;
215246 }
216247
248+ // Tests in this file are based on Iceberg's TestVariantWriters suite.
217249 @ Test
218250 public void testUnshreddedValues () throws IOException {
219251 for (Variant v : VARIANTS ) {
@@ -238,7 +270,10 @@ public void testShreddedValues() throws IOException {
238270 GenericRecord actual = writeAndRead (testSchema , record );
239271 assertEquals (record .get (0 ), actual .get (0 ));
240272 assertEquals (((GenericRecord ) record .get (1 )).get (0 ), ((GenericRecord ) actual .get (1 )).get (0 ));
241- assertEquals (((GenericRecord ) record .get (1 )).get (1 ), ((GenericRecord ) actual .get (1 )).get (1 ));
273+ // assertEquals(((GenericRecord) record.get(1)).get(1), ((GenericRecord) actual.get(1)).get(1));
274+ if (!((GenericRecord ) record .get (1 )).get (1 ).equals (((GenericRecord ) actual .get (1 )).get (1 ))) {
275+ assertTrue (false );
276+ }
242277 }
243278 }
244279
@@ -292,28 +327,24 @@ protected TestWriterBuilder self() {
292327
293328 @ Override
294329 protected WriteSupport <GenericRecord > getWriteSupport (Configuration conf ) {
295- return new AvroWriteSupport <>(
296- schema ,
297- new AvroSchemaConverter ().convert (schema ),
298- GenericData .get ());
330+ return new AvroWriteSupport <>(schema , new AvroSchemaConverter ().convert (schema ), GenericData .get ());
299331 }
300332 }
301333
302334 GenericRecord writeAndRead (TestSchema testSchema , GenericRecord record ) throws IOException {
303335 List <GenericRecord > result = writeAndRead (testSchema , Arrays .asList (record ));
304- assert (result .size () == 1 );
336+ assert (result .size () == 1 );
305337 return result .get (0 );
306338 }
307339
308- private List <GenericRecord > writeAndRead (
309- TestSchema testSchema , List <GenericRecord > records ) throws IOException {
340+ private List <GenericRecord > writeAndRead (TestSchema testSchema , List <GenericRecord > records ) throws IOException {
310341 File tmp = File .createTempFile (getClass ().getSimpleName (), ".tmp" );
311342 tmp .deleteOnExit ();
312343 tmp .delete ();
313344 Path path = new Path (tmp .getPath ());
314345
315346 try (ParquetWriter <GenericRecord > writer =
316- new TestWriterBuilder (path ).withFileType (testSchema .writeSchema ).build ()) {
347+ new TestWriterBuilder (path ).withFileType (testSchema .writeSchema ).build ()) {
317348 for (GenericRecord record : records ) {
318349 writer .write (record );
319350 }
@@ -385,9 +416,13 @@ private static Type shreddedType(Variant v) {
385416 case BOOLEAN :
386417 return Types .optional (BOOLEAN ).named ("typed_value" );
387418 case BYTE :
388- return Types .optional (INT32 ).as (LogicalTypeAnnotation .intType (8 )).named ("typed_value" );
419+ return Types .optional (INT32 )
420+ .as (LogicalTypeAnnotation .intType (8 ))
421+ .named ("typed_value" );
389422 case SHORT :
390- return Types .optional (INT32 ).as (LogicalTypeAnnotation .intType (16 )).named ("typed_value" );
423+ return Types .optional (INT32 )
424+ .as (LogicalTypeAnnotation .intType (16 ))
425+ .named ("typed_value" );
391426 case INT :
392427 return Types .optional (INT32 ).named ("typed_value" );
393428 case LONG :
0 commit comments