diff --git a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroReadSupport.java b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroReadSupport.java index 6d7ca398a4..5e6ac2b38c 100644 --- a/parquet-avro/src/main/java/org/apache/parquet/avro/AvroReadSupport.java +++ b/parquet-avro/src/main/java/org/apache/parquet/avro/AvroReadSupport.java @@ -18,7 +18,9 @@ */ package org.apache.parquet.avro; +import java.util.ArrayList; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -32,6 +34,7 @@ import org.apache.parquet.hadoop.util.ConfigurationUtil; import org.apache.parquet.io.api.RecordMaterializer; import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.Type; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -126,12 +129,19 @@ public ReadContext init(Configuration configuration, Map keyValu public ReadContext init( ParquetConfiguration configuration, Map keyValueMetaData, MessageType fileSchema) { MessageType projection = fileSchema; - Map metadata = new LinkedHashMap(); + Map metadata = new LinkedHashMap<>(); String requestedProjectionString = configuration.get(AVRO_REQUESTED_PROJECTION); if (requestedProjectionString != null) { Schema avroRequestedProjection = new Schema.Parser().parse(requestedProjectionString); - projection = new AvroSchemaConverter(configuration).convert(avroRequestedProjection); + List types = new ArrayList<>(); + for (Schema.Field field : avroRequestedProjection.getFields()) { + if (field.schema().getType().equals(Schema.Type.NULL)) { + continue; // Avro nulls are not encoded, unless they are null unions + } + types.add(fileSchema.getType(field.name())); + } + projection = new MessageType(avroRequestedProjection.getFullName(), types); } String avroReadSchema = configuration.get(AVRO_READ_SCHEMA); diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/BaseCommand.java b/parquet-cli/src/main/java/org/apache/parquet/cli/BaseCommand.java index 0c8841b05c..7005fcc44f 100644 --- a/parquet-cli/src/main/java/org/apache/parquet/cli/BaseCommand.java +++ b/parquet-cli/src/main/java/org/apache/parquet/cli/BaseCommand.java @@ -285,6 +285,7 @@ public SeekableInput openSeekable(String filename) throws IOException { @Override public void setConf(Configuration conf) { + conf.setBoolean(AvroReadSupport.READ_INT96_AS_FIXED, true); this.conf = conf; HadoopFileSystemURLStreamHandler.setDefaultConf(conf); } diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/FileTest.java b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/FileTest.java index 6e031112ff..7b5ea817ea 100644 --- a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/FileTest.java +++ b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/FileTest.java @@ -40,6 +40,7 @@ public abstract class FileTest { static final String BINARY_FIELD = "binary_field"; static final String FIXED_LEN_BYTE_ARRAY_FIELD = "flba_field"; static final String DATE_FIELD = "date_field"; + static final String INT96_FIELD = "int96_field"; static final String[] COLORS = {"RED", "BLUE", "YELLOW", "GREEN", "WHITE"}; diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ParquetFileTest.java b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ParquetFileTest.java index fe499ad338..4e4ad0364b 100644 --- a/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ParquetFileTest.java +++ b/parquet-cli/src/test/java/org/apache/parquet/cli/commands/ParquetFileTest.java @@ -75,6 +75,8 @@ private static MessageType createSchema() { .required(PrimitiveTypeName.INT32) .as(LogicalTypeAnnotation.dateType()) .named(DATE_FIELD) + .required(PrimitiveTypeName.INT96) + .named(INT96_FIELD) .named("schema"); } @@ -109,7 +111,8 @@ private void createTestParquetFile() throws IOException { .append(DOUBLE_FIELD, 2.0d + i) .append(BINARY_FIELD, Binary.fromString(COLORS[i % COLORS.length])) .append(FIXED_LEN_BYTE_ARRAY_FIELD, Binary.fromConstantByteArray(bytes)) - .append(DATE_FIELD, i)); + .append(DATE_FIELD, i) + .append(INT96_FIELD, Binary.fromConstantByteArray(bytes))); } } }