Skip to content

Commit dfc025e

Browse files
authored
GH-3213: Add the configuration for ByteStreamSplit encoding (#3214)
1 parent 2bcd2bd commit dfc025e

File tree

2 files changed

+43
-0
lines changed

2 files changed

+43
-0
lines changed

parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,11 @@ public SELF withByteStreamSplitEncoding(boolean enableByteStreamSplit) {
700700
return self();
701701
}
702702

703+
public SELF withByteStreamSplitEncoding(String columnPath, boolean enableByteStreamSplit) {
704+
encodingPropsBuilder.withByteStreamSplitEncoding(columnPath, enableByteStreamSplit);
705+
return self();
706+
}
707+
703708
/**
704709
* Enable or disable dictionary encoding of the specified column for the constructed writer.
705710
*

parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,44 @@ public void testSizeStatisticsAndStatisticsControl() throws Exception {
626626
}
627627
}
628628

629+
@Test
630+
public void testByteStreamSplitEncodingControl() throws Exception {
631+
MessageType schema = Types.buildMessage()
632+
.required(FLOAT)
633+
.named("float_field")
634+
.required(INT32)
635+
.named("int32_field")
636+
.named("test_schema");
637+
638+
File file = temp.newFile();
639+
temp.delete();
640+
641+
Path path = new Path(file.getAbsolutePath());
642+
SimpleGroupFactory factory = new SimpleGroupFactory(schema);
643+
try (ParquetWriter<Group> writer = ExampleParquetWriter.builder(path)
644+
.withType(schema)
645+
.withByteStreamSplitEncoding(true)
646+
.withByteStreamSplitEncoding("int32_field", true)
647+
.build()) {
648+
writer.write(factory.newGroup().append("float_field", 0.3f).append("int32_field", 42));
649+
}
650+
651+
try (ParquetFileReader reader = ParquetFileReader.open(HadoopInputFile.fromPath(path, new Configuration()))) {
652+
for (BlockMetaData block : reader.getFooter().getBlocks()) {
653+
for (ColumnChunkMetaData column : block.getColumns()) {
654+
assertTrue(column.getEncodings().contains(Encoding.BYTE_STREAM_SPLIT));
655+
}
656+
}
657+
}
658+
659+
try (ParquetReader<Group> reader =
660+
ParquetReader.builder(new GroupReadSupport(), path).build()) {
661+
Group group = reader.read();
662+
assertEquals(0.3f, group.getFloat("float_field", 0), 0.0);
663+
assertEquals(42, group.getInteger("int32_field", 0));
664+
}
665+
}
666+
629667
@Test
630668
public void testV2WriteAllNullValues() throws Exception {
631669
testV2WriteAllNullValues(null, null);

0 commit comments

Comments
 (0)