Skip to content

Commit 3fdef44

Browse files
committed
Merge remote-tracking branch 'apache/master' into feature-apache-parquet-2417-geospatial
2 parents f6610e7 + 00b6bab commit 3fdef44

File tree

7 files changed

+31
-35
lines changed

7 files changed

+31
-35
lines changed

parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,7 @@ public Builder withStatisticsEnabled(String columnPath, boolean enabled) {
708708
}
709709

710710
public Builder withStatisticsEnabled(boolean enabled) {
711+
this.statistics.withDefaultValue(enabled);
711712
this.statisticsEnabled = enabled;
712713
return this;
713714
}

parquet-format-structures/src/main/java/org/apache/parquet/format/LogicalTypes.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,5 @@ public static LogicalType DECIMAL(int scale, int precision) {
5353
public static final LogicalType JSON = LogicalType.JSON(new JsonType());
5454
public static final LogicalType BSON = LogicalType.BSON(new BsonType());
5555
public static final LogicalType FLOAT16 = LogicalType.FLOAT16(new Float16Type());
56+
public static final LogicalType UUID = LogicalType.UUID(new UUIDType());
5657
}

parquet-hadoop/pom.xml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,14 @@
236236
<arch>!aarch64</arch>
237237
</os>
238238
</activation>
239+
<repositories>
240+
<repository>
241+
<id>jitpack.io</id>
242+
<url>https://jitpack.io</url>
243+
<name>Jitpack.io repository</name>
244+
<!-- needed for brotli-codec -->
245+
</repository>
246+
</repositories>
239247
<dependencies>
240248
<dependency>
241249
<groupId>com.github.rdblue</groupId>

parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@
6767
import org.apache.parquet.format.BloomFilterHeader;
6868
import org.apache.parquet.format.BoundaryOrder;
6969
import org.apache.parquet.format.BoundingBox;
70-
import org.apache.parquet.format.BsonType;
7170
import org.apache.parquet.format.ColumnChunk;
7271
import org.apache.parquet.format.ColumnCryptoMetaData;
7372
import org.apache.parquet.format.ColumnIndex;
@@ -77,29 +76,23 @@
7776
import org.apache.parquet.format.ConvertedType;
7877
import org.apache.parquet.format.DataPageHeader;
7978
import org.apache.parquet.format.DataPageHeaderV2;
80-
import org.apache.parquet.format.DateType;
8179
import org.apache.parquet.format.DecimalType;
8280
import org.apache.parquet.format.DictionaryPageHeader;
8381
import org.apache.parquet.format.EdgeInterpolationAlgorithm;
8482
import org.apache.parquet.format.Encoding;
8583
import org.apache.parquet.format.EncryptionWithColumnKey;
86-
import org.apache.parquet.format.EnumType;
8784
import org.apache.parquet.format.FieldRepetitionType;
8885
import org.apache.parquet.format.FileMetaData;
89-
import org.apache.parquet.format.Float16Type;
9086
import org.apache.parquet.format.GeographyType;
9187
import org.apache.parquet.format.GeometryType;
9288
import org.apache.parquet.format.GeospatialStatistics;
9389
import org.apache.parquet.format.IntType;
94-
import org.apache.parquet.format.JsonType;
9590
import org.apache.parquet.format.KeyValue;
96-
import org.apache.parquet.format.ListType;
9791
import org.apache.parquet.format.LogicalType;
98-
import org.apache.parquet.format.MapType;
92+
import org.apache.parquet.format.LogicalTypes;
9993
import org.apache.parquet.format.MicroSeconds;
10094
import org.apache.parquet.format.MilliSeconds;
10195
import org.apache.parquet.format.NanoSeconds;
102-
import org.apache.parquet.format.NullType;
10396
import org.apache.parquet.format.OffsetIndex;
10497
import org.apache.parquet.format.PageEncodingStats;
10598
import org.apache.parquet.format.PageHeader;
@@ -110,13 +103,11 @@
110103
import org.apache.parquet.format.SizeStatistics;
111104
import org.apache.parquet.format.SplitBlockAlgorithm;
112105
import org.apache.parquet.format.Statistics;
113-
import org.apache.parquet.format.StringType;
114106
import org.apache.parquet.format.TimeType;
115107
import org.apache.parquet.format.TimeUnit;
116108
import org.apache.parquet.format.TimestampType;
117109
import org.apache.parquet.format.Type;
118110
import org.apache.parquet.format.TypeDefinedOrder;
119-
import org.apache.parquet.format.UUIDType;
120111
import org.apache.parquet.format.Uncompressed;
121112
import org.apache.parquet.format.XxHash;
122113
import org.apache.parquet.hadoop.metadata.BlockMetaData;
@@ -455,33 +446,32 @@ private static class LogicalTypeConverterVisitor
455446
implements LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<LogicalType> {
456447
@Override
457448
public Optional<LogicalType> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
458-
return of(LogicalType.STRING(new StringType()));
449+
return of(LogicalTypes.UTF8);
459450
}
460451

461452
@Override
462453
public Optional<LogicalType> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) {
463-
return of(LogicalType.MAP(new MapType()));
454+
return of(LogicalTypes.MAP);
464455
}
465456

466457
@Override
467458
public Optional<LogicalType> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
468-
return of(LogicalType.LIST(new ListType()));
459+
return of(LogicalTypes.LIST);
469460
}
470461

471462
@Override
472463
public Optional<LogicalType> visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) {
473-
return of(LogicalType.ENUM(new EnumType()));
464+
return of(LogicalTypes.ENUM);
474465
}
475466

476467
@Override
477468
public Optional<LogicalType> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
478-
return of(LogicalType.DECIMAL(
479-
new DecimalType(decimalLogicalType.getScale(), decimalLogicalType.getPrecision())));
469+
return of(LogicalTypes.DECIMAL(decimalLogicalType.getScale(), decimalLogicalType.getPrecision()));
480470
}
481471

482472
@Override
483473
public Optional<LogicalType> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
484-
return of(LogicalType.DATE(new DateType()));
474+
return of(LogicalTypes.DATE);
485475
}
486476

487477
@Override
@@ -503,32 +493,32 @@ public Optional<LogicalType> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotatio
503493

504494
@Override
505495
public Optional<LogicalType> visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) {
506-
return of(LogicalType.JSON(new JsonType()));
496+
return of(LogicalTypes.JSON);
507497
}
508498

509499
@Override
510500
public Optional<LogicalType> visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) {
511-
return of(LogicalType.BSON(new BsonType()));
501+
return of(LogicalTypes.BSON);
512502
}
513503

514504
@Override
515505
public Optional<LogicalType> visit(UUIDLogicalTypeAnnotation uuidLogicalType) {
516-
return of(LogicalType.UUID(new UUIDType()));
506+
return of(LogicalTypes.UUID);
517507
}
518508

519509
@Override
520510
public Optional<LogicalType> visit(LogicalTypeAnnotation.Float16LogicalTypeAnnotation float16LogicalType) {
521-
return of(LogicalType.FLOAT16(new Float16Type()));
511+
return of(LogicalTypes.FLOAT16);
522512
}
523513

524514
@Override
525-
public Optional<LogicalType> visit(LogicalTypeAnnotation.UnknownLogicalTypeAnnotation intervalLogicalType) {
526-
return of(LogicalType.UNKNOWN(new NullType()));
515+
public Optional<LogicalType> visit(LogicalTypeAnnotation.UnknownLogicalTypeAnnotation unknownLogicalType) {
516+
return of(LogicalTypes.UNKNOWN);
527517
}
528518

529519
@Override
530520
public Optional<LogicalType> visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
531-
return of(LogicalType.UNKNOWN(new NullType()));
521+
return of(LogicalTypes.UNKNOWN);
532522
}
533523

534524
@Override

parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -543,7 +543,7 @@ private void testParquetFileNumberOfBlocks(
543543
}
544544

545545
@Test
546-
public void testSizeStatisticsControl() throws Exception {
546+
public void testSizeStatisticsAndStatisticsControl() throws Exception {
547547
MessageType schema = Types.buildMessage()
548548
.required(BINARY)
549549
.named("string_field")
@@ -568,6 +568,7 @@ public void testSizeStatisticsControl() throws Exception {
568568
try (ParquetWriter<Group> writer = ExampleParquetWriter.builder(path)
569569
.withType(schema)
570570
.withSizeStatisticsEnabled(false)
571+
.withStatisticsEnabled(false) // Disable column statistics globally
571572
.build()) {
572573
writer.write(group);
573574
}
@@ -576,6 +577,7 @@ public void testSizeStatisticsControl() throws Exception {
576577
// Verify size statistics are disabled globally
577578
for (BlockMetaData block : reader.getFooter().getBlocks()) {
578579
for (ColumnChunkMetaData column : block.getColumns()) {
580+
assertTrue(column.getStatistics().isEmpty()); // Make sure there is no column statistics
579581
assertNull(column.getSizeStatistics());
580582
}
581583
}
@@ -589,6 +591,7 @@ public void testSizeStatisticsControl() throws Exception {
589591
.withType(schema)
590592
.withSizeStatisticsEnabled(true) // enable globally
591593
.withSizeStatisticsEnabled("boolean_field", false) // disable for specific column
594+
.withStatisticsEnabled("boolean_field", false) // disable column statistics
592595
.build()) {
593596
writer.write(group);
594597
}
@@ -599,8 +602,10 @@ public void testSizeStatisticsControl() throws Exception {
599602
for (ColumnChunkMetaData column : block.getColumns()) {
600603
if (column.getPath().toDotString().equals("boolean_field")) {
601604
assertNull(column.getSizeStatistics());
605+
assertTrue(column.getStatistics().isEmpty());
602606
} else {
603607
assertTrue(column.getSizeStatistics().isValid());
608+
assertFalse(column.getStatistics().isEmpty());
604609
}
605610
}
606611
}

parquet-protobuf/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
<properties>
3333
<elephant-bird.version>4.4</elephant-bird.version>
3434
<protobuf.version>3.25.6</protobuf.version>
35-
<common-protos.version>2.51.0</common-protos.version> <!-- make sure it's compatible with protobuf.version -->
35+
<common-protos.version>2.54.1</common-protos.version> <!-- make sure it's compatible with protobuf.version -->
3636
<truth-proto-extension.version>1.4.4</truth-proto-extension.version>
3737
</properties>
3838

pom.xml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,6 @@
6969
</mailingList>
7070
</mailingLists>
7171

72-
<repositories>
73-
<repository>
74-
<id>jitpack.io</id>
75-
<url>https://jitpack.io</url>
76-
<name>Jitpack.io repository</name>
77-
<!-- needed for brotli-codec -->
78-
</repository>
79-
</repositories>
80-
8172
<properties>
8273
<maven.compiler.source>1.8</maven.compiler.source>
8374
<maven.compiler.target>1.8</maven.compiler.target>

0 commit comments

Comments
 (0)