Skip to content

Commit 81eb57d

Browse files
committed
change the behavior of merge null or invalid statistics
Tested the following cases after the changes: - Merging valid stats with null stats results in invalid stats with null components - Merging null stats with valid stats also results in invalid stats with null components - Merging valid stats with partially null stats (null bounding box) results in the expected behavior where only the bounding box becomes null
1 parent ef8e343 commit 81eb57d

File tree

4 files changed

+75
-13
lines changed

4 files changed

+75
-13
lines changed

parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/BoundingBox.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -150,15 +150,14 @@ public boolean isEmpty() {
150150
* @param other the other BoundingBox whose bounds will be merged into this one
151151
*/
152152
public void merge(BoundingBox other) {
153-
// Skip merging if the bbox is not valid or other is null or empty
154-
// - A null bounding box cannot be merged
155-
// - An empty bounding box (with inverted min/max) has no effect when merged,
156-
// so we can skip the merge operation entirely for efficiency
157153
if (!valid) {
158154
return;
159155
}
160156

161-
if (other == null || other.isEmpty()) {
157+
// If other is null or invalid, mark this as invalid
158+
if (other == null || other.isEmpty() || !other.valid) {
159+
valid = false;
160+
resetBBox();
162161
return;
163162
}
164163

@@ -184,7 +183,6 @@ public void merge(BoundingBox other) {
184183
* If null or empty, the method returns without making any changes.
185184
*/
186185
public void update(Geometry geometry) {
187-
// Skip updating if the bbox is not valid or geometry is null or empty
188186
if (!valid) {
189187
return;
190188
}

parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeospatialStatistics.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ public class GeospatialStatistics {
3737
private static final Logger LOG = LoggerFactory.getLogger(GeospatialStatistics.class);
3838

3939
// Metadata that may impact the statistics calculation
40-
private final BoundingBox boundingBox;
4140
private final EdgeInterpolationAlgorithm edgeAlgorithm;
42-
private final GeospatialTypes geospatialTypes;
41+
private BoundingBox boundingBox;
42+
private GeospatialTypes geospatialTypes;
4343

4444
/**
4545
* Merge the statistics from another GeospatialStatistics object.
@@ -196,11 +196,21 @@ public boolean isValid() {
196196
public void merge(GeospatialStatistics other) {
197197
Preconditions.checkArgument(other != null, "Cannot merge with null GeometryStatistics");
198198

199-
if (boundingBox != null && other.boundingBox != null) {
199+
// If other bounding box is null or invalid, set this as null
200+
if (boundingBox == null || other.boundingBox == null) {
201+
boundingBox = null;
202+
}
203+
204+
if (boundingBox != null) {
200205
boundingBox.merge(other.boundingBox);
201206
}
202207

203-
if (geospatialTypes != null && other.geospatialTypes != null) {
208+
// If other geosptialTypes is null or invalid, set this as null
209+
if (geospatialTypes == null || other.geospatialTypes == null) {
210+
geospatialTypes = null;
211+
}
212+
213+
if (geospatialTypes != null) {
204214
geospatialTypes.merge(other.geospatialTypes);
205215
}
206216
}

parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeospatialTypes.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import java.util.HashSet;
2222
import java.util.Set;
2323
import java.util.stream.Collectors;
24-
import org.apache.parquet.Preconditions;
2524
import org.locationtech.jts.geom.Coordinate;
2625
import org.locationtech.jts.geom.Geometry;
2726

@@ -45,6 +44,11 @@ void update(Geometry geometry) {
4544
if (!valid) {
4645
return;
4746
}
47+
48+
if (geometry == null || geometry.isEmpty()) {
49+
return;
50+
}
51+
4852
int code = getGeometryTypeCode(geometry);
4953
if (code != UNKNOWN_TYPE_ID) {
5054
types.add(code);
@@ -55,11 +59,12 @@ void update(Geometry geometry) {
5559
}
5660

5761
public void merge(GeospatialTypes other) {
58-
Preconditions.checkArgument(other != null, "Cannot merge with null GeospatialTypes");
5962
if (!valid) {
6063
return;
6164
}
62-
if (!other.valid) {
65+
66+
// If other is null or invalid, mark this as invalid
67+
if (other == null || !other.valid) {
6368
valid = false;
6469
types.clear();
6570
return;

parquet-column/src/test/java/org/apache/parquet/column/statistics/geometry/TestGeospatialStatistics.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,55 @@ public void testMergeGeospatialStatistics() throws ParseException {
7171
Assert.assertNotNull(statistics1.getGeospatialTypes());
7272
}
7373

74+
@Test
75+
public void testMergeNullGeospatialStatistics() {
76+
// Create a valid stats object
77+
PrimitiveType type = Types.optional(PrimitiveType.PrimitiveTypeName.BINARY)
78+
.as(LogicalTypeAnnotation.geometryType(null))
79+
.named("a");
80+
81+
WKTReader wktReader = new WKTReader();
82+
WKBWriter wkbWriter = new WKBWriter();
83+
84+
GeospatialStatistics.Builder validBuilder = GeospatialStatistics.newBuilder(type);
85+
try {
86+
validBuilder.update(Binary.fromConstantByteArray(wkbWriter.write(wktReader.read("POINT (1 1)"))));
87+
} catch (ParseException e) {
88+
Assert.fail("Failed to parse valid WKT: " + e.getMessage());
89+
}
90+
GeospatialStatistics validStats = validBuilder.build();
91+
Assert.assertTrue(validStats.isValid());
92+
93+
// Create stats with null components
94+
GeospatialStatistics nullStats = new GeospatialStatistics(null, null, null);
95+
Assert.assertFalse(nullStats.isValid());
96+
97+
// Test merging valid with null
98+
GeospatialStatistics validCopy = validStats.copy();
99+
validCopy.merge(nullStats);
100+
Assert.assertFalse(validCopy.isValid());
101+
Assert.assertNull(validCopy.getBoundingBox());
102+
Assert.assertNull(validCopy.getGeospatialTypes());
103+
104+
// Test merging null with valid
105+
nullStats = new GeospatialStatistics(null, null, null);
106+
nullStats.merge(validStats);
107+
Assert.assertFalse(nullStats.isValid());
108+
Assert.assertNull(nullStats.getBoundingBox());
109+
Assert.assertNull(nullStats.getGeospatialTypes());
110+
111+
// Create stats with null bounding box only
112+
GeospatialStatistics nullBboxStats = new GeospatialStatistics(null, new GeospatialTypes(), null);
113+
Assert.assertFalse(nullBboxStats.isValid());
114+
115+
// Test merging valid with null bounding box
116+
validCopy = validStats.copy();
117+
validCopy.merge(nullBboxStats);
118+
Assert.assertFalse(validCopy.isValid());
119+
Assert.assertNull(validCopy.getBoundingBox());
120+
Assert.assertNotNull(validCopy.getGeospatialTypes());
121+
}
122+
74123
@Test
75124
public void testCopyGeospatialStatistics() {
76125
PrimitiveType type = Types.optional(PrimitiveType.PrimitiveTypeName.BINARY)

0 commit comments

Comments
 (0)