Skip to content

Commit 3d421f9

Browse files
committed
Fix geotile_grid aggregation CPU stall on large geo_shapes
Signed-off-by: Harsha Vamsi Kalluri <harshavamsi096@gmail.com>
1 parent e67f291 commit 3d421f9

File tree

7 files changed

+229
-2
lines changed

7 files changed

+229
-2
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6060
- Fix terms lookup subquery fetch limit reading from non-existent index setting instead of cluster `max_clause_count` ([#20823](https://github.com/opensearch-project/OpenSearch/pull/20823))
6161
- Fix array_index_out_of_bounds_exception with wildcard and aggregations ([#20842](https://github.com/opensearch-project/OpenSearch/pull/20842))
6262
- Handle dependencies between analyzers ([#19248](https://github.com/opensearch-project/OpenSearch/pull/19248))
63+
- Fix geotile_grid aggregation CPU stall on large geo_shapes by adding tile limit and cooperative cancellation ([#20929](https://github.com/opensearch-project/OpenSearch/pull/20929))
6364

6465
### Dependencies
6566
- Bump shadow-gradle-plugin from 8.3.9 to 9.3.1 ([#20569](https://github.com/opensearch-project/OpenSearch/pull/20569))

modules/geo/src/main/java/org/opensearch/geo/search/aggregations/bucket/geogrid/GeoHashGridAggregatorFactory.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
package org.opensearch.geo.search.aggregations.bucket.geogrid;
3434

3535
import org.opensearch.common.geo.GeoBoundingBox;
36+
import org.opensearch.core.tasks.TaskCancelledException;
3637
import org.opensearch.geo.search.aggregations.bucket.geogrid.cells.CellIdSource;
3738
import org.opensearch.geo.search.aggregations.bucket.geogrid.cells.GeoShapeCellIdSource;
3839
import org.opensearch.geo.search.aggregations.bucket.geogrid.util.GeoShapeHashUtil;
@@ -175,11 +176,16 @@ static void registerAggregators(ValuesSourceRegistry.Builder builder) {
175176
parent,
176177
cardinality,
177178
metadata) -> {
179+
final SearchContext ctx = aggregationContext;
178180
final GeoShapeCellIdSource cellIdSource = new GeoShapeCellIdSource(
179181
(ValuesSource.GeoShape) valuesSource,
180182
precision,
181183
geoBoundingBox,
182-
GeoShapeHashUtil::encodeShape
184+
(docValue, p) -> GeoShapeHashUtil.encodeShape(docValue, p, () -> {
185+
if (ctx.isCancelled()) {
186+
throw new TaskCancelledException("Cancelling geohash_grid aggregation on geo_shape field");
187+
}
188+
})
183189
);
184190
return new GeoHashGridAggregator(
185191
name,

modules/geo/src/main/java/org/opensearch/geo/search/aggregations/bucket/geogrid/GeoTileGridAggregatorFactory.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
package org.opensearch.geo.search.aggregations.bucket.geogrid;
3434

3535
import org.opensearch.common.geo.GeoBoundingBox;
36+
import org.opensearch.core.tasks.TaskCancelledException;
3637
import org.opensearch.geo.search.aggregations.bucket.geogrid.cells.CellIdSource;
3738
import org.opensearch.geo.search.aggregations.bucket.geogrid.cells.GeoShapeCellIdSource;
3839
import org.opensearch.index.query.QueryShardContext;
@@ -173,11 +174,16 @@ static void registerAggregators(ValuesSourceRegistry.Builder builder) {
173174
parent,
174175
cardinality,
175176
metadata) -> {
177+
final SearchContext ctx = aggregationContext;
176178
GeoShapeCellIdSource cellIdSource = new GeoShapeCellIdSource(
177179
(ValuesSource.GeoShape) valuesSource,
178180
precision,
179181
geoBoundingBox,
180-
GeoTileUtils::encodeShape
182+
(docValue, p) -> GeoTileUtils.encodeShape(docValue, p, () -> {
183+
if (ctx.isCancelled()) {
184+
throw new TaskCancelledException("Cancelling geotile_grid aggregation on geo_shape field");
185+
}
186+
})
181187
);
182188
return new GeoTileGridAggregator(
183189
name,

modules/geo/src/main/java/org/opensearch/geo/search/aggregations/bucket/geogrid/util/GeoShapeHashUtil.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,11 @@
2121
*/
2222
public class GeoShapeHashUtil {
2323

24+
/**
25+
* Interval at which we check for cancellation inside the geohash iteration loop.
26+
*/
27+
private static final int CANCELLATION_CHECK_INTERVAL = 1024;
28+
2429
/**
2530
* The function encodes the shape provided as {@link GeoShapeDocValue} to a {@link List} of {@link Long} values
2631
* (representing the GeoHashes) which are intersecting with the shapes at a given precision.
@@ -30,6 +35,18 @@ public class GeoShapeHashUtil {
3035
* @return {@link List} containing encoded {@link Long} values
3136
*/
3237
public static List<Long> encodeShape(final GeoShapeDocValue geoShapeDocValue, final int precision) {
38+
return encodeShape(geoShapeDocValue, precision, () -> {});
39+
}
40+
41+
/**
42+
* Encodes the shape to a list of intersecting geohash long values, with periodic cancellation checks.
43+
*
44+
* @param geoShapeDocValue {@link GeoShapeDocValue}
45+
* @param precision int
46+
* @param checkCancelled a {@link Runnable} invoked periodically to allow the caller to abort
47+
* @return {@link List} containing encoded {@link Long} values
48+
*/
49+
public static List<Long> encodeShape(final GeoShapeDocValue geoShapeDocValue, final int precision, final Runnable checkCancelled) {
3350
final List<Long> encodedValues = new ArrayList<>();
3451
final GeoShapeDocValue.BoundingRectangle boundingRectangle = geoShapeDocValue.getBoundingRectangle();
3552
long topLeftGeoHash = Geohash.longEncode(boundingRectangle.getMinX(), boundingRectangle.getMaxY(), precision);
@@ -39,7 +56,11 @@ public static List<Long> encodeShape(final GeoShapeDocValue geoShapeDocValue, fi
3956
long currentValue = topLeftGeoHash;
4057
long rightMax = topRightGeoHash;
4158
long tempCurrent = currentValue;
59+
int iterationCount = 0;
4260
while (true) {
61+
if (++iterationCount % CANCELLATION_CHECK_INTERVAL == 0) {
62+
checkCancelled.run();
63+
}
4364
// check if this currentValue intersect with shape.
4465
final Rectangle geohashRectangle = Geohash.toBoundingBox(Geohash.stringEncode(tempCurrent));
4566
if (geoShapeDocValue.isIntersectingRectangle(geohashRectangle)) {

modules/geo/src/test/java/org/opensearch/geo/search/aggregations/bucket/geogrid/GeoTileGridAggregatorTests.java

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,21 @@
3232

3333
package org.opensearch.geo.search.aggregations.bucket.geogrid;
3434

35+
import org.apache.lucene.document.Document;
36+
import org.apache.lucene.document.LatLonShape;
37+
import org.apache.lucene.index.IndexReader;
38+
import org.apache.lucene.search.IndexSearcher;
39+
import org.apache.lucene.search.MatchAllDocsQuery;
40+
import org.apache.lucene.store.Directory;
41+
import org.apache.lucene.tests.index.RandomIndexWriter;
42+
import org.opensearch.common.geo.GeoShapeUtils;
43+
import org.opensearch.geometry.Line;
44+
import org.opensearch.index.mapper.GeoShapeFieldMapper;
45+
import org.opensearch.index.mapper.MappedFieldType;
3546
import org.opensearch.search.aggregations.bucket.GeoTileUtils;
3647

48+
import java.io.IOException;
49+
3750
public class GeoTileGridAggregatorTests extends GeoGridAggregatorTestCase<InternalGeoTileGridBucket> {
3851

3952
@Override
@@ -61,4 +74,73 @@ public void testPrecision() {
6174
builder.precision(precision);
6275
assertEquals(precision, builder.precision());
6376
}
77+
78+
/**
79+
* Test that a LineString spanning large distances doesn't cause excessive tile generation.
80+
* This reproduces bug #20413 where a LineString with a large bounding box can generate
81+
* millions of tiles at high precision, causing CPU to max out and the cluster to stall.
82+
*/
83+
public void testLineStringWithLargeBoundingBoxAtHighPrecision() {
84+
final String fieldName = "location";
85+
// Create a LineString spanning nearly the entire world diagonally.
86+
// The diagonal span ensures a large bounding box in both x and y tile dimensions.
87+
Line line = new Line(new double[] { -170.0, 170.0 }, new double[] { -60.0, 60.0 });
88+
89+
try (Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
90+
Document doc = new Document();
91+
doc.add(LatLonShape.createDocValueField(fieldName, GeoShapeUtils.toLuceneLine(line)));
92+
writer.addDocument(doc);
93+
94+
MappedFieldType fieldType = new GeoShapeFieldMapper.GeoShapeFieldType(fieldName);
95+
96+
// At precision 15 (2^15 = 32768 tiles/axis), this diagonal line's bounding box
97+
// spans ~31000 x-tiles and many y-tiles, well exceeding MAX_TILES_PER_SHAPE (65536)
98+
GeoTileGridAggregationBuilder aggBuilder = new GeoTileGridAggregationBuilder("geotile_grid");
99+
aggBuilder.field(fieldName).precision(15);
100+
101+
try (IndexReader reader = writer.getReader()) {
102+
IndexSearcher searcher = new IndexSearcher(reader);
103+
104+
// With the fix, this should throw an IllegalArgumentException
105+
// instead of hanging indefinitely
106+
Exception exception = expectThrows(
107+
IllegalArgumentException.class,
108+
() -> searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType)
109+
);
110+
assertTrue("Exception should mention tile limit", exception.getMessage().contains("would generate too many tiles"));
111+
}
112+
} catch (IOException e) {
113+
fail("IOException during test: " + e.getMessage());
114+
}
115+
}
116+
117+
/**
118+
* Test that a LineString with moderate size works correctly at reasonable precision.
119+
*/
120+
public void testLineStringWithModerateBoundingBox() throws IOException {
121+
final String fieldName = "location";
122+
// Create a smaller LineString that should work fine
123+
Line line = new Line(new double[] { -1.0, 1.0 }, new double[] { 0.0, 0.0 });
124+
125+
try (Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
126+
Document doc = new Document();
127+
doc.add(LatLonShape.createDocValueField(fieldName, GeoShapeUtils.toLuceneLine(line)));
128+
writer.addDocument(doc);
129+
130+
MappedFieldType fieldType = new GeoShapeFieldMapper.GeoShapeFieldType(fieldName);
131+
132+
// This should work fine even at high precision
133+
GeoTileGridAggregationBuilder aggBuilder = new GeoTileGridAggregationBuilder("geotile_grid");
134+
aggBuilder.field(fieldName).precision(10);
135+
136+
try (IndexReader reader = writer.getReader()) {
137+
IndexSearcher searcher = new IndexSearcher(reader);
138+
GeoTileGrid result = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);
139+
140+
// Should successfully generate buckets
141+
assertNotNull(result);
142+
assertTrue("Expected buckets to be generated", result.getBuckets().size() > 0);
143+
}
144+
}
145+
}
64146
}

server/src/main/java/org/opensearch/search/aggregations/bucket/GeoTileUtils.java

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,13 @@ private GeoTileUtils() {}
8181
*/
8282
public static final double LATITUDE_MASK = 85.0511287798066;
8383

84+
/**
85+
* Maximum number of tiles that can be generated for a single shape.
86+
* This prevents excessive CPU usage for shapes with large bounding boxes (e.g., long LineStrings).
87+
* The limit is set to allow reasonable precision while preventing cluster stalls.
88+
*/
89+
public static final int MAX_TILES_PER_SHAPE = 65536;
90+
8491
/**
8592
* Since shapes are encoded, their boundaries are to be compared to against the encoded/decoded values of <code>LATITUDE_MASK</code>
8693
*/
@@ -288,6 +295,11 @@ public static Rectangle toBoundingBox(String hash) {
288295
return toBoundingBox(hashAsInts[1], hashAsInts[2], hashAsInts[0]);
289296
}
290297

298+
/**
299+
* Interval at which we check for cancellation inside the tile iteration loop.
300+
*/
301+
private static final int CANCELLATION_CHECK_INTERVAL = 1024;
302+
291303
/**
292304
* The function encodes the shape provided as {@link GeoShapeDocValue} to a {@link List} of {@link Long} values
293305
* (representing the GeoTiles) which are intersecting with the shapes at a given precision.
@@ -297,6 +309,19 @@ public static Rectangle toBoundingBox(String hash) {
297309
* @return {@link List} of {@link Long}
298310
*/
299311
public static List<Long> encodeShape(final GeoShapeDocValue geoShapeDocValue, final int precision) {
312+
return encodeShape(geoShapeDocValue, precision, () -> {});
313+
}
314+
315+
/**
316+
* Encodes the shape to a list of intersecting tile long values, with periodic cancellation checks.
317+
*
318+
* @param geoShapeDocValue {@link GeoShapeDocValue}
319+
* @param precision int
320+
* @param checkCancelled a {@link Runnable} invoked periodically to allow the caller to abort
321+
* (e.g. by throwing a TaskCancelledException)
322+
* @return {@link List} of {@link Long}
323+
*/
324+
public static List<Long> encodeShape(final GeoShapeDocValue geoShapeDocValue, final int precision, final Runnable checkCancelled) {
300325
final GeoShapeDocValue.BoundingRectangle boundingRectangle = geoShapeDocValue.getBoundingRectangle();
301326
// generate all the grid long values that this shape intersects.
302327
final long totalTilesAtPrecision = 1L << checkPrecisionRange(precision);
@@ -306,9 +331,36 @@ public static List<Long> encodeShape(final GeoShapeDocValue geoShapeDocValue, fi
306331
// take maxY and for maxYTile we need to take minY.
307332
int minYTile = getYTile(boundingRectangle.getMaxY(), totalTilesAtPrecision);
308333
int maxYTile = getYTile(boundingRectangle.getMinY(), totalTilesAtPrecision);
334+
335+
// Calculate the potential number of tiles to prevent excessive iteration
336+
long xTileRange = (long) maxXTile - minXTile + 1;
337+
long yTileRange = (long) maxYTile - minYTile + 1;
338+
long potentialTiles = xTileRange * yTileRange;
339+
340+
// Hard limit: reject shapes that would generate an unreasonable number of tiles
341+
if (potentialTiles > MAX_TILES_PER_SHAPE) {
342+
throw new IllegalArgumentException(
343+
String.format(
344+
Locale.ROOT,
345+
"Tile aggregation on GeoShape field would generate too many tiles (%d) at precision %d. "
346+
+ "The shape spans %d x-tiles and %d y-tiles. Maximum allowed is %d tiles per shape. "
347+
+ "Consider using a lower precision or a smaller shape.",
348+
potentialTiles,
349+
precision,
350+
xTileRange,
351+
yTileRange,
352+
MAX_TILES_PER_SHAPE
353+
)
354+
);
355+
}
356+
309357
final List<Long> encodedValues = new ArrayList<>();
358+
int iterationCount = 0;
310359
for (int x = minXTile; x <= maxXTile; x++) {
311360
for (int y = minYTile; y <= maxYTile; y++) {
361+
if (++iterationCount % CANCELLATION_CHECK_INTERVAL == 0) {
362+
checkCancelled.run();
363+
}
312364
// Convert the precision, x , y to encoded value.
313365
long encodedValue = longEncodeTiles(precision, x, y);
314366
// Convert encoded value to rectangle

server/src/test/java/org/opensearch/search/aggregations/bucket/geogrid/GeoTileUtilsTests.java

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,14 @@
3333
package org.opensearch.search.aggregations.bucket.geogrid;
3434

3535
import org.opensearch.common.geo.GeoPoint;
36+
import org.opensearch.common.geo.GeoShapeDocValue;
37+
import org.opensearch.geometry.Line;
3638
import org.opensearch.geometry.Rectangle;
3739
import org.opensearch.search.aggregations.bucket.GeoTileUtils;
3840
import org.opensearch.test.OpenSearchTestCase;
3941

42+
import java.util.List;
43+
4044
import static org.opensearch.search.aggregations.bucket.GeoTileUtils.MAX_ZOOM;
4145
import static org.opensearch.search.aggregations.bucket.GeoTileUtils.checkPrecisionRange;
4246
import static org.opensearch.search.aggregations.bucket.GeoTileUtils.hashToGeoPoint;
@@ -47,6 +51,8 @@
4751
import static org.hamcrest.Matchers.closeTo;
4852
import static org.hamcrest.Matchers.containsString;
4953
import static org.hamcrest.Matchers.equalTo;
54+
import static org.hamcrest.Matchers.greaterThan;
55+
import static org.hamcrest.Matchers.lessThan;
5056

5157
public class GeoTileUtilsTests extends OpenSearchTestCase {
5258

@@ -265,4 +271,57 @@ public void testPointToTile() {
265271
assertThat(GeoTileUtils.getYTile(y, tiles), equalTo(yTile));
266272

267273
}
274+
275+
/**
276+
* Test that encodeShape throws an exception when the shape would generate too many tiles.
277+
* This tests the fix for bug #20413 where a LineString with a large bounding box could
278+
* generate millions of tiles at high precision, causing CPU to max out.
279+
*/
280+
public void testEncodeShapeExceedsTileLimit() {
281+
// Create a LineString spanning most of the world in both longitude and latitude.
282+
// The diagonal span ensures a large bounding box in both x and y tile dimensions.
283+
Line line = new Line(new double[] { -170.0, 170.0 }, new double[] { -60.0, 60.0 });
284+
GeoShapeDocValue docValue = GeoShapeDocValue.createGeometryDocValue(line);
285+
286+
// At precision 15 (2^15 = 32768 tiles/axis), this line's bounding box spans
287+
// ~31000 x-tiles and many y-tiles, well exceeding MAX_TILES_PER_SHAPE (65536)
288+
IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> GeoTileUtils.encodeShape(docValue, 15));
289+
assertThat(exception.getMessage(), containsString("would generate too many tiles"));
290+
assertThat(exception.getMessage(), containsString("at precision 15"));
291+
}
292+
293+
/**
294+
* Test that encodeShape works correctly for shapes within the tile limit.
295+
*/
296+
public void testEncodeShapeWithinTileLimit() {
297+
// Create a small LineString
298+
Line line = new Line(new double[] { -1.0, 1.0 }, new double[] { 0.0, 0.0 });
299+
GeoShapeDocValue docValue = GeoShapeDocValue.createGeometryDocValue(line);
300+
301+
// This should work fine even at high precision
302+
List<Long> tiles = GeoTileUtils.encodeShape(docValue, 10);
303+
304+
// Should generate some tiles but not exceed the limit
305+
assertThat(tiles.size(), greaterThan(0));
306+
assertThat(tiles.size(), lessThan(GeoTileUtils.MAX_TILES_PER_SHAPE));
307+
}
308+
309+
/**
310+
* Test that encodeShape respects the cancellation callback.
311+
* This verifies that long-running tile iterations can be cancelled by the search timeout.
312+
*/
313+
public void testEncodeShapeRespectsCancel() {
314+
// Create a moderate LineString that generates enough tiles to trigger the check
315+
Line line = new Line(new double[] { -10.0, 10.0 }, new double[] { -5.0, 5.0 });
316+
GeoShapeDocValue docValue = GeoShapeDocValue.createGeometryDocValue(line);
317+
318+
// Use a cancellation callback that throws after being called
319+
RuntimeException cancelException = new RuntimeException("cancelled");
320+
Runnable alwaysCancel = () -> { throw cancelException; };
321+
322+
// At a moderate precision, iteration count should exceed the check interval (1024)
323+
// and the cancellation should fire
324+
RuntimeException thrown = expectThrows(RuntimeException.class, () -> GeoTileUtils.encodeShape(docValue, 10, alwaysCancel));
325+
assertSame(cancelException, thrown);
326+
}
268327
}

0 commit comments

Comments
 (0)