Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix terms lookup subquery fetch limit reading from non-existent index setting instead of cluster `max_clause_count` ([#20823](https://github.com/opensearch-project/OpenSearch/pull/20823))
- Fix array_index_out_of_bounds_exception with wildcard and aggregations ([#20842](https://github.com/opensearch-project/OpenSearch/pull/20842))
- Handle dependencies between analyzers ([#19248](https://github.com/opensearch-project/OpenSearch/pull/19248))
- Fix geotile_grid aggregation CPU stall on large geo_shapes by adding tile limit and cooperative cancellation ([#20929](https://github.com/opensearch-project/OpenSearch/pull/20929))

### Dependencies
- Bump shadow-gradle-plugin from 8.3.9 to 9.3.1 ([#20569](https://github.com/opensearch-project/OpenSearch/pull/20569))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
package org.opensearch.geo.search.aggregations.bucket.geogrid;

import org.opensearch.common.geo.GeoBoundingBox;
import org.opensearch.core.tasks.TaskCancelledException;
import org.opensearch.geo.search.aggregations.bucket.geogrid.cells.CellIdSource;
import org.opensearch.geo.search.aggregations.bucket.geogrid.cells.GeoShapeCellIdSource;
import org.opensearch.geo.search.aggregations.bucket.geogrid.util.GeoShapeHashUtil;
Expand Down Expand Up @@ -175,11 +176,16 @@ static void registerAggregators(ValuesSourceRegistry.Builder builder) {
parent,
cardinality,
metadata) -> {
final SearchContext ctx = aggregationContext;
final GeoShapeCellIdSource cellIdSource = new GeoShapeCellIdSource(
(ValuesSource.GeoShape) valuesSource,
precision,
geoBoundingBox,
GeoShapeHashUtil::encodeShape
(docValue, p) -> GeoShapeHashUtil.encodeShape(docValue, p, () -> {
if (ctx.isCancelled()) {
throw new TaskCancelledException("Cancelling geohash_grid aggregation on geo_shape field");
}
})
);
return new GeoHashGridAggregator(
name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
package org.opensearch.geo.search.aggregations.bucket.geogrid;

import org.opensearch.common.geo.GeoBoundingBox;
import org.opensearch.core.tasks.TaskCancelledException;
import org.opensearch.geo.search.aggregations.bucket.geogrid.cells.CellIdSource;
import org.opensearch.geo.search.aggregations.bucket.geogrid.cells.GeoShapeCellIdSource;
import org.opensearch.index.query.QueryShardContext;
Expand Down Expand Up @@ -173,11 +174,16 @@ static void registerAggregators(ValuesSourceRegistry.Builder builder) {
parent,
cardinality,
metadata) -> {
final SearchContext ctx = aggregationContext;
GeoShapeCellIdSource cellIdSource = new GeoShapeCellIdSource(
(ValuesSource.GeoShape) valuesSource,
precision,
geoBoundingBox,
GeoTileUtils::encodeShape
(docValue, p) -> GeoTileUtils.encodeShape(docValue, p, () -> {
if (ctx.isCancelled()) {
throw new TaskCancelledException("Cancelling geotile_grid aggregation on geo_shape field");
}
})
);
return new GeoTileGridAggregator(
name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
*/
public class GeoShapeHashUtil {

/**
* Interval at which we check for cancellation inside the geohash iteration loop.
*/
private static final int CANCELLATION_CHECK_INTERVAL = 1024;

/**
* The function encodes the shape provided as {@link GeoShapeDocValue} to a {@link List} of {@link Long} values
* (representing the GeoHashes) which are intersecting with the shapes at a given precision.
Expand All @@ -30,6 +35,18 @@ public class GeoShapeHashUtil {
* @return {@link List} containing encoded {@link Long} values
*/
public static List<Long> encodeShape(final GeoShapeDocValue geoShapeDocValue, final int precision) {
return encodeShape(geoShapeDocValue, precision, () -> {});
}

/**
* Encodes the shape to a list of intersecting geohash long values, with periodic cancellation checks.
*
* @param geoShapeDocValue {@link GeoShapeDocValue}
* @param precision int
* @param checkCancelled a {@link Runnable} invoked periodically to allow the caller to abort
* @return {@link List} containing encoded {@link Long} values
*/
public static List<Long> encodeShape(final GeoShapeDocValue geoShapeDocValue, final int precision, final Runnable checkCancelled) {
final List<Long> encodedValues = new ArrayList<>();
final GeoShapeDocValue.BoundingRectangle boundingRectangle = geoShapeDocValue.getBoundingRectangle();
long topLeftGeoHash = Geohash.longEncode(boundingRectangle.getMinX(), boundingRectangle.getMaxY(), precision);
Expand All @@ -39,7 +56,11 @@ public static List<Long> encodeShape(final GeoShapeDocValue geoShapeDocValue, fi
long currentValue = topLeftGeoHash;
long rightMax = topRightGeoHash;
long tempCurrent = currentValue;
int iterationCount = 0;
while (true) {
if (++iterationCount % CANCELLATION_CHECK_INTERVAL == 0) {
checkCancelled.run();
}
// check if this currentValue intersect with shape.
final Rectangle geohashRectangle = Geohash.toBoundingBox(Geohash.stringEncode(tempCurrent));
if (geoShapeDocValue.isIntersectingRectangle(geohashRectangle)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,21 @@

package org.opensearch.geo.search.aggregations.bucket.geogrid;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.LatLonShape;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.opensearch.common.geo.GeoShapeUtils;
import org.opensearch.geometry.Line;
import org.opensearch.index.mapper.GeoShapeFieldMapper;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.search.aggregations.bucket.GeoTileUtils;

import java.io.IOException;

public class GeoTileGridAggregatorTests extends GeoGridAggregatorTestCase<InternalGeoTileGridBucket> {

@Override
Expand Down Expand Up @@ -61,4 +74,73 @@ public void testPrecision() {
builder.precision(precision);
assertEquals(precision, builder.precision());
}

/**
* Test that a LineString spanning large distances doesn't cause excessive tile generation.
* This reproduces bug #20413 where a LineString with a large bounding box can generate
* millions of tiles at high precision, causing CPU to max out and the cluster to stall.
*/
public void testLineStringWithLargeBoundingBoxAtHighPrecision() {
final String fieldName = "location";
// Create a LineString spanning nearly the entire world diagonally.
// The diagonal span ensures a large bounding box in both x and y tile dimensions.
Line line = new Line(new double[] { -170.0, 170.0 }, new double[] { -60.0, 60.0 });

try (Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
Document doc = new Document();
doc.add(LatLonShape.createDocValueField(fieldName, GeoShapeUtils.toLuceneLine(line)));
writer.addDocument(doc);

MappedFieldType fieldType = new GeoShapeFieldMapper.GeoShapeFieldType(fieldName);

// At precision 15 (2^15 = 32768 tiles/axis), this diagonal line's bounding box
// spans ~31000 x-tiles and many y-tiles, well exceeding MAX_TILES_PER_SHAPE (65536)
GeoTileGridAggregationBuilder aggBuilder = new GeoTileGridAggregationBuilder("geotile_grid");
aggBuilder.field(fieldName).precision(15);

try (IndexReader reader = writer.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);

// With the fix, this should throw an IllegalArgumentException
// instead of hanging indefinitely
Exception exception = expectThrows(
IllegalArgumentException.class,
() -> searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType)
);
assertTrue("Exception should mention tile limit", exception.getMessage().contains("would generate too many tiles"));
}
} catch (IOException e) {
fail("IOException during test: " + e.getMessage());
}
}

/**
* Test that a LineString with moderate size works correctly at reasonable precision.
*/
public void testLineStringWithModerateBoundingBox() throws IOException {
final String fieldName = "location";
// Create a smaller LineString that should work fine
Line line = new Line(new double[] { -1.0, 1.0 }, new double[] { 0.0, 0.0 });

try (Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
Document doc = new Document();
doc.add(LatLonShape.createDocValueField(fieldName, GeoShapeUtils.toLuceneLine(line)));
writer.addDocument(doc);

MappedFieldType fieldType = new GeoShapeFieldMapper.GeoShapeFieldType(fieldName);

// This should work fine even at high precision
GeoTileGridAggregationBuilder aggBuilder = new GeoTileGridAggregationBuilder("geotile_grid");
aggBuilder.field(fieldName).precision(10);

try (IndexReader reader = writer.getReader()) {
IndexSearcher searcher = new IndexSearcher(reader);
GeoTileGrid result = searchAndReduce(searcher, new MatchAllDocsQuery(), aggBuilder, fieldType);

// Should successfully generate buckets
assertNotNull(result);
assertTrue("Expected buckets to be generated", result.getBuckets().size() > 0);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ private GeoTileUtils() {}
*/
public static final double LATITUDE_MASK = 85.0511287798066;

/**
* Maximum number of tiles that can be generated for a single shape.
* This prevents excessive CPU usage for shapes with large bounding boxes (e.g., long LineStrings).
* The limit is set to allow reasonable precision while preventing cluster stalls.
*/
public static final int MAX_TILES_PER_SHAPE = 65536;

/**
* Since shapes are encoded, their boundaries are to be compared to against the encoded/decoded values of <code>LATITUDE_MASK</code>
*/
Expand Down Expand Up @@ -288,6 +295,11 @@ public static Rectangle toBoundingBox(String hash) {
return toBoundingBox(hashAsInts[1], hashAsInts[2], hashAsInts[0]);
}

/**
* Interval at which we check for cancellation inside the tile iteration loop.
*/
private static final int CANCELLATION_CHECK_INTERVAL = 1024;

/**
* The function encodes the shape provided as {@link GeoShapeDocValue} to a {@link List} of {@link Long} values
* (representing the GeoTiles) which are intersecting with the shapes at a given precision.
Expand All @@ -297,6 +309,19 @@ public static Rectangle toBoundingBox(String hash) {
* @return {@link List} of {@link Long}
*/
public static List<Long> encodeShape(final GeoShapeDocValue geoShapeDocValue, final int precision) {
return encodeShape(geoShapeDocValue, precision, () -> {});
}

/**
* Encodes the shape to a list of intersecting tile long values, with periodic cancellation checks.
*
* @param geoShapeDocValue {@link GeoShapeDocValue}
* @param precision int
* @param checkCancelled a {@link Runnable} invoked periodically to allow the caller to abort
* (e.g. by throwing a TaskCancelledException)
* @return {@link List} of {@link Long}
*/
public static List<Long> encodeShape(final GeoShapeDocValue geoShapeDocValue, final int precision, final Runnable checkCancelled) {
final GeoShapeDocValue.BoundingRectangle boundingRectangle = geoShapeDocValue.getBoundingRectangle();
// generate all the grid long values that this shape intersects.
final long totalTilesAtPrecision = 1L << checkPrecisionRange(precision);
Expand All @@ -306,9 +331,36 @@ public static List<Long> encodeShape(final GeoShapeDocValue geoShapeDocValue, fi
// take maxY and for maxYTile we need to take minY.
int minYTile = getYTile(boundingRectangle.getMaxY(), totalTilesAtPrecision);
int maxYTile = getYTile(boundingRectangle.getMinY(), totalTilesAtPrecision);

// Calculate the potential number of tiles to prevent excessive iteration
long xTileRange = (long) maxXTile - minXTile + 1;
long yTileRange = (long) maxYTile - minYTile + 1;
long potentialTiles = xTileRange * yTileRange;

// Hard limit: reject shapes that would generate an unreasonable number of tiles
if (potentialTiles > MAX_TILES_PER_SHAPE) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"Tile aggregation on GeoShape field would generate too many tiles (%d) at precision %d. "
+ "The shape spans %d x-tiles and %d y-tiles. Maximum allowed is %d tiles per shape. "
+ "Consider using a lower precision or a smaller shape.",
potentialTiles,
precision,
xTileRange,
yTileRange,
MAX_TILES_PER_SHAPE
)
);
}

final List<Long> encodedValues = new ArrayList<>();
int iterationCount = 0;
for (int x = minXTile; x <= maxXTile; x++) {
for (int y = minYTile; y <= maxYTile; y++) {
if (++iterationCount % CANCELLATION_CHECK_INTERVAL == 0) {
checkCancelled.run();
}
// Convert the precision, x , y to encoded value.
long encodedValue = longEncodeTiles(precision, x, y);
// Convert encoded value to rectangle
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,14 @@
package org.opensearch.search.aggregations.bucket.geogrid;

import org.opensearch.common.geo.GeoPoint;
import org.opensearch.common.geo.GeoShapeDocValue;
import org.opensearch.geometry.Line;
import org.opensearch.geometry.Rectangle;
import org.opensearch.search.aggregations.bucket.GeoTileUtils;
import org.opensearch.test.OpenSearchTestCase;

import java.util.List;

import static org.opensearch.search.aggregations.bucket.GeoTileUtils.MAX_ZOOM;
import static org.opensearch.search.aggregations.bucket.GeoTileUtils.checkPrecisionRange;
import static org.opensearch.search.aggregations.bucket.GeoTileUtils.hashToGeoPoint;
Expand All @@ -47,6 +51,8 @@
import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.lessThan;

public class GeoTileUtilsTests extends OpenSearchTestCase {

Expand Down Expand Up @@ -265,4 +271,57 @@ public void testPointToTile() {
assertThat(GeoTileUtils.getYTile(y, tiles), equalTo(yTile));

}

/**
* Test that encodeShape throws an exception when the shape would generate too many tiles.
* This tests the fix for bug #20413 where a LineString with a large bounding box could
* generate millions of tiles at high precision, causing CPU to max out.
*/
public void testEncodeShapeExceedsTileLimit() {
// Create a LineString spanning most of the world in both longitude and latitude.
// The diagonal span ensures a large bounding box in both x and y tile dimensions.
Line line = new Line(new double[] { -170.0, 170.0 }, new double[] { -60.0, 60.0 });
GeoShapeDocValue docValue = GeoShapeDocValue.createGeometryDocValue(line);

// At precision 15 (2^15 = 32768 tiles/axis), this line's bounding box spans
// ~31000 x-tiles and many y-tiles, well exceeding MAX_TILES_PER_SHAPE (65536)
IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> GeoTileUtils.encodeShape(docValue, 15));
assertThat(exception.getMessage(), containsString("would generate too many tiles"));
assertThat(exception.getMessage(), containsString("at precision 15"));
}

/**
* Test that encodeShape works correctly for shapes within the tile limit.
*/
public void testEncodeShapeWithinTileLimit() {
// Create a small LineString
Line line = new Line(new double[] { -1.0, 1.0 }, new double[] { 0.0, 0.0 });
GeoShapeDocValue docValue = GeoShapeDocValue.createGeometryDocValue(line);

// This should work fine even at high precision
List<Long> tiles = GeoTileUtils.encodeShape(docValue, 10);

// Should generate some tiles but not exceed the limit
assertThat(tiles.size(), greaterThan(0));
assertThat(tiles.size(), lessThan(GeoTileUtils.MAX_TILES_PER_SHAPE));
}

/**
* Test that encodeShape respects the cancellation callback.
* This verifies that long-running tile iterations can be cancelled by the search timeout.
*/
public void testEncodeShapeRespectsCancel() {
// Create a moderate LineString that generates enough tiles to trigger the check
Line line = new Line(new double[] { -10.0, 10.0 }, new double[] { -5.0, 5.0 });
GeoShapeDocValue docValue = GeoShapeDocValue.createGeometryDocValue(line);

// Use a cancellation callback that throws after being called
RuntimeException cancelException = new RuntimeException("cancelled");
Runnable alwaysCancel = () -> { throw cancelException; };

// At a moderate precision, iteration count should exceed the check interval (1024)
// and the cancellation should fire
RuntimeException thrown = expectThrows(RuntimeException.class, () -> GeoTileUtils.encodeShape(docValue, 10, alwaysCancel));
assertSame(cancelException, thrown);
}
}
Loading