Skip to content

Commit fb358aa

Browse files
[8.x] Optimize ST_EXTENT_AGG for geo_shape and cartesian_shape (elastic#119889) (elastic#122276) (elastic#122420)
* Optimize ST_EXTENT_AGG for geo_shape and cartesian_shape (elastic#119889) Support for `ST_EXTENT_AGG` was added in elastic#118829, and then partially optimized in elastic#118829. This optimization worked only for cartesian_shape fields, and worked by extracting the Extent from the doc-values and re-encoding it as a WKB `BBOX` geometry. This does not work for geo_shape, where we need to retain all 6 integers stored in the doc-values, in order to perform the datelline choice only at reduce time during the final phase of the aggregation. Since both geo_shape and cartesian_shape perform the aggregations using integers, and the original Extent values in the doc-values are integers, this PR expands the previous optimization by: * Saving all Extent values into a multi-valued field in an IntBlock for both cartesian_shape and geo_shape * Simplifying the logic around merging intermediate states for all cases (geo/cartesian and grouped and non-grouped aggs) * Widening test cases for testing more combinations of aggregations and types, and fixing a few bugs found * Enhancing cartesian extent to convert from 6 ints to 4 ints at block loading time (for efficiency) * Fixing bugs in both cartesian and geo extents for generating intermediate state with missing groups (flaky tests in serverless) * Moved the int order to always match Rectangle for 4-int and Extent for 6-int cases (improved internal consistency) Since the PR already changed the meaning of the invalid/infinite values of the intermediate state integers, it was already not compatible with the previous cluster versions. We disabled mixed-cluster testing to prevent errors as a result of that. This leaves us the opportunity to make further changes that are mixed-cluster incompatible, hence the decision to perform this consistency update now. * Regenerate generated files
1 parent 1a99a47 commit fb358aa

File tree

53 files changed

+2668
-963
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+2668
-963
lines changed

docs/changelog/119889.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 119889
2+
summary: Optimize ST_EXTENT_AGG for `geo_shape` and `cartesian_shape`
3+
area: "ES|QL"
4+
type: enhancement
5+
issues: []

libs/geo/src/main/java/org/elasticsearch/geometry/utils/SpatialEnvelopeVisitor.java

Lines changed: 88 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ public interface PointVisitor {
116116
boolean isValid();
117117

118118
Rectangle getResult();
119+
120+
/** To allow for memory optimizations through object reuse, the visitor can be reset to its initial state. */
121+
void reset();
119122
}
120123

121124
/**
@@ -124,18 +127,14 @@ public interface PointVisitor {
124127
*/
125128
public static class CartesianPointVisitor implements PointVisitor {
126129
private double minX = Double.POSITIVE_INFINITY;
127-
private double minY = Double.POSITIVE_INFINITY;
128130
private double maxX = Double.NEGATIVE_INFINITY;
129131
private double maxY = Double.NEGATIVE_INFINITY;
132+
private double minY = Double.POSITIVE_INFINITY;
130133

131134
public double getMinX() {
132135
return minX;
133136
}
134137

135-
public double getMinY() {
136-
return minY;
137-
}
138-
139138
public double getMaxX() {
140139
return maxX;
141140
}
@@ -144,12 +143,16 @@ public double getMaxY() {
144143
return maxY;
145144
}
146145

146+
public double getMinY() {
147+
return minY;
148+
}
149+
147150
@Override
148151
public void visitPoint(double x, double y) {
149152
minX = Math.min(minX, x);
150-
minY = Math.min(minY, y);
151153
maxX = Math.max(maxX, x);
152154
maxY = Math.max(maxY, y);
155+
minY = Math.min(minY, y);
153156
}
154157

155158
@Override
@@ -160,9 +163,9 @@ public void visitRectangle(double minX, double maxX, double maxY, double minY) {
160163
);
161164
}
162165
this.minX = Math.min(this.minX, minX);
163-
this.minY = Math.min(this.minY, minY);
164166
this.maxX = Math.max(this.maxX, maxX);
165167
this.maxY = Math.max(this.maxY, maxY);
168+
this.minY = Math.min(this.minY, minY);
166169
}
167170

168171
@Override
@@ -174,6 +177,14 @@ public boolean isValid() {
174177
public Rectangle getResult() {
175178
return new Rectangle(minX, maxX, maxY, minY);
176179
}
180+
181+
@Override
182+
public void reset() {
183+
minX = Double.POSITIVE_INFINITY;
184+
maxX = Double.NEGATIVE_INFINITY;
185+
maxY = Double.NEGATIVE_INFINITY;
186+
minY = Double.POSITIVE_INFINITY;
187+
}
177188
}
178189

179190
/**
@@ -186,82 +197,117 @@ public Rectangle getResult() {
186197
* </ul>
187198
*/
188199
public static class GeoPointVisitor implements PointVisitor {
189-
protected double minY = Double.POSITIVE_INFINITY;
190-
protected double maxY = Double.NEGATIVE_INFINITY;
191-
protected double minNegX = Double.POSITIVE_INFINITY;
192-
protected double maxNegX = Double.NEGATIVE_INFINITY;
193-
protected double minPosX = Double.POSITIVE_INFINITY;
194-
protected double maxPosX = Double.NEGATIVE_INFINITY;
200+
protected double top = Double.NEGATIVE_INFINITY;
201+
protected double bottom = Double.POSITIVE_INFINITY;
202+
protected double negLeft = Double.POSITIVE_INFINITY;
203+
protected double negRight = Double.NEGATIVE_INFINITY;
204+
protected double posLeft = Double.POSITIVE_INFINITY;
205+
protected double posRight = Double.NEGATIVE_INFINITY;
195206

196207
private final WrapLongitude wrapLongitude;
197208

198209
public GeoPointVisitor(WrapLongitude wrapLongitude) {
199210
this.wrapLongitude = wrapLongitude;
200211
}
201212

213+
public double getTop() {
214+
return top;
215+
}
216+
217+
public double getBottom() {
218+
return bottom;
219+
}
220+
221+
public double getNegLeft() {
222+
return negLeft;
223+
}
224+
225+
public double getNegRight() {
226+
return negRight;
227+
}
228+
229+
public double getPosLeft() {
230+
return posLeft;
231+
}
232+
233+
public double getPosRight() {
234+
return posRight;
235+
}
236+
202237
@Override
203238
public void visitPoint(double x, double y) {
204-
minY = Math.min(minY, y);
205-
maxY = Math.max(maxY, y);
239+
bottom = Math.min(bottom, y);
240+
top = Math.max(top, y);
206241
visitLongitude(x);
207242
}
208243

209244
@Override
210245
public void visitRectangle(double minX, double maxX, double maxY, double minY) {
211-
this.minY = Math.min(this.minY, minY);
212-
this.maxY = Math.max(this.maxY, maxY);
246+
// TODO: Fix bug with rectangle crossing the dateline (see Extent.addRectangle for correct behaviour)
247+
this.bottom = Math.min(this.bottom, minY);
248+
this.top = Math.max(this.top, maxY);
213249
visitLongitude(minX);
214250
visitLongitude(maxX);
215251
}
216252

217253
private void visitLongitude(double x) {
218254
if (x >= 0) {
219-
minPosX = Math.min(minPosX, x);
220-
maxPosX = Math.max(maxPosX, x);
255+
posLeft = Math.min(posLeft, x);
256+
posRight = Math.max(posRight, x);
221257
} else {
222-
minNegX = Math.min(minNegX, x);
223-
maxNegX = Math.max(maxNegX, x);
258+
negLeft = Math.min(negLeft, x);
259+
negRight = Math.max(negRight, x);
224260
}
225261
}
226262

227263
@Override
228264
public boolean isValid() {
229-
return minY != Double.POSITIVE_INFINITY;
265+
return bottom != Double.POSITIVE_INFINITY;
230266
}
231267

232268
@Override
233269
public Rectangle getResult() {
234-
return getResult(minNegX, minPosX, maxNegX, maxPosX, maxY, minY, wrapLongitude);
270+
return getResult(top, bottom, negLeft, negRight, posLeft, posRight, wrapLongitude);
271+
}
272+
273+
@Override
274+
public void reset() {
275+
bottom = Double.POSITIVE_INFINITY;
276+
top = Double.NEGATIVE_INFINITY;
277+
negLeft = Double.POSITIVE_INFINITY;
278+
negRight = Double.NEGATIVE_INFINITY;
279+
posLeft = Double.POSITIVE_INFINITY;
280+
posRight = Double.NEGATIVE_INFINITY;
235281
}
236282

237-
protected static Rectangle getResult(
238-
double minNegX,
239-
double minPosX,
240-
double maxNegX,
241-
double maxPosX,
242-
double maxY,
243-
double minY,
283+
public static Rectangle getResult(
284+
double top,
285+
double bottom,
286+
double negLeft,
287+
double negRight,
288+
double posLeft,
289+
double posRight,
244290
WrapLongitude wrapLongitude
245291
) {
246-
assert Double.isFinite(maxY);
247-
if (Double.isInfinite(minPosX)) {
248-
return new Rectangle(minNegX, maxNegX, maxY, minY);
249-
} else if (Double.isInfinite(minNegX)) {
250-
return new Rectangle(minPosX, maxPosX, maxY, minY);
292+
assert Double.isFinite(top);
293+
if (posRight == Double.NEGATIVE_INFINITY) {
294+
return new Rectangle(negLeft, negRight, top, bottom);
295+
} else if (negLeft == Double.POSITIVE_INFINITY) {
296+
return new Rectangle(posLeft, posRight, top, bottom);
251297
} else {
252298
return switch (wrapLongitude) {
253-
case NO_WRAP -> new Rectangle(minNegX, maxPosX, maxY, minY);
254-
case WRAP -> maybeWrap(minNegX, minPosX, maxNegX, maxPosX, maxY, minY);
299+
case NO_WRAP -> new Rectangle(negLeft, posRight, top, bottom);
300+
case WRAP -> maybeWrap(top, bottom, negLeft, negRight, posLeft, posRight);
255301
};
256302
}
257303
}
258304

259-
private static Rectangle maybeWrap(double minNegX, double minPosX, double maxNegX, double maxPosX, double maxY, double minY) {
260-
double unwrappedWidth = maxPosX - minNegX;
261-
double wrappedWidth = 360 + maxNegX - minPosX;
305+
private static Rectangle maybeWrap(double top, double bottom, double negLeft, double negRight, double posLeft, double posRight) {
306+
double unwrappedWidth = posRight - negLeft;
307+
double wrappedWidth = 360 + negRight - posLeft;
262308
return unwrappedWidth <= wrappedWidth
263-
? new Rectangle(minNegX, maxPosX, maxY, minY)
264-
: new Rectangle(minPosX, maxNegX, maxY, minY);
309+
? new Rectangle(negLeft, posRight, top, bottom)
310+
: new Rectangle(posLeft, negRight, top, bottom);
265311
}
266312
}
267313

modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.elasticsearch.index.IndexVersions;
3434
import org.elasticsearch.index.analysis.NamedAnalyzer;
3535
import org.elasticsearch.index.mapper.AbstractShapeGeometryFieldMapper;
36+
import org.elasticsearch.index.mapper.BlockLoader;
3637
import org.elasticsearch.index.mapper.DocumentParserContext;
3738
import org.elasticsearch.index.mapper.DocumentParsingException;
3839
import org.elasticsearch.index.mapper.FieldMapper;
@@ -46,7 +47,6 @@
4647
import org.elasticsearch.legacygeo.builders.ShapeBuilder;
4748
import org.elasticsearch.legacygeo.parsers.ShapeParser;
4849
import org.elasticsearch.legacygeo.query.LegacyGeoShapeQueryProcessor;
49-
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
5050
import org.elasticsearch.xcontent.XContentBuilder;
5151
import org.elasticsearch.xcontent.XContentParser;
5252
import org.locationtech.spatial4j.shape.Point;
@@ -84,6 +84,7 @@
8484
* "field" : "POLYGON ((100.0 0.0, 101.0 0.0, 101.0 1.0, 100.0 1.0, 100.0 0.0))
8585
*
8686
* @deprecated use the field mapper in the spatial module
87+
* TODO: Remove this class once we no longer need to supported reading 7.x indices that might have this field type
8788
*/
8889
@Deprecated
8990
public class LegacyGeoShapeFieldMapper extends AbstractShapeGeometryFieldMapper<ShapeBuilder<?, ?, ?>> {
@@ -533,14 +534,9 @@ public PrefixTreeStrategy resolvePrefixTreeStrategy(String strategyName) {
533534
}
534535

535536
@Override
536-
protected boolean isBoundsExtractionSupported() {
537-
// Extracting bounds for geo shapes is not implemented yet.
538-
return false;
539-
}
540-
541-
@Override
542-
protected CoordinateEncoder coordinateEncoder() {
543-
return CoordinateEncoder.GEO;
537+
public BlockLoader blockLoader(BlockLoaderContext blContext) {
538+
// Legacy geo-shapes do not support doc-values, we can only lead from source in ES|QL
539+
return blockLoaderFromSource(blContext);
544540
}
545541
}
546542

server/src/main/java/org/elasticsearch/index/mapper/AbstractGeometryFieldMapper.java

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -180,12 +180,6 @@ protected Object parseSourceValue(Object value) {
180180
};
181181
}
182182

183-
@Override
184-
public BlockLoader blockLoader(BlockLoaderContext blContext) {
185-
// Currently we can only load from source in ESQL
186-
return blockLoaderFromSource(blContext);
187-
}
188-
189183
protected BlockLoader blockLoaderFromSource(BlockLoaderContext blContext) {
190184
ValueFetcher fetcher = valueFetcher(blContext.sourcePaths(name()), nullValue, GeometryFormatterFactory.WKB);
191185
// TODO consider optimization using BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name())

server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java

Lines changed: 21 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,12 @@
1010

1111
import org.apache.lucene.index.BinaryDocValues;
1212
import org.apache.lucene.index.LeafReaderContext;
13-
import org.apache.lucene.util.BytesRef;
1413
import org.elasticsearch.common.Explicit;
1514
import org.elasticsearch.common.geo.Orientation;
16-
import org.elasticsearch.geometry.Rectangle;
17-
import org.elasticsearch.geometry.utils.WellKnownBinary;
18-
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
15+
import org.elasticsearch.lucene.spatial.Extent;
1916
import org.elasticsearch.lucene.spatial.GeometryDocValueReader;
2017

2118
import java.io.IOException;
22-
import java.nio.ByteOrder;
2319
import java.util.Map;
2420
import java.util.function.Function;
2521

@@ -75,29 +71,27 @@ public Orientation orientation() {
7571

7672
@Override
7773
protected Object nullValueAsSource(T nullValue) {
78-
// we don't support null value fors shapes
74+
// we don't support null value for shapes
7975
return nullValue;
8076
}
8177

82-
@Override
83-
public BlockLoader blockLoader(BlockLoaderContext blContext) {
84-
return blContext.fieldExtractPreference() == FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS && isBoundsExtractionSupported()
85-
? new BoundsBlockLoader(name(), coordinateEncoder())
86-
: blockLoaderFromSource(blContext);
87-
}
88-
89-
protected abstract boolean isBoundsExtractionSupported();
90-
91-
protected abstract CoordinateEncoder coordinateEncoder();
92-
93-
// Visible for testing
94-
static class BoundsBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader {
78+
protected static class BoundsBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader {
9579
private final String fieldName;
96-
private final CoordinateEncoder encoder;
9780

98-
BoundsBlockLoader(String fieldName, CoordinateEncoder encoder) {
81+
protected BoundsBlockLoader(String fieldName) {
9982
this.fieldName = fieldName;
100-
this.encoder = encoder;
83+
}
84+
85+
protected void writeExtent(BlockLoader.IntBuilder builder, Extent extent) {
86+
// We store the 6 values as a single multi-valued field, in the same order as the fields in the Extent class
87+
builder.beginPositionEntry();
88+
builder.appendInt(extent.top);
89+
builder.appendInt(extent.bottom);
90+
builder.appendInt(extent.negLeft);
91+
builder.appendInt(extent.negRight);
92+
builder.appendInt(extent.posLeft);
93+
builder.appendInt(extent.posRight);
94+
builder.endPositionEntry();
10195
}
10296

10397
@Override
@@ -107,7 +101,7 @@ public BlockLoader.AllReader reader(LeafReaderContext context) throws IOExceptio
107101
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException {
108102
var binaryDocValues = context.reader().getBinaryDocValues(fieldName);
109103
var reader = new GeometryDocValueReader();
110-
try (var builder = factory.bytesRefs(docs.count())) {
104+
try (var builder = factory.ints(docs.count())) {
111105
for (int i = 0; i < docs.count(); i++) {
112106
read(binaryDocValues, docs.get(i), reader, builder);
113107
}
@@ -119,27 +113,17 @@ public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs
119113
public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException {
120114
var binaryDocValues = context.reader().getBinaryDocValues(fieldName);
121115
var reader = new GeometryDocValueReader();
122-
read(binaryDocValues, docId, reader, (BytesRefBuilder) builder);
116+
read(binaryDocValues, docId, reader, (IntBuilder) builder);
123117
}
124118

125-
private void read(BinaryDocValues binaryDocValues, int doc, GeometryDocValueReader reader, BytesRefBuilder builder)
119+
private void read(BinaryDocValues binaryDocValues, int doc, GeometryDocValueReader reader, IntBuilder builder)
126120
throws IOException {
127121
if (binaryDocValues.advanceExact(doc) == false) {
128122
builder.appendNull();
129123
return;
130124
}
131125
reader.reset(binaryDocValues.binaryValue());
132-
var extent = reader.getExtent();
133-
// This is rather silly: an extent is already encoded as ints, but we convert it to Rectangle to
134-
// preserve its properties as a WKB shape, only to convert it back to ints when we compute the
135-
// aggregation. An obvious optimization would be to avoid this back-and-forth conversion.
136-
var rectangle = new Rectangle(
137-
encoder.decodeX(extent.minX()),
138-
encoder.decodeX(extent.maxX()),
139-
encoder.decodeY(extent.maxY()),
140-
encoder.decodeY(extent.minY())
141-
);
142-
builder.appendBytesRef(new BytesRef(WellKnownBinary.toWKB(rectangle, ByteOrder.LITTLE_ENDIAN)));
126+
writeExtent(builder, reader.getExtent());
143127
}
144128

145129
@Override
@@ -151,7 +135,7 @@ public boolean canReuse(int startingDocID) {
151135

152136
@Override
153137
public BlockLoader.Builder builder(BlockLoader.BlockFactory factory, int expectedCount) {
154-
return factory.bytesRefs(expectedCount);
138+
return factory.ints(expectedCount);
155139
}
156140
}
157141
}

0 commit comments

Comments
 (0)