Skip to content

Commit ef6372a

Browse files
authored
ESQL: ST_EXTENT_AGG optimize envelope extraction from doc-values for Cartesian_shape (#118802)
When we cartesian shapes to Lucene, we encode additional information in the binary, including the extent, so we can read the extent directly from the binary encoding instead of (re-)computing it per shape, and replace the (possibly very complicated) shape with a rectangle. At the moment, this is only done for Cartesian shapes, since for Geo shapes, we need to take dateline wrapping into account, which means it can't be directly encoded as a rectangle. We will deal with geo shapes in a future PR.
1 parent 3ab612f commit ef6372a

File tree

28 files changed

+872
-231
lines changed

28 files changed

+872
-231
lines changed

docs/changelog/118802.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 118802
2+
summary: ST_EXTENT_AGG optimize envelope extraction from doc-values for cartesian_shape
3+
area: "ES|QL"
4+
type: enhancement
5+
issues: []

modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import org.elasticsearch.legacygeo.builders.ShapeBuilder;
4747
import org.elasticsearch.legacygeo.parsers.ShapeParser;
4848
import org.elasticsearch.legacygeo.query.LegacyGeoShapeQueryProcessor;
49+
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
4950
import org.elasticsearch.xcontent.XContentBuilder;
5051
import org.elasticsearch.xcontent.XContentParser;
5152
import org.locationtech.spatial4j.shape.Point;
@@ -530,6 +531,17 @@ public PrefixTreeStrategy resolvePrefixTreeStrategy(String strategyName) {
530531
protected Function<List<ShapeBuilder<?, ?, ?>>, List<Object>> getFormatter(String format) {
531532
return GeometryFormatterFactory.getFormatter(format, ShapeBuilder::buildGeometry);
532533
}
534+
535+
@Override
536+
protected boolean isBoundsExtractionSupported() {
537+
// Extracting bounds for geo shapes is not implemented yet.
538+
return false;
539+
}
540+
541+
@Override
542+
protected CoordinateEncoder coordinateEncoder() {
543+
return CoordinateEncoder.GEO;
544+
}
533545
}
534546

535547
private final IndexVersion indexCreatedVersion;

server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,18 @@
88
*/
99
package org.elasticsearch.index.mapper;
1010

11+
import org.apache.lucene.index.BinaryDocValues;
12+
import org.apache.lucene.index.LeafReaderContext;
13+
import org.apache.lucene.util.BytesRef;
1114
import org.elasticsearch.common.Explicit;
1215
import org.elasticsearch.common.geo.Orientation;
16+
import org.elasticsearch.geometry.Rectangle;
17+
import org.elasticsearch.geometry.utils.WellKnownBinary;
18+
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
19+
import org.elasticsearch.lucene.spatial.GeometryDocValueReader;
1320

21+
import java.io.IOException;
22+
import java.nio.ByteOrder;
1423
import java.util.Map;
1524
import java.util.function.Function;
1625

@@ -69,6 +78,79 @@ protected Object nullValueAsSource(T nullValue) {
6978
// we don't support null value fors shapes
7079
return nullValue;
7180
}
81+
82+
@Override
83+
public BlockLoader blockLoader(BlockLoaderContext blContext) {
84+
return blContext.fieldExtractPreference() == FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS && isBoundsExtractionSupported()
85+
? new BoundsBlockLoader(name(), coordinateEncoder())
86+
: blockLoaderFromSource(blContext);
87+
}
88+
89+
protected abstract boolean isBoundsExtractionSupported();
90+
91+
protected abstract CoordinateEncoder coordinateEncoder();
92+
93+
// Visible for testing
94+
static class BoundsBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader {
95+
private final String fieldName;
96+
private final CoordinateEncoder encoder;
97+
98+
BoundsBlockLoader(String fieldName, CoordinateEncoder encoder) {
99+
this.fieldName = fieldName;
100+
this.encoder = encoder;
101+
}
102+
103+
@Override
104+
public BlockLoader.AllReader reader(LeafReaderContext context) throws IOException {
105+
return new BlockLoader.AllReader() {
106+
@Override
107+
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException {
108+
var binaryDocValues = context.reader().getBinaryDocValues(fieldName);
109+
var reader = new GeometryDocValueReader();
110+
try (var builder = factory.bytesRefs(docs.count())) {
111+
for (int i = 0; i < docs.count(); i++) {
112+
read(binaryDocValues, docs.get(i), reader, builder);
113+
}
114+
return builder.build();
115+
}
116+
}
117+
118+
@Override
119+
public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException {
120+
var binaryDocValues = context.reader().getBinaryDocValues(fieldName);
121+
var reader = new GeometryDocValueReader();
122+
read(binaryDocValues, docId, reader, (BytesRefBuilder) builder);
123+
}
124+
125+
private void read(BinaryDocValues binaryDocValues, int doc, GeometryDocValueReader reader, BytesRefBuilder builder)
126+
throws IOException {
127+
binaryDocValues.advanceExact(doc);
128+
reader.reset(binaryDocValues.binaryValue());
129+
var extent = reader.getExtent();
130+
// This is rather silly: an extent is already encoded as ints, but we convert it to Rectangle to
131+
// preserve its properties as a WKB shape, only to convert it back to ints when we compute the
132+
// aggregation. An obvious optimization would be to avoid this back-and-forth conversion.
133+
var rectangle = new Rectangle(
134+
encoder.decodeX(extent.minX()),
135+
encoder.decodeX(extent.maxX()),
136+
encoder.decodeY(extent.maxY()),
137+
encoder.decodeY(extent.minY())
138+
);
139+
builder.appendBytesRef(new BytesRef(WellKnownBinary.toWKB(rectangle, ByteOrder.LITTLE_ENDIAN)));
140+
}
141+
142+
@Override
143+
public boolean canReuse(int startingDocID) {
144+
return true;
145+
}
146+
};
147+
}
148+
149+
@Override
150+
public BlockLoader.Builder builder(BlockLoader.BlockFactory factory, int expectedCount) {
151+
return factory.bytesRefs(expectedCount);
152+
}
153+
}
72154
}
73155

74156
protected Explicit<Boolean> coerce;

server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -677,10 +677,15 @@ public enum FieldExtractPreference {
677677
* Load the field from doc-values into a BlockLoader supporting doc-values.
678678
*/
679679
DOC_VALUES,
680+
/**
681+
* Loads the field by extracting the extent from the binary encoded representation
682+
*/
683+
EXTRACT_SPATIAL_BOUNDS,
680684
/**
681685
* No preference. Leave the choice of where to load the field from up to the FieldType.
682686
*/
683-
NONE
687+
NONE;
688+
684689
}
685690

686691
/**
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.mapper;
11+
12+
import org.apache.lucene.document.Document;
13+
import org.apache.lucene.index.DirectoryReader;
14+
import org.apache.lucene.index.LeafReaderContext;
15+
import org.apache.lucene.store.Directory;
16+
import org.apache.lucene.tests.index.RandomIndexWriter;
17+
import org.apache.lucene.util.BytesRef;
18+
import org.elasticsearch.common.Strings;
19+
import org.elasticsearch.common.geo.Orientation;
20+
import org.elasticsearch.geo.GeometryTestUtils;
21+
import org.elasticsearch.geo.ShapeTestUtils;
22+
import org.elasticsearch.geometry.Geometry;
23+
import org.elasticsearch.geometry.Rectangle;
24+
import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor;
25+
import org.elasticsearch.lucene.spatial.BinaryShapeDocValuesField;
26+
import org.elasticsearch.lucene.spatial.CartesianShapeIndexer;
27+
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
28+
import org.elasticsearch.test.ESTestCase;
29+
import org.elasticsearch.test.hamcrest.RectangleMatcher;
30+
import org.elasticsearch.test.hamcrest.WellKnownBinaryBytesRefMatcher;
31+
32+
import java.io.IOException;
33+
import java.util.Optional;
34+
import java.util.function.Function;
35+
import java.util.function.Supplier;
36+
import java.util.stream.IntStream;
37+
38+
public class AbstractShapeGeometryFieldMapperTests extends ESTestCase {
39+
public void testCartesianBoundsBlockLoader() throws IOException {
40+
testBoundsBlockLoaderAux(
41+
CoordinateEncoder.CARTESIAN,
42+
() -> ShapeTestUtils.randomGeometryWithoutCircle(0, false),
43+
CartesianShapeIndexer::new,
44+
SpatialEnvelopeVisitor::visitCartesian
45+
);
46+
}
47+
48+
// TODO when we turn this optimization on for geo, this test should pass.
49+
public void ignoreTestGeoBoundsBlockLoader() throws IOException {
50+
testBoundsBlockLoaderAux(
51+
CoordinateEncoder.GEO,
52+
() -> GeometryTestUtils.randomGeometryWithoutCircle(0, false),
53+
field -> new GeoShapeIndexer(Orientation.RIGHT, field),
54+
g -> SpatialEnvelopeVisitor.visitGeo(g, SpatialEnvelopeVisitor.WrapLongitude.WRAP)
55+
);
56+
}
57+
58+
private void testBoundsBlockLoaderAux(
59+
CoordinateEncoder encoder,
60+
Supplier<Geometry> generator,
61+
Function<String, ShapeIndexer> indexerFactory,
62+
Function<Geometry, Optional<Rectangle>> visitor
63+
) throws IOException {
64+
var geometries = IntStream.range(0, 20).mapToObj(i -> generator.get()).toList();
65+
var loader = new AbstractShapeGeometryFieldMapper.AbstractShapeGeometryFieldType.BoundsBlockLoader("field", encoder);
66+
try (Directory directory = newDirectory()) {
67+
try (var iw = new RandomIndexWriter(random(), directory)) {
68+
for (Geometry geometry : geometries) {
69+
var shape = new BinaryShapeDocValuesField("field", encoder);
70+
shape.add(indexerFactory.apply("field").indexShape(geometry), geometry);
71+
var doc = new Document();
72+
doc.add(shape);
73+
iw.addDocument(doc);
74+
}
75+
}
76+
var indices = IntStream.range(0, geometries.size() / 2).map(x -> x * 2).toArray();
77+
try (DirectoryReader reader = DirectoryReader.open(directory)) {
78+
LeafReaderContext ctx = reader.leaves().get(0);
79+
TestBlock block = (TestBlock) loader.reader(ctx).read(TestBlock.factory(ctx.reader().numDocs()), TestBlock.docs(indices));
80+
for (int i = 0; i < indices.length; i++) {
81+
var idx = indices[i];
82+
var geometry = geometries.get(idx);
83+
var geoString = geometry.toString();
84+
var geometryString = geoString.length() > 200 ? geoString.substring(0, 200) + "..." : geoString;
85+
Rectangle r = visitor.apply(geometry).get();
86+
assertThat(
87+
Strings.format("geometries[%d] ('%s') wasn't extracted correctly", idx, geometryString),
88+
(BytesRef) block.get(i),
89+
WellKnownBinaryBytesRefMatcher.encodes(RectangleMatcher.closeToFloat(r, 1e-3, encoder))
90+
);
91+
}
92+
}
93+
}
94+
}
95+
}

test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import org.elasticsearch.index.fielddata.LeafFieldData;
5252
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
5353
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
54+
import org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference;
5455
import org.elasticsearch.index.query.SearchExecutionContext;
5556
import org.elasticsearch.index.termvectors.TermVectorsService;
5657
import org.elasticsearch.index.translog.Translog;
@@ -87,8 +88,6 @@
8788
import java.util.stream.IntStream;
8889

8990
import static java.util.stream.Collectors.toList;
90-
import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES;
91-
import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.NONE;
9291
import static org.elasticsearch.test.MapMatcher.assertMap;
9392
import static org.hamcrest.Matchers.anyOf;
9493
import static org.hamcrest.Matchers.contains;
@@ -1420,7 +1419,7 @@ public BlockReaderSupport(boolean columnAtATimeReader, MapperService mapper, Str
14201419
this(columnAtATimeReader, true, mapper, loaderFieldName);
14211420
}
14221421

1423-
private BlockLoader getBlockLoader(boolean columnReader) {
1422+
private BlockLoader getBlockLoader(FieldExtractPreference fieldExtractPreference) {
14241423
SearchLookup searchLookup = new SearchLookup(mapper.mappingLookup().fieldTypesLookup()::get, null, null);
14251424
return mapper.fieldType(loaderFieldName).blockLoader(new MappedFieldType.BlockLoaderContext() {
14261425
@Override
@@ -1434,8 +1433,8 @@ public IndexSettings indexSettings() {
14341433
}
14351434

14361435
@Override
1437-
public MappedFieldType.FieldExtractPreference fieldExtractPreference() {
1438-
return columnReader ? DOC_VALUES : NONE;
1436+
public FieldExtractPreference fieldExtractPreference() {
1437+
return fieldExtractPreference;
14391438
}
14401439

14411440
@Override
@@ -1493,7 +1492,9 @@ protected final void testBlockLoader(
14931492
BlockReaderSupport blockReaderSupport,
14941493
SourceLoader sourceLoader
14951494
) throws IOException {
1496-
BlockLoader loader = blockReaderSupport.getBlockLoader(columnReader);
1495+
// EXTRACT_SPATIAL_BOUNDS is not currently supported in this test path.
1496+
var fieldExtractPreference = columnReader ? FieldExtractPreference.DOC_VALUES : FieldExtractPreference.NONE;
1497+
BlockLoader loader = blockReaderSupport.getBlockLoader(fieldExtractPreference);
14971498
Function<Object, Object> valuesConvert = loadBlockExpected(blockReaderSupport, columnReader);
14981499
if (valuesConvert == null) {
14991500
assertNull(loader);
Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
/*
22
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3-
* or more contributor license agreements. Licensed under the Elastic License
4-
* 2.0; you may not use this file except in compliance with the Elastic License
5-
* 2.0.
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
68
*/
79

8-
package org.elasticsearch.xpack.esql.expression;
10+
package org.elasticsearch.test.hamcrest;
911

10-
import org.elasticsearch.compute.aggregation.spatial.PointType;
1112
import org.elasticsearch.geometry.Rectangle;
13+
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
1214
import org.hamcrest.Description;
1315
import org.hamcrest.Matchers;
1416
import org.hamcrest.TypeSafeMatcher;
@@ -19,23 +21,31 @@
1921
*/
2022
public class RectangleMatcher extends TypeSafeMatcher<Rectangle> {
2123
private final Rectangle r;
22-
private final PointType pointType;
24+
private final CoordinateEncoder coordinateEncoder;
2325
private final double error;
2426

25-
public static TypeSafeMatcher<Rectangle> closeTo(Rectangle r, double error, PointType pointType) {
26-
return new RectangleMatcher(r, error, pointType);
27+
public static TypeSafeMatcher<Rectangle> closeTo(Rectangle r, double error, CoordinateEncoder coordinateEncoder) {
28+
return new RectangleMatcher(r, error, coordinateEncoder);
2729
}
2830

29-
private RectangleMatcher(Rectangle r, double error, PointType pointType) {
31+
private RectangleMatcher(Rectangle r, double error, CoordinateEncoder coordinateEncoder) {
3032
this.r = r;
31-
this.pointType = pointType;
33+
this.coordinateEncoder = coordinateEncoder;
3234
this.error = error;
3335
}
3436

37+
/**
38+
* Casts the rectangle coordinates to floats before comparing. Useful when working with extents which hold the coordinate data as ints.
39+
*/
40+
public static TypeSafeMatcher<Rectangle> closeToFloat(Rectangle r, double v, CoordinateEncoder encoder) {
41+
var normalized = new Rectangle((float) r.getMinX(), (float) r.getMaxX(), (float) r.getMaxY(), (float) r.getMinY());
42+
return closeTo(normalized, v, encoder);
43+
}
44+
3545
@Override
3646
protected boolean matchesSafely(Rectangle other) {
3747
// For geo bounds, longitude of (-180, 180) and (epsilon, -epsilon) are actually very close, since both encompass the entire globe.
38-
boolean wrapAroundWorkAround = pointType == PointType.GEO && r.getMinX() >= r.getMaxX();
48+
boolean wrapAroundWorkAround = coordinateEncoder == CoordinateEncoder.GEO && r.getMinX() >= r.getMaxX();
3949
boolean matchMinX = Matchers.closeTo(r.getMinX(), error).matches(other.getMinX())
4050
|| (wrapAroundWorkAround && Matchers.closeTo(r.getMinX() - 180, error).matches(other.getMinX()))
4151
|| (wrapAroundWorkAround && Matchers.closeTo(r.getMinX(), error).matches(other.getMinX() - 180));
Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
/*
22
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3-
* or more contributor license agreements. Licensed under the Elastic License
4-
* 2.0; you may not use this file except in compliance with the Elastic License
5-
* 2.0.
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
68
*/
79

8-
package org.elasticsearch.xpack.esql.expression;
10+
package org.elasticsearch.test.hamcrest;
911

1012
import org.apache.lucene.util.BytesRef;
1113
import org.elasticsearch.geometry.Geometry;
@@ -23,6 +25,10 @@ public WellKnownBinaryBytesRefMatcher(Matcher<G> matcher) {
2325
this.matcher = matcher;
2426
}
2527

28+
public static <G extends Geometry> Matcher<BytesRef> encodes(TypeSafeMatcher<G> matcher) {
29+
return new WellKnownBinaryBytesRefMatcher<G>(matcher);
30+
}
31+
2632
@Override
2733
public boolean matchesSafely(BytesRef bytesRef) {
2834
return matcher.matches(fromBytesRef(bytesRef));

0 commit comments

Comments
 (0)