Skip to content

Commit 92ed93d

Browse files
authored
Merge branch 'main' into inference_metadata_fields
2 parents 4b490ee + bb0c34e commit 92ed93d

File tree

30 files changed

+922
-238
lines changed

30 files changed

+922
-238
lines changed

docs/changelog/118802.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 118802
2+
summary: ST_EXTENT_AGG optimize envelope extraction from doc-values for cartesian_shape
3+
area: "ES|QL"
4+
type: enhancement
5+
issues: []

modules/legacy-geo/src/main/java/org/elasticsearch/legacygeo/mapper/LegacyGeoShapeFieldMapper.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import org.elasticsearch.legacygeo.builders.ShapeBuilder;
4747
import org.elasticsearch.legacygeo.parsers.ShapeParser;
4848
import org.elasticsearch.legacygeo.query.LegacyGeoShapeQueryProcessor;
49+
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
4950
import org.elasticsearch.xcontent.XContentBuilder;
5051
import org.elasticsearch.xcontent.XContentParser;
5152
import org.locationtech.spatial4j.shape.Point;
@@ -530,6 +531,17 @@ public PrefixTreeStrategy resolvePrefixTreeStrategy(String strategyName) {
530531
protected Function<List<ShapeBuilder<?, ?, ?>>, List<Object>> getFormatter(String format) {
531532
return GeometryFormatterFactory.getFormatter(format, ShapeBuilder::buildGeometry);
532533
}
534+
535+
@Override
536+
protected boolean isBoundsExtractionSupported() {
537+
// Extracting bounds for geo shapes is not implemented yet.
538+
return false;
539+
}
540+
541+
@Override
542+
protected CoordinateEncoder coordinateEncoder() {
543+
return CoordinateEncoder.GEO;
544+
}
533545
}
534546

535547
private final IndexVersion indexCreatedVersion;

server/src/main/java/org/elasticsearch/index/mapper/AbstractShapeGeometryFieldMapper.java

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,18 @@
88
*/
99
package org.elasticsearch.index.mapper;
1010

11+
import org.apache.lucene.index.BinaryDocValues;
12+
import org.apache.lucene.index.LeafReaderContext;
13+
import org.apache.lucene.util.BytesRef;
1114
import org.elasticsearch.common.Explicit;
1215
import org.elasticsearch.common.geo.Orientation;
16+
import org.elasticsearch.geometry.Rectangle;
17+
import org.elasticsearch.geometry.utils.WellKnownBinary;
18+
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
19+
import org.elasticsearch.lucene.spatial.GeometryDocValueReader;
1320

21+
import java.io.IOException;
22+
import java.nio.ByteOrder;
1423
import java.util.Map;
1524
import java.util.function.Function;
1625

@@ -69,6 +78,79 @@ protected Object nullValueAsSource(T nullValue) {
6978
// we don't support null value fors shapes
7079
return nullValue;
7180
}
81+
82+
@Override
83+
public BlockLoader blockLoader(BlockLoaderContext blContext) {
84+
return blContext.fieldExtractPreference() == FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS && isBoundsExtractionSupported()
85+
? new BoundsBlockLoader(name(), coordinateEncoder())
86+
: blockLoaderFromSource(blContext);
87+
}
88+
89+
protected abstract boolean isBoundsExtractionSupported();
90+
91+
protected abstract CoordinateEncoder coordinateEncoder();
92+
93+
// Visible for testing
94+
static class BoundsBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader {
95+
private final String fieldName;
96+
private final CoordinateEncoder encoder;
97+
98+
BoundsBlockLoader(String fieldName, CoordinateEncoder encoder) {
99+
this.fieldName = fieldName;
100+
this.encoder = encoder;
101+
}
102+
103+
@Override
104+
public BlockLoader.AllReader reader(LeafReaderContext context) throws IOException {
105+
return new BlockLoader.AllReader() {
106+
@Override
107+
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs) throws IOException {
108+
var binaryDocValues = context.reader().getBinaryDocValues(fieldName);
109+
var reader = new GeometryDocValueReader();
110+
try (var builder = factory.bytesRefs(docs.count())) {
111+
for (int i = 0; i < docs.count(); i++) {
112+
read(binaryDocValues, docs.get(i), reader, builder);
113+
}
114+
return builder.build();
115+
}
116+
}
117+
118+
@Override
119+
public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException {
120+
var binaryDocValues = context.reader().getBinaryDocValues(fieldName);
121+
var reader = new GeometryDocValueReader();
122+
read(binaryDocValues, docId, reader, (BytesRefBuilder) builder);
123+
}
124+
125+
private void read(BinaryDocValues binaryDocValues, int doc, GeometryDocValueReader reader, BytesRefBuilder builder)
126+
throws IOException {
127+
binaryDocValues.advanceExact(doc);
128+
reader.reset(binaryDocValues.binaryValue());
129+
var extent = reader.getExtent();
130+
// This is rather silly: an extent is already encoded as ints, but we convert it to Rectangle to
131+
// preserve its properties as a WKB shape, only to convert it back to ints when we compute the
132+
// aggregation. An obvious optimization would be to avoid this back-and-forth conversion.
133+
var rectangle = new Rectangle(
134+
encoder.decodeX(extent.minX()),
135+
encoder.decodeX(extent.maxX()),
136+
encoder.decodeY(extent.maxY()),
137+
encoder.decodeY(extent.minY())
138+
);
139+
builder.appendBytesRef(new BytesRef(WellKnownBinary.toWKB(rectangle, ByteOrder.LITTLE_ENDIAN)));
140+
}
141+
142+
@Override
143+
public boolean canReuse(int startingDocID) {
144+
return true;
145+
}
146+
};
147+
}
148+
149+
@Override
150+
public BlockLoader.Builder builder(BlockLoader.BlockFactory factory, int expectedCount) {
151+
return factory.bytesRefs(expectedCount);
152+
}
153+
}
72154
}
73155

74156
protected Explicit<Boolean> coerce;

server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
import org.apache.lucene.search.suggest.document.RegexCompletionQuery;
1919
import org.apache.lucene.search.suggest.document.SuggestField;
2020
import org.elasticsearch.common.ParsingException;
21+
import org.elasticsearch.common.logging.DeprecationCategory;
2122
import org.elasticsearch.common.unit.Fuzziness;
2223
import org.elasticsearch.common.util.Maps;
2324
import org.elasticsearch.index.IndexVersion;
25+
import org.elasticsearch.index.IndexVersions;
2426
import org.elasticsearch.index.analysis.AnalyzerScope;
2527
import org.elasticsearch.index.analysis.NamedAnalyzer;
2628
import org.elasticsearch.index.query.SearchExecutionContext;
@@ -207,11 +209,29 @@ public CompletionFieldMapper build(MapperBuilderContext context) {
207209

208210
private void checkCompletionContextsLimit() {
209211
if (this.contexts.getValue() != null && this.contexts.getValue().size() > COMPLETION_CONTEXTS_LIMIT) {
210-
throw new IllegalArgumentException(
211-
"Limit of completion field contexts [" + COMPLETION_CONTEXTS_LIMIT + "] has been exceeded"
212-
);
212+
if (indexVersionCreated.onOrAfter(IndexVersions.V_8_0_0)) {
213+
throw new IllegalArgumentException(
214+
"Limit of completion field contexts [" + COMPLETION_CONTEXTS_LIMIT + "] has been exceeded"
215+
);
216+
} else {
217+
deprecationLogger.warn(
218+
DeprecationCategory.MAPPINGS,
219+
"excessive_completion_contexts",
220+
"You have defined more than ["
221+
+ COMPLETION_CONTEXTS_LIMIT
222+
+ "] completion contexts"
223+
+ " in the mapping for field ["
224+
+ leafName()
225+
+ "]. "
226+
+ "The maximum allowed number of completion contexts in a mapping will be limited to "
227+
+ "["
228+
+ COMPLETION_CONTEXTS_LIMIT
229+
+ "] starting in version [8.0]."
230+
);
231+
}
213232
}
214233
}
234+
215235
}
216236

217237
public static final Set<String> ALLOWED_CONTENT_FIELD_NAMES = Set.of(

server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -677,10 +677,15 @@ public enum FieldExtractPreference {
677677
* Load the field from doc-values into a BlockLoader supporting doc-values.
678678
*/
679679
DOC_VALUES,
680+
/**
681+
* Loads the field by extracting the extent from the binary encoded representation
682+
*/
683+
EXTRACT_SPATIAL_BOUNDS,
680684
/**
681685
* No preference. Leave the choice of where to load the field from up to the FieldType.
682686
*/
683-
NONE
687+
NONE;
688+
684689
}
685690

686691
/**
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.mapper;
11+
12+
import org.apache.lucene.document.Document;
13+
import org.apache.lucene.index.DirectoryReader;
14+
import org.apache.lucene.index.LeafReaderContext;
15+
import org.apache.lucene.store.Directory;
16+
import org.apache.lucene.tests.index.RandomIndexWriter;
17+
import org.apache.lucene.util.BytesRef;
18+
import org.elasticsearch.common.Strings;
19+
import org.elasticsearch.common.geo.Orientation;
20+
import org.elasticsearch.geo.GeometryTestUtils;
21+
import org.elasticsearch.geo.ShapeTestUtils;
22+
import org.elasticsearch.geometry.Geometry;
23+
import org.elasticsearch.geometry.Rectangle;
24+
import org.elasticsearch.geometry.utils.SpatialEnvelopeVisitor;
25+
import org.elasticsearch.lucene.spatial.BinaryShapeDocValuesField;
26+
import org.elasticsearch.lucene.spatial.CartesianShapeIndexer;
27+
import org.elasticsearch.lucene.spatial.CoordinateEncoder;
28+
import org.elasticsearch.test.ESTestCase;
29+
import org.elasticsearch.test.hamcrest.RectangleMatcher;
30+
import org.elasticsearch.test.hamcrest.WellKnownBinaryBytesRefMatcher;
31+
32+
import java.io.IOException;
33+
import java.util.Optional;
34+
import java.util.function.Function;
35+
import java.util.function.Supplier;
36+
import java.util.stream.IntStream;
37+
38+
public class AbstractShapeGeometryFieldMapperTests extends ESTestCase {
39+
public void testCartesianBoundsBlockLoader() throws IOException {
40+
testBoundsBlockLoaderAux(
41+
CoordinateEncoder.CARTESIAN,
42+
() -> ShapeTestUtils.randomGeometryWithoutCircle(0, false),
43+
CartesianShapeIndexer::new,
44+
SpatialEnvelopeVisitor::visitCartesian
45+
);
46+
}
47+
48+
// TODO when we turn this optimization on for geo, this test should pass.
49+
public void ignoreTestGeoBoundsBlockLoader() throws IOException {
50+
testBoundsBlockLoaderAux(
51+
CoordinateEncoder.GEO,
52+
() -> GeometryTestUtils.randomGeometryWithoutCircle(0, false),
53+
field -> new GeoShapeIndexer(Orientation.RIGHT, field),
54+
g -> SpatialEnvelopeVisitor.visitGeo(g, SpatialEnvelopeVisitor.WrapLongitude.WRAP)
55+
);
56+
}
57+
58+
private void testBoundsBlockLoaderAux(
59+
CoordinateEncoder encoder,
60+
Supplier<Geometry> generator,
61+
Function<String, ShapeIndexer> indexerFactory,
62+
Function<Geometry, Optional<Rectangle>> visitor
63+
) throws IOException {
64+
var geometries = IntStream.range(0, 20).mapToObj(i -> generator.get()).toList();
65+
var loader = new AbstractShapeGeometryFieldMapper.AbstractShapeGeometryFieldType.BoundsBlockLoader("field", encoder);
66+
try (Directory directory = newDirectory()) {
67+
try (var iw = new RandomIndexWriter(random(), directory)) {
68+
for (Geometry geometry : geometries) {
69+
var shape = new BinaryShapeDocValuesField("field", encoder);
70+
shape.add(indexerFactory.apply("field").indexShape(geometry), geometry);
71+
var doc = new Document();
72+
doc.add(shape);
73+
iw.addDocument(doc);
74+
}
75+
}
76+
var indices = IntStream.range(0, geometries.size() / 2).map(x -> x * 2).toArray();
77+
try (DirectoryReader reader = DirectoryReader.open(directory)) {
78+
LeafReaderContext ctx = reader.leaves().get(0);
79+
TestBlock block = (TestBlock) loader.reader(ctx).read(TestBlock.factory(ctx.reader().numDocs()), TestBlock.docs(indices));
80+
for (int i = 0; i < indices.length; i++) {
81+
var idx = indices[i];
82+
var geometry = geometries.get(idx);
83+
var geoString = geometry.toString();
84+
var geometryString = geoString.length() > 200 ? geoString.substring(0, 200) + "..." : geoString;
85+
Rectangle r = visitor.apply(geometry).get();
86+
assertThat(
87+
Strings.format("geometries[%d] ('%s') wasn't extracted correctly", idx, geometryString),
88+
(BytesRef) block.get(i),
89+
WellKnownBinaryBytesRefMatcher.encodes(RectangleMatcher.closeToFloat(r, 1e-3, encoder))
90+
);
91+
}
92+
}
93+
}
94+
}
95+
}

server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.elasticsearch.common.util.BigArrays;
3535
import org.elasticsearch.core.CheckedConsumer;
3636
import org.elasticsearch.index.IndexSettings;
37+
import org.elasticsearch.index.IndexVersions;
3738
import org.elasticsearch.index.analysis.AnalyzerScope;
3839
import org.elasticsearch.index.analysis.IndexAnalyzers;
3940
import org.elasticsearch.index.analysis.NamedAnalyzer;
@@ -60,6 +61,7 @@
6061
import java.util.Set;
6162
import java.util.function.Function;
6263

64+
import static org.elasticsearch.index.mapper.CompletionFieldMapper.COMPLETION_CONTEXTS_LIMIT;
6365
import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
6466
import static org.hamcrest.Matchers.containsInAnyOrder;
6567
import static org.hamcrest.Matchers.containsString;
@@ -757,7 +759,7 @@ public void testLimitOfContextMappings() throws Throwable {
757759
.startObject("suggest")
758760
.field("type", "completion")
759761
.startArray("contexts");
760-
for (int i = 0; i < CompletionFieldMapper.COMPLETION_CONTEXTS_LIMIT + 1; i++) {
762+
for (int i = 0; i < COMPLETION_CONTEXTS_LIMIT + 1; i++) {
761763
mappingBuilder.startObject();
762764
mappingBuilder.field("name", Integer.toString(i));
763765
mappingBuilder.field("type", "category");
@@ -769,7 +771,7 @@ public void testLimitOfContextMappings() throws Throwable {
769771
MapperParsingException e = expectThrows(MapperParsingException.class, () -> createDocumentMapper(fieldMapping(b -> {
770772
b.field("type", "completion");
771773
b.startArray("contexts");
772-
for (int i = 0; i < CompletionFieldMapper.COMPLETION_CONTEXTS_LIMIT + 1; i++) {
774+
for (int i = 0; i < COMPLETION_CONTEXTS_LIMIT + 1; i++) {
773775
b.startObject();
774776
b.field("name", Integer.toString(i));
775777
b.field("type", "category");
@@ -779,8 +781,29 @@ public void testLimitOfContextMappings() throws Throwable {
779781
})));
780782
assertTrue(
781783
e.getMessage(),
782-
e.getMessage()
783-
.contains("Limit of completion field contexts [" + CompletionFieldMapper.COMPLETION_CONTEXTS_LIMIT + "] has been exceeded")
784+
e.getMessage().contains("Limit of completion field contexts [" + COMPLETION_CONTEXTS_LIMIT + "] has been exceeded")
785+
);
786+
787+
// test pre-8 deprecation warnings
788+
createDocumentMapper(IndexVersions.V_7_0_0, fieldMapping(b -> {
789+
b.field("type", "completion");
790+
b.startArray("contexts");
791+
for (int i = 0; i < COMPLETION_CONTEXTS_LIMIT + 1; i++) {
792+
b.startObject();
793+
b.field("name", Integer.toString(i));
794+
b.field("type", "category");
795+
b.endObject();
796+
}
797+
b.endArray();
798+
}));
799+
assertCriticalWarnings(
800+
"You have defined more than ["
801+
+ COMPLETION_CONTEXTS_LIMIT
802+
+ "] completion contexts"
803+
+ " in the mapping for field [field]. The maximum allowed number of completion contexts in a mapping will be limited to "
804+
+ "["
805+
+ COMPLETION_CONTEXTS_LIMIT
806+
+ "] starting in version [8.0]."
784807
);
785808
}
786809

0 commit comments

Comments
 (0)