Skip to content

Commit 2e62dab

Browse files
authored
Improve block loader for source only runtime geo_point fields (#135883)
* Improve block loader for source only runtime geo_point fields * Update docs/changelog/135883.yaml
1 parent 6af4039 commit 2e62dab

File tree

5 files changed

+255
-3
lines changed

5 files changed

+255
-3
lines changed

docs/changelog/135883.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 135883
2+
summary: Improve block loader for source only runtime `geo_point` fields
3+
area: Mapping
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/index/mapper/AbstractGeometryFieldMapper.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,15 @@ public abstract static class Parser<T> {
6666
public abstract void parse(XContentParser parser, CheckedConsumer<T, IOException> consumer, MalformedValueHandler malformedHandler)
6767
throws IOException;
6868

69-
private void fetchFromSource(Object sourceMap, Consumer<T> consumer) {
69+
void fetchFromSource(Object sourceMap, Consumer<T> consumer) {
7070
try (XContentParser parser = wrapObject(sourceMap)) {
7171
parseFromSource(parser, consumer);
7272
} catch (IOException e) {
7373
throw new UncheckedIOException(e);
7474
}
7575
}
7676

77-
private void parseFromSource(XContentParser parser, Consumer<T> consumer) throws IOException {
77+
void parseFromSource(XContentParser parser, Consumer<T> consumer) throws IOException {
7878
parse(parser, v -> consumer.accept(normalizeFromSource(v)), NoopMalformedValueHandler.INSTANCE);
7979
}
8080

server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,7 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
569569
}
570570

571571
/** GeoPoint parser implementation */
572-
private static class GeoPointParser extends PointParser<GeoPoint> {
572+
static class GeoPointParser extends PointParser<GeoPoint> {
573573
private final boolean storeMalformedDataForSyntheticSource;
574574

575575
GeoPointParser(

server/src/main/java/org/elasticsearch/index/mapper/GeoPointScriptFieldType.java

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.apache.lucene.index.LeafReaderContext;
1616
import org.apache.lucene.search.MatchNoDocsQuery;
1717
import org.apache.lucene.search.Query;
18+
import org.apache.lucene.util.BytesRef;
1819
import org.elasticsearch.common.geo.GeoPoint;
1920
import org.elasticsearch.common.geo.GeoUtils;
2021
import org.elasticsearch.common.geo.GeometryFormatterFactory;
@@ -35,6 +36,7 @@
3536
import org.elasticsearch.search.runtime.GeoPointScriptFieldDistanceFeatureQuery;
3637
import org.elasticsearch.search.runtime.GeoPointScriptFieldExistsQuery;
3738
import org.elasticsearch.search.runtime.GeoPointScriptFieldGeoShapeQuery;
39+
import org.elasticsearch.xcontent.XContentParser;
3840

3941
import java.io.IOException;
4042
import java.time.ZoneId;
@@ -44,6 +46,8 @@
4446
import java.util.Map;
4547
import java.util.function.Function;
4648

49+
import static org.elasticsearch.index.mapper.GeoPointFieldMapper.GeoPointFieldType.GEO_FORMATTER_FACTORY;
50+
4751
public final class GeoPointScriptFieldType extends AbstractScriptFieldType<GeoPointFieldScript.LeafFactory> implements GeoShapeQueryable {
4852

4953
public static final RuntimeField.Parser PARSER = new RuntimeField.Parser(name -> new Builder<>(name, GeoPointFieldScript.CONTEXT) {
@@ -227,4 +231,90 @@ public StoredFieldsSpec storedFieldsSpec() {
227231
}
228232
};
229233
}
234+
235+
@Override
236+
public BlockLoader blockLoader(BlockLoaderContext blContext) {
237+
FallbackSyntheticSourceBlockLoader fallbackSyntheticSourceBlockLoader = fallbackSyntheticSourceBlockLoader(
238+
blContext,
239+
BlockLoader.BlockFactory::bytesRefs,
240+
this::fallbackSyntheticSourceBlockLoaderReader
241+
);
242+
243+
if (fallbackSyntheticSourceBlockLoader != null) {
244+
return fallbackSyntheticSourceBlockLoader;
245+
}
246+
247+
// the rest is not yet supported, so call super
248+
return super.blockLoader(blContext);
249+
}
250+
251+
private FallbackSyntheticSourceBlockLoader.Reader<?> fallbackSyntheticSourceBlockLoaderReader() {
252+
// parses Objects into GeoPoints
253+
AbstractGeometryFieldMapper.Parser<GeoPoint> geoPointParser = createGeoPointParser();
254+
255+
// WKB is how what we store geometries as in Lucene, this also matches the format defined in
256+
// AbstractGeometryFieldMapper.AbstractGeometryFieldType.GeometriesFallbackSyntheticSourceReader, which GeoPointFieldMapper uses
257+
var formatter = GEO_FORMATTER_FACTORY.getFormatter(
258+
GeometryFormatterFactory.WKB,
259+
p -> new org.elasticsearch.geometry.Point(p.getLon(), p.getLat())
260+
);
261+
262+
return new FallbackSyntheticSourceBlockLoader.Reader<GeoPoint>() {
263+
@Override
264+
public void convertValue(Object value, List<GeoPoint> accumulator) {
265+
geoPointParser.fetchFromSource(value, gp -> {
266+
if (gp != null) {
267+
accumulator.add(gp);
268+
}
269+
});
270+
}
271+
272+
@Override
273+
public void parse(XContentParser parser, List<GeoPoint> accumulator) throws IOException {
274+
// geo objects can be defined in many ways, and these are more complex compared to basic types like text or number, as a
275+
// result we must use a dedicated parser here
276+
geoPointParser.parseFromSource(parser, gp -> {
277+
if (gp != null) {
278+
accumulator.add(gp);
279+
}
280+
});
281+
}
282+
283+
@Override
284+
public void writeToBlock(List<GeoPoint> points, BlockLoader.Builder blockBuilder) {
285+
var bytesRefBuilder = (BlockLoader.BytesRefBuilder) blockBuilder;
286+
List<Object> gpBytes = formatter.apply(points);
287+
288+
for (var bytes : gpBytes) {
289+
// WKB is a binary storage format for geometric data
290+
if (bytes instanceof byte[] wkb) {
291+
bytesRefBuilder.appendBytesRef(new BytesRef(wkb));
292+
}
293+
}
294+
}
295+
};
296+
}
297+
298+
private AbstractGeometryFieldMapper.Parser<GeoPoint> createGeoPointParser() {
299+
// only needed during indexing, which is not done for runtime fields
300+
boolean ignoredMalformedEnabled = false;
301+
boolean storeMalformedDataForSyntheticSource = false;
302+
303+
// we don't know how we stored things, so we need to enable this otherwise parsing can fail
304+
boolean allowMultipleValues = true;
305+
boolean ignoreZValue = true;
306+
307+
GeoPoint nullValue = null;
308+
309+
return new GeoPointFieldMapper.GeoPointParser(
310+
name(),
311+
(parser) -> GeoUtils.parseGeoPoint(parser, ignoreZValue),
312+
nullValue,
313+
ignoreZValue,
314+
ignoredMalformedEnabled,
315+
allowMultipleValues,
316+
storeMalformedDataForSyntheticSource
317+
);
318+
}
319+
230320
}

server/src/test/java/org/elasticsearch/index/mapper/GeoPointScriptFieldTypeTests.java

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.apache.lucene.document.StoredField;
1313
import org.apache.lucene.index.DirectoryReader;
1414
import org.apache.lucene.index.LeafReaderContext;
15+
import org.apache.lucene.index.NoMergePolicy;
1516
import org.apache.lucene.search.Collector;
1617
import org.apache.lucene.search.IndexSearcher;
1718
import org.apache.lucene.search.LeafCollector;
@@ -23,7 +24,10 @@
2324
import org.apache.lucene.tests.index.RandomIndexWriter;
2425
import org.apache.lucene.util.BytesRef;
2526
import org.elasticsearch.common.geo.GeoPoint;
27+
import org.elasticsearch.common.geo.GeoUtils;
28+
import org.elasticsearch.common.geo.GeometryFormatterFactory;
2629
import org.elasticsearch.common.lucene.search.function.ScriptScoreQuery;
30+
import org.elasticsearch.common.settings.Settings;
2731
import org.elasticsearch.geo.GeometryTestUtils;
2832
import org.elasticsearch.index.IndexVersion;
2933
import org.elasticsearch.index.fielddata.GeoPointScriptFieldData;
@@ -37,19 +41,38 @@
3741
import org.elasticsearch.script.ScriptFactory;
3842
import org.elasticsearch.script.ScriptType;
3943
import org.elasticsearch.search.MultiValueMode;
44+
import org.elasticsearch.search.lookup.SearchLookup;
4045
import org.elasticsearch.search.lookup.Source;
4146

4247
import java.io.IOException;
4348
import java.util.ArrayList;
49+
import java.util.Arrays;
4450
import java.util.List;
4551
import java.util.Map;
52+
import java.util.function.Function;
4653

4754
import static java.util.Collections.emptyMap;
55+
import static org.elasticsearch.index.mapper.GeoPointFieldMapper.GeoPointFieldType.GEO_FORMATTER_FACTORY;
4856
import static org.hamcrest.Matchers.containsInAnyOrder;
4957
import static org.hamcrest.Matchers.equalTo;
58+
import static org.hamcrest.Matchers.instanceOf;
59+
import static org.hamcrest.Matchers.nullValue;
5060

5161
public class GeoPointScriptFieldTypeTests extends AbstractNonTextScriptFieldTypeTestCase {
5262

63+
private static final GeoPoint EMPTY_POINT = null;
64+
private static final GeoPoint MALFORMED_POINT = null;
65+
66+
private static final Function<List<GeoPoint>, List<Object>> FORMATTER = GEO_FORMATTER_FACTORY.getFormatter(
67+
GeometryFormatterFactory.WKB,
68+
p -> {
69+
if (p != null) {
70+
return new org.elasticsearch.geometry.Point(p.getLon(), p.getLat());
71+
}
72+
return null;
73+
}
74+
);
75+
5376
@Override
5477
protected ScriptFactory parseFromSource() {
5578
return GeoPointFieldScript.PARSE_FROM_SOURCE;
@@ -225,6 +248,140 @@ public void testTermsQuery() {
225248

226249
}
227250

251+
public void testBlockLoaderSourceOnlyRuntimeFieldWithSyntheticSource() throws IOException {
252+
try (
253+
Directory directory = newDirectory();
254+
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))
255+
) {
256+
// given
257+
iw.addDocuments(
258+
List.of(
259+
createDocumentWithIgnoredSource("""
260+
{"type": "Point", "coordinates": [11.22, -33.44]}
261+
"""),
262+
createDocumentWithIgnoredSource("""
263+
["POINT (23.45 -56.78)"]
264+
"""),
265+
createDocumentWithIgnoredSource("""
266+
{"lat": 22.33, "lon" : -44.55}}
267+
"""),
268+
createDocumentWithIgnoredSource("""
269+
[-33.44, 55.66]
270+
"""),
271+
createDocumentWithIgnoredSource("""
272+
[-44.55, 66.77, 88.99]
273+
"""),
274+
createDocumentWithIgnoredSource("""
275+
["55.66,77.88"]
276+
"""),
277+
createDocumentWithIgnoredSource("""
278+
["drm3btev3e86"]
279+
"""),
280+
// ensure empty points can be parsed
281+
createDocumentWithIgnoredSource("""
282+
[]
283+
"""),
284+
// ensure a malformed value doesn't crash
285+
createDocumentWithIgnoredSource("""
286+
["potato", "tomato"]
287+
""")
288+
)
289+
);
290+
291+
Settings settings = Settings.builder().put("index.mapping.source.mode", "synthetic").build();
292+
GeoPointScriptFieldType fieldType = simpleSourceOnlyMappedFieldType();
293+
294+
// note, the order of coordinates here differs from the documents above. This is expected - x and y coordinates get mapped to
295+
// lon and lat differently depending on the underlying type of the geo point itself. Sometimes they match 1:1, others they swap
296+
List<Object> expectedPoints = FORMATTER.apply(
297+
Arrays.asList(
298+
new GeoPoint(-33.44, 11.22),
299+
new GeoPoint(-56.78, 23.45),
300+
new GeoPoint(22.33, -44.55),
301+
new GeoPoint(55.66, -33.44),
302+
new GeoPoint(66.77, -44.55),
303+
new GeoPoint(55.66, 77.88),
304+
new GeoPoint("drm3btev3e86")
305+
)
306+
);
307+
308+
// add these separately because the formatter doesn't like nulls
309+
expectedPoints.add(EMPTY_POINT);
310+
expectedPoints.add(MALFORMED_POINT);
311+
312+
// expected points converted to BytesRef
313+
List<BytesRef> expected = expectedPoints.stream().map(gp -> {
314+
if (gp instanceof byte[] wkb) {
315+
return new BytesRef(wkb);
316+
}
317+
return null;
318+
}).toList();
319+
320+
try (DirectoryReader reader = iw.getReader()) {
321+
// when
322+
BlockLoader loader = fieldType.blockLoader(blContext(settings, true));
323+
324+
// then
325+
326+
// assert loader is of expected instance type
327+
assertThat(loader, instanceOf(FallbackSyntheticSourceBlockLoader.class));
328+
329+
// ignored source doesn't support column at a time loading:
330+
var columnAtATimeLoader = loader.columnAtATimeReader(reader.leaves().getFirst());
331+
assertThat(columnAtATimeLoader, nullValue());
332+
333+
var rowStrideReader = loader.rowStrideReader(reader.leaves().getFirst());
334+
assertThat(
335+
rowStrideReader.getClass().getName(),
336+
equalTo("org.elasticsearch.index.mapper.FallbackSyntheticSourceBlockLoader$IgnoredSourceRowStrideReader")
337+
);
338+
339+
// assert values
340+
assertThat(blockLoaderReadValuesFromRowStrideReader(settings, reader, fieldType, true), equalTo(expected));
341+
}
342+
}
343+
}
344+
345+
/**
346+
* Returns a source only mapped field type. This is useful, since the available build() function doesn't override isParsedFromSource()
347+
*/
348+
private GeoPointScriptFieldType simpleSourceOnlyMappedFieldType() {
349+
Script script = new Script(ScriptType.INLINE, "test", "", emptyMap());
350+
GeoPointFieldScript.Factory factory = new GeoPointFieldScript.Factory() {
351+
@Override
352+
public GeoPointFieldScript.LeafFactory newFactory(
353+
String fieldName,
354+
Map<String, Object> params,
355+
SearchLookup searchLookup,
356+
OnScriptError onScriptError
357+
) {
358+
return ctx -> new GeoPointFieldScript(fieldName, params, searchLookup, onScriptError, ctx) {
359+
@Override
360+
@SuppressWarnings("unchecked")
361+
public void execute() {
362+
Map<String, Object> source = (Map<String, Object>) this.getParams().get("_source");
363+
for (Object foo : (List<?>) source.get("test")) {
364+
try {
365+
// ignore the Z coordinate because we don't care about it anyway
366+
// this conversion matches GeoPointFieldScript.emitPoint()
367+
GeoPoint gp = GeoUtils.parseGeoPoint(foo, true);
368+
emit(gp.lat(), gp.lon());
369+
} catch (Exception e) {
370+
// skip
371+
}
372+
}
373+
}
374+
};
375+
}
376+
377+
@Override
378+
public boolean isParsedFromSource() {
379+
return true;
380+
}
381+
};
382+
return new GeoPointScriptFieldType("test", factory, script, emptyMap(), OnScriptError.FAIL);
383+
}
384+
228385
@Override
229386
protected Query randomTermsQuery(MappedFieldType ft, SearchExecutionContext ctx) {
230387
return ft.termsQuery(randomList(100, GeometryTestUtils::randomPoint), mockContext());

0 commit comments

Comments
 (0)