Skip to content

Commit 81097fe

Browse files
Kubik42elasticsearchmachine
andauthored
Fixed geo point block loader slowness (elastic#136147) (elastic#136909)
* Fixed geo point block loader slowness * Fixed precision issue * Addressed feedback * Reverted TestBlock changes * [CI] Auto commit changes from spotless * Replaced bytesRefsFromDocValues with bytesRefs * Fixed geo point block loader slowness * Revert "Fixed geo point block loader slowness" This reverts commit cac43c4. * Fixed failing tests after rebasing against main * Update docs/changelog/136147.yaml * Addressed feedback, added an explanation about bytesRefsFromDocValues() * [CI] Auto commit changes from spotless --------- (cherry picked from commit 0dd761b) Co-authored-by: elasticsearchmachine <[email protected]>
1 parent d68b945 commit 81097fe

File tree

5 files changed

+320
-38
lines changed

5 files changed

+320
-38
lines changed

docs/changelog/136147.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 136147
2+
summary: Fixed geo point block loader slowness
3+
area: Mapping
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java

Lines changed: 108 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import org.apache.lucene.geo.LatLonGeometry;
1919
import org.apache.lucene.index.DocValuesType;
2020
import org.apache.lucene.index.LeafReaderContext;
21+
import org.apache.lucene.index.SortedNumericDocValues;
2122
import org.apache.lucene.search.IndexOrDocValuesQuery;
2223
import org.apache.lucene.search.Query;
2324
import org.apache.lucene.util.BytesRef;
@@ -34,6 +35,7 @@
3435
import org.elasticsearch.core.CheckedConsumer;
3536
import org.elasticsearch.core.CheckedFunction;
3637
import org.elasticsearch.geometry.Point;
38+
import org.elasticsearch.geometry.utils.WellKnownBinary;
3739
import org.elasticsearch.index.IndexMode;
3840
import org.elasticsearch.index.IndexVersion;
3941
import org.elasticsearch.index.fielddata.FieldDataContext;
@@ -62,6 +64,7 @@
6264

6365
import java.io.IOException;
6466
import java.io.UncheckedIOException;
67+
import java.nio.ByteOrder;
6568
import java.util.Collections;
6669
import java.util.List;
6770
import java.util.Map;
@@ -70,6 +73,7 @@
7073
import java.util.function.Function;
7174

7275
import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES;
76+
import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.NONE;
7377

7478
/**
7579
* Field Mapper for geo_point types.
@@ -387,7 +391,7 @@ public static class GeoPointFieldType extends AbstractPointFieldType<GeoPoint> i
387391
private final IndexMode indexMode;
388392
private final boolean isSyntheticSource;
389393

390-
private GeoPointFieldType(
394+
GeoPointFieldType(
391395
String name,
392396
boolean indexed,
393397
boolean stored,
@@ -545,28 +549,119 @@ public TimeSeriesParams.MetricType getMetricType() {
545549

546550
@Override
547551
public BlockLoader blockLoader(BlockLoaderContext blContext) {
548-
if (blContext.fieldExtractPreference() == DOC_VALUES && hasDocValues()) {
549-
return new BlockDocValuesReader.LongsBlockLoader(name());
552+
// load from doc values
553+
if (hasDocValues()) {
554+
if (blContext.fieldExtractPreference() == DOC_VALUES) {
555+
return new BlockDocValuesReader.LongsBlockLoader(name());
556+
} else if (blContext.fieldExtractPreference() == NONE && isSyntheticSource) {
557+
// when the preference is not explicitly set to DOC_VALUES, we expect a BytesRef -> see PlannerUtils.toElementType()
558+
return new BytesRefFromLongsBlockLoader(name());
559+
}
560+
// if we got here, then either synthetic source is not enabled or the preference prohibits us from using doc_values
550561
}
551562

552-
// There are two scenarios possible once we arrive here:
553-
//
554-
// * Stored source - we'll just use blockLoaderFromSource
555-
// * Synthetic source. However, because of the fieldExtractPreference() check above it is still possible that doc_values are
556-
// present here.
557-
// So we have two subcases:
558-
// - doc_values are enabled - _ignored_source field does not exist since we have doc_values. We will use
559-
// blockLoaderFromSource which reads "native" synthetic source.
560-
// - doc_values are disabled - we know that _ignored_source field is present and use a special block loader unless it's a multi
561-
// field.
563+
// doc_values are disabled, fallback to ignored_source, except for multi fields since then don't have fallback synthetic source
562564
if (isSyntheticSource && hasDocValues() == false && blContext.parentField(name()) == null) {
563565
return blockLoaderFromFallbackSyntheticSource(blContext);
564566
}
565567

568+
// otherwise, load from _source (synthetic or otherwise) - very slow
566569
return blockLoaderFromSource(blContext);
567570
}
568571
}
569572

573+
/**
574+
* This is a GeoPoint-specific block loader that helps deal with an edge case where doc_values are available, yet
575+
* FieldExtractPreference = NONE. When this happens, the BlockLoader sanity checker (see PlannerUtils.toElementType) expects a BytesRef.
576+
* This implies that we need to load the value from _source. This however is very slow, especially when synthetic source is enabled.
577+
* We're better off reading from doc_values and converting to BytesRef to satisfy the checker. This is what this block loader is for.
578+
*/
579+
static final class BytesRefFromLongsBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader {
580+
581+
private final String fieldName;
582+
583+
BytesRefFromLongsBlockLoader(String fieldName) {
584+
this.fieldName = fieldName;
585+
}
586+
587+
@Override
588+
public Builder builder(BlockFactory factory, int expectedCount) {
589+
return factory.bytesRefs(expectedCount);
590+
}
591+
592+
@Override
593+
public AllReader reader(LeafReaderContext context) throws IOException {
594+
SortedNumericDocValues docValues = context.reader().getSortedNumericDocValues(fieldName);
595+
if (docValues != null) {
596+
return new BytesRefsFromLong(docValues, (geoPointLong) -> {
597+
GeoPoint gp = new GeoPoint().resetFromEncoded(geoPointLong);
598+
byte[] wkb = WellKnownBinary.toWKB(new Point(gp.getX(), gp.getY()), ByteOrder.LITTLE_ENDIAN);
599+
return new BytesRef(wkb);
600+
});
601+
}
602+
return new ConstantNullsReader();
603+
}
604+
}
605+
606+
private static final class BytesRefsFromLong extends BlockDocValuesReader {
607+
608+
private final SortedNumericDocValues numericDocValues;
609+
private final Function<Long, BytesRef> longsToBytesRef;
610+
611+
BytesRefsFromLong(SortedNumericDocValues numericDocValues, Function<Long, BytesRef> longsToBytesRef) {
612+
this.numericDocValues = numericDocValues;
613+
this.longsToBytesRef = longsToBytesRef;
614+
}
615+
616+
@Override
617+
protected int docId() {
618+
return numericDocValues.docID();
619+
}
620+
621+
@Override
622+
public String toString() {
623+
return "BlockDocValuesReader.BytesRefsFromLong";
624+
}
625+
626+
@Override
627+
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset, boolean nullsFiltered)
628+
throws IOException {
629+
630+
try (BlockLoader.BytesRefBuilder builder = factory.bytesRefs(docs.count() - offset)) {
631+
for (int i = offset; i < docs.count(); i++) {
632+
int doc = docs.get(i);
633+
read(doc, builder);
634+
}
635+
return builder.build();
636+
}
637+
}
638+
639+
@Override
640+
public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException {
641+
read(docId, (BlockLoader.BytesRefBuilder) builder);
642+
}
643+
644+
private void read(int doc, BlockLoader.BytesRefBuilder builder) throws IOException {
645+
// no more values remaining
646+
if (numericDocValues.advanceExact(doc) == false) {
647+
builder.appendNull();
648+
return;
649+
}
650+
int count = numericDocValues.docValueCount();
651+
if (count == 1) {
652+
BytesRef bytesRefValue = longsToBytesRef.apply(numericDocValues.nextValue());
653+
builder.appendBytesRef(bytesRefValue);
654+
return;
655+
}
656+
builder.beginPositionEntry();
657+
for (int v = 0; v < count; v++) {
658+
BytesRef bytesRefValue = longsToBytesRef.apply(numericDocValues.nextValue());
659+
builder.appendBytesRef(bytesRefValue);
660+
}
661+
builder.endPositionEntry();
662+
}
663+
}
664+
570665
/** GeoPoint parser implementation */
571666
private static class GeoPointParser extends PointParser<GeoPoint> {
572667
private final boolean storeMalformedDataForSyntheticSource;

server/src/test/java/org/elasticsearch/index/mapper/GeoPointFieldTypeTests.java

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,28 @@
1010
package org.elasticsearch.index.mapper;
1111

1212
import org.apache.lucene.tests.geo.GeoTestUtil;
13+
import org.elasticsearch.cluster.metadata.IndexMetadata;
1314
import org.elasticsearch.common.geo.GeoPoint;
1415
import org.elasticsearch.common.geo.SimpleFeatureFactory;
16+
import org.elasticsearch.common.settings.Settings;
1517
import org.elasticsearch.geometry.Point;
1618
import org.elasticsearch.geometry.utils.WellKnownBinary;
19+
import org.elasticsearch.index.IndexMode;
20+
import org.elasticsearch.index.IndexSettings;
1721
import org.elasticsearch.index.IndexVersion;
1822
import org.elasticsearch.script.ScriptCompiler;
1923

2024
import java.io.IOException;
2125
import java.nio.ByteOrder;
2226
import java.util.ArrayList;
27+
import java.util.Collections;
2328
import java.util.List;
2429
import java.util.Map;
2530

2631
import static org.hamcrest.Matchers.equalTo;
32+
import static org.hamcrest.Matchers.instanceOf;
33+
import static org.mockito.Mockito.doReturn;
34+
import static org.mockito.Mockito.mock;
2735

2836
public class GeoPointFieldTypeTests extends FieldTypeTestCase {
2937

@@ -147,4 +155,143 @@ public void testFetchVectorTile() throws IOException {
147155
assertThat(sourceValue.size(), equalTo(1));
148156
assertThat(sourceValue.get(0), equalTo(featureFactory.points(geoPoints)));
149157
}
158+
159+
public void testBlockLoaderWhenDocValuesAreEnabledAndThePreferenceIsToUseDocValues() {
160+
// given
161+
GeoPointFieldMapper.GeoPointFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType("potato");
162+
MappedFieldType.BlockLoaderContext blContextMock = mockBlockLoaderContext(false, MappedFieldType.FieldExtractPreference.DOC_VALUES);
163+
164+
// when
165+
BlockLoader loader = fieldType.blockLoader(blContextMock);
166+
167+
// then
168+
// verify that we use the correct block value reader
169+
assertThat(loader, instanceOf(BlockDocValuesReader.LongsBlockLoader.class));
170+
}
171+
172+
public void testBlockLoaderWhenDocValuesAreEnabledAndThereIsNoPreference() {
173+
// given
174+
GeoPointFieldMapper.GeoPointFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType("potato");
175+
MappedFieldType.BlockLoaderContext blContextMock = mockBlockLoaderContext(false, MappedFieldType.FieldExtractPreference.NONE);
176+
177+
// when
178+
BlockLoader loader = fieldType.blockLoader(blContextMock);
179+
180+
// then
181+
// verify that we use the correct block value reader
182+
assertThat(loader, instanceOf(BlockSourceReader.GeometriesBlockLoader.class));
183+
}
184+
185+
public void testBlockLoaderWhenFieldIsStoredAndThePreferenceIsToUseStoredFields() {
186+
// given
187+
GeoPointFieldMapper.GeoPointFieldType fieldType = createFieldType(true, false, false);
188+
MappedFieldType.BlockLoaderContext blContextMock = mockBlockLoaderContext(false, MappedFieldType.FieldExtractPreference.STORED);
189+
190+
// when
191+
BlockLoader loader = fieldType.blockLoader(blContextMock);
192+
193+
// then
194+
// verify that we use the correct block value reader
195+
assertThat(loader, instanceOf(BlockSourceReader.GeometriesBlockLoader.class));
196+
}
197+
198+
public void testBlockLoaderWhenFieldIsStoredAndThereIsNoPreference() {
199+
// given
200+
GeoPointFieldMapper.GeoPointFieldType fieldType = createFieldType(true, false, false);
201+
MappedFieldType.BlockLoaderContext blContextMock = mockBlockLoaderContext(false, MappedFieldType.FieldExtractPreference.NONE);
202+
203+
// when
204+
BlockLoader loader = fieldType.blockLoader(blContextMock);
205+
206+
// then
207+
// verify that we use the correct block value reader
208+
assertThat(loader, instanceOf(BlockSourceReader.GeometriesBlockLoader.class));
209+
}
210+
211+
public void testBlockLoaderWhenSyntheticSourceIsEnabledAndFieldIsStoredInIgnoredSource() {
212+
// given
213+
GeoPointFieldMapper.GeoPointFieldType fieldType = createFieldType(false, false, true);
214+
MappedFieldType.BlockLoaderContext blContextMock = mockBlockLoaderContext(true, MappedFieldType.FieldExtractPreference.NONE);
215+
216+
// when
217+
BlockLoader loader = fieldType.blockLoader(blContextMock);
218+
219+
// then
220+
// verify that we use the correct block value reader
221+
assertThat(loader, instanceOf(FallbackSyntheticSourceBlockLoader.class));
222+
}
223+
224+
public void testBlockLoaderWhenSyntheticSourceAndDocValuesAreEnabled() {
225+
// given
226+
GeoPointFieldMapper.GeoPointFieldType fieldType = createFieldType(false, true, true);
227+
MappedFieldType.BlockLoaderContext blContextMock = mockBlockLoaderContext(true, MappedFieldType.FieldExtractPreference.NONE);
228+
229+
// when
230+
BlockLoader loader = fieldType.blockLoader(blContextMock);
231+
232+
// then
233+
// verify that we use the correct block value reader
234+
assertThat(loader, instanceOf(GeoPointFieldMapper.BytesRefFromLongsBlockLoader.class));
235+
}
236+
237+
public void testBlockLoaderFallsBackToSource() {
238+
// given
239+
GeoPointFieldMapper.GeoPointFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType("potato");
240+
MappedFieldType.BlockLoaderContext blContextMock = mockBlockLoaderContext(
241+
false,
242+
MappedFieldType.FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS
243+
);
244+
245+
// when
246+
BlockLoader loader = fieldType.blockLoader(blContextMock);
247+
248+
// then
249+
// verify that we use the correct block value reader
250+
assertThat(loader, instanceOf(BlockSourceReader.GeometriesBlockLoader.class));
251+
}
252+
253+
private MappedFieldType.BlockLoaderContext mockBlockLoaderContext(
254+
boolean enableSyntheticSource,
255+
MappedFieldType.FieldExtractPreference fieldExtractPreference
256+
) {
257+
Settings.Builder builder = Settings.builder()
258+
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
259+
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
260+
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1);
261+
262+
if (enableSyntheticSource) {
263+
builder.put("index.mapping.source.mode", "synthetic");
264+
}
265+
266+
Settings settings = builder.build();
267+
268+
IndexSettings indexSettings = new IndexSettings(IndexMetadata.builder("index").settings(builder).build(), settings);
269+
270+
MappedFieldType.BlockLoaderContext blContextMock = mock(MappedFieldType.BlockLoaderContext.class);
271+
doReturn(fieldExtractPreference).when(blContextMock).fieldExtractPreference();
272+
doReturn(indexSettings).when(blContextMock).indexSettings();
273+
274+
return blContextMock;
275+
}
276+
277+
private GeoPointFieldMapper.GeoPointFieldType createFieldType(
278+
boolean isStored,
279+
boolean hasDocValues,
280+
boolean isSyntheticSourceEnabled
281+
) {
282+
return new GeoPointFieldMapper.GeoPointFieldType(
283+
"potato",
284+
true,
285+
isStored,
286+
hasDocValues,
287+
null,
288+
null,
289+
null,
290+
Collections.emptyMap(),
291+
TimeSeriesParams.MetricType.COUNTER,
292+
IndexMode.LOGSDB,
293+
isSyntheticSourceEnabled
294+
);
295+
}
296+
150297
}

0 commit comments

Comments
 (0)