Skip to content

Commit a3c526e

Browse files
committed
ESQL: Make field fusion generic
Speeds up queries like ``` FROM foo | STATS SUM(LENGTH(field)) ``` by fusing the `LENGTH` into the loading of the `field` if it has doc values. Running a fairly simple test: https://gist.github.com/nik9000/9dac067f8ce29875a4fb0f0359a75091 I'm seeing that query drop from 48ms to 28ms. So, like, 40% faster. More importantly, this makes the mechanism for fusing functions into field loading generic. All you have to do is implement `BlockLoaderExpression` on your expression and return non-null from `tryFuse`.
1 parent 1f29688 commit a3c526e

File tree

19 files changed

+173
-98
lines changed

19 files changed

+173
-98
lines changed

server/src/main/java/module-info.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,4 +501,5 @@
501501
exports org.elasticsearch.index.codec.vectors.es93 to org.elasticsearch.test.knn;
502502
exports org.elasticsearch.search.crossproject;
503503
exports org.elasticsearch.index.mapper.blockloader.docvalues;
504+
exports org.elasticsearch.index.mapper.blockloader;
504505
}

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@
5555
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
5656
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
5757
import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
58+
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
5859
import org.elasticsearch.index.mapper.blockloader.docvalues.BytesRefsFromOrdsBlockLoader;
60+
import org.elasticsearch.index.mapper.blockloader.docvalues.Utf8CodePointsFromOrdsBlockLoader;
5961
import org.elasticsearch.index.query.AutomatonQueryWithDescription;
6062
import org.elasticsearch.index.query.SearchExecutionContext;
6163
import org.elasticsearch.index.similarity.SimilarityProvider;
@@ -813,10 +815,19 @@ NamedAnalyzer normalizer() {
813815
@Override
814816
public BlockLoader blockLoader(BlockLoaderContext blContext) {
815817
if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSourceEnabled())) {
816-
return new BytesRefsFromOrdsBlockLoader(name());
818+
return switch (blContext.blockLoaderFunctionConfig()) {
819+
case null -> new BytesRefsFromOrdsBlockLoader(name());
820+
case BlockLoaderFunctionConfig.Named named -> switch (named.name()) {
821+
case "LENGTH" -> new Utf8CodePointsFromOrdsBlockLoader(named.warnings(), name());
822+
default -> throw new UnsupportedOperationException("unknown fusion config [" + named.name() + "]");
823+
};
824+
default -> throw new UnsupportedOperationException(
825+
"unknown fusion config [" + blContext.blockLoaderFunctionConfig() + "]"
826+
);
827+
};
817828
}
818-
if (isStored()) {
819-
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(name());
829+
if (blContext.blockLoaderFunctionConfig() != null) {
830+
throw new UnsupportedOperationException("function fusing only supported for doc values");
820831
}
821832

822833
// Multi fields don't have fallback synthetic source.

server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.elasticsearch.index.IndexSettings;
3636
import org.elasticsearch.index.fielddata.FieldDataContext;
3737
import org.elasticsearch.index.fielddata.IndexFieldData;
38+
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
3839
import org.elasticsearch.index.query.DistanceFeatureQueryBuilder;
3940
import org.elasticsearch.index.query.QueryRewriteContext;
4041
import org.elasticsearch.index.query.QueryShardException;
@@ -710,11 +711,4 @@ default BlockLoaderFunctionConfig blockLoaderFunctionConfig() {
710711
}
711712
}
712713

713-
/**
714-
* Marker interface that contains the configuration needed to transform loaded values into blocks.
715-
* Is retrievable from the {@link BlockLoaderContext}. The {@link MappedFieldType} can use this configuration to choose the appropriate
716-
* implementation for transforming loaded values into blocks.
717-
*/
718-
public interface BlockLoaderFunctionConfig {}
719-
720714
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.mapper.blockloader;
11+
12+
import org.elasticsearch.index.mapper.MappedFieldType;
13+
14+
/**
15+
* Configuration needed to transform loaded values into blocks.
16+
* {@link MappedFieldType}s will find me in
17+
* {@link MappedFieldType.BlockLoaderContext#blockLoaderFunctionConfig()} and
18+
* use this configuration to choose the appropriate implementation for
19+
* transforming loaded values into blocks.
20+
*/
21+
public interface BlockLoaderFunctionConfig {
22+
record Named(String name, Warnings warnings) implements BlockLoaderFunctionConfig {}
23+
}
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10-
package org.elasticsearch.index.mapper.blockloader.docvalues;
10+
package org.elasticsearch.index.mapper.blockloader;
1111

1212
/**
1313
* Warnings returned when loading values for ESQL. These are returned as HTTP 299 headers like so:
@@ -17,7 +17,7 @@
1717
* < Warning: 299 Elasticsearch-${ver} "Line 1:27: java.lang.IllegalArgumentException: single-value function encountered multi-value"
1818
* }</pre>
1919
*/
20-
interface Warnings {
20+
public interface Warnings {
2121
/**
2222
* Register a warning. ESQL deduplicates and limits the number of warnings returned so it should
2323
* be fine to blast as many warnings into this as you encounter.

server/src/main/java/org/elasticsearch/index/mapper/blockloader/docvalues/Utf8CodePointsFromOrdsBlockLoader.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@
1616
import org.apache.lucene.util.BytesRef;
1717
import org.apache.lucene.util.RamUsageEstimator;
1818
import org.apache.lucene.util.UnicodeUtil;
19+
import org.elasticsearch.index.mapper.blockloader.Warnings;
1920

2021
import java.io.IOException;
2122
import java.util.Arrays;
2223

23-
import static org.elasticsearch.index.mapper.blockloader.docvalues.Warnings.registerSingleValueWarning;
24+
import static org.elasticsearch.index.mapper.blockloader.Warnings.registerSingleValueWarning;
2425

2526
/**
2627
* A count of utf-8 code points for {@code keyword} style fields that are stored as a lookup table.

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
import org.elasticsearch.index.fielddata.IndexFieldData;
6060
import org.elasticsearch.index.mapper.ArraySourceValueFetcher;
6161
import org.elasticsearch.index.mapper.BlockLoader;
62+
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
6263
import org.elasticsearch.index.mapper.BlockSourceReader;
6364
import org.elasticsearch.index.mapper.DocumentParserContext;
6465
import org.elasticsearch.index.mapper.FieldMapper;
@@ -3173,10 +3174,10 @@ public interface SimilarityFunction {
31733174
}
31743175

31753176
/**
3176-
* Configuration for a {@link MappedFieldType.BlockLoaderFunctionConfig} that calculates vector similarity.
3177+
* Configuration for a {@link BlockLoaderFunctionConfig} that calculates vector similarity.
31773178
* Functions that use this config should use SIMILARITY_FUNCTION_NAME as their name.
31783179
*/
3179-
public static class VectorSimilarityFunctionConfig implements MappedFieldType.BlockLoaderFunctionConfig {
3180+
public static class VectorSimilarityFunctionConfig implements BlockLoaderFunctionConfig {
31803181

31813182
private final SimilarityFunction similarityFunction;
31823183
private final float[] vector;
@@ -3185,7 +3186,6 @@ public static class VectorSimilarityFunctionConfig implements MappedFieldType.Bl
31853186
public VectorSimilarityFunctionConfig(SimilarityFunction similarityFunction, float[] vector) {
31863187
this.similarityFunction = similarityFunction;
31873188
this.vector = vector;
3188-
31893189
}
31903190

31913191
/**

server/src/test/java/org/elasticsearch/index/mapper/blockloader/docvalues/MockWarnings.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99

1010
package org.elasticsearch.index.mapper.blockloader.docvalues;
1111

12+
import org.elasticsearch.index.mapper.blockloader.Warnings;
13+
1214
import java.util.ArrayList;
1315
import java.util.List;
1416

x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/FunctionEsField.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,23 @@
88
package org.elasticsearch.xpack.esql.core.type;
99

1010
import org.elasticsearch.common.io.stream.StreamOutput;
11-
import org.elasticsearch.index.mapper.MappedFieldType;
11+
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
1212

1313
import java.io.IOException;
1414
import java.util.Map;
1515
import java.util.Objects;
1616

1717
/**
1818
* EsField that represents a function being applied to a field on extraction. It receives a
19-
* {@link org.elasticsearch.index.mapper.MappedFieldType.BlockLoaderFunctionConfig} that will be passed down to the block loading process
19+
* {@link BlockLoaderFunctionConfig} that will be passed down to the block loading process
2020
* to apply the function at data load time.
2121
*/
2222
public class FunctionEsField extends EsField {
2323

2424
// Not serialized as it will be created on the data node
25-
private final transient MappedFieldType.BlockLoaderFunctionConfig functionConfig;
25+
private final transient BlockLoaderFunctionConfig functionConfig;
2626

27-
public FunctionEsField(EsField esField, DataType dataType, MappedFieldType.BlockLoaderFunctionConfig functionConfig) {
27+
public FunctionEsField(EsField esField, DataType dataType, BlockLoaderFunctionConfig functionConfig) {
2828
this(
2929
esField.getName(),
3030
dataType,
@@ -43,7 +43,7 @@ private FunctionEsField(
4343
boolean aggregatable,
4444
boolean isAlias,
4545
TimeSeriesFieldType timeSeriesFieldType,
46-
MappedFieldType.BlockLoaderFunctionConfig functionConfig
46+
BlockLoaderFunctionConfig functionConfig
4747
) {
4848
super(name, esDataType, properties, aggregatable, isAlias, timeSeriesFieldType);
4949
this.functionConfig = functionConfig;
@@ -54,7 +54,7 @@ public void writeTo(StreamOutput out) throws IOException {
5454
throw new UnsupportedOperationException("FunctionEsField is not serializable, should be created on data nodes");
5555
}
5656

57-
public MappedFieldType.BlockLoaderFunctionConfig functionConfig() {
57+
public BlockLoaderFunctionConfig functionConfig() {
5858
return functionConfig;
5959
}
6060

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ShardContext.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.elasticsearch.compute.data.Block;
1212
import org.elasticsearch.core.RefCounted;
1313
import org.elasticsearch.index.mapper.BlockLoader;
14+
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
1415
import org.elasticsearch.index.mapper.MappedFieldType;
1516
import org.elasticsearch.index.mapper.SourceLoader;
1617
import org.elasticsearch.search.sort.SortAndFormats;
@@ -58,7 +59,7 @@ BlockLoader blockLoader(
5859
String name,
5960
boolean asUnsupportedSource,
6061
MappedFieldType.FieldExtractPreference fieldExtractPreference,
61-
MappedFieldType.BlockLoaderFunctionConfig blockLoaderFunctionConfig
62+
BlockLoaderFunctionConfig blockLoaderFunctionConfig
6263
);
6364

6465
/**

0 commit comments

Comments
 (0)