Skip to content

Commit 97f96b4

Browse files
authored
ESQL: Make field fusion generic (#137382)
Speeds up queries like ``` FROM foo | STATS SUM(LENGTH(field)) ``` by fusing the `LENGTH` into the loading of the `field` if it has doc values. Running a fairly simple test: https://gist.github.com/nik9000/9dac067f8ce29875a4fb0f0359a75091 I'm seeing that query drop from 48ms to 28ms. So, like, 40% faster. More importantly, this makes the mechanism for fusing functions into field loading generic. All you have to do is implement `BlockLoaderExpression` on your expression and return non-null from `tryFuse`.
1 parent 1f47669 commit 97f96b4

File tree

37 files changed

+1105
-144
lines changed

37 files changed

+1105
-144
lines changed

docs/changelog/137382.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 137382
2+
summary: Make field fusion generic
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

server/src/main/java/module-info.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,4 +501,5 @@
501501
exports org.elasticsearch.index.codec.vectors.es93 to org.elasticsearch.test.knn;
502502
exports org.elasticsearch.search.crossproject;
503503
exports org.elasticsearch.index.mapper.blockloader.docvalues;
504+
exports org.elasticsearch.index.mapper.blockloader;
504505
}

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,9 @@
5959
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
6060
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
6161
import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
62+
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
6263
import org.elasticsearch.index.mapper.blockloader.docvalues.BytesRefsFromOrdsBlockLoader;
64+
import org.elasticsearch.index.mapper.blockloader.docvalues.Utf8CodePointsFromOrdsBlockLoader;
6365
import org.elasticsearch.index.query.AutomatonQueryWithDescription;
6466
import org.elasticsearch.index.query.SearchExecutionContext;
6567
import org.elasticsearch.index.similarity.SimilarityProvider;
@@ -749,7 +751,17 @@ NamedAnalyzer normalizer() {
749751
@Override
750752
public BlockLoader blockLoader(BlockLoaderContext blContext) {
751753
if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSourceEnabled())) {
752-
return new BytesRefsFromOrdsBlockLoader(name());
754+
BlockLoaderFunctionConfig cfg = blContext.blockLoaderFunctionConfig();
755+
if (cfg == null) {
756+
return new BytesRefsFromOrdsBlockLoader(name());
757+
}
758+
if (cfg.function() == BlockLoaderFunctionConfig.Function.LENGTH) {
759+
return new Utf8CodePointsFromOrdsBlockLoader(((BlockLoaderFunctionConfig.JustWarnings) cfg).warnings(), name());
760+
}
761+
throw new UnsupportedOperationException("unknown fusion config [" + cfg.function() + "]");
762+
}
763+
if (blContext.blockLoaderFunctionConfig() != null) {
764+
throw new UnsupportedOperationException("function fusing only supported for doc values");
753765
}
754766
if (isStored()) {
755767
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(name());
@@ -773,6 +785,14 @@ public Builder builder(BlockFactory factory, int expectedCount) {
773785
return new BlockSourceReader.BytesRefsBlockLoader(fetcher, sourceBlockLoaderLookup(blContext));
774786
}
775787

788+
@Override
789+
public boolean supportsBlockLoaderConfig(BlockLoaderFunctionConfig config, FieldExtractPreference preference) {
790+
if (hasDocValues() && (preference != FieldExtractPreference.STORED || isSyntheticSourceEnabled())) {
791+
return config.function() == BlockLoaderFunctionConfig.Function.LENGTH;
792+
}
793+
return false;
794+
}
795+
776796
private FallbackSyntheticSourceBlockLoader.Reader<?> fallbackSyntheticSourceBlockLoaderReader() {
777797
var nullValueBytes = nullValue != null ? new BytesRef(nullValue) : null;
778798
return new FallbackSyntheticSourceBlockLoader.SingleValueReader<BytesRef>(nullValueBytes) {

server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.elasticsearch.index.IndexSettings;
3636
import org.elasticsearch.index.fielddata.FieldDataContext;
3737
import org.elasticsearch.index.fielddata.IndexFieldData;
38+
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
3839
import org.elasticsearch.index.query.DistanceFeatureQueryBuilder;
3940
import org.elasticsearch.index.query.QueryRewriteContext;
4041
import org.elasticsearch.index.query.QueryShardException;
@@ -642,6 +643,10 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
642643
return null;
643644
}
644645

646+
public boolean supportsBlockLoaderConfig(BlockLoaderFunctionConfig config, FieldExtractPreference preference) {
647+
return false;
648+
}
649+
645650
public enum FieldExtractPreference {
646651
/**
647652
* Load the field from doc-values into a BlockLoader supporting doc-values.
@@ -710,11 +715,4 @@ default BlockLoaderFunctionConfig blockLoaderFunctionConfig() {
710715
}
711716
}
712717

713-
/**
714-
* Marker interface that contains the configuration needed to transform loaded values into blocks.
715-
* Is retrievable from the {@link BlockLoaderContext}. The {@link MappedFieldType} can use this configuration to choose the appropriate
716-
* implementation for transforming loaded values into blocks.
717-
*/
718-
public interface BlockLoaderFunctionConfig {}
719-
720718
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.mapper.blockloader;
11+
12+
import org.elasticsearch.index.mapper.MappedFieldType;
13+
14+
/**
15+
* Configuration needed to transform loaded values into blocks.
16+
* {@link MappedFieldType}s will find me in
17+
* {@link MappedFieldType.BlockLoaderContext#blockLoaderFunctionConfig()} and
18+
* use this configuration to choose the appropriate implementation for
19+
* transforming loaded values into blocks.
20+
*/
21+
public interface BlockLoaderFunctionConfig {
22+
/**
23+
* Name used in descriptions.
24+
*/
25+
Function function();
26+
27+
record JustWarnings(Function function, Warnings warnings) implements BlockLoaderFunctionConfig {}
28+
29+
enum Function {
30+
LENGTH,
31+
V_COSINE,
32+
V_DOT_PRODUCT,
33+
V_HAMMING,
34+
V_L1NORM,
35+
V_L2NORM,
36+
}
37+
}
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10-
package org.elasticsearch.index.mapper.blockloader.docvalues;
10+
package org.elasticsearch.index.mapper.blockloader;
1111

1212
/**
1313
* Warnings returned when loading values for ESQL. These are returned as HTTP 299 headers like so:
@@ -17,7 +17,7 @@
1717
* < Warning: 299 Elasticsearch-${ver} "Line 1:27: java.lang.IllegalArgumentException: single-value function encountered multi-value"
1818
* }</pre>
1919
*/
20-
interface Warnings {
20+
public interface Warnings {
2121
/**
2222
* Register a warning. ESQL deduplicates and limits the number of warnings returned so it should
2323
* be fine to blast as many warnings into this as you encounter.

server/src/main/java/org/elasticsearch/index/mapper/blockloader/docvalues/DenseVectorBlockLoader.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ protected void processCurrentVector(B builder) throws IOException {
178178

179179
@Override
180180
public String toString() {
181-
return "BlockDocValuesReader.FloatDenseVectorValuesBlockReader";
181+
return "FloatDenseVectorFromDocValues." + processor.name();
182182
}
183183
}
184184

@@ -216,7 +216,7 @@ protected void processCurrentVector(B builder) throws IOException {
216216

217217
@Override
218218
public String toString() {
219-
return "BlockDocValuesReader.FloatDenseVectorNormalizedValuesBlockReader";
219+
return "FloatDenseVectorFromDocValues.Normalized." + processor.name();
220220
}
221221
}
222222

@@ -237,7 +237,7 @@ protected void processCurrentVector(B builder) throws IOException {
237237

238238
@Override
239239
public String toString() {
240-
return "BlockDocValuesReader.ByteDenseVectorValuesBlockReader";
240+
return "ByteDenseVectorFromDocValues." + processor.name();
241241
}
242242
}
243243

@@ -255,7 +255,7 @@ protected void assertDimensions() {
255255

256256
@Override
257257
public String toString() {
258-
return "BlockDocValuesReader.BitDenseVectorValuesBlockReader";
258+
return "BitDenseVectorFromDocValues." + processor.name();
259259
}
260260
}
261261
}

server/src/main/java/org/elasticsearch/index/mapper/blockloader/docvalues/DenseVectorBlockLoaderProcessor.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ default void appendNull(B builder) {
4747
builder.appendNull();
4848
}
4949

50+
String name();
51+
5052
/**
5153
* Processor that appends raw float vectors to a FloatBuilder as multi values.
5254
*/
@@ -74,6 +76,11 @@ public void process(byte[] vector, BlockLoader.FloatBuilder builder) {
7476
}
7577
builder.endPositionEntry();
7678
}
79+
80+
@Override
81+
public String name() {
82+
return "Load";
83+
}
7784
}
7885

7986
/**
@@ -102,5 +109,10 @@ public void process(byte[] vector, BlockLoader.DoubleBuilder builder) {
102109
double similarity = config.similarityFunction().calculateSimilarity(vector, config.vectorAsBytes());
103110
builder.appendDouble(similarity);
104111
}
112+
113+
@Override
114+
public String name() {
115+
return config.similarityFunction().function().toString();
116+
}
105117
}
106118
}

server/src/main/java/org/elasticsearch/index/mapper/blockloader/docvalues/Utf8CodePointsFromOrdsBlockLoader.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@
1616
import org.apache.lucene.util.BytesRef;
1717
import org.apache.lucene.util.RamUsageEstimator;
1818
import org.apache.lucene.util.UnicodeUtil;
19+
import org.elasticsearch.index.mapper.blockloader.Warnings;
1920

2021
import java.io.IOException;
2122
import java.util.Arrays;
2223

23-
import static org.elasticsearch.index.mapper.blockloader.docvalues.Warnings.registerSingleValueWarning;
24+
import static org.elasticsearch.index.mapper.blockloader.Warnings.registerSingleValueWarning;
2425

2526
/**
2627
* A count of utf-8 code points for {@code keyword} style fields that are stored as a lookup table.

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 44 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
import org.elasticsearch.index.mapper.SourceLoader;
7878
import org.elasticsearch.index.mapper.SourceValueFetcher;
7979
import org.elasticsearch.index.mapper.ValueFetcher;
80+
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
8081
import org.elasticsearch.index.mapper.blockloader.docvalues.DenseVectorBlockLoader;
8182
import org.elasticsearch.index.mapper.blockloader.docvalues.DenseVectorBlockLoaderProcessor;
8283
import org.elasticsearch.index.mapper.blockloader.docvalues.DenseVectorFromBinaryBlockLoader;
@@ -2915,31 +2916,34 @@ public BlockLoader blockLoader(MappedFieldType.BlockLoaderContext blContext) {
29152916
return BlockLoader.CONSTANT_NULLS;
29162917
}
29172918

2918-
BlockLoaderFunctionConfig functionConfig = blContext.blockLoaderFunctionConfig();
2919+
BlockLoaderFunctionConfig cfg = blContext.blockLoaderFunctionConfig();
29192920
if (indexed) {
2920-
if (functionConfig == null) {
2921+
if (cfg == null) {
29212922
return new DenseVectorBlockLoader<>(
29222923
name(),
29232924
dims,
29242925
this,
29252926
new DenseVectorBlockLoaderProcessor.DenseVectorLoaderProcessor()
29262927
);
2927-
} else if (functionConfig instanceof VectorSimilarityFunctionConfig similarityConfig) {
2928-
if (getElementType() == ElementType.BYTE) {
2929-
similarityConfig = similarityConfig.forByteVector();
2930-
}
2931-
return new DenseVectorBlockLoader<>(
2932-
name(),
2933-
dims,
2934-
this,
2935-
new DenseVectorBlockLoaderProcessor.DenseVectorSimilarityProcessor(similarityConfig)
2936-
);
2937-
} else {
2938-
throw new UnsupportedOperationException("Unknown block loader function config: " + functionConfig.getClass());
29392928
}
2929+
return switch (cfg.function()) {
2930+
case V_COSINE, V_DOT_PRODUCT, V_HAMMING, V_L1NORM, V_L2NORM -> {
2931+
VectorSimilarityFunctionConfig similarityConfig = (VectorSimilarityFunctionConfig) cfg;
2932+
if (getElementType() == ElementType.BYTE || getElementType() == ElementType.BIT) {
2933+
similarityConfig = similarityConfig.forByteVector();
2934+
}
2935+
yield new DenseVectorBlockLoader<>(
2936+
name(),
2937+
dims,
2938+
this,
2939+
new DenseVectorBlockLoaderProcessor.DenseVectorSimilarityProcessor(similarityConfig)
2940+
);
2941+
}
2942+
default -> throw new UnsupportedOperationException("Unknown block loader function config: " + cfg.function());
2943+
};
29402944
}
29412945

2942-
if (functionConfig != null) {
2946+
if (cfg != null) {
29432947
throw new IllegalArgumentException(
29442948
"Field ["
29452949
+ name()
@@ -2960,6 +2964,22 @@ public BlockLoader blockLoader(MappedFieldType.BlockLoaderContext blContext) {
29602964
);
29612965
}
29622966

2967+
@Override
2968+
public boolean supportsBlockLoaderConfig(BlockLoaderFunctionConfig config, FieldExtractPreference preference) {
2969+
if (dims == null) {
2970+
// No data has been indexed yet
2971+
return true;
2972+
}
2973+
2974+
if (indexed) {
2975+
return switch (config.function()) {
2976+
case V_COSINE, V_DOT_PRODUCT, V_HAMMING, V_L1NORM, V_L2NORM -> true;
2977+
default -> false;
2978+
};
2979+
}
2980+
return false;
2981+
}
2982+
29632983
private SourceValueFetcher sourceValueFetcher(Set<String> sourcePaths, IndexSettings indexSettings) {
29642984
return new SourceValueFetcher(sourcePaths, null, indexSettings.getIgnoredSourceFormat()) {
29652985
@Override
@@ -3434,13 +3454,15 @@ public interface SimilarityFunction {
34343454
float calculateSimilarity(float[] leftVector, float[] rightVector);
34353455

34363456
float calculateSimilarity(byte[] leftVector, byte[] rightVector);
3457+
3458+
BlockLoaderFunctionConfig.Function function();
34373459
}
34383460

34393461
/**
3440-
* Configuration for a {@link MappedFieldType.BlockLoaderFunctionConfig} that calculates vector similarity.
3462+
* Configuration for a {@link BlockLoaderFunctionConfig} that calculates vector similarity.
34413463
* Functions that use this config should use SIMILARITY_FUNCTION_NAME as their name.
34423464
*/
3443-
public static class VectorSimilarityFunctionConfig implements MappedFieldType.BlockLoaderFunctionConfig {
3465+
public static class VectorSimilarityFunctionConfig implements BlockLoaderFunctionConfig {
34443466

34453467
private final SimilarityFunction similarityFunction;
34463468
private final float[] vector;
@@ -3449,7 +3471,6 @@ public static class VectorSimilarityFunctionConfig implements MappedFieldType.Bl
34493471
public VectorSimilarityFunctionConfig(SimilarityFunction similarityFunction, float[] vector) {
34503472
this.similarityFunction = similarityFunction;
34513473
this.vector = vector;
3452-
34533474
}
34543475

34553476
/**
@@ -3463,6 +3484,11 @@ public VectorSimilarityFunctionConfig forByteVector() {
34633484
return this;
34643485
}
34653486

3487+
@Override
3488+
public Function function() {
3489+
return similarityFunction.function();
3490+
}
3491+
34663492
public byte[] vectorAsBytes() {
34673493
assert vectorAsBytes != null : "vectorAsBytes is null, call forByteVector() first";
34683494
return vectorAsBytes;

0 commit comments

Comments
 (0)