Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
a3c526e
ESQL: Make field fusion generic
nik9000 Oct 30, 2025
7758ab9
Update docs/changelog/137382.yaml
nik9000 Oct 30, 2025
47c874e
[CI] Auto commit changes from spotless
Oct 30, 2025
6b0fead
More tests
nik9000 Oct 30, 2025
1dd6d96
Tests
nik9000 Oct 30, 2025
ce8e6aa
Merge remote-tracking branch 'nik9000/esql_fuse_length' into esql_fus…
nik9000 Oct 30, 2025
d50a74b
[CI] Auto commit changes from spotless
Oct 30, 2025
8746cfa
Add names back
nik9000 Oct 30, 2025
15eb5e9
Merge branch 'main' into esql_fuse_length
nik9000 Oct 30, 2025
d01184b
Renam
nik9000 Oct 31, 2025
d6897d8
[CI] Auto commit changes from spotless
Oct 31, 2025
e091312
Merge branch 'main' into esql_fuse_length
nik9000 Oct 31, 2025
538b72b
More tests
nik9000 Oct 31, 2025
2fc8fd5
Merge remote-tracking branch 'nik9000/esql_fuse_length' into esql_fus…
nik9000 Oct 31, 2025
e310d4b
[CI] Auto commit changes from spotless
Oct 31, 2025
bb81e43
Merge branch 'main' into esql_fuse_length
nik9000 Nov 1, 2025
bb9bca2
Update x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/…
nik9000 Nov 3, 2025
d70bca9
Javadoc
nik9000 Nov 3, 2025
5fb351c
Merge remote-tracking branch 'nik9000/esql_fuse_length' into esql_fus…
nik9000 Nov 3, 2025
39ae15a
Rename
nik9000 Nov 3, 2025
90005ca
[CI] Auto commit changes from spotless
Nov 3, 2025
b98935c
Merge branch 'main' into esql_fuse_length
nik9000 Nov 3, 2025
c462d07
fix
nik9000 Nov 3, 2025
667e58a
Merge remote-tracking branch 'nik9000/esql_fuse_length' into esql_fus…
nik9000 Nov 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/137382.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 137382
summary: Make field fusion generic
area: ES|QL
type: enhancement
issues: []
1 change: 1 addition & 0 deletions server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -501,4 +501,5 @@
exports org.elasticsearch.index.codec.vectors.es93 to org.elasticsearch.test.knn;
exports org.elasticsearch.search.crossproject;
exports org.elasticsearch.index.mapper.blockloader.docvalues;
exports org.elasticsearch.index.mapper.blockloader;
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
import org.elasticsearch.index.mapper.blockloader.docvalues.BytesRefsFromOrdsBlockLoader;
import org.elasticsearch.index.mapper.blockloader.docvalues.Utf8CodePointsFromOrdsBlockLoader;
import org.elasticsearch.index.query.AutomatonQueryWithDescription;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.similarity.SimilarityProvider;
Expand Down Expand Up @@ -813,10 +815,19 @@ NamedAnalyzer normalizer() {
@Override
public BlockLoader blockLoader(BlockLoaderContext blContext) {
if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSourceEnabled())) {
return new BytesRefsFromOrdsBlockLoader(name());
return switch (blContext.blockLoaderFunctionConfig()) {
case null -> new BytesRefsFromOrdsBlockLoader(name());
case BlockLoaderFunctionConfig.Named named -> switch (named.name()) {
case "LENGTH" -> new Utf8CodePointsFromOrdsBlockLoader(named.warnings(), name());
default -> throw new UnsupportedOperationException("unknown fusion config [" + named.name() + "]");
};
default -> throw new UnsupportedOperationException(
"unknown fusion config [" + blContext.blockLoaderFunctionConfig() + "]"
);
};
}
if (isStored()) {
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(name());
if (blContext.blockLoaderFunctionConfig() != null) {
throw new UnsupportedOperationException("function fusing only supported for doc values");
}

// Multi fields don't have fallback synthetic source.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
import org.elasticsearch.index.query.DistanceFeatureQueryBuilder;
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.QueryShardException;
Expand Down Expand Up @@ -710,11 +711,4 @@ default BlockLoaderFunctionConfig blockLoaderFunctionConfig() {
}
}

/**
* Marker interface that contains the configuration needed to transform loaded values into blocks.
* Is retrievable from the {@link BlockLoaderContext}. The {@link MappedFieldType} can use this configuration to choose the appropriate
* implementation for transforming loaded values into blocks.
*/
public interface BlockLoaderFunctionConfig {}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.mapper.blockloader;

import org.elasticsearch.index.mapper.MappedFieldType;

/**
* Configuration needed to transform loaded values into blocks.
* {@link MappedFieldType}s will find me in
* {@link MappedFieldType.BlockLoaderContext#blockLoaderFunctionConfig()} and
* use this configuration to choose the appropriate implementation for
* transforming loaded values into blocks.
*/
public interface BlockLoaderFunctionConfig {
record Named(String name, Warnings warnings) implements BlockLoaderFunctionConfig {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.mapper.blockloader.docvalues;
package org.elasticsearch.index.mapper.blockloader;

/**
* Warnings returned when loading values for ESQL. These are returned as HTTP 299 headers like so:
Expand All @@ -17,7 +17,7 @@
* < Warning: 299 Elasticsearch-${ver} "Line 1:27: java.lang.IllegalArgumentException: single-value function encountered multi-value"
* }</pre>
*/
interface Warnings {
public interface Warnings {
/**
* Register a warning. ESQL deduplicates and limits the number of warnings returned so it should
* be fine to blast as many warnings into this as you encounter.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.index.mapper.blockloader.Warnings;

import java.io.IOException;
import java.util.Arrays;

import static org.elasticsearch.index.mapper.blockloader.docvalues.Warnings.registerSingleValueWarning;
import static org.elasticsearch.index.mapper.blockloader.Warnings.registerSingleValueWarning;

/**
* A count of utf-8 code points for {@code keyword} style fields that are stored as a lookup table.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.SourceValueFetcher;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
import org.elasticsearch.index.mapper.blockloader.docvalues.DenseVectorBlockLoader;
import org.elasticsearch.index.mapper.blockloader.docvalues.DenseVectorBlockLoaderProcessor;
import org.elasticsearch.index.mapper.blockloader.docvalues.DenseVectorFromBinaryBlockLoader;
Expand Down Expand Up @@ -3173,10 +3174,10 @@ public interface SimilarityFunction {
}

/**
* Configuration for a {@link MappedFieldType.BlockLoaderFunctionConfig} that calculates vector similarity.
* Configuration for a {@link BlockLoaderFunctionConfig} that calculates vector similarity.
* Functions that use this config should use SIMILARITY_FUNCTION_NAME as their name.
*/
public static class VectorSimilarityFunctionConfig implements MappedFieldType.BlockLoaderFunctionConfig {
public static class VectorSimilarityFunctionConfig implements BlockLoaderFunctionConfig {

private final SimilarityFunction similarityFunction;
private final float[] vector;
Expand All @@ -3185,7 +3186,6 @@ public static class VectorSimilarityFunctionConfig implements MappedFieldType.Bl
public VectorSimilarityFunctionConfig(SimilarityFunction similarityFunction, float[] vector) {
this.similarityFunction = similarityFunction;
this.vector = vector;

}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

package org.elasticsearch.index.mapper.blockloader.docvalues;

import org.elasticsearch.index.mapper.blockloader.Warnings;

import java.util.ArrayList;
import java.util.List;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,23 @@
package org.elasticsearch.xpack.esql.core.type;

import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;

import java.io.IOException;
import java.util.Map;
import java.util.Objects;

/**
* EsField that represents a function being applied to a field on extraction. It receives a
* {@link org.elasticsearch.index.mapper.MappedFieldType.BlockLoaderFunctionConfig} that will be passed down to the block loading process
* {@link BlockLoaderFunctionConfig} that will be passed down to the block loading process
* to apply the function at data load time.
*/
public class FunctionEsField extends EsField {

// Not serialized as it will be created on the data node
private final transient MappedFieldType.BlockLoaderFunctionConfig functionConfig;
private final transient BlockLoaderFunctionConfig functionConfig;

public FunctionEsField(EsField esField, DataType dataType, MappedFieldType.BlockLoaderFunctionConfig functionConfig) {
public FunctionEsField(EsField esField, DataType dataType, BlockLoaderFunctionConfig functionConfig) {
this(
esField.getName(),
dataType,
Expand All @@ -43,7 +43,7 @@ private FunctionEsField(
boolean aggregatable,
boolean isAlias,
TimeSeriesFieldType timeSeriesFieldType,
MappedFieldType.BlockLoaderFunctionConfig functionConfig
BlockLoaderFunctionConfig functionConfig
) {
super(name, esDataType, properties, aggregatable, isAlias, timeSeriesFieldType);
this.functionConfig = functionConfig;
Expand All @@ -54,7 +54,7 @@ public void writeTo(StreamOutput out) throws IOException {
throw new UnsupportedOperationException("FunctionEsField is not serializable, should be created on data nodes");
}

public MappedFieldType.BlockLoaderFunctionConfig functionConfig() {
public BlockLoaderFunctionConfig functionConfig() {
return functionConfig;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.elasticsearch.index.mapper.BlockLoader;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
import org.elasticsearch.search.sort.SortAndFormats;
import org.elasticsearch.search.sort.SortBuilder;

Expand Down Expand Up @@ -58,7 +59,7 @@ BlockLoader blockLoader(
String name,
boolean asUnsupportedSource,
MappedFieldType.FieldExtractPreference fieldExtractPreference,
MappedFieldType.BlockLoaderFunctionConfig blockLoaderFunctionConfig
BlockLoaderFunctionConfig blockLoaderFunctionConfig
);

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
import org.elasticsearch.indices.CrankyCircuitBreakerService;
import org.elasticsearch.search.internal.ContextIndexSearcher;
import org.elasticsearch.search.sort.SortAndFormats;
Expand Down Expand Up @@ -467,7 +468,7 @@ public BlockLoader blockLoader(
String name,
boolean asUnsupportedSource,
MappedFieldType.FieldExtractPreference fieldExtractPreference,
MappedFieldType.BlockLoaderFunctionConfig blockLoaderFunctionConfig
BlockLoaderFunctionConfig blockLoaderFunctionConfig
) {
throw new UnsupportedOperationException();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,50 @@

package org.elasticsearch.xpack.esql.expression.function.blockloader;

import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.stats.SearchStats;

/**
* {@link org.elasticsearch.xpack.esql.core.expression.Expression}s that can be implemented as part of value loading implement this
* interface to provide the {@link MappedFieldType.BlockLoaderFunctionConfig} that will be used to load and
* transform the value of the field.
* {@link Expression} that can be "fused" into value loading. Most of the time
* we load values into {@link Block}s and then run the expressions on them, but
* sometimes it's worth short-circuiting this process and running the expression
* in the tight loop we use for loading:
* <ul>
* <li>
* {@code V_COSINE(vector, [constant_vector])} - vector is ~512 floats
* and V_COSINE is one double.
* </li>
* <li>
* {@code ST_CENTROID(shape)} - shapes can be quite large. Centroids
* are just one point.
* </li>
* <li>
* {@code LENGTH(string)} - strings can be quite long, but string length
* is always an int. For more fun, {@code keyword}s are usually stored
* using a dictionary, and it's <strong>fairly</strong> easy to optimize
* running {@code LENGTH} once per dictionary entry.
* </li>
* <li>
* {@code MV_COUNT(anything)} - counts are always integers.
* </li>
* </ul>
*/
public interface BlockLoaderExpression {
/**
* The field and loading configuration that replaces this expression, effectively
* "fusing" the expression into the load. Or null if the fusion isn't possible.
*/
@Nullable
Fuse tryFuse(SearchStats stats);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's try to find another name - we already have Fuse as a command. ExpressionFieldLoader?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is FusedExpression ok? Or still too indicative?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Naming... 😅

I come from staring at FUSE enough that it carries a lot of weight.

For me, this feature involves BlockLoaders. And Expressions that are applied to them. I understand that fuse means getting together those two, but it's not something I would think of immediately without more context.

I'd prefer to be overly explicit here, and call this BlockLoaderExpression or something similar that helps me bridge those two concepts together. But, naming...


/**
* Returns the configuration that will be used to load the value of the field and transform it
* Fused load configuration.
* @param field the field whose load we're fusing into
* @param config the fusion configuration
*/
MappedFieldType.BlockLoaderFunctionConfig getBlockLoaderFunctionConfig();
record Fuse(FieldAttribute field, BlockLoaderFunctionConfig config) {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,26 @@
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.compute.ann.Evaluator;
import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator;
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FieldAttribute;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.expression.function.Example;
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.expression.function.blockloader.BlockLoaderExpression;
import org.elasticsearch.xpack.esql.expression.function.scalar.UnaryScalarFunction;
import org.elasticsearch.xpack.esql.stats.SearchStats;

import java.io.IOException;
import java.util.List;

import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT;
import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString;

public class Length extends UnaryScalarFunction {
public class Length extends UnaryScalarFunction implements BlockLoaderExpression {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Length", Length::new);

@FunctionInfo(
Expand Down Expand Up @@ -90,4 +94,17 @@ protected NodeInfo<? extends Expression> info() {
public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) {
return new LengthEvaluator.Factory(source(), toEvaluator.apply(field()));
}

@Override
public Fuse tryFuse(SearchStats stats) {
if (field instanceof FieldAttribute f) {
if (stats.hasDocValues(f.fieldName()) == false) {
return null;
}
return new Fuse(f, new BlockLoaderFunctionConfig.Named("LENGTH", (exceptionClass, message) -> {
throw new IllegalStateException("NOCOMMIT");
}));
}
return null;
}
}
Loading