Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/137382.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 137382
summary: Make field fusion generic
area: ES|QL
type: enhancement
issues: []
1 change: 1 addition & 0 deletions server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -501,4 +501,5 @@
exports org.elasticsearch.index.codec.vectors.es93 to org.elasticsearch.test.knn;
exports org.elasticsearch.search.crossproject;
exports org.elasticsearch.index.mapper.blockloader.docvalues;
exports org.elasticsearch.index.mapper.blockloader;
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
import org.elasticsearch.index.fielddata.StoredFieldSortedBinaryIndexFieldData;
import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData;
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
import org.elasticsearch.index.mapper.blockloader.docvalues.BytesRefsFromOrdsBlockLoader;
import org.elasticsearch.index.mapper.blockloader.docvalues.Utf8CodePointsFromOrdsBlockLoader;
import org.elasticsearch.index.query.AutomatonQueryWithDescription;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.similarity.SimilarityProvider;
Expand Down Expand Up @@ -817,7 +819,19 @@ NamedAnalyzer normalizer() {
@Override
public BlockLoader blockLoader(BlockLoaderContext blContext) {
if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSourceEnabled())) {
return new BytesRefsFromOrdsBlockLoader(name());
return switch (blContext.blockLoaderFunctionConfig()) {
case null -> new BytesRefsFromOrdsBlockLoader(name());
case BlockLoaderFunctionConfig.Named named -> switch (named.name()) {
case "LENGTH" -> new Utf8CodePointsFromOrdsBlockLoader(named.warnings(), name());
default -> throw new UnsupportedOperationException("unknown fusion config [" + named.name() + "]");
};
default -> throw new UnsupportedOperationException(
"unknown fusion config [" + blContext.blockLoaderFunctionConfig() + "]"
);
};
}
if (blContext.blockLoaderFunctionConfig() != null) {
throw new UnsupportedOperationException("function fusing only supported for doc values");
}
if (isStored()) {
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
import org.elasticsearch.index.query.DistanceFeatureQueryBuilder;
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.QueryShardException;
Expand Down Expand Up @@ -710,11 +711,4 @@ default BlockLoaderFunctionConfig blockLoaderFunctionConfig() {
}
}

/**
* Marker interface that contains the configuration needed to transform loaded values into blocks.
* Is retrievable from the {@link BlockLoaderContext}. The {@link MappedFieldType} can use this configuration to choose the appropriate
* implementation for transforming loaded values into blocks.
*/
public interface BlockLoaderFunctionConfig {}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.mapper.blockloader;

import org.elasticsearch.index.mapper.MappedFieldType;

/**
* Configuration needed to transform loaded values into blocks.
* {@link MappedFieldType}s will find me in
* {@link MappedFieldType.BlockLoaderContext#blockLoaderFunctionConfig()} and
* use this configuration to choose the appropriate implementation for
* transforming loaded values into blocks.
*/
public interface BlockLoaderFunctionConfig {
/**
* Name used in descriptions.
*/
String name();

record Named(String name, Warnings warnings) implements BlockLoaderFunctionConfig {
@Override
public int hashCode() {
return name.hashCode();
}

@Override
public boolean equals(Object o) {
if (o == null || getClass() != o.getClass()) {
return false;
}
Named named = (Named) o;
return name.equals(named.name);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.mapper.blockloader.docvalues;
package org.elasticsearch.index.mapper.blockloader;

/**
* Warnings returned when loading values for ESQL. These are returned as HTTP 299 headers like so:
Expand All @@ -17,7 +17,7 @@
* < Warning: 299 Elasticsearch-${ver} "Line 1:27: java.lang.IllegalArgumentException: single-value function encountered multi-value"
* }</pre>
*/
interface Warnings {
public interface Warnings {
/**
* Register a warning. ESQL deduplicates and limits the number of warnings returned so it should
* be fine to blast as many warnings into this as you encounter.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.index.mapper.blockloader.Warnings;

import java.io.IOException;
import java.util.Arrays;

import static org.elasticsearch.index.mapper.blockloader.docvalues.Warnings.registerSingleValueWarning;
import static org.elasticsearch.index.mapper.blockloader.Warnings.registerSingleValueWarning;

/**
* A count of utf-8 code points for {@code keyword} style fields that are stored as a lookup table.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.SourceValueFetcher;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
import org.elasticsearch.index.mapper.blockloader.docvalues.DenseVectorBlockLoader;
import org.elasticsearch.index.mapper.blockloader.docvalues.DenseVectorBlockLoaderProcessor;
import org.elasticsearch.index.mapper.blockloader.docvalues.DenseVectorFromBinaryBlockLoader;
Expand Down Expand Up @@ -3173,10 +3174,10 @@ public interface SimilarityFunction {
}

/**
* Configuration for a {@link MappedFieldType.BlockLoaderFunctionConfig} that calculates vector similarity.
* Configuration for a {@link BlockLoaderFunctionConfig} that calculates vector similarity.
* Functions that use this config should use SIMILARITY_FUNCTION_NAME as their name.
*/
public static class VectorSimilarityFunctionConfig implements MappedFieldType.BlockLoaderFunctionConfig {
public static class VectorSimilarityFunctionConfig implements BlockLoaderFunctionConfig {

private final SimilarityFunction similarityFunction;
private final float[] vector;
Expand All @@ -3185,7 +3186,6 @@ public static class VectorSimilarityFunctionConfig implements MappedFieldType.Bl
public VectorSimilarityFunctionConfig(SimilarityFunction similarityFunction, float[] vector) {
this.similarityFunction = similarityFunction;
this.vector = vector;

}

/**
Expand All @@ -3199,6 +3199,11 @@ public VectorSimilarityFunctionConfig forByteVector() {
return this;
}

@Override
public String name() {
return similarityFunction.toString();
}

public byte[] vectorAsBytes() {
assert vectorAsBytes != null : "vectorAsBytes is null, call forByteVector() first";
return vectorAsBytes;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,13 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.mapper.blockloader.docvalues;
package org.elasticsearch.index.mapper.blockloader;

import java.util.ArrayList;
import java.util.List;

// TODO move me once we have more of these loaders
class MockWarnings implements Warnings {
record MockWarning(Class<? extends Exception> exceptionClass, String message) {}
public class MockWarnings implements Warnings {
public record MockWarning(Class<? extends Exception> exceptionClass, String message) {}

private final List<MockWarning> warnings = new ArrayList<>();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.elasticsearch.index.mapper.BlockLoader;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.TestBlock;
import org.elasticsearch.index.mapper.blockloader.MockWarnings;
import org.elasticsearch.test.ESTestCase;
import org.hamcrest.Matcher;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,13 @@ public EsField getExactField() {
return this;
}

/**
* Can this field be pushed <strong>if</strong> it is indexed?
*/
public boolean pushable() {
return true;
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This bothers me. I needed this because without it we'd try to push this:

FROM foo
| WHERE LENGTH(kwd) < 10

to the index. Now, we might be able to do that with a specialized lucene query. But we don't have one of those. Without those change instead what happens is:

  1. LENGTH(kwd) becomes $$kwd$length$hash$.
  2. We identify $$kwd$length$hash$ < 10 as pushable.

This tells us we can't push it. But it's kind of picky. If SearchStats took EsField it could check this easy enough. That might be a good solution to this.


/**
* Returns and {@link Exact} object with all the necessary info about the field:
* <ul>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,23 @@
package org.elasticsearch.xpack.esql.core.type;

import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;

import java.io.IOException;
import java.util.Map;
import java.util.Objects;

/**
* EsField that represents a function being applied to a field on extraction. It receives a
* {@link org.elasticsearch.index.mapper.MappedFieldType.BlockLoaderFunctionConfig} that will be passed down to the block loading process
* {@link BlockLoaderFunctionConfig} that will be passed down to the block loading process
* to apply the function at data load time.
*/
public class FunctionEsField extends EsField {

// Not serialized as it will be created on the data node
private final transient MappedFieldType.BlockLoaderFunctionConfig functionConfig;
private final transient BlockLoaderFunctionConfig functionConfig;

public FunctionEsField(EsField esField, DataType dataType, MappedFieldType.BlockLoaderFunctionConfig functionConfig) {
public FunctionEsField(EsField esField, DataType dataType, BlockLoaderFunctionConfig functionConfig) {
this(
esField.getName(),
dataType,
Expand All @@ -43,7 +43,7 @@ private FunctionEsField(
boolean aggregatable,
boolean isAlias,
TimeSeriesFieldType timeSeriesFieldType,
MappedFieldType.BlockLoaderFunctionConfig functionConfig
BlockLoaderFunctionConfig functionConfig
) {
super(name, esDataType, properties, aggregatable, isAlias, timeSeriesFieldType);
this.functionConfig = functionConfig;
Expand All @@ -54,7 +54,7 @@ public void writeTo(StreamOutput out) throws IOException {
throw new UnsupportedOperationException("FunctionEsField is not serializable, should be created on data nodes");
}

public MappedFieldType.BlockLoaderFunctionConfig functionConfig() {
public BlockLoaderFunctionConfig functionConfig() {
return functionConfig;
}

Expand All @@ -70,4 +70,10 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(super.hashCode(), functionConfig);
}

@Override
public boolean pushable() {
// These fields are *never* pushable to the lucene index.
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.elasticsearch.index.mapper.BlockLoader;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
import org.elasticsearch.search.sort.SortAndFormats;
import org.elasticsearch.search.sort.SortBuilder;

Expand Down Expand Up @@ -58,7 +59,7 @@ BlockLoader blockLoader(
String name,
boolean asUnsupportedSource,
MappedFieldType.FieldExtractPreference fieldExtractPreference,
MappedFieldType.BlockLoaderFunctionConfig blockLoaderFunctionConfig
BlockLoaderFunctionConfig blockLoaderFunctionConfig
);

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.blockloader.BlockLoaderFunctionConfig;
import org.elasticsearch.indices.CrankyCircuitBreakerService;
import org.elasticsearch.search.internal.ContextIndexSearcher;
import org.elasticsearch.search.sort.SortAndFormats;
Expand Down Expand Up @@ -467,7 +468,7 @@ public BlockLoader blockLoader(
String name,
boolean asUnsupportedSource,
MappedFieldType.FieldExtractPreference fieldExtractPreference,
MappedFieldType.BlockLoaderFunctionConfig blockLoaderFunctionConfig
BlockLoaderFunctionConfig blockLoaderFunctionConfig
) {
throw new UnsupportedOperationException();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.esql.expression.function;

import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.Warnings;
import org.elasticsearch.xpack.esql.core.tree.Source;

public class BlockLoaderWarnings implements org.elasticsearch.index.mapper.blockloader.Warnings {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess these are warnings that can be created when using BlockLoader function config to load values. Should we add that to the javadoc?

private final DriverContext.WarningsMode warningsMode;
private final Source source;
private Warnings delegate;

public BlockLoaderWarnings(DriverContext.WarningsMode warningsMode, Source source) {
this.warningsMode = warningsMode;
this.source = source;
}

@Override
public void registerException(Class<? extends Exception> exceptionClass, String message) {
if (delegate == null) {
delegate = Warnings.createOnlyWarnings(
warningsMode,
source.source().getLineNumber(),
source.source().getColumnNumber(),
source.text()
);
}
delegate.registerException(exceptionClass, message);
}
}
Loading