diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java index 736592083a229..363e956f1b211 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java @@ -11,6 +11,8 @@ import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.FloatVectorValues; +import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedDocValues; @@ -18,6 +20,7 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.ByteArrayStreamInput; +import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.mapper.BlockLoader.BlockFactory; import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder; import org.elasticsearch.index.mapper.BlockLoader.Builder; @@ -26,6 +29,7 @@ import org.elasticsearch.index.mapper.BlockLoader.DoubleBuilder; import org.elasticsearch.index.mapper.BlockLoader.IntBuilder; import org.elasticsearch.index.mapper.BlockLoader.LongBuilder; +import org.elasticsearch.index.mapper.vectors.VectorEncoderDecoder; import org.elasticsearch.search.fetch.StoredFieldsSpec; import java.io.IOException; @@ -504,6 +508,87 @@ public String toString() { } } + public static class DenseVectorBlockLoader extends DocValuesBlockLoader { + private final String fieldName; + private final int dimensions; + + public DenseVectorBlockLoader(String fieldName, int dimensions) { + this.fieldName = fieldName; + this.dimensions = dimensions; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.denseVectors(expectedCount, dimensions); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + FloatVectorValues floatVectorValues = context.reader().getFloatVectorValues(fieldName); + if (floatVectorValues != null) { + return new DenseVectorValuesBlockReader(floatVectorValues, dimensions); + } + return new ConstantNullsReader(); + } + } + + private static class DenseVectorValuesBlockReader extends BlockDocValuesReader { + private final FloatVectorValues floatVectorValues; + private final KnnVectorValues.DocIndexIterator iterator; + private final int dimensions; + + DenseVectorValuesBlockReader(FloatVectorValues floatVectorValues, int dimensions) { + this.floatVectorValues = floatVectorValues; + iterator = floatVectorValues.iterator(); + this.dimensions = dimensions; + } + + @Override + public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { + // Doubles from doc values ensures that the values are in order + try (BlockLoader.FloatBuilder builder = factory.denseVectors(docs.count(), dimensions)) { + for (int i = 0; i < docs.count(); i++) { + int doc = docs.get(i); + if (doc < iterator.docID()) { + throw new IllegalStateException("docs within same block must be in order"); + } + read(doc, builder); + } + return builder.build(); + } + } + + @Override + public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { + read(docId, (BlockLoader.FloatBuilder) builder); + } + + private void read(int doc, BlockLoader.FloatBuilder builder) throws IOException { + if (iterator.advance(doc) == doc) { + builder.beginPositionEntry(); + float[] floats = floatVectorValues.vectorValue(iterator.index()); + assert floats.length == dimensions + : "unexpected dimensions for vector value; expected " + dimensions + " but got " + floats.length; + for (float aFloat : floats) { + builder.appendFloat(aFloat); + } + builder.endPositionEntry(); + } else { + builder.appendNull(); + } + } + + @Override + public int docId() { + return iterator.docID(); + } + + @Override + public String toString() { + return "BlockDocValuesReader.FloatVectorValuesBlockReader"; + } + } + public static class BytesRefsFromOrdsBlockLoader extends DocValuesBlockLoader { private final String fieldName; @@ -752,6 +837,94 @@ public String toString() { } } + public static class DenseVectorFromBinaryBlockLoader extends DocValuesBlockLoader { + private final String fieldName; + private final int dims; + private final IndexVersion indexVersion; + + public DenseVectorFromBinaryBlockLoader(String fieldName, int dims, IndexVersion indexVersion) { + this.fieldName = fieldName; + this.dims = dims; + this.indexVersion = indexVersion; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.denseVectors(expectedCount, dims); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + BinaryDocValues docValues = context.reader().getBinaryDocValues(fieldName); + if (docValues == null) { + return new ConstantNullsReader(); + } + return new DenseVectorFromBinary(docValues, dims, indexVersion); + } + } + + private static class DenseVectorFromBinary extends BlockDocValuesReader { + private final BinaryDocValues docValues; + private final IndexVersion indexVersion; + private final int dimensions; + private final float[] scratch; + + private int docID = -1; + + DenseVectorFromBinary(BinaryDocValues docValues, int dims, IndexVersion indexVersion) { + this.docValues = docValues; + this.scratch = new float[dims]; + this.indexVersion = indexVersion; + this.dimensions = dims; + } + + @Override + public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { + try (BlockLoader.FloatBuilder builder = factory.denseVectors(docs.count(), dimensions)) { + for (int i = 0; i < docs.count(); i++) { + int doc = docs.get(i); + if (doc < docID) { + throw new IllegalStateException("docs within same block must be in order"); + } + read(doc, builder); + } + return builder.build(); + } + } + + @Override + public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { + read(docId, (BlockLoader.FloatBuilder) builder); + } + + private void read(int doc, BlockLoader.FloatBuilder builder) throws IOException { + this.docID = doc; + if (false == docValues.advanceExact(doc)) { + builder.appendNull(); + return; + } + BytesRef bytesRef = docValues.binaryValue(); + assert bytesRef.length > 0; + VectorEncoderDecoder.decodeDenseVector(indexVersion, bytesRef, scratch); + + builder.beginPositionEntry(); + for (float value : scratch) { + builder.appendFloat(value); + } + builder.endPositionEntry(); + } + + @Override + public int docId() { + return docID; + } + + @Override + public String toString() { + return "DenseVectorFromBinary.Bytes"; + } + } + public static class BooleansBlockLoader extends DocValuesBlockLoader { private final String fieldName; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java index 451da5bfdbaf0..640a629410451 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java @@ -373,6 +373,11 @@ interface BlockFactory { */ DoubleBuilder doubles(int expectedCount); + /** + * Build a builder to load dense vectors without any loading constraints. + */ + FloatBuilder denseVectors(int expectedVectorsCount, int dimensions); + /** * Build a builder to load ints as loaded from doc values. * Doc values load ints in sorted order. diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockSourceReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockSourceReader.java index fd248b47502ea..72d39b7f59ca2 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockSourceReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockSourceReader.java @@ -303,6 +303,49 @@ public String toString() { } } + /** + * Load {@code float}s from {@code _source}. + */ + public static class DenseVectorBlockLoader extends SourceBlockLoader { + private final int dimensions; + + public DenseVectorBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup, int dimensions) { + super(fetcher, lookup); + this.dimensions = dimensions; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return factory.denseVectors(expectedCount, dimensions); + } + + @Override + public RowStrideReader rowStrideReader(LeafReaderContext context, DocIdSetIterator iter) { + return new DenseVectors(fetcher, iter); + } + + @Override + protected String name() { + return "DenseVectors"; + } + } + + private static class DenseVectors extends BlockSourceReader { + DenseVectors(ValueFetcher fetcher, DocIdSetIterator iter) { + super(fetcher, iter); + } + + @Override + protected void append(BlockLoader.Builder builder, Object v) { + ((BlockLoader.FloatBuilder) builder).appendFloat(((Number) v).floatValue()); + } + + @Override + public String toString() { + return "BlockSourceReader.DenseVectors"; + } + } + /** * Load {@code int}s from {@code _source}. */ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 95caaa6ccf316..fd38cebb58e3d 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -53,6 +53,9 @@ import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.mapper.ArraySourceValueFetcher; +import org.elasticsearch.index.mapper.BlockDocValuesReader; +import org.elasticsearch.index.mapper.BlockLoader; +import org.elasticsearch.index.mapper.BlockSourceReader; import org.elasticsearch.index.mapper.DocumentParserContext; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; @@ -60,13 +63,16 @@ import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.MappingParser; +import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.index.mapper.SimpleMappedFieldType; import org.elasticsearch.index.mapper.SourceLoader; +import org.elasticsearch.index.mapper.SourceValueFetcher; import org.elasticsearch.index.mapper.TextSearchInfo; import org.elasticsearch.index.mapper.ValueFetcher; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; +import org.elasticsearch.search.lookup.Source; import org.elasticsearch.search.vectors.DenseVectorQuery; import org.elasticsearch.search.vectors.ESDiversifyingChildrenByteKnnVectorQuery; import org.elasticsearch.search.vectors.ESDiversifyingChildrenFloatKnnVectorQuery; @@ -87,10 +93,12 @@ import java.time.ZoneId; import java.util.Arrays; import java.util.HexFormat; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Stream; @@ -376,7 +384,8 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) { indexed.getValue(), similarity.getValue(), indexOptions.getValue(), - meta.getValue() + meta.getValue(), + context.isSourceSynthetic() ), builderParams(this, context), indexOptions.getValue(), @@ -2138,6 +2147,7 @@ public static final class DenseVectorFieldType extends SimpleMappedFieldType { private final VectorSimilarity similarity; private final IndexVersion indexVersionCreated; private final IndexOptions indexOptions; + private final boolean isSyntheticSource; public DenseVectorFieldType( String name, @@ -2147,7 +2157,8 @@ public DenseVectorFieldType( boolean indexed, VectorSimilarity similarity, IndexOptions indexOptions, - Map meta + Map meta, + boolean isSyntheticSource ) { super(name, indexed, false, indexed == false, TextSearchInfo.NONE, meta); this.elementType = elementType; @@ -2156,6 +2167,7 @@ public DenseVectorFieldType( this.similarity = similarity; this.indexVersionCreated = indexVersionCreated; this.indexOptions = indexOptions; + this.isSyntheticSource = isSyntheticSource; } @Override @@ -2444,6 +2456,44 @@ ElementType getElementType() { public IndexOptions getIndexOptions() { return indexOptions; } + + @Override + public BlockLoader blockLoader(MappedFieldType.BlockLoaderContext blContext) { + if (elementType != ElementType.FLOAT) { + // Just float dense vector support for now + return null; + } + + if (indexed) { + return new BlockDocValuesReader.DenseVectorBlockLoader(name(), dims); + } + + if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSource)) { + return new BlockDocValuesReader.DenseVectorFromBinaryBlockLoader(name(), dims, indexVersionCreated); + } + + BlockSourceReader.LeafIteratorLookup lookup = BlockSourceReader.lookupMatchingAll(); + return new BlockSourceReader.DenseVectorBlockLoader(sourceValueFetcher(blContext.sourcePaths(name())), lookup, dims); + } + + private SourceValueFetcher sourceValueFetcher(Set sourcePaths) { + return new SourceValueFetcher(sourcePaths, null) { + @Override + protected Object parseSourceValue(Object value) { + if (value.equals("")) { + return null; + } + return NumberFieldMapper.NumberType.FLOAT.parse(value, false); + } + + @Override + public List fetchValues(Source source, int doc, List ignoredValues) { + List result = super.fetchValues(source, doc, ignoredValues); + assert result.size() == dims : "Unexpected number of dimensions; got " + result.size() + " but expected " + dims; + return result; + } + }; + } } private final IndexOptions indexOptions; @@ -2498,7 +2548,8 @@ public void parse(DocumentParserContext context) throws IOException { fieldType().indexed, fieldType().similarity, fieldType().indexOptions, - fieldType().meta() + fieldType().meta(), + fieldType().isSyntheticSource ); Mapper update = new DenseVectorFieldMapper( leafName(), diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java index 5877ce9003ff5..c02cb5436e90d 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java @@ -128,7 +128,8 @@ private DenseVectorFieldType createFloatFieldType() { indexed, VectorSimilarity.COSINE, indexed ? randomIndexOptionsAll() : null, - Collections.emptyMap() + Collections.emptyMap(), + false ); } @@ -141,7 +142,8 @@ private DenseVectorFieldType createByteFieldType() { true, VectorSimilarity.COSINE, randomIndexOptionsNonQuantized(), - Collections.emptyMap() + Collections.emptyMap(), + false ); } @@ -214,7 +216,8 @@ public void testCreateNestedKnnQuery() { true, VectorSimilarity.COSINE, randomIndexOptionsAll(), - Collections.emptyMap() + Collections.emptyMap(), + false ); float[] queryVector = new float[dims]; for (int i = 0; i < dims; i++) { @@ -244,7 +247,8 @@ public void testCreateNestedKnnQuery() { true, VectorSimilarity.COSINE, randomIndexOptionsNonQuantized(), - Collections.emptyMap() + Collections.emptyMap(), + false ); byte[] queryVector = new byte[dims]; float[] floatQueryVector = new float[dims]; @@ -294,7 +298,8 @@ public void testExactKnnQuery() { true, VectorSimilarity.COSINE, randomIndexOptionsAll(), - Collections.emptyMap() + Collections.emptyMap(), + false ); float[] queryVector = new float[dims]; for (int i = 0; i < dims; i++) { @@ -312,7 +317,8 @@ public void testExactKnnQuery() { true, VectorSimilarity.COSINE, randomIndexOptionsNonQuantized(), - Collections.emptyMap() + Collections.emptyMap(), + false ); byte[] queryVector = new byte[dims]; for (int i = 0; i < dims; i++) { @@ -332,7 +338,8 @@ public void testFloatCreateKnnQuery() { false, VectorSimilarity.COSINE, null, - Collections.emptyMap() + Collections.emptyMap(), + false ); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, @@ -357,7 +364,8 @@ public void testFloatCreateKnnQuery() { true, VectorSimilarity.DOT_PRODUCT, randomIndexOptionsAll(), - Collections.emptyMap() + Collections.emptyMap(), + false ); float[] queryVector = new float[BBQ_MIN_DIMS]; for (int i = 0; i < BBQ_MIN_DIMS; i++) { @@ -386,7 +394,8 @@ public void testFloatCreateKnnQuery() { true, VectorSimilarity.COSINE, randomIndexOptionsAll(), - Collections.emptyMap() + Collections.emptyMap(), + false ); e = expectThrows( IllegalArgumentException.class, @@ -414,7 +423,8 @@ public void testCreateKnnQueryMaxDims() { true, VectorSimilarity.COSINE, randomIndexOptionsAll(), - Collections.emptyMap() + Collections.emptyMap(), + false ); float[] queryVector = new float[4096]; for (int i = 0; i < 4096; i++) { @@ -445,7 +455,8 @@ public void testCreateKnnQueryMaxDims() { true, VectorSimilarity.COSINE, randomIndexOptionsNonQuantized(), - Collections.emptyMap() + Collections.emptyMap(), + false ); byte[] queryVector = new byte[4096]; for (int i = 0; i < 4096; i++) { @@ -475,7 +486,8 @@ public void testByteCreateKnnQuery() { false, VectorSimilarity.COSINE, randomIndexOptionsNonQuantized(), - Collections.emptyMap() + Collections.emptyMap(), + false ); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, @@ -500,7 +512,8 @@ public void testByteCreateKnnQuery() { true, VectorSimilarity.COSINE, randomIndexOptionsNonQuantized(), - Collections.emptyMap() + Collections.emptyMap(), + false ); e = expectThrows( IllegalArgumentException.class, @@ -543,7 +556,8 @@ public void testRescoreOversampleUsedWithoutQuantization() { true, VectorSimilarity.COSINE, randomIndexOptionsNonQuantized(), - Collections.emptyMap() + Collections.emptyMap(), + false ); Query knnQuery = nonQuantizedField.createKnnQuery( @@ -577,7 +591,8 @@ public void testRescoreOversampleModifiesNumCandidates() { true, VectorSimilarity.COSINE, randomIndexOptionsHnswQuantized(), - Collections.emptyMap() + Collections.emptyMap(), + false ); // Total results is k, internal k is multiplied by oversample @@ -598,7 +613,8 @@ public void testRescoreOversampleQueryOverrides() { true, VectorSimilarity.COSINE, randomIndexOptionsHnswQuantized(new DenseVectorFieldMapper.RescoreVector(randomFloatBetween(1.1f, 9.9f, false))), - Collections.emptyMap() + Collections.emptyMap(), + false ); Query query = fieldType.createKnnQuery( VectorData.fromFloats(new float[] { 1, 4, 10 }), @@ -621,7 +637,8 @@ public void testRescoreOversampleQueryOverrides() { true, VectorSimilarity.COSINE, randomIndexOptionsHnswQuantized(new DenseVectorFieldMapper.RescoreVector(0)), - Collections.emptyMap() + Collections.emptyMap(), + false ); query = fieldType.createKnnQuery( VectorData.fromFloats(new float[] { 1, 4, 10 }), @@ -655,7 +672,8 @@ public void testFilterSearchThreshold() { true, VectorSimilarity.COSINE, randomIndexOptionsHnswQuantized(), - Collections.emptyMap() + Collections.emptyMap(), + false ); // Test with a filter search threshold diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java index ffc3231e959df..779d7a2a976d9 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java @@ -76,6 +76,39 @@ public DoublesBuilder appendDouble(double value) { return new DoublesBuilder(); } + @Override + public BlockLoader.FloatBuilder denseVectors(int expectedCount, int dimensions) { + class FloatsBuilder extends TestBlock.Builder implements BlockLoader.FloatBuilder { + int numElements = 0; + + @Override + public BlockLoader.FloatBuilder appendFloat(float value) { + add(value); + numElements++; + return this; + } + + @Override + public Builder appendNull() { + throw new IllegalArgumentException("dense vectors should not have null values"); + } + + @Override + public Builder endPositionEntry() { + assert numElements == dimensions : "expected " + dimensions + " dimensions, but got " + numElements; + numElements = 0; + return super.endPositionEntry(); + } + + @Override + public TestBlock build() { + assert numElements == 0 : "endPositionEntry() was not called for the last entry"; + return super.build(); + } + } + return new FloatsBuilder(); + } + @Override public BlockLoader.IntBuilder intsFromDocValues(int expectedCount) { return ints(expectedCount); diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java index 4bd722992f524..b9f55f7985e84 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/plugin/EsqlCorePlugin.java @@ -14,4 +14,5 @@ public class EsqlCorePlugin extends Plugin implements ExtensiblePlugin { public static final FeatureFlag AGGREGATE_METRIC_DOUBLE_FEATURE_FLAG = new FeatureFlag("esql_aggregate_metric_double"); + public static final FeatureFlag DENSE_VECTOR_FEATURE_FLAG = new FeatureFlag("esql_dense_vector"); } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java index a330ac076d2c4..bde8af3c4e6d1 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java @@ -302,7 +302,12 @@ public enum DataType { */ PARTIAL_AGG(builder().esType("partial_agg").unknownSize()), - AGGREGATE_METRIC_DOUBLE(builder().esType("aggregate_metric_double").estimatedSize(Double.BYTES * 3 + Integer.BYTES)); + AGGREGATE_METRIC_DOUBLE(builder().esType("aggregate_metric_double").estimatedSize(Double.BYTES * 3 + Integer.BYTES)), + + /** + * Fields with this type are dense vectors, represented as an array of double values. + */ + DENSE_VECTOR(builder().esType("dense_vector").unknownSize()); /** * Types that are actively being built. These types are not returned @@ -311,7 +316,8 @@ public enum DataType { * check that sending them to a function produces a sane error message. */ public static final Map UNDER_CONSTRUCTION = Map.ofEntries( - Map.entry(AGGREGATE_METRIC_DOUBLE, EsqlCorePlugin.AGGREGATE_METRIC_DOUBLE_FEATURE_FLAG) + Map.entry(AGGREGATE_METRIC_DOUBLE, EsqlCorePlugin.AGGREGATE_METRIC_DOUBLE_FEATURE_FLAG), + Map.entry(DENSE_VECTOR, EsqlCorePlugin.DENSE_VECTOR_FEATURE_FLAG) ); private final String typeName; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java index e06efb09f5724..657f7b8504c94 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockUtils.java @@ -15,12 +15,14 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Random; import java.util.function.Consumer; import static org.elasticsearch.common.lucene.BytesRefs.toBytesRef; +import static org.elasticsearch.compute.data.ElementType.NULL; import static org.elasticsearch.compute.data.ElementType.fromJava; public final class BlockUtils { @@ -222,6 +224,13 @@ public static Block constantBlock(BlockFactory blockFactory, Object val, int siz if (val == null) { return blockFactory.newConstantNullBlock(size); } + if (val instanceof Collection collection) { + if (collection.isEmpty()) { + return constantBlock(blockFactory, NULL, val, size); + } + Object colVal = collection.iterator().next(); + return constantBlock(blockFactory, fromJava(colVal.getClass()), colVal, size); + } return constantBlock(blockFactory, fromJava(val.getClass()), val, size); } @@ -235,6 +244,7 @@ private static Block constantBlock(BlockFactory blockFactory, ElementType type, case DOUBLE -> blockFactory.newConstantDoubleBlockWith((double) val, size); case BOOLEAN -> blockFactory.newConstantBooleanBlockWith((boolean) val, size); case AGGREGATE_METRIC_DOUBLE -> blockFactory.newConstantAggregateMetricDoubleBlock((AggregateMetricDoubleLiteral) val, size); + case FLOAT -> blockFactory.newConstantFloatBlockWith((float) val, size); default -> throw new UnsupportedOperationException("unsupported element type [" + type + "]"); }; } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java index 57819d6d7e041..62da90a72a555 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java @@ -712,6 +712,11 @@ public BlockLoader.DoubleBuilder doubles(int expectedCount) { return factory.newDoubleBlockBuilder(expectedCount); } + @Override + public BlockLoader.FloatBuilder denseVectors(int expectedVectorsCount, int dimensions) { + return factory.newFloatBlockBuilder(expectedVectorsCount * dimensions); + } + @Override public BlockLoader.IntBuilder intsFromDocValues(int expectedCount) { return factory.newIntBlockBuilder(expectedCount).mvOrdering(Block.MvOrdering.SORTED_ASCENDING); diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java index d902f32c16c45..cb6780f8359dc 100644 --- a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java @@ -690,6 +690,7 @@ public void testSuggestedCast() throws IOException { shouldBeSupported.remove(DataType.NULL); shouldBeSupported.remove(DataType.DOC_DATA_TYPE); shouldBeSupported.remove(DataType.TSID_DATA_TYPE); + shouldBeSupported.remove(DataType.DENSE_VECTOR); if (EsqlCorePlugin.AGGREGATE_METRIC_DOUBLE_FEATURE_FLAG.isEnabled() == false) { shouldBeSupported.remove(DataType.AGGREGATE_METRIC_DOUBLE); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java index 3f8478fe713a3..e57874f857de0 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvAssert.java @@ -35,6 +35,7 @@ import static org.elasticsearch.common.logging.LoggerMessageFormat.format; import static org.elasticsearch.xpack.esql.CsvTestUtils.ExpectedResults; import static org.elasticsearch.xpack.esql.CsvTestUtils.Type; +import static org.elasticsearch.xpack.esql.CsvTestUtils.Type.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.CsvTestUtils.Type.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.CsvTestUtils.logMetaData; import static org.elasticsearch.xpack.esql.core.util.DateUtils.UTC_DATE_TIME_FORMATTER; @@ -145,6 +146,10 @@ private static void assertMetadata( // Type.asType translates all bytes references into keywords continue; } + if (blockType == Type.FLOAT && expectedType == DENSE_VECTOR) { + // DENSE_VECTOR is internally represented as a float block + continue; + } if (blockType == Type.NULL) { // Null pages don't have any real type information beyond "it's all null, man" continue; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java index 6ea4f553ff1ed..3a6f623c6dec1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestUtils.java @@ -52,6 +52,7 @@ import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Stream; import static org.elasticsearch.common.Strings.delimitedListToStringArray; import static org.elasticsearch.common.logging.LoggerMessageFormat.format; @@ -147,7 +148,11 @@ void append(String stringValue) { for (String value : arrayOfValues) { convertedValues.add(type.convert(value)); } - convertedValues.stream().sorted().forEach(v -> builderWrapper().append().accept(v)); + Stream convertedValuesStream = convertedValues.stream(); + if (type.sortMultiValues()) { + convertedValuesStream = convertedValuesStream.sorted(); + } + convertedValuesStream.forEach(v -> builderWrapper().append().accept(v)); builderWrapper().builder().endPositionEntry(); return; @@ -486,6 +491,7 @@ public enum Type { x -> x == null ? null : stringToAggregateMetricDoubleLiteral(x), AggregateMetricDoubleBlockBuilder.AggregateMetricDoubleLiteral.class ), + DENSE_VECTOR(Float::parseFloat, Float.class, false), UNSUPPORTED(Type::convertUnsupported, Void.class); private static Void convertUnsupported(String s) { @@ -528,25 +534,38 @@ private static Void convertUnsupported(String s) { LOOKUP.put("DATE", DATETIME); LOOKUP.put("DT", DATETIME); LOOKUP.put("V", VERSION); + + LOOKUP.put("DENSE_VECTOR", DENSE_VECTOR); } private final Function converter; private final Class clazz; private final Comparator comparator; + private final boolean sortMultiValues; - @SuppressWarnings("unchecked") Type(Function converter, Class clazz) { + this(converter, clazz, true); + } + + @SuppressWarnings("unchecked") + Type(Function converter, Class clazz, boolean sortMultiValues) { this( converter, Comparable.class.isAssignableFrom(clazz) ? (a, b) -> ((Comparable) a).compareTo(b) : Comparator.comparing(Object::toString), - clazz + clazz, + sortMultiValues ); } Type(Function converter, Comparator comparator, Class clazz) { + this(converter, comparator, clazz, true); + } + + Type(Function converter, Comparator comparator, Class clazz, boolean sortMultiValues) { this.converter = converter; this.comparator = comparator; this.clazz = clazz; + this.sortMultiValues = sortMultiValues; } public static Type asType(String name) { @@ -594,6 +613,10 @@ Class clazz() { public Comparator comparator() { return comparator; } + + public boolean sortMultiValues() { + return sortMultiValues; + } } record ActualResults( diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index efca400724472..92fe597362bb0 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -143,6 +143,7 @@ public class CsvTestsDataLoader { private static final TestDataset SEMANTIC_TEXT = new TestDataset("semantic_text").withInferenceEndpoint(true); private static final TestDataset LOGS = new TestDataset("logs"); private static final TestDataset MV_TEXT = new TestDataset("mv_text"); + private static final TestDataset DENSE_VECTOR = new TestDataset("dense_vector"); public static final Map CSV_DATASET_MAP = Map.ofEntries( Map.entry(EMPLOYEES.indexName, EMPLOYEES), @@ -202,7 +203,8 @@ public class CsvTestsDataLoader { Map.entry(BOOKS.indexName, BOOKS), Map.entry(SEMANTIC_TEXT.indexName, SEMANTIC_TEXT), Map.entry(LOGS.indexName, LOGS), - Map.entry(MV_TEXT.indexName, MV_TEXT) + Map.entry(MV_TEXT.indexName, MV_TEXT), + Map.entry(DENSE_VECTOR.indexName, DENSE_VECTOR) ); private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json"); @@ -235,6 +237,7 @@ public class CsvTestsDataLoader { CITY_BOUNDARIES_ENRICH, CITY_AIRPORTS_ENRICH ); + public static final String NUMERIC_REGEX = "-?\\d+(\\.\\d+)?"; /** *

@@ -660,7 +663,8 @@ private static void loadCsvData(RestClient client, String indexName, URL resourc private static String quoteIfNecessary(String value) { boolean isQuoted = (value.startsWith("\"") && value.endsWith("\"")) || (value.startsWith("{") && value.endsWith("}")); - return isQuoted ? value : "\"" + value + "\""; + boolean isNumeric = value.matches(NUMERIC_REGEX); + return isQuoted || isNumeric ? value : "\"" + value + "\""; } private static void sendBulkRequest(String indexName, StringBuilder builder, RestClient client, Logger logger, List failures) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index 477d6a7651074..dc5608efe67b1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -109,6 +109,7 @@ import java.time.Duration; import java.time.Period; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.EnumSet; import java.util.HashMap; @@ -129,6 +130,7 @@ import static org.elasticsearch.test.ESTestCase.assertEquals; import static org.elasticsearch.test.ESTestCase.between; import static org.elasticsearch.test.ESTestCase.randomAlphaOfLength; +import static org.elasticsearch.test.ESTestCase.randomArray; import static org.elasticsearch.test.ESTestCase.randomBoolean; import static org.elasticsearch.test.ESTestCase.randomByte; import static org.elasticsearch.test.ESTestCase.randomDouble; @@ -837,6 +839,7 @@ public static Literal randomLiteral(DataType type) { throw new UncheckedIOException(e); } } + case DENSE_VECTOR -> Arrays.asList(randomArray(10, 10, i -> new Float[10], ESTestCase::randomFloat)); case UNSUPPORTED, OBJECT, DOC_DATA_TYPE, TSID_DATA_TYPE, PARTIAL_AGG -> throw new IllegalArgumentException( "can't make random values for [" + type.typeName() + "]" ); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/dense_vector.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/dense_vector.csv new file mode 100644 index 0000000000000..d24c9f8543b53 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/data/dense_vector.csv @@ -0,0 +1,5 @@ +id:l, vector:dense_vector +0, [1.0, 2.0, 3.0] +1, [4.0, 5.0, 6.0] +2, [9.0, 8.0, 7.0] +3, [0.054, 0.032, 0.012] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec new file mode 100644 index 0000000000000..74ef532313055 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dense_vector.csv-spec @@ -0,0 +1,48 @@ + +retrieveDenseVectorData +required_capability: dense_vector_field_type + +FROM dense_vector +| KEEP id, vector +| SORT id +; + +id:l | vector:dense_vector +0 | [1.0, 2.0, 3.0] +1 | [4.0, 5.0, 6.0] +2 | [9.0, 8.0, 7.0] +3 | [0.054, 0.032, 0.012] +; + +denseVectorWithEval +required_capability: dense_vector_field_type + +FROM dense_vector +| EVAL v = vector +| KEEP id, v +| SORT id +; + +id:l | v:dense_vector +0 | [1.0, 2.0, 3.0] +1 | [4.0, 5.0, 6.0] +2 | [9.0, 8.0, 7.0] +3 | [0.054, 0.032, 0.012] +; + +denseVectorWithRenameAndDrop +required_capability: dense_vector_field_type + +FROM dense_vector +| EVAL v = vector +| RENAME v AS new_vector +| DROP vector +| SORT id +; + +id:l | new_vector:dense_vector +0 | [1.0, 2.0, 3.0] +1 | [4.0, 5.0, 6.0] +2 | [9.0, 8.0, 7.0] +3 | [0.054, 0.032, 0.012] +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-default-incompatible.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-default-incompatible.json index 607ae5c9ab2c8..841aa84811ab6 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-default-incompatible.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-default-incompatible.json @@ -63,18 +63,7 @@ "type" : "keyword" }, "salary_change": { - "type": "float", - "fields": { - "int": { - "type": "integer" - }, - "long": { - "type": "long" - }, - "keyword": { - "type" : "keyword" - } - } + "type": "float" } } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json new file mode 100644 index 0000000000000..572d9870d09da --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-dense_vector.json @@ -0,0 +1,11 @@ +{ + "properties": { + "id": { + "type": "long" + }, + "vector": { + "type": "dense_vector", + "similarity": "l2_norm" + } + } +} diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/DenseVectorFieldTypeIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/DenseVectorFieldTypeIT.java new file mode 100644 index 0000000000000..12631fdeaed5b --- /dev/null +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/DenseVectorFieldTypeIT.java @@ -0,0 +1,177 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.elasticsearch.index.IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING; +import static org.elasticsearch.index.mapper.SourceFieldMapper.Mode.SYNTHETIC; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; + +public class DenseVectorFieldTypeIT extends AbstractEsqlIntegTestCase { + + private static final Set DENSE_VECTOR_INDEX_TYPES = Set.of( + "int8_hnsw", + "hnsw", + "int4_hnsw", + "bbq_hnsw", + "int8_flat", + "int4_flat", + "bbq_flat", + "flat" + ); + + private final String indexType; + private final boolean index; + private final boolean synthetic; + + @ParametersFactory + public static Iterable parameters() throws Exception { + List params = new ArrayList<>(); + // Indexed field types + for (String indexType : DENSE_VECTOR_INDEX_TYPES) { + params.add(new Object[] { indexType, true, false }); + } + // No indexing + params.add(new Object[] { null, false, false }); + // No indexing, synthetic source + params.add(new Object[] { null, false, true }); + return params; + } + + public DenseVectorFieldTypeIT(@Name("indexType") String indexType, @Name("index") boolean index, @Name("synthetic") boolean synthetic) { + this.indexType = indexType; + this.index = index; + this.synthetic = synthetic; + } + + private final Map> indexedVectors = new HashMap<>(); + + public void testRetrieveFieldType() { + var query = """ + FROM test + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "vector")); + assertColumnTypes(resp.columns(), List.of("integer", "dense_vector")); + } + } + + @SuppressWarnings("unchecked") + public void testRetrieveTopNDenseVectorFieldData() { + var query = """ + FROM test + | KEEP id, vector + | SORT id ASC + """; + + try (var resp = run(query)) { + List> valuesList = EsqlTestUtils.getValuesList(resp); + indexedVectors.forEach((id, vector) -> { + var values = valuesList.get(id); + assertEquals(id, values.get(0)); + List vectors = (List) values.get(1); + assertNotNull(vectors); + assertEquals(vector.size(), vectors.size()); + for (int i = 0; i < vector.size(); i++) { + assertEquals(vector.get(i), vectors.get(i), 0F); + } + }); + } + } + + @SuppressWarnings("unchecked") + public void testRetrieveDenseVectorFieldData() { + var query = """ + FROM test + | KEEP id, vector + """; + + try (var resp = run(query)) { + List> valuesList = EsqlTestUtils.getValuesList(resp); + assertEquals(valuesList.size(), indexedVectors.size()); + valuesList.forEach(value -> { + ; + assertEquals(2, value.size()); + Integer id = (Integer) value.get(0); + List vector = (List) value.get(1); + assertNotNull(vector); + List expectedVector = indexedVectors.get(id); + assertNotNull(expectedVector); + for (int i = 0; i < vector.size(); i++) { + assertEquals(expectedVector.get(i), vector.get(i), 0F); + } + }); + } + } + + @Before + public void setup() throws IOException { + var indexName = "test"; + var client = client().admin().indices(); + XContentBuilder mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject("id") + .field("type", "integer") + .endObject() + .startObject("vector") + .field("type", "dense_vector") + .field("index", index); + if (index) { + mapping.field("similarity", "l2_norm"); + } + if (indexType != null) { + mapping.startObject("index_options").field("type", indexType).endObject(); + } + mapping.endObject().endObject().endObject(); + Settings.Builder settingsBuilder = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 5)); + if (synthetic) { + settingsBuilder.put(INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), SYNTHETIC); + } + + var CreateRequest = client.prepareCreate(indexName) + .setSettings(Settings.builder().put("index.number_of_shards", 1)) + .setMapping(mapping) + .setSettings(settingsBuilder.build()); + assertAcked(CreateRequest); + + int numDims = randomIntBetween(32, 64) * 2; // min 64, even number + int numDocs = randomIntBetween(10, 100); + IndexRequestBuilder[] docs = new IndexRequestBuilder[numDocs]; + for (int i = 0; i < numDocs; i++) { + List vector = new ArrayList<>(numDims); + for (int j = 0; j < numDims; j++) { + vector.add(randomFloat()); + } + docs[i] = prepareIndex("test").setId("" + i).setSource("id", String.valueOf(i), "vector", vector); + indexedVectors.put(i, vector); + } + + indexRandom(true, docs); + } +} diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java index 24b184f573dfd..cd1f155483481 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupJoinTypesIT.java @@ -354,9 +354,14 @@ private String mainPropertyFor(TestConfig config) { } private static String sampleDataTextFor(DataType type) { - var value = sampleDataFor(type); + return sampleDataForValue(sampleDataFor(type)); + } + + private static String sampleDataForValue(Object value) { if (value instanceof String) { return "\"" + value + "\""; + } else if (value instanceof List list) { + return "[" + list.stream().map(LookupJoinTypesIT::sampleDataForValue).collect(Collectors.joining(", ")) + "]"; } return String.valueOf(value); } @@ -375,6 +380,7 @@ private static Object sampleDataFor(DataType type) { case VERSION -> "1.2.19"; case GEO_POINT, CARTESIAN_POINT -> "POINT (1.0 2.0)"; case GEO_SHAPE, CARTESIAN_SHAPE -> "POLYGON ((0.0 0.0, 1.0 0.0, 1.0 1.0, 0.0 1.0, 0.0 0.0))"; + case DENSE_VECTOR -> List.of(0.2672612f, 0.5345224f, 0.8017837f); default -> throw new IllegalArgumentException("Unsupported type: " + type); }; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 71914ee57e032..713be7fe756af 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -11,6 +11,7 @@ import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.rest.action.admin.cluster.RestNodesCapabilitiesAction; +import org.elasticsearch.xpack.esql.core.plugin.EsqlCorePlugin; import org.elasticsearch.xpack.esql.plugin.EsqlFeatures; import org.elasticsearch.xpack.esql.plugin.EsqlPlugin; @@ -1117,7 +1118,12 @@ public enum Cap { /** * Allow lookup join on mixed numeric fields, among byte, short, int, long, half_float, scaled_float, float and double. */ - LOOKUP_JOIN_ON_MIXED_NUMERIC_FIELDS; + LOOKUP_JOIN_ON_MIXED_NUMERIC_FIELDS, + + /** + * Dense vector field type support + */ + DENSE_VECTOR_FIELD_TYPE(EsqlCorePlugin.DENSE_VECTOR_FEATURE_FLAG); private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java index 7d580b897c2a0..00f297efd0e53 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/PositionToXContent.java @@ -15,6 +15,7 @@ import org.elasticsearch.compute.data.BooleanBlock; import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.FloatBlock; import org.elasticsearch.compute.data.IntBlock; import org.elasticsearch.compute.data.LongBlock; import org.elasticsearch.xcontent.ToXContent; @@ -182,6 +183,13 @@ protected XContentBuilder valueToXContent(XContentBuilder builder, ToXContent.Pa } } }; + case DENSE_VECTOR -> new PositionToXContent(block) { + @Override + protected XContentBuilder valueToXContent(XContentBuilder builder, ToXContent.Params params, int valueIndex) + throws IOException { + return builder.value(((FloatBlock) block).getFloat(valueIndex)); + } + }; case DATE_PERIOD, TIME_DURATION, DOC_DATA_TYPE, TSID_DATA_TYPE, SHORT, BYTE, OBJECT, FLOAT, HALF_FLOAT, SCALED_FLOAT, PARTIAL_AGG -> throw new IllegalArgumentException("can't convert values of type [" + columnInfo.type() + "]"); }; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java index 38c2916d73b72..3a406de60ace7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/ResponseValueUtils.java @@ -16,6 +16,7 @@ import org.elasticsearch.compute.data.BooleanBlock; import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.FloatBlock; import org.elasticsearch.compute.data.IntBlock; import org.elasticsearch.compute.data.LongBlock; import org.elasticsearch.compute.data.Page; @@ -147,6 +148,7 @@ private static Object valueAt(DataType dataType, Block block, int offset, BytesR throw new UncheckedIOException(e); } } + case DENSE_VECTOR -> ((FloatBlock) block).getFloat(offset); case SHORT, BYTE, FLOAT, HALF_FLOAT, SCALED_FLOAT, OBJECT, DATE_PERIOD, TIME_DURATION, DOC_DATA_TYPE, TSID_DATA_TYPE, NULL, PARTIAL_AGG -> throw EsqlIllegalArgumentException.illegalDataType(dataType); }; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java index 411aff6f52ab3..c8005506be627 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/nulls/Coalesce.java @@ -210,7 +210,7 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { .toEvaluator(toEvaluator, children()); case NULL -> EvalOperator.CONSTANT_NULL_FACTORY; case UNSUPPORTED, SHORT, BYTE, DATE_PERIOD, OBJECT, DOC_DATA_TYPE, SOURCE, TIME_DURATION, FLOAT, HALF_FLOAT, TSID_DATA_TYPE, - SCALED_FLOAT, PARTIAL_AGG, AGGREGATE_METRIC_DOUBLE -> throw new UnsupportedOperationException( + SCALED_FLOAT, PARTIAL_AGG, AGGREGATE_METRIC_DOUBLE, DENSE_VECTOR -> throw new UnsupportedOperationException( dataType() + " can’t be coalesced" ); }; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java index 1330213143f8b..6b3e334ac3acc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java @@ -39,6 +39,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.COUNTER_LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_NANOS; import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; +import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; import static org.elasticsearch.xpack.esql.core.type.DataType.DOC_DATA_TYPE; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_SHAPE; @@ -79,7 +80,8 @@ public class Join extends BinaryPlan implements PostAnalysisVerificationAware, S DOC_DATA_TYPE, TSID_DATA_TYPE, PARTIAL_AGG, - AGGREGATE_METRIC_DOUBLE }; + AGGREGATE_METRIC_DOUBLE, + DENSE_VECTOR }; private final JoinConfig config; private List lazyOutput; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index 49252799c321b..277b65dd00708 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -471,7 +471,7 @@ private PhysicalOperation planTopN(TopNExec topNExec, LocalExecutionPlannerConte case BOOLEAN, NULL, BYTE, SHORT, INTEGER, LONG, DOUBLE, FLOAT, HALF_FLOAT, DATETIME, DATE_NANOS, DATE_PERIOD, TIME_DURATION, OBJECT, SCALED_FLOAT, UNSIGNED_LONG, DOC_DATA_TYPE, TSID_DATA_TYPE -> TopNEncoder.DEFAULT_SORTABLE; case GEO_POINT, CARTESIAN_POINT, GEO_SHAPE, CARTESIAN_SHAPE, COUNTER_LONG, COUNTER_INTEGER, COUNTER_DOUBLE, SOURCE, - AGGREGATE_METRIC_DOUBLE -> TopNEncoder.DEFAULT_UNSORTABLE; + AGGREGATE_METRIC_DOUBLE, DENSE_VECTOR -> TopNEncoder.DEFAULT_UNSORTABLE; // unsupported fields are encoded as BytesRef, we'll use the same encoder; all values should be null at this point case PARTIAL_AGG, UNSUPPORTED -> TopNEncoder.UNSUPPORTED; }; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java index 3593b90c9662f..d217ced9efb00 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java @@ -303,6 +303,7 @@ public static ElementType toElementType(DataType dataType, MappedFieldType.Field case GEO_SHAPE, CARTESIAN_SHAPE -> fieldExtractPreference == EXTRACT_SPATIAL_BOUNDS ? ElementType.INT : ElementType.BYTES_REF; case PARTIAL_AGG -> ElementType.COMPOSITE; case AGGREGATE_METRIC_DOUBLE -> ElementType.AGGREGATE_METRIC_DOUBLE; + case DENSE_VECTOR -> ElementType.FLOAT; case SHORT, BYTE, DATE_PERIOD, TIME_DURATION, OBJECT, FLOAT, HALF_FLOAT, SCALED_FLOAT -> throw EsqlIllegalArgumentException .illegalDataType(dataType); }; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java index ccab9d54a96a1..cd0389e757e8c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java @@ -25,6 +25,7 @@ import org.elasticsearch.compute.data.BooleanBlock; import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.FloatBlock; import org.elasticsearch.compute.data.IntBlock; import org.elasticsearch.compute.data.LongBlock; import org.elasticsearch.compute.data.Page; @@ -252,6 +253,15 @@ private Page randomPage(List columns) { throw new UncheckedIOException(e); } } + case DENSE_VECTOR -> { + BlockLoader.FloatBuilder floatBuilder = (BlockLoader.FloatBuilder) builder; + int dims = randomIntBetween(32, 64) * 2; // min 64 dims, always even + floatBuilder.beginPositionEntry(); + for (int i = 0; i < dims; i++) { + floatBuilder.appendFloat(randomFloat()); + } + floatBuilder.endPositionEntry(); + } // default -> throw new UnsupportedOperationException("unsupported data type [" + c + "]"); } return builder.build(); @@ -1194,6 +1204,20 @@ static Page valuesToPage(BlockFactory blockFactory, List columns aggBuilder.sum().appendDouble(((Number) value).doubleValue()); aggBuilder.count().appendInt(((Number) value).intValue()); } + case DENSE_VECTOR -> { + FloatBlock.Builder floatBuilder = (FloatBlock.Builder) builder; + List vector = (List) value; + floatBuilder.beginPositionEntry(); + for (Object v : vector) { + switch (v) { + // XContentParser may retrieve Double values - we convert them to Float if needed + case Double d -> floatBuilder.appendFloat(d.floatValue()); + case Float f -> floatBuilder.appendFloat(f); + default -> fail("Unexpected dense_vector value type: " + v.getClass()); + } + } + floatBuilder.endPositionEntry(); + } } } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/CaseTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/CaseTests.java index 184e8bc68ba69..8d3b214f97a63 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/CaseTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/conditional/CaseTests.java @@ -59,7 +59,12 @@ public class CaseTests extends AbstractScalarFunctionTestCase { DataType.NULL ).collect(Collectors.toList()); if (Build.current().isSnapshot()) { - t.addAll(DataType.UNDER_CONSTRUCTION.keySet().stream().filter(type -> type != DataType.AGGREGATE_METRIC_DOUBLE).toList()); + t.addAll( + DataType.UNDER_CONSTRUCTION.keySet() + .stream() + .filter(type -> type != DataType.AGGREGATE_METRIC_DOUBLE && type != DataType.DENSE_VECTOR) + .toList() + ); } TYPES = unmodifiableList(t); } diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_unsupported_types.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_unsupported_types.yml index b9f65aad9ad4a..8bae45df8ce08 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_unsupported_types.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/40_unsupported_types.yml @@ -26,6 +26,7 @@ setup: format: "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis" dense_vector: type: dense_vector + similarity: l2_norm dims: 3 double_range: type: double_range @@ -115,7 +116,7 @@ unsupported: - method: POST path: /_query parameters: [] - capabilities: [REPORT_ORIGINAL_TYPES] + capabilities: [dense_vector_field_type] reason: "uses original_type" - do: @@ -140,7 +141,7 @@ unsupported: - match: { columns.4.name: date_range } - match: { columns.4.type: unsupported } - match: { columns.5.name: dense_vector } - - match: { columns.5.type: unsupported } + - match: { columns.5.type: dense_vector } - match: { columns.6.name: double_range } - match: { columns.6.type: unsupported } - match: { columns.7.name: float_range } @@ -194,7 +195,9 @@ unsupported: - match: { values.0.2: null } - match: { values.0.3: "2015-01-01T12:10:30.123456789Z" } - match: { values.0.4: null } - - match: { values.0.5: null } + - match: { values.0.5.0: 0.5 } + - match: { values.0.5.1: 10.0 } + - match: { values.0.5.2: 6.0 } - match: { values.0.6: null } - match: { values.0.7: null } - match: { values.0.8: "POINT (10.0 12.0)" } @@ -239,7 +242,7 @@ unsupported: - match: { columns.4.name: date_range } - match: { columns.4.type: unsupported } - match: { columns.5.name: dense_vector } - - match: { columns.5.type: unsupported } + - match: { columns.5.type: dense_vector } - match: { columns.6.name: double_range } - match: { columns.6.type: unsupported } - match: { columns.7.name: float_range } @@ -310,8 +313,8 @@ unsupported with sort: - method: POST path: /_query parameters: [ ] - capabilities: [ aggregate_metric_double_sorting ] - reason: "support for sorting when aggregate_metric_double present" + capabilities: [ dense_vector_field_type ] + reason: "support for sorting when dense_vector_field_type present" - do: allowed_warnings_regex: @@ -332,7 +335,7 @@ unsupported with sort: - match: { columns.4.name: date_range } - match: { columns.4.type: unsupported } - match: { columns.5.name: dense_vector } - - match: { columns.5.type: unsupported } + - match: { columns.5.type: dense_vector } - match: { columns.6.name: double_range } - match: { columns.6.type: unsupported } - match: { columns.7.name: float_range } @@ -386,7 +389,9 @@ unsupported with sort: - match: { values.0.2: null } - match: { values.0.3: "2015-01-01T12:10:30.123456789Z" } - match: { values.0.4: null } - - match: { values.0.5: null } + - match: { values.0.5.0: 0.5 } + - match: { values.0.5.1: 10.0 } + - match: { values.0.5.2: 6.0 } - match: { values.0.6: null } - match: { values.0.7: null } - match: { values.0.8: "POINT (10.0 12.0)" }