Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
a62c943
Add byte vector support
carlosdelest Jul 24, 2025
9317fcd
Add byte vector support - tests
carlosdelest Jul 24, 2025
4c59ba1
First test version, with all index types - fails
carlosdelest Jul 24, 2025
475a285
Knn tests for non-flat, indexed types
carlosdelest Jul 24, 2025
ecc563f
Add CSV tests
carlosdelest Jul 24, 2025
4a40503
Merge remote-tracking branch 'origin/main' into non-issue/esql-dense-…
carlosdelest Jul 24, 2025
84d3c50
Fix tests after merging
carlosdelest Jul 24, 2025
e9afc06
[CI] Auto commit changes from spotless
Jul 24, 2025
0402ad9
Merge remote-tracking branch 'origin/main' into non-issue/esql-dense-…
carlosdelest Aug 11, 2025
a4aca14
Take into account normalization
carlosdelest Aug 12, 2025
08a3c5c
[CI] Auto commit changes from spotless
Aug 12, 2025
763fe63
Take into account normalization for dense vector support
carlosdelest Aug 12, 2025
80b48cf
Fix cherry pick
carlosdelest Aug 12, 2025
40edca3
[CI] Auto commit changes from spotless
Aug 12, 2025
8bd7f79
Remove debugging code
carlosdelest Aug 12, 2025
f9447f7
Merge remote-tracking branch 'carlosdelest/non-issue/esql-dense-vecto…
carlosdelest Aug 12, 2025
7d2625c
Check that we may not have magnitudes at all, or for normalized vectors
carlosdelest Aug 12, 2025
b763bcc
Merge branch 'non-issue/esql-dense-vector-support-normalization' into…
carlosdelest Aug 13, 2025
5bcac49
Fix merge
carlosdelest Aug 13, 2025
57c45b5
Remove cosine similarity code
carlosdelest Aug 13, 2025
9abc1ea
Merge remote-tracking branch 'carlosdelest/non-issue/esql-dense-vecto…
carlosdelest Aug 13, 2025
6977dbe
[CI] Auto commit changes from spotless
Aug 13, 2025
7675c71
Merge branch 'main' into non-issue/esql-dense-vector-byte-element-sup…
carlosdelest Aug 13, 2025
239d350
Better parameterized test
carlosdelest Aug 13, 2025
9385113
Fix test
carlosdelest Aug 13, 2025
67cbb84
Merge remote-tracking branch 'carlosdelest/non-issue/esql-dense-vecto…
carlosdelest Aug 13, 2025
14ba3b0
Fix test
carlosdelest Aug 13, 2025
64ca563
[CI] Auto commit changes from spotless
Aug 13, 2025
ee803a4
Merge branch 'main' into non-issue/esql-dense-vector-byte-element-sup…
carlosdelest Aug 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
package org.elasticsearch.index.mapper;

import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.KnnVectorValues;
Expand All @@ -29,6 +30,7 @@
import org.elasticsearch.index.mapper.BlockLoader.DoubleBuilder;
import org.elasticsearch.index.mapper.BlockLoader.IntBuilder;
import org.elasticsearch.index.mapper.BlockLoader.LongBuilder;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.ElementType;
import org.elasticsearch.index.mapper.vectors.VectorEncoderDecoder;
import org.elasticsearch.search.fetch.StoredFieldsSpec;

Expand Down Expand Up @@ -511,10 +513,12 @@ public String toString() {
public static class DenseVectorBlockLoader extends DocValuesBlockLoader {
private final String fieldName;
private final int dimensions;
private final ElementType elementType;

public DenseVectorBlockLoader(String fieldName, int dimensions) {
public DenseVectorBlockLoader(String fieldName, int dimensions, ElementType elementType) {
this.fieldName = fieldName;
this.dimensions = dimensions;
this.elementType = elementType;
}

@Override
Expand All @@ -524,22 +528,34 @@ public Builder builder(BlockFactory factory, int expectedCount) {

@Override
public AllReader reader(LeafReaderContext context) throws IOException {
FloatVectorValues floatVectorValues = context.reader().getFloatVectorValues(fieldName);
if (floatVectorValues != null) {
return new DenseVectorValuesBlockReader(floatVectorValues, dimensions);
switch (elementType) {
case FLOAT -> {
FloatVectorValues floatVectorValues = context.reader().getFloatVectorValues(fieldName);
if (floatVectorValues != null) {
return new FloatDenseVectorValuesBlockReader(floatVectorValues, dimensions);
}
}
case BYTE -> {
ByteVectorValues byteVectorValues = context.reader().getByteVectorValues(fieldName);
if (byteVectorValues != null) {
return new ByteDenseVectorValuesBlockReader(byteVectorValues, dimensions);
}
}
}

return new ConstantNullsReader();
}
}

private static class DenseVectorValuesBlockReader extends BlockDocValuesReader {
private final FloatVectorValues floatVectorValues;
private final KnnVectorValues.DocIndexIterator iterator;
private final int dimensions;
private abstract static class DenseVectorValuesBlockReader<T extends KnnVectorValues> extends BlockDocValuesReader {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added abstract classes to deal with common code between float and byte vector reading


protected final T vectorValues;
protected final KnnVectorValues.DocIndexIterator iterator;
protected final int dimensions;

DenseVectorValuesBlockReader(FloatVectorValues floatVectorValues, int dimensions) {
this.floatVectorValues = floatVectorValues;
iterator = floatVectorValues.iterator();
DenseVectorValuesBlockReader(T vectorValues, int dimensions) {
this.vectorValues = vectorValues;
iterator = vectorValues.iterator();
this.dimensions = dimensions;
}

Expand All @@ -564,26 +580,58 @@ private void read(int doc, BlockLoader.FloatBuilder builder) throws IOException
builder.appendNull();
} else if (iterator.docID() == doc || iterator.advance(doc) == doc) {
builder.beginPositionEntry();
float[] floats = floatVectorValues.vectorValue(iterator.index());
assert floats.length == dimensions
: "unexpected dimensions for vector value; expected " + dimensions + " but got " + floats.length;
for (float aFloat : floats) {
builder.appendFloat(aFloat);
}
appendDoc(builder);
builder.endPositionEntry();
} else {
builder.appendNull();
}
}

protected abstract void appendDoc(BlockLoader.FloatBuilder builder) throws IOException;

@Override
public int docId() {
return iterator.docID();
}
}

private static class FloatDenseVectorValuesBlockReader extends DenseVectorValuesBlockReader<FloatVectorValues> {
FloatDenseVectorValuesBlockReader(FloatVectorValues floatVectorValues, int dimensions) {
super(floatVectorValues, dimensions);
}

protected void appendDoc(BlockLoader.FloatBuilder builder) throws IOException {
float[] floats = vectorValues.vectorValue(iterator.index());
assert floats.length == dimensions
: "unexpected dimensions for vector value; expected " + dimensions + " but got " + floats.length;
for (float aFloat : floats) {
builder.appendFloat(aFloat);
}
}

@Override
public String toString() {
return "BlockDocValuesReader.FloatDenseVectorValuesBlockReader";
}
}

private static class ByteDenseVectorValuesBlockReader extends DenseVectorValuesBlockReader<ByteVectorValues> {
ByteDenseVectorValuesBlockReader(ByteVectorValues floatVectorValues, int dimensions) {
super(floatVectorValues, dimensions);
}

protected void appendDoc(BlockLoader.FloatBuilder builder) throws IOException {
byte[] bytes = vectorValues.vectorValue(iterator.index());
assert bytes.length == dimensions
: "unexpected dimensions for vector value; expected " + dimensions + " but got " + bytes.length;
for (byte aFloat : bytes) {
builder.appendFloat(aFloat);
}
}

@Override
public String toString() {
return "BlockDocValuesReader.FloatVectorValuesBlockReader";
return "BlockDocValuesReader.ByteDenseVectorValuesBlockReader";
}
}

Expand Down Expand Up @@ -875,11 +923,13 @@ public static class DenseVectorFromBinaryBlockLoader extends DocValuesBlockLoade
private final String fieldName;
private final int dims;
private final IndexVersion indexVersion;
private final ElementType elementType;

public DenseVectorFromBinaryBlockLoader(String fieldName, int dims, IndexVersion indexVersion) {
public DenseVectorFromBinaryBlockLoader(String fieldName, int dims, IndexVersion indexVersion, ElementType elementType) {
this.fieldName = fieldName;
this.dims = dims;
this.indexVersion = indexVersion;
this.elementType = elementType;
}

@Override
Expand All @@ -893,23 +943,40 @@ public AllReader reader(LeafReaderContext context) throws IOException {
if (docValues == null) {
return new ConstantNullsReader();
}
return new DenseVectorFromBinary(docValues, dims, indexVersion);
switch (elementType) {
case FLOAT:
return new FloatDenseVectorFromBinary(docValues, dims, indexVersion);
case BYTE:
return new ByteDenseVectorFromBinary(docValues, dims, indexVersion);
default:
throw new IllegalArgumentException("Unknown element type [" + elementType + "]");
}
}
}

private static class DenseVectorFromBinary extends BlockDocValuesReader {
private final BinaryDocValues docValues;
private final IndexVersion indexVersion;
private final int dimensions;
private final float[] scratch;
// Abstract base for dense vector readers
private abstract static class AbstractDenseVectorFromBinary<T> extends BlockDocValuesReader {
protected final BinaryDocValues docValues;
protected final IndexVersion indexVersion;
protected final int dimensions;
protected final T scratch;
protected int docID = -1;

private int docID = -1;

DenseVectorFromBinary(BinaryDocValues docValues, int dims, IndexVersion indexVersion) {
AbstractDenseVectorFromBinary(BinaryDocValues docValues, int dims, IndexVersion indexVersion, T scratch) {
this.docValues = docValues;
this.scratch = new float[dims];
this.indexVersion = indexVersion;
this.dimensions = dims;
this.scratch = scratch;
}

@Override
public int docId() {
return docID;
}

@Override
public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException {
read(docId, (BlockLoader.FloatBuilder) builder);
}

@Override
Expand All @@ -926,36 +993,67 @@ public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throw
}
}

@Override
public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException {
read(docId, (BlockLoader.FloatBuilder) builder);
}

private void read(int doc, BlockLoader.FloatBuilder builder) throws IOException {
this.docID = doc;
if (false == docValues.advanceExact(doc)) {
if (docValues.advanceExact(doc) == false) {
builder.appendNull();
return;
}
BytesRef bytesRef = docValues.binaryValue();
assert bytesRef.length > 0;
VectorEncoderDecoder.decodeDenseVector(indexVersion, bytesRef, scratch);
decodeDenseVector(bytesRef, scratch);

builder.beginPositionEntry();
writeScratchToBuilder(scratch, builder);
builder.endPositionEntry();
}

protected abstract void decodeDenseVector(BytesRef bytesRef, T scratch);

protected abstract void writeScratchToBuilder(T scratch, BlockLoader.FloatBuilder builder);
}

private static class FloatDenseVectorFromBinary extends AbstractDenseVectorFromBinary<float[]> {
FloatDenseVectorFromBinary(BinaryDocValues docValues, int dims, IndexVersion indexVersion) {
super(docValues, dims, indexVersion, new float[dims]);
}

@Override
protected void writeScratchToBuilder(float[] scratch, BlockLoader.FloatBuilder builder) {
for (float value : scratch) {
builder.appendFloat(value);
}
builder.endPositionEntry();
}

@Override
public int docId() {
return docID;
protected void decodeDenseVector(BytesRef bytesRef, float[] scratch) {
VectorEncoderDecoder.decodeDenseVector(indexVersion, bytesRef, scratch);
}

@Override
public String toString() {
return "FloatDenseVectorFromBinary.Bytes";
}
}

private static class ByteDenseVectorFromBinary extends AbstractDenseVectorFromBinary<byte[]> {
ByteDenseVectorFromBinary(BinaryDocValues docValues, int dims, IndexVersion indexVersion) {
super(docValues, dims, indexVersion, new byte[dims]);
}

@Override
public String toString() {
return "DenseVectorFromBinary.Bytes";
return "ByteDenseVectorFromBinary.Bytes";
}

protected void writeScratchToBuilder(byte[] scratch, BlockLoader.FloatBuilder builder) {
for (byte value : scratch) {
builder.appendFloat(value);
}
}

protected void decodeDenseVector(BytesRef bytesRef, byte[] scratch) {
VectorEncoderDecoder.decodeDenseVector(indexVersion, bytesRef, scratch);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2805,8 +2805,8 @@ public DenseVectorIndexOptions getIndexOptions() {

@Override
public BlockLoader blockLoader(MappedFieldType.BlockLoaderContext blContext) {
if (elementType != ElementType.FLOAT) {
// Just float dense vector support for now
if (elementType == ElementType.BIT) {
// Just float and byte dense vector support for now
return null;
}

Expand All @@ -2816,11 +2816,11 @@ public BlockLoader blockLoader(MappedFieldType.BlockLoaderContext blContext) {
}

if (indexed) {
return new BlockDocValuesReader.DenseVectorBlockLoader(name(), dims);
return new BlockDocValuesReader.DenseVectorBlockLoader(name(), dims, elementType);
}

if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSource)) {
return new BlockDocValuesReader.DenseVectorFromBinaryBlockLoader(name(), dims, indexVersionCreated);
return new BlockDocValuesReader.DenseVectorFromBinaryBlockLoader(name(), dims, indexVersionCreated, elementType);
}

BlockSourceReader.LeafIteratorLookup lookup = BlockSourceReader.lookupMatchingAll();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,26 @@ public static void decodeDenseVector(IndexVersion indexVersion, BytesRef vectorB
}
}

/**
* Decodes a BytesRef into the provided array of bytes
* @param vectorBR - dense vector encoded in BytesRef
* @param vector - array of bytes where the decoded vector should be stored
*/
public static void decodeDenseVector(IndexVersion indexVersion, BytesRef vectorBR, byte[] vector) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needed a specific method for decoding dense vector of byte values - this is an adaptation of the existing float[] method (expand up to see it)

if (vectorBR == null) {
throw new IllegalArgumentException(DenseVectorScriptDocValues.MISSING_VECTOR_FIELD_MESSAGE);
}
if (indexVersion.onOrAfter(LITTLE_ENDIAN_FLOAT_STORED_INDEX_VERSION)) {
ByteBuffer fb = ByteBuffer.wrap(vectorBR.bytes, vectorBR.offset, vectorBR.length).order(ByteOrder.LITTLE_ENDIAN);
fb.get(vector);
} else {
ByteBuffer byteBuffer = ByteBuffer.wrap(vectorBR.bytes, vectorBR.offset, vectorBR.length);
for (int dim = 0; dim < vector.length; dim++) {
vector[dim] = byteBuffer.get(dim * vectorBR.offset);
}
}
}

public static float[] getMultiMagnitudes(BytesRef magnitudes) {
assert magnitudes.length % Float.BYTES == 0;
float[] multiMagnitudes = new float[magnitudes.length / Float.BYTES];
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
id:l, vector:dense_vector
0, [1.0, 2.0, 3.0]
1, [4.0, 5.0, 6.0]
2, [9.0, 8.0, 7.0]
3, [0.054, 0.032, 0.012]
id:l, float_vector:dense_vector, byte_vector:dense_vector
0, [1.0, 2.0, 3.0], [10, 20, 30]
1, [4.0, 5.0, 6.0], [40, 50, 60]
2, [9.0, 8.0, 7.0], [90, 80, 70]
3, [0.054, 0.032, 0.012], [100, 110, 120]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A copy of the float vector tests, using the specific byte field

Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
retrieveByteVectorData
required_capability: dense_vector_field_type_byte_elements

FROM dense_vector
| KEEP id, byte_vector
| SORT id
;

id:l | byte_vector:dense_vector
0 | [10, 20, 30]
1 | [40, 50, 60]
2 | [90, 80, 70]
3 | [100, 110, 120]
;

denseByteVectorWithEval
required_capability: dense_vector_field_type_byte_elements

FROM dense_vector
| EVAL v = byte_vector
| KEEP id, v
| SORT id
;

id:l | v:dense_vector
0 | [10, 20, 30]
1 | [40, 50, 60]
2 | [90, 80, 70]
3 | [100, 110, 120]
;

denseByteVectorWithRenameAndDrop
required_capability: dense_vector_field_type_byte_elements

FROM dense_vector
| EVAL v = byte_vector
| RENAME v AS new_vector
| DROP float_vector, byte_vector
| SORT id
;

id:l | new_vector:dense_vector
0 | [10, 20, 30]
1 | [40, 50, 60]
2 | [90, 80, 70]
3 | [100, 110, 120]
;
Loading