Skip to content

Commit b759161

Browse files
authored
ES|QL dense vector field type support (#126456)
1 parent 83fe2ed commit b759161

File tree

31 files changed

+724
-54
lines changed

31 files changed

+724
-54
lines changed

server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,16 @@
1111

1212
import org.apache.lucene.index.BinaryDocValues;
1313
import org.apache.lucene.index.DocValues;
14+
import org.apache.lucene.index.FloatVectorValues;
15+
import org.apache.lucene.index.KnnVectorValues;
1416
import org.apache.lucene.index.LeafReaderContext;
1517
import org.apache.lucene.index.NumericDocValues;
1618
import org.apache.lucene.index.SortedDocValues;
1719
import org.apache.lucene.index.SortedNumericDocValues;
1820
import org.apache.lucene.index.SortedSetDocValues;
1921
import org.apache.lucene.util.BytesRef;
2022
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
23+
import org.elasticsearch.index.IndexVersion;
2124
import org.elasticsearch.index.mapper.BlockLoader.BlockFactory;
2225
import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder;
2326
import org.elasticsearch.index.mapper.BlockLoader.Builder;
@@ -26,6 +29,7 @@
2629
import org.elasticsearch.index.mapper.BlockLoader.DoubleBuilder;
2730
import org.elasticsearch.index.mapper.BlockLoader.IntBuilder;
2831
import org.elasticsearch.index.mapper.BlockLoader.LongBuilder;
32+
import org.elasticsearch.index.mapper.vectors.VectorEncoderDecoder;
2933
import org.elasticsearch.search.fetch.StoredFieldsSpec;
3034

3135
import java.io.IOException;
@@ -504,6 +508,87 @@ public String toString() {
504508
}
505509
}
506510

511+
public static class DenseVectorBlockLoader extends DocValuesBlockLoader {
512+
private final String fieldName;
513+
private final int dimensions;
514+
515+
public DenseVectorBlockLoader(String fieldName, int dimensions) {
516+
this.fieldName = fieldName;
517+
this.dimensions = dimensions;
518+
}
519+
520+
@Override
521+
public Builder builder(BlockFactory factory, int expectedCount) {
522+
return factory.denseVectors(expectedCount, dimensions);
523+
}
524+
525+
@Override
526+
public AllReader reader(LeafReaderContext context) throws IOException {
527+
FloatVectorValues floatVectorValues = context.reader().getFloatVectorValues(fieldName);
528+
if (floatVectorValues != null) {
529+
return new DenseVectorValuesBlockReader(floatVectorValues, dimensions);
530+
}
531+
return new ConstantNullsReader();
532+
}
533+
}
534+
535+
private static class DenseVectorValuesBlockReader extends BlockDocValuesReader {
536+
private final FloatVectorValues floatVectorValues;
537+
private final KnnVectorValues.DocIndexIterator iterator;
538+
private final int dimensions;
539+
540+
DenseVectorValuesBlockReader(FloatVectorValues floatVectorValues, int dimensions) {
541+
this.floatVectorValues = floatVectorValues;
542+
iterator = floatVectorValues.iterator();
543+
this.dimensions = dimensions;
544+
}
545+
546+
@Override
547+
public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException {
548+
// Doubles from doc values ensures that the values are in order
549+
try (BlockLoader.FloatBuilder builder = factory.denseVectors(docs.count(), dimensions)) {
550+
for (int i = 0; i < docs.count(); i++) {
551+
int doc = docs.get(i);
552+
if (doc < iterator.docID()) {
553+
throw new IllegalStateException("docs within same block must be in order");
554+
}
555+
read(doc, builder);
556+
}
557+
return builder.build();
558+
}
559+
}
560+
561+
@Override
562+
public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException {
563+
read(docId, (BlockLoader.FloatBuilder) builder);
564+
}
565+
566+
private void read(int doc, BlockLoader.FloatBuilder builder) throws IOException {
567+
if (iterator.advance(doc) == doc) {
568+
builder.beginPositionEntry();
569+
float[] floats = floatVectorValues.vectorValue(iterator.index());
570+
assert floats.length == dimensions
571+
: "unexpected dimensions for vector value; expected " + dimensions + " but got " + floats.length;
572+
for (float aFloat : floats) {
573+
builder.appendFloat(aFloat);
574+
}
575+
builder.endPositionEntry();
576+
} else {
577+
builder.appendNull();
578+
}
579+
}
580+
581+
@Override
582+
public int docId() {
583+
return iterator.docID();
584+
}
585+
586+
@Override
587+
public String toString() {
588+
return "BlockDocValuesReader.FloatVectorValuesBlockReader";
589+
}
590+
}
591+
507592
public static class BytesRefsFromOrdsBlockLoader extends DocValuesBlockLoader {
508593
private final String fieldName;
509594

@@ -752,6 +837,94 @@ public String toString() {
752837
}
753838
}
754839

840+
public static class DenseVectorFromBinaryBlockLoader extends DocValuesBlockLoader {
841+
private final String fieldName;
842+
private final int dims;
843+
private final IndexVersion indexVersion;
844+
845+
public DenseVectorFromBinaryBlockLoader(String fieldName, int dims, IndexVersion indexVersion) {
846+
this.fieldName = fieldName;
847+
this.dims = dims;
848+
this.indexVersion = indexVersion;
849+
}
850+
851+
@Override
852+
public Builder builder(BlockFactory factory, int expectedCount) {
853+
return factory.denseVectors(expectedCount, dims);
854+
}
855+
856+
@Override
857+
public AllReader reader(LeafReaderContext context) throws IOException {
858+
BinaryDocValues docValues = context.reader().getBinaryDocValues(fieldName);
859+
if (docValues == null) {
860+
return new ConstantNullsReader();
861+
}
862+
return new DenseVectorFromBinary(docValues, dims, indexVersion);
863+
}
864+
}
865+
866+
private static class DenseVectorFromBinary extends BlockDocValuesReader {
867+
private final BinaryDocValues docValues;
868+
private final IndexVersion indexVersion;
869+
private final int dimensions;
870+
private final float[] scratch;
871+
872+
private int docID = -1;
873+
874+
DenseVectorFromBinary(BinaryDocValues docValues, int dims, IndexVersion indexVersion) {
875+
this.docValues = docValues;
876+
this.scratch = new float[dims];
877+
this.indexVersion = indexVersion;
878+
this.dimensions = dims;
879+
}
880+
881+
@Override
882+
public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException {
883+
try (BlockLoader.FloatBuilder builder = factory.denseVectors(docs.count(), dimensions)) {
884+
for (int i = 0; i < docs.count(); i++) {
885+
int doc = docs.get(i);
886+
if (doc < docID) {
887+
throw new IllegalStateException("docs within same block must be in order");
888+
}
889+
read(doc, builder);
890+
}
891+
return builder.build();
892+
}
893+
}
894+
895+
@Override
896+
public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException {
897+
read(docId, (BlockLoader.FloatBuilder) builder);
898+
}
899+
900+
private void read(int doc, BlockLoader.FloatBuilder builder) throws IOException {
901+
this.docID = doc;
902+
if (false == docValues.advanceExact(doc)) {
903+
builder.appendNull();
904+
return;
905+
}
906+
BytesRef bytesRef = docValues.binaryValue();
907+
assert bytesRef.length > 0;
908+
VectorEncoderDecoder.decodeDenseVector(indexVersion, bytesRef, scratch);
909+
910+
builder.beginPositionEntry();
911+
for (float value : scratch) {
912+
builder.appendFloat(value);
913+
}
914+
builder.endPositionEntry();
915+
}
916+
917+
@Override
918+
public int docId() {
919+
return docID;
920+
}
921+
922+
@Override
923+
public String toString() {
924+
return "DenseVectorFromBinary.Bytes";
925+
}
926+
}
927+
755928
public static class BooleansBlockLoader extends DocValuesBlockLoader {
756929
private final String fieldName;
757930

server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,11 @@ interface BlockFactory {
373373
*/
374374
DoubleBuilder doubles(int expectedCount);
375375

376+
/**
377+
* Build a builder to load dense vectors without any loading constraints.
378+
*/
379+
FloatBuilder denseVectors(int expectedVectorsCount, int dimensions);
380+
376381
/**
377382
* Build a builder to load ints as loaded from doc values.
378383
* Doc values load ints in sorted order.

server/src/main/java/org/elasticsearch/index/mapper/BlockSourceReader.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,49 @@ public String toString() {
303303
}
304304
}
305305

306+
/**
307+
* Load {@code float}s from {@code _source}.
308+
*/
309+
public static class DenseVectorBlockLoader extends SourceBlockLoader {
310+
private final int dimensions;
311+
312+
public DenseVectorBlockLoader(ValueFetcher fetcher, LeafIteratorLookup lookup, int dimensions) {
313+
super(fetcher, lookup);
314+
this.dimensions = dimensions;
315+
}
316+
317+
@Override
318+
public Builder builder(BlockFactory factory, int expectedCount) {
319+
return factory.denseVectors(expectedCount, dimensions);
320+
}
321+
322+
@Override
323+
public RowStrideReader rowStrideReader(LeafReaderContext context, DocIdSetIterator iter) {
324+
return new DenseVectors(fetcher, iter);
325+
}
326+
327+
@Override
328+
protected String name() {
329+
return "DenseVectors";
330+
}
331+
}
332+
333+
private static class DenseVectors extends BlockSourceReader {
334+
DenseVectors(ValueFetcher fetcher, DocIdSetIterator iter) {
335+
super(fetcher, iter);
336+
}
337+
338+
@Override
339+
protected void append(BlockLoader.Builder builder, Object v) {
340+
((BlockLoader.FloatBuilder) builder).appendFloat(((Number) v).floatValue());
341+
}
342+
343+
@Override
344+
public String toString() {
345+
return "BlockSourceReader.DenseVectors";
346+
}
347+
}
348+
306349
/**
307350
* Load {@code int}s from {@code _source}.
308351
*/

0 commit comments

Comments
 (0)