|
11 | 11 |
|
12 | 12 | import org.apache.lucene.index.BinaryDocValues; |
13 | 13 | import org.apache.lucene.index.DocValues; |
| 14 | +import org.apache.lucene.index.FloatVectorValues; |
| 15 | +import org.apache.lucene.index.KnnVectorValues; |
14 | 16 | import org.apache.lucene.index.LeafReaderContext; |
15 | 17 | import org.apache.lucene.index.NumericDocValues; |
16 | 18 | import org.apache.lucene.index.SortedDocValues; |
17 | 19 | import org.apache.lucene.index.SortedNumericDocValues; |
18 | 20 | import org.apache.lucene.index.SortedSetDocValues; |
19 | 21 | import org.apache.lucene.util.BytesRef; |
20 | 22 | import org.elasticsearch.common.io.stream.ByteArrayStreamInput; |
| 23 | +import org.elasticsearch.index.IndexVersion; |
21 | 24 | import org.elasticsearch.index.mapper.BlockLoader.BlockFactory; |
22 | 25 | import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder; |
23 | 26 | import org.elasticsearch.index.mapper.BlockLoader.Builder; |
|
26 | 29 | import org.elasticsearch.index.mapper.BlockLoader.DoubleBuilder; |
27 | 30 | import org.elasticsearch.index.mapper.BlockLoader.IntBuilder; |
28 | 31 | import org.elasticsearch.index.mapper.BlockLoader.LongBuilder; |
| 32 | +import org.elasticsearch.index.mapper.vectors.VectorEncoderDecoder; |
29 | 33 | import org.elasticsearch.search.fetch.StoredFieldsSpec; |
30 | 34 |
|
31 | 35 | import java.io.IOException; |
@@ -504,6 +508,87 @@ public String toString() { |
504 | 508 | } |
505 | 509 | } |
506 | 510 |
|
| 511 | + public static class DenseVectorBlockLoader extends DocValuesBlockLoader { |
| 512 | + private final String fieldName; |
| 513 | + private final int dimensions; |
| 514 | + |
| 515 | + public DenseVectorBlockLoader(String fieldName, int dimensions) { |
| 516 | + this.fieldName = fieldName; |
| 517 | + this.dimensions = dimensions; |
| 518 | + } |
| 519 | + |
| 520 | + @Override |
| 521 | + public Builder builder(BlockFactory factory, int expectedCount) { |
| 522 | + return factory.denseVectors(expectedCount, dimensions); |
| 523 | + } |
| 524 | + |
| 525 | + @Override |
| 526 | + public AllReader reader(LeafReaderContext context) throws IOException { |
| 527 | + FloatVectorValues floatVectorValues = context.reader().getFloatVectorValues(fieldName); |
| 528 | + if (floatVectorValues != null) { |
| 529 | + return new DenseVectorValuesBlockReader(floatVectorValues, dimensions); |
| 530 | + } |
| 531 | + return new ConstantNullsReader(); |
| 532 | + } |
| 533 | + } |
| 534 | + |
| 535 | + private static class DenseVectorValuesBlockReader extends BlockDocValuesReader { |
| 536 | + private final FloatVectorValues floatVectorValues; |
| 537 | + private final KnnVectorValues.DocIndexIterator iterator; |
| 538 | + private final int dimensions; |
| 539 | + |
| 540 | + DenseVectorValuesBlockReader(FloatVectorValues floatVectorValues, int dimensions) { |
| 541 | + this.floatVectorValues = floatVectorValues; |
| 542 | + iterator = floatVectorValues.iterator(); |
| 543 | + this.dimensions = dimensions; |
| 544 | + } |
| 545 | + |
| 546 | + @Override |
| 547 | + public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { |
| 548 | + // Doubles from doc values ensures that the values are in order |
| 549 | + try (BlockLoader.FloatBuilder builder = factory.denseVectors(docs.count(), dimensions)) { |
| 550 | + for (int i = 0; i < docs.count(); i++) { |
| 551 | + int doc = docs.get(i); |
| 552 | + if (doc < iterator.docID()) { |
| 553 | + throw new IllegalStateException("docs within same block must be in order"); |
| 554 | + } |
| 555 | + read(doc, builder); |
| 556 | + } |
| 557 | + return builder.build(); |
| 558 | + } |
| 559 | + } |
| 560 | + |
| 561 | + @Override |
| 562 | + public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { |
| 563 | + read(docId, (BlockLoader.FloatBuilder) builder); |
| 564 | + } |
| 565 | + |
| 566 | + private void read(int doc, BlockLoader.FloatBuilder builder) throws IOException { |
| 567 | + if (iterator.advance(doc) == doc) { |
| 568 | + builder.beginPositionEntry(); |
| 569 | + float[] floats = floatVectorValues.vectorValue(iterator.index()); |
| 570 | + assert floats.length == dimensions |
| 571 | + : "unexpected dimensions for vector value; expected " + dimensions + " but got " + floats.length; |
| 572 | + for (float aFloat : floats) { |
| 573 | + builder.appendFloat(aFloat); |
| 574 | + } |
| 575 | + builder.endPositionEntry(); |
| 576 | + } else { |
| 577 | + builder.appendNull(); |
| 578 | + } |
| 579 | + } |
| 580 | + |
| 581 | + @Override |
| 582 | + public int docId() { |
| 583 | + return iterator.docID(); |
| 584 | + } |
| 585 | + |
| 586 | + @Override |
| 587 | + public String toString() { |
| 588 | + return "BlockDocValuesReader.FloatVectorValuesBlockReader"; |
| 589 | + } |
| 590 | + } |
| 591 | + |
507 | 592 | public static class BytesRefsFromOrdsBlockLoader extends DocValuesBlockLoader { |
508 | 593 | private final String fieldName; |
509 | 594 |
|
@@ -752,6 +837,94 @@ public String toString() { |
752 | 837 | } |
753 | 838 | } |
754 | 839 |
|
| 840 | + public static class DenseVectorFromBinaryBlockLoader extends DocValuesBlockLoader { |
| 841 | + private final String fieldName; |
| 842 | + private final int dims; |
| 843 | + private final IndexVersion indexVersion; |
| 844 | + |
| 845 | + public DenseVectorFromBinaryBlockLoader(String fieldName, int dims, IndexVersion indexVersion) { |
| 846 | + this.fieldName = fieldName; |
| 847 | + this.dims = dims; |
| 848 | + this.indexVersion = indexVersion; |
| 849 | + } |
| 850 | + |
| 851 | + @Override |
| 852 | + public Builder builder(BlockFactory factory, int expectedCount) { |
| 853 | + return factory.denseVectors(expectedCount, dims); |
| 854 | + } |
| 855 | + |
| 856 | + @Override |
| 857 | + public AllReader reader(LeafReaderContext context) throws IOException { |
| 858 | + BinaryDocValues docValues = context.reader().getBinaryDocValues(fieldName); |
| 859 | + if (docValues == null) { |
| 860 | + return new ConstantNullsReader(); |
| 861 | + } |
| 862 | + return new DenseVectorFromBinary(docValues, dims, indexVersion); |
| 863 | + } |
| 864 | + } |
| 865 | + |
| 866 | + private static class DenseVectorFromBinary extends BlockDocValuesReader { |
| 867 | + private final BinaryDocValues docValues; |
| 868 | + private final IndexVersion indexVersion; |
| 869 | + private final int dimensions; |
| 870 | + private final float[] scratch; |
| 871 | + |
| 872 | + private int docID = -1; |
| 873 | + |
| 874 | + DenseVectorFromBinary(BinaryDocValues docValues, int dims, IndexVersion indexVersion) { |
| 875 | + this.docValues = docValues; |
| 876 | + this.scratch = new float[dims]; |
| 877 | + this.indexVersion = indexVersion; |
| 878 | + this.dimensions = dims; |
| 879 | + } |
| 880 | + |
| 881 | + @Override |
| 882 | + public BlockLoader.Block read(BlockFactory factory, Docs docs) throws IOException { |
| 883 | + try (BlockLoader.FloatBuilder builder = factory.denseVectors(docs.count(), dimensions)) { |
| 884 | + for (int i = 0; i < docs.count(); i++) { |
| 885 | + int doc = docs.get(i); |
| 886 | + if (doc < docID) { |
| 887 | + throw new IllegalStateException("docs within same block must be in order"); |
| 888 | + } |
| 889 | + read(doc, builder); |
| 890 | + } |
| 891 | + return builder.build(); |
| 892 | + } |
| 893 | + } |
| 894 | + |
| 895 | + @Override |
| 896 | + public void read(int docId, BlockLoader.StoredFields storedFields, Builder builder) throws IOException { |
| 897 | + read(docId, (BlockLoader.FloatBuilder) builder); |
| 898 | + } |
| 899 | + |
| 900 | + private void read(int doc, BlockLoader.FloatBuilder builder) throws IOException { |
| 901 | + this.docID = doc; |
| 902 | + if (false == docValues.advanceExact(doc)) { |
| 903 | + builder.appendNull(); |
| 904 | + return; |
| 905 | + } |
| 906 | + BytesRef bytesRef = docValues.binaryValue(); |
| 907 | + assert bytesRef.length > 0; |
| 908 | + VectorEncoderDecoder.decodeDenseVector(indexVersion, bytesRef, scratch); |
| 909 | + |
| 910 | + builder.beginPositionEntry(); |
| 911 | + for (float value : scratch) { |
| 912 | + builder.appendFloat(value); |
| 913 | + } |
| 914 | + builder.endPositionEntry(); |
| 915 | + } |
| 916 | + |
| 917 | + @Override |
| 918 | + public int docId() { |
| 919 | + return docID; |
| 920 | + } |
| 921 | + |
| 922 | + @Override |
| 923 | + public String toString() { |
| 924 | + return "DenseVectorFromBinary.Bytes"; |
| 925 | + } |
| 926 | + } |
| 927 | + |
755 | 928 | public static class BooleansBlockLoader extends DocValuesBlockLoader { |
756 | 929 | private final String fieldName; |
757 | 930 |
|
|
0 commit comments