|
9 | 9 |
|
10 | 10 | package org.elasticsearch.index.codec.vectors.es93; |
11 | 11 |
|
12 | | -import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; |
| 12 | +import org.apache.lucene.codecs.KnnVectorsFormat; |
| 13 | +import org.apache.lucene.codecs.KnnVectorsReader; |
| 14 | +import org.apache.lucene.codecs.KnnVectorsWriter; |
13 | 15 | import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; |
14 | 16 | import org.apache.lucene.codecs.hnsw.FlatVectorsReader; |
15 | | -import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; |
16 | | -import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorScorer; |
17 | 17 | import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsFormat; |
18 | | -import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsReader; |
19 | | -import org.apache.lucene.codecs.lucene104.Lucene104ScalarQuantizedVectorsWriter; |
| 18 | +import org.apache.lucene.index.ByteVectorValues; |
| 19 | +import org.apache.lucene.index.FieldInfo; |
| 20 | +import org.apache.lucene.index.FloatVectorValues; |
20 | 21 | import org.apache.lucene.index.SegmentReadState; |
21 | 22 | import org.apache.lucene.index.SegmentWriteState; |
| 23 | +import org.apache.lucene.search.AcceptDocs; |
| 24 | +import org.apache.lucene.search.KnnCollector; |
| 25 | +import org.apache.lucene.util.Bits; |
| 26 | +import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; |
| 27 | +import org.apache.lucene.util.hnsw.RandomVectorScorer; |
22 | 28 |
|
23 | 29 | import java.io.IOException; |
| 30 | +import java.util.Map; |
24 | 31 |
|
25 | 32 | import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; |
26 | 33 |
|
27 | | -public class ES93ScalarQuantizedVectorsFormat extends FlatVectorsFormat { |
| 34 | +public class ES93ScalarQuantizedVectorsFormat extends KnnVectorsFormat { |
28 | 35 |
|
29 | 36 | static final String NAME = "ES93ScalarQuantizedVectorsFormat"; |
30 | 37 |
|
31 | | - static final Lucene104ScalarQuantizedVectorScorer flatVectorScorer = new Lucene104ScalarQuantizedVectorScorer( |
32 | | - FlatVectorScorerUtil.getLucene99FlatVectorsScorer() |
33 | | - ); |
| 38 | + private final FlatVectorsFormat format; |
34 | 39 |
|
35 | | - private final Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding; |
36 | | - private final FlatVectorsFormat rawVectorFormat; |
| 40 | + public ES93ScalarQuantizedVectorsFormat() { |
| 41 | + this(false, Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT); |
| 42 | + } |
| 43 | + |
| 44 | + public ES93ScalarQuantizedVectorsFormat(boolean useBFloat16) { |
| 45 | + this(useBFloat16, Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding.SEVEN_BIT); |
| 46 | + } |
37 | 47 |
|
38 | | - public ES93ScalarQuantizedVectorsFormat( |
39 | | - Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding, |
40 | | - boolean useBFloat16, |
41 | | - boolean useDirectIO |
42 | | - ) { |
| 48 | + public ES93ScalarQuantizedVectorsFormat(boolean useBFloat16, Lucene104ScalarQuantizedVectorsFormat.ScalarEncoding encoding) { |
43 | 49 | super(NAME); |
44 | | - this.encoding = encoding; |
45 | | - this.rawVectorFormat = new ES93GenericFlatVectorsFormat(useBFloat16, useDirectIO); |
| 50 | + this.format = new ES93ScalarQuantizedFlatVectorsFormat(encoding, useBFloat16, false); |
46 | 51 | } |
47 | 52 |
|
48 | 53 | @Override |
49 | | - public int getMaxDimensions(String fieldName) { |
50 | | - return MAX_DIMS_COUNT; |
| 54 | + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { |
| 55 | + return format.fieldsWriter(state); |
51 | 56 | } |
52 | 57 |
|
53 | 58 | @Override |
54 | | - public String toString() { |
55 | | - return NAME |
56 | | - + "(name=" |
57 | | - + NAME |
58 | | - + ", encoding=" |
59 | | - + encoding |
60 | | - + ", flatVectorScorer=" |
61 | | - + flatVectorScorer |
62 | | - + ", rawVectorFormat=" |
63 | | - + rawVectorFormat |
64 | | - + ")"; |
| 59 | + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { |
| 60 | + return new ES93FlatVectorsReader(format.fieldsReader(state)); |
65 | 61 | } |
66 | 62 |
|
67 | 63 | @Override |
68 | | - public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { |
69 | | - return new Lucene104ScalarQuantizedVectorsWriter(state, encoding, rawVectorFormat.fieldsWriter(state), flatVectorScorer) { |
70 | | - }; |
| 64 | + public int getMaxDimensions(String fieldName) { |
| 65 | + return MAX_DIMS_COUNT; |
71 | 66 | } |
72 | 67 |
|
73 | 68 | @Override |
74 | | - public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException { |
75 | | - return new Lucene104ScalarQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), flatVectorScorer); |
| 69 | + public String toString() { |
| 70 | + return NAME + "(name=" + NAME + ", innerFormat=" + format + ")"; |
| 71 | + } |
| 72 | + |
| 73 | + public static class ES93FlatVectorsReader extends KnnVectorsReader { |
| 74 | + |
| 75 | + private final FlatVectorsReader reader; |
| 76 | + |
| 77 | + public ES93FlatVectorsReader(FlatVectorsReader reader) { |
| 78 | + super(); |
| 79 | + this.reader = reader; |
| 80 | + } |
| 81 | + |
| 82 | + @Override |
| 83 | + public void checkIntegrity() throws IOException { |
| 84 | + reader.checkIntegrity(); |
| 85 | + } |
| 86 | + |
| 87 | + @Override |
| 88 | + public FloatVectorValues getFloatVectorValues(String field) throws IOException { |
| 89 | + return reader.getFloatVectorValues(field); |
| 90 | + } |
| 91 | + |
| 92 | + @Override |
| 93 | + public ByteVectorValues getByteVectorValues(String field) throws IOException { |
| 94 | + return reader.getByteVectorValues(field); |
| 95 | + } |
| 96 | + |
| 97 | + @Override |
| 98 | + public void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { |
| 99 | + collectAllMatchingDocs(knnCollector, acceptDocs, reader.getRandomVectorScorer(field, target)); |
| 100 | + } |
| 101 | + |
| 102 | + private void collectAllMatchingDocs(KnnCollector knnCollector, AcceptDocs acceptDocs, RandomVectorScorer scorer) |
| 103 | + throws IOException { |
| 104 | + OrdinalTranslatedKnnCollector collector = new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc); |
| 105 | + Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs.bits()); |
| 106 | + for (int i = 0; i < scorer.maxOrd(); i++) { |
| 107 | + if (acceptedOrds == null || acceptedOrds.get(i)) { |
| 108 | + collector.collect(i, scorer.score(i)); |
| 109 | + collector.incVisitedCount(1); |
| 110 | + } |
| 111 | + } |
| 112 | + assert collector.earlyTerminated() == false; |
| 113 | + } |
| 114 | + |
| 115 | + @Override |
| 116 | + public void search(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { |
| 117 | + collectAllMatchingDocs(knnCollector, acceptDocs, reader.getRandomVectorScorer(field, target)); |
| 118 | + } |
| 119 | + |
| 120 | + @Override |
| 121 | + public Map<String, Long> getOffHeapByteSize(FieldInfo fieldInfo) { |
| 122 | + return reader.getOffHeapByteSize(fieldInfo); |
| 123 | + } |
| 124 | + |
| 125 | + @Override |
| 126 | + public void close() throws IOException { |
| 127 | + reader.close(); |
| 128 | + } |
76 | 129 | } |
77 | 130 | } |
0 commit comments