Skip to content

Commit d9d5d1b

Browse files
authored
Fix ES818BinaryQuantizedVectorsReader to not use directIO during merge (elastic#130114) (elastic#130165)
This commit fixes the BBQ reader to **not** use directIO when merging the original float vectors.
1 parent 6e2bc78 commit d9d5d1b

File tree

3 files changed

+65
-1
lines changed

3 files changed

+65
-1
lines changed

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader impleme
6565

6666
private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ES818BinaryQuantizedVectorsReader.class);
6767

68-
private final Map<String, FieldEntry> fields = new HashMap<>();
68+
private final Map<String, FieldEntry> fields;
6969
private final IndexInput quantizedVectorData;
7070
private final FlatVectorsReader rawVectorsReader;
7171
private final ES818BinaryFlatVectorsScorer vectorScorer;
@@ -77,6 +77,7 @@ public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader impleme
7777
ES818BinaryFlatVectorsScorer vectorsScorer
7878
) throws IOException {
7979
super(vectorsScorer);
80+
this.fields = new HashMap<>();
8081
this.vectorScorer = vectorsScorer;
8182
this.rawVectorsReader = rawVectorsReader;
8283
int versionMeta = -1;
@@ -120,6 +121,24 @@ public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader impleme
120121
}
121122
}
122123

124+
private ES818BinaryQuantizedVectorsReader(ES818BinaryQuantizedVectorsReader clone, FlatVectorsReader rawVectorsReader) {
125+
super(clone.vectorScorer);
126+
this.rawVectorsReader = rawVectorsReader;
127+
this.vectorScorer = clone.vectorScorer;
128+
this.quantizedVectorData = clone.quantizedVectorData;
129+
this.fields = clone.fields;
130+
}
131+
132+
// For testing
133+
FlatVectorsReader getRawVectorsReader() {
134+
return rawVectorsReader;
135+
}
136+
137+
@Override
138+
public FlatVectorsReader getMergeInstance() {
139+
return new ES818BinaryQuantizedVectorsReader(this, rawVectorsReader.getMergeInstance());
140+
}
141+
123142
private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
124143
for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
125144
FieldInfo info = infos.fieldInfo(fieldNumber);

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ protected MergeReaderWrapper(FlatVectorsReader mainReader, FlatVectorsReader mer
3636
this.mergeReader = mergeReader;
3737
}
3838

39+
// For testing
40+
FlatVectorsReader getMainReader() {
41+
return mainReader;
42+
}
43+
3944
@Override
4045
public RandomVectorScorer getRandomVectorScorer(String field, float[] target) throws IOException {
4146
return mainReader.getRandomVectorScorer(field, target);

server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.lucene.codecs.FilterCodec;
2424
import org.apache.lucene.codecs.KnnVectorsFormat;
2525
import org.apache.lucene.codecs.KnnVectorsReader;
26+
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsReader;
2627
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
2728
import org.apache.lucene.document.Document;
2829
import org.apache.lucene.document.Field;
@@ -35,6 +36,7 @@
3536
import org.apache.lucene.index.IndexWriterConfig;
3637
import org.apache.lucene.index.KnnVectorValues;
3738
import org.apache.lucene.index.LeafReader;
39+
import org.apache.lucene.index.SegmentReader;
3840
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
3941
import org.apache.lucene.index.Term;
4042
import org.apache.lucene.index.VectorSimilarityFunction;
@@ -83,6 +85,7 @@
8385
import static java.lang.String.format;
8486
import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
8587
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
88+
import static org.hamcrest.Matchers.instanceOf;
8689
import static org.hamcrest.Matchers.is;
8790
import static org.hamcrest.Matchers.oneOf;
8891

@@ -309,6 +312,43 @@ public void testSimpleOffHeapSizeImpl(Directory dir, IndexWriterConfig config, b
309312
}
310313
}
311314

315+
public void testMergeInstance() throws IOException {
316+
checkDirectIOSupported();
317+
float[] vector = randomVector(10);
318+
VectorSimilarityFunction similarityFunction = randomSimilarity();
319+
KnnFloatVectorField knnField = new KnnFloatVectorField("field", vector, similarityFunction);
320+
try (Directory dir = newFSDirectory()) {
321+
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setUseCompoundFile(false))) {
322+
Document doc = new Document();
323+
knnField.setVectorValue(randomVector(10));
324+
doc.add(knnField);
325+
w.addDocument(doc);
326+
w.commit();
327+
328+
try (IndexReader reader = DirectoryReader.open(w)) {
329+
SegmentReader r = (SegmentReader) getOnlyLeafReader(reader);
330+
assertThat(unwrapRawVectorReader("field", r.getVectorReader()), instanceOf(DirectIOLucene99FlatVectorsReader.class));
331+
assertThat(
332+
unwrapRawVectorReader("field", r.getVectorReader().getMergeInstance()),
333+
instanceOf(Lucene99FlatVectorsReader.class)
334+
);
335+
}
336+
}
337+
}
338+
}
339+
340+
private static KnnVectorsReader unwrapRawVectorReader(String fieldName, KnnVectorsReader knnReader) {
341+
if (knnReader instanceof PerFieldKnnVectorsFormat.FieldsReader perField) {
342+
return unwrapRawVectorReader(fieldName, perField.getFieldReader(fieldName));
343+
} else if (knnReader instanceof ES818BinaryQuantizedVectorsReader bbqReader) {
344+
return unwrapRawVectorReader(fieldName, bbqReader.getRawVectorsReader());
345+
} else if (knnReader instanceof MergeReaderWrapper mergeReaderWrapper) {
346+
return unwrapRawVectorReader(fieldName, mergeReaderWrapper.getMainReader());
347+
} else {
348+
return knnReader;
349+
}
350+
}
351+
312352
static Directory newMMapDirectory() throws IOException {
313353
Directory dir = new MMapDirectory(createTempDir("ES818BinaryQuantizedVectorsFormatTests"));
314354
if (random().nextBoolean()) {

0 commit comments

Comments
 (0)