Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/130114.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 130114
summary: Fix ES818BinaryQuantizedVectorsReader to not use directIO during merge
area: Vector Search
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader impleme

private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ES818BinaryQuantizedVectorsReader.class);

private final Map<String, FieldEntry> fields = new HashMap<>();
private final Map<String, FieldEntry> fields;
private final IndexInput quantizedVectorData;
private final FlatVectorsReader rawVectorsReader;
private final ES818BinaryFlatVectorsScorer vectorScorer;
Expand All @@ -77,6 +77,7 @@ public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader impleme
ES818BinaryFlatVectorsScorer vectorsScorer
) throws IOException {
super(vectorsScorer);
this.fields = new HashMap<>();
this.vectorScorer = vectorsScorer;
this.rawVectorsReader = rawVectorsReader;
int versionMeta = -1;
Expand Down Expand Up @@ -120,6 +121,24 @@ public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader impleme
}
}

private ES818BinaryQuantizedVectorsReader(ES818BinaryQuantizedVectorsReader clone, FlatVectorsReader rawVectorsReader) {
super(clone.vectorScorer);
this.rawVectorsReader = rawVectorsReader;
this.vectorScorer = clone.vectorScorer;
this.quantizedVectorData = clone.quantizedVectorData;
this.fields = clone.fields;
}

// For testing
FlatVectorsReader getRawVectorsReader() {
return rawVectorsReader;
}

@Override
public FlatVectorsReader getMergeInstance() {
return new ES818BinaryQuantizedVectorsReader(this, rawVectorsReader.getMergeInstance());
}

private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException {
for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
FieldInfo info = infos.fieldInfo(fieldNumber);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ protected MergeReaderWrapper(FlatVectorsReader mainReader, FlatVectorsReader mer
this.mergeReader = mergeReader;
}

// For testing
FlatVectorsReader getMainReader() {
return mainReader;
}

@Override
public RandomVectorScorer getRandomVectorScorer(String field, float[] target) throws IOException {
return mainReader.getRandomVectorScorer(field, target);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsReader;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
Expand All @@ -35,6 +36,7 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.VectorSimilarityFunction;
Expand Down Expand Up @@ -83,6 +85,7 @@
import static java.lang.String.format;
import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.oneOf;

Expand Down Expand Up @@ -309,6 +312,46 @@ public void testSimpleOffHeapSizeImpl(Directory dir, IndexWriterConfig config, b
}
}

public void testMergeInstance() throws IOException {
checkDirectIOSupported();
float[] vector = randomVector(10);
VectorSimilarityFunction similarityFunction = randomSimilarity();
KnnFloatVectorField knnField = new KnnFloatVectorField("field", vector, similarityFunction);
try (Directory dir = newFSDirectory()) {
try (IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setUseCompoundFile(false))) {
for (int i = 0; i < 10; i++) {
Document doc = new Document();
knnField.setVectorValue(randomVector(10));
doc.add(knnField);
w.addDocument(doc);
}
w.commit();
w.forceMerge(1);

try (IndexReader reader = DirectoryReader.open(w)) {
SegmentReader r = (SegmentReader) getOnlyLeafReader(reader);
assertThat(unwrapRawVectorReader("field", r.getVectorReader()), instanceOf(DirectIOLucene99FlatVectorsReader.class));
assertThat(
unwrapRawVectorReader("field", r.getVectorReader().getMergeInstance()),
instanceOf(Lucene99FlatVectorsReader.class)
);
}
}
}
}

private static KnnVectorsReader unwrapRawVectorReader(String fieldName, KnnVectorsReader knnReader) {
if (knnReader instanceof PerFieldKnnVectorsFormat.FieldsReader perField) {
return unwrapRawVectorReader(fieldName, perField.getFieldReader(fieldName));
} else if (knnReader instanceof ES818BinaryQuantizedVectorsReader bbqReader) {
return unwrapRawVectorReader(fieldName, bbqReader.getRawVectorsReader());
} else if (knnReader instanceof MergeReaderWrapper mergeReaderWrapper) {
return unwrapRawVectorReader(fieldName, mergeReaderWrapper.getMainReader());
} else {
return knnReader;
}
}

static Directory newMMapDirectory() throws IOException {
Directory dir = new MMapDirectory(createTempDir("ES818BinaryQuantizedVectorsFormatTests"));
if (random().nextBoolean()) {
Expand Down
Loading