Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,9 @@ Improvements

* GITHUB#15425: Refactoring internal HnswGraph.NodesIterator to avoid unneeded copying and sorting (Mike Sokolov)

* GITHUB#15429: Make the criteria to re-use per-segment HNSW graph information in merge optimizations more appropriate,
from "no deleted documents" to "no deleted vectors". (Kaival Parikh)

Optimizations
---------------------
* GITHUB#15140: Optimize TopScoreDocCollector with TernaryLongHeap for improved performance over Binary-LongHeap. (Ramakrishna Chilaka)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,11 @@
import java.util.List;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.hnsw.HnswGraphProvider;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.internal.hppc.IntIntHashMap;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
Expand Down Expand Up @@ -73,31 +72,26 @@ public IncrementalHnswGraphMerger(
public IncrementalHnswGraphMerger addReader(
KnnVectorsReader reader, MergeState.DocMap docMap, Bits liveDocs) throws IOException {
numReaders++;
if (hasDeletes(liveDocs) || !(reader instanceof HnswGraphProvider)) {
if (!(reader instanceof HnswGraphProvider provider)) {
return this;
}
HnswGraph graph = ((HnswGraphProvider) reader).getGraph(fieldInfo.name);

HnswGraph graph = provider.getGraph(fieldInfo.name);
if (graph == null || graph.size() == 0) {
return this;
}

int candidateVectorCount = 0;
switch (fieldInfo.getVectorEncoding()) {
case BYTE -> {
ByteVectorValues byteVectorValues = reader.getByteVectorValues(fieldInfo.name);
if (byteVectorValues == null) {
return this;
}
candidateVectorCount = byteVectorValues.size();
}
case FLOAT32 -> {
FloatVectorValues vectorValues = reader.getFloatVectorValues(fieldInfo.name);
if (vectorValues == null) {
return this;
}
candidateVectorCount = vectorValues.size();
}
KnnVectorValues values =
switch (fieldInfo.getVectorEncoding()) {
case BYTE -> reader.getByteVectorValues(fieldInfo.name);
case FLOAT32 -> reader.getFloatVectorValues(fieldInfo.name);
};

if (values == null || hasDeletes(values.iterator(), liveDocs)) {
return this;
}

int candidateVectorCount = values.size();
graphReaders.add(new GraphReader(reader, docMap, candidateVectorCount));
return this;
}
Expand Down Expand Up @@ -192,13 +186,13 @@ public OnHeapHnswGraph merge(
return builder.build(maxOrd);
}

private static boolean hasDeletes(Bits liveDocs) {
private static boolean hasDeletes(DocIdSetIterator iterator, Bits liveDocs) throws IOException {
if (liveDocs == null) {
return false;
}

for (int i = 0; i < liveDocs.length(); i++) {
if (!liveDocs.get(i)) {
for (int doc = iterator.nextDoc(); doc != NO_MORE_DOCS; doc = iterator.nextDoc()) {
if (!liveDocs.get(doc)) {
return true;
}
}
Expand Down