Skip to content

Commit f93a0ed

Browse files
committed
Sometimes use FixedBitSet when doing HNSW searches (apache#14836)
For smaller graphs, the overhead cost of a SparseFixedBitSet shows up in the performance metrics. This adjusts the bitset creation logic to be more similar to how we utilize [Sparse]FixedBitSet elsewhere. The expectedVisitedNodes is both empirical and intuitive. This could possibly be refined given the number of connections within the graph, but I think this is "good enough" for now.
1 parent 0461da4 commit f93a0ed

File tree

3 files changed

+32
-2
lines changed

3 files changed

+32
-2
lines changed

lucene/CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ Optimizations
9090

9191
* GITHUB#14774: Use IntArrayList/IntHashSet to replace usages of List/Set of Integer. (Zhang Chao)
9292

93+
* GITHUB#14836: Sometimes use FixedBitSet when doing HNSW searches. This slightly improves HNSW
94+
search performance on smaller graphs. (Ben Trent)
95+
9396
Bug Fixes
9497
---------------------
9598
* GITHUB#14654: ValueSource.fromDoubleValuesSource(dvs).getSortField() would throw errors when

lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ private void search(
337337
// Take into account if quantized? E.g. some scorer cost?
338338
int filteredDocCount = 0;
339339
// The approximate number of vectors that would be visited if we did not filter
340-
int unfilteredVisit = (int) (Math.log(graph.size()) * knnCollector.k());
340+
int unfilteredVisit = HnswGraphSearcher.expectedVisitedNodes(knnCollector.k(), graph.size());
341341
if (acceptDocs instanceof BitSet bitSet) {
342342
// Use approximate cardinality as this is good enough, but ensure we don't exceed the graph
343343
// size as that is illogical

lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphSearcher.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
2121

2222
import java.io.IOException;
23+
import org.apache.lucene.search.DocIdSetIterator;
2324
import org.apache.lucene.search.KnnCollector;
2425
import org.apache.lucene.search.TopKnnCollector;
2526
import org.apache.lucene.search.knn.KnnSearchStrategy;
@@ -41,6 +42,32 @@ public class HnswGraphSearcher extends AbstractHnswGraphSearcher {
4142

4243
protected BitSet visited;
4344

45+
/**
46+
* HNSW search is roughly logarithmic. This doesn't take maxConn into account, but it is a pretty
47+
* good approximation.
48+
*
49+
* @param k neighbors to find
50+
* @param graphSize size of the graph
51+
* @return expected number of visited nodes
52+
*/
53+
public static int expectedVisitedNodes(int k, int graphSize) {
54+
return (int) (Math.log(graphSize) * k);
55+
}
56+
57+
/**
58+
* Follows similar logic to {@link FixedBitSet#of(DocIdSetIterator, int)} to determine the best
59+
* bit set given the expected number of visited nodes vs total graph size.
60+
*
61+
* @param k neighbors to find
62+
* @param graphSize size of the graph
63+
* @return a bit set appropriate for the expected number of visited nodes
64+
*/
65+
static BitSet createBitSet(int k, int graphSize) {
66+
return expectedVisitedNodes(k, graphSize) < (graphSize >>> 7)
67+
? new SparseFixedBitSet(graphSize)
68+
: new FixedBitSet(graphSize);
69+
}
70+
4471
/**
4572
* Creates a new graph searcher.
4673
*
@@ -117,7 +144,7 @@ public static void search(
117144
innerSearcher =
118145
new HnswGraphSearcher(
119146
new NeighborQueue(knnCollector.k(), true),
120-
new SparseFixedBitSet(getGraphSize(graph)));
147+
createBitSet(knnCollector.k(), getGraphSize(graph)));
121148
}
122149
// Then, check if we the search strategy is seeded
123150
final AbstractHnswGraphSearcher graphSearcher;

0 commit comments

Comments
 (0)