Skip to content

Commit 3c118d7

Browse files
authored
Optimize AbstractKnnVectorQuery#createBitSet with intoBitset (#14674)
1 parent 06afbe7 commit 3c118d7

File tree

3 files changed

+44
-17
lines changed

3 files changed

+44
-17
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ Optimizations
124124

125125
* GITHUB#14709: Speed up TermQuery by Scorer#nextDocsAndScores. (Guo Feng)
126126

127+
* GITHUB#14674: Optimize AbstractKnnVectorQuery#createBitSet with intoBitset. (Guo Feng)
128+
127129
Bug Fixes
128130
---------------------
129131
* GITHUB#14654: ValueSource.fromDoubleValuesSource(dvs).getSortField() would throw errors when

lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.apache.lucene.util.BitSet;
4343
import org.apache.lucene.util.BitSetIterator;
4444
import org.apache.lucene.util.Bits;
45+
import org.apache.lucene.util.FixedBitSet;
4546

4647
/**
4748
* Uses {@link KnnVectorsReader#search} to perform nearest neighbour search.
@@ -226,15 +227,25 @@ private BitSet createBitSet(DocIdSetIterator iterator, Bits liveDocs, int maxDoc
226227
// If we already have a BitSet and no deletions, reuse the BitSet
227228
return bitSetIterator.getBitSet();
228229
} else {
229-
// Create a new BitSet from matching and live docs
230-
FilteredDocIdSetIterator filterIterator =
231-
new FilteredDocIdSetIterator(iterator) {
232-
@Override
233-
protected boolean match(int doc) {
234-
return liveDocs == null || liveDocs.get(doc);
235-
}
236-
};
237-
return BitSet.of(filterIterator, maxDoc);
230+
int threshold = maxDoc >> 7; // same as BitSet#of
231+
if (iterator.cost() >= threshold) {
232+
// take advantage of Disi#intoBitset and Bits#applyMask
233+
FixedBitSet bitSet = new FixedBitSet(maxDoc);
234+
bitSet.or(iterator);
235+
if (liveDocs != null) {
236+
liveDocs.applyMask(bitSet, 0);
237+
}
238+
return bitSet;
239+
} else {
240+
FilteredDocIdSetIterator filterIterator =
241+
new FilteredDocIdSetIterator(iterator) {
242+
@Override
243+
protected boolean match(int doc) {
244+
return liveDocs == null || liveDocs.get(doc);
245+
}
246+
};
247+
return BitSet.of(filterIterator, maxDoc); // create a sparse bitset
248+
}
238249
}
239250
}
240251

lucene/core/src/java/org/apache/lucene/search/AbstractVectorSimilarityQuery.java

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.apache.lucene.util.BitSet;
3030
import org.apache.lucene.util.BitSetIterator;
3131
import org.apache.lucene.util.Bits;
32+
import org.apache.lucene.util.FixedBitSet;
3233

3334
/**
3435
* Search for all (approximate) vectors above a similarity threshold.
@@ -142,14 +143,27 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
142143
acceptDocs = bitSetIterator.getBitSet();
143144
} else {
144145
// Else collect all matching docs
145-
FilteredDocIdSetIterator filtered =
146-
new FilteredDocIdSetIterator(scorer.iterator()) {
147-
@Override
148-
protected boolean match(int doc) {
149-
return liveDocs == null || liveDocs.get(doc);
150-
}
151-
};
152-
acceptDocs = BitSet.of(filtered, leafReader.maxDoc());
146+
DocIdSetIterator iterator = scorer.iterator();
147+
final int maxDoc = leafReader.maxDoc();
148+
int threshold = maxDoc >> 7; // same as BitSet#of
149+
if (iterator.cost() >= threshold) {
150+
// take advantage of Disi#intoBitset and Bits#applyMask
151+
FixedBitSet bitSet = new FixedBitSet(maxDoc);
152+
bitSet.or(iterator);
153+
if (liveDocs != null) {
154+
liveDocs.applyMask(bitSet, 0);
155+
}
156+
acceptDocs = bitSet;
157+
} else {
158+
FilteredDocIdSetIterator filterIterator =
159+
new FilteredDocIdSetIterator(iterator) {
160+
@Override
161+
protected boolean match(int doc) {
162+
return liveDocs == null || liveDocs.get(doc);
163+
}
164+
};
165+
acceptDocs = BitSet.of(filterIterator, maxDoc); // create a sparse bitset
166+
}
153167
}
154168

155169
int cardinality = acceptDocs.cardinality();

0 commit comments

Comments
 (0)