Skip to content

Commit d8b52ad

Browse files
authored
Knn dont filter match all (#14936)
If a given filter is a MatchAllQuery, we shouldn't bother going through the filtered search path. It adds unnecessary overhead through: building a bitset for filter checking checking a bit set to see if a vector matches a filter
1 parent c4a24de commit d8b52ad

File tree

4 files changed

+55
-13
lines changed

4 files changed

+55
-13
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,8 @@ Optimizations
210210

211211
* GITHUB#14935: Speed up PostingsEnum#nextPostings when block is encoded as bitset. (Guo Feng)
212212

213+
* GITHUB#14936: Don't do the filtered knn path when the provided filter is a MatchAllDocsQuery. (Ben Trent)
214+
213215
Changes in Runtime Behavior
214216
---------------------
215217
* GITHUB#14823: Decrease TieredMergePolicy's default number of segments per

lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -80,16 +80,28 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
8080

8181
final Weight filterWeight;
8282
if (filter != null) {
83-
BooleanQuery booleanQuery =
84-
new BooleanQuery.Builder()
85-
.add(filter, BooleanClause.Occur.FILTER)
86-
.add(new FieldExistsQuery(field), BooleanClause.Occur.FILTER)
87-
.build();
88-
Query rewritten = indexSearcher.rewrite(booleanQuery);
89-
if (rewritten.getClass() == MatchNoDocsQuery.class) {
90-
return rewritten;
83+
// rewrite inner filter query first to determine if its a match all
84+
// or match no docs query, so we can skip the knn search
85+
Query rewrittenFilter = filter.rewrite(indexSearcher);
86+
if (rewrittenFilter.getClass() == MatchNoDocsQuery.class) {
87+
// If the filter is a match no docs query, we can also skip it
88+
return rewrittenFilter;
89+
}
90+
if (rewrittenFilter.getClass() != MatchAllDocsQuery.class) {
91+
BooleanQuery booleanQuery =
92+
new BooleanQuery.Builder()
93+
.add(filter, BooleanClause.Occur.FILTER)
94+
.add(new FieldExistsQuery(field), BooleanClause.Occur.FILTER)
95+
.build();
96+
Query rewritten = indexSearcher.rewrite(booleanQuery);
97+
if (rewritten.getClass() == MatchNoDocsQuery.class) {
98+
return rewritten;
99+
}
100+
filterWeight = rewritten.createWeight(indexSearcher, ScoreMode.COMPLETE_NO_SCORES, 1f);
101+
} else {
102+
// If the filter is a match all docs query, we can skip it
103+
filterWeight = null;
91104
}
92-
filterWeight = indexSearcher.createWeight(rewritten, ScoreMode.COMPLETE_NO_SCORES, 1f);
93105
} else {
94106
filterWeight = null;
95107
}

lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,20 @@ public void testFilterWithNoVectorMatches() throws IOException {
250250
}
251251
}
252252

253+
public void testMatchAllFilter() throws IOException {
254+
try (Directory indexStore =
255+
getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0});
256+
IndexReader reader = DirectoryReader.open(indexStore)) {
257+
IndexSearcher searcher = newSearcher(reader);
258+
259+
// make sure we don't drop to exact search, even though the filter matches fewer than k docs
260+
Query kvq =
261+
getThrowingKnnVectorQuery("field", new float[] {0, 0}, 10, new MatchAllDocsQuery());
262+
TopDocs topDocs = searcher.search(kvq, 3);
263+
assertEquals(3, topDocs.totalHits.value());
264+
}
265+
}
266+
253267
/** testDimensionMismatch */
254268
public void testDimensionMismatch() throws IOException {
255269
try (Directory indexStore =

lucene/test-framework/src/java/org/apache/lucene/tests/index/BaseKnnVectorsFormatTestCase.java

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,14 @@
8181
import org.apache.lucene.search.DocIdSetIterator;
8282
import org.apache.lucene.search.IndexSearcher;
8383
import org.apache.lucene.search.KnnFloatVectorQuery;
84-
import org.apache.lucene.search.MatchAllDocsQuery;
84+
import org.apache.lucene.search.Query;
8585
import org.apache.lucene.search.ScoreDoc;
8686
import org.apache.lucene.search.Sort;
8787
import org.apache.lucene.search.SortField;
8888
import org.apache.lucene.search.TopDocs;
8989
import org.apache.lucene.search.TotalHits;
9090
import org.apache.lucene.search.VectorScorer;
91+
import org.apache.lucene.search.knn.KnnCollectorManager;
9192
import org.apache.lucene.store.Directory;
9293
import org.apache.lucene.store.FSDirectory;
9394
import org.apache.lucene.tests.codecs.asserting.AssertingKnnVectorsFormat;
@@ -1980,9 +1981,8 @@ protected void assertRecall(VectorSimilarityFunction similarity, double min, dou
19801981
for (String queryString : testQueries) {
19811982
computeLineEmbedding(queryString, queryEmbedding);
19821983

1983-
// pass match-all "filter" to force full traversal, bypassing graph
1984-
KnnFloatVectorQuery exactQuery =
1985-
new KnnFloatVectorQuery("field", queryEmbedding, 1000, new MatchAllDocsQuery());
1984+
// gather exact results first
1985+
Query exactQuery = buildExactKnnQuery("field", queryEmbedding, 10000);
19861986
assertEquals(numQueries, searcher.count(exactQuery)); // Same for exact search
19871987

19881988
KnnFloatVectorQuery query = new KnnFloatVectorQuery("field", queryEmbedding, efSearch);
@@ -2202,4 +2202,18 @@ public void testMergeOffHeapByteSizeMaps() {
22022202
assertEquals(101L, (long) r.get("d"));
22032203
assertEquals(6L, (long) r.get("e"));
22042204
}
2205+
2206+
private static Query buildExactKnnQuery(String fieldName, float[] queryVector, int totalDocs) {
2207+
return new KnnFloatVectorQuery(fieldName, queryVector, totalDocs) {
2208+
@Override
2209+
protected TopDocs approximateSearch(
2210+
LeafReaderContext context,
2211+
Bits acceptDocs,
2212+
int visitedLimit,
2213+
KnnCollectorManager knnCollectorManager)
2214+
throws IOException {
2215+
return exactSearch(context, DocIdSetIterator.all(totalDocs), null);
2216+
}
2217+
};
2218+
}
22052219
}

0 commit comments

Comments
 (0)