Skip to content

Commit 462e4f9

Browse files
authored
Patch for Lucene bug 14857 (elastic#130254)
Basically, advanceShallow for knn queries can flip back from hitting NO_MORE_DOCS to a valid doc ID again. This can cause search higher level queries to flip back and forth breaking assumptions and causing a CPU core to be locked up in-definitely (until server reboot). Applies the fix provided here, but requires gathering score docs again: apache/lucene#14858 closes: elastic#130239
1 parent a70cc4d commit 462e4f9

File tree

10 files changed

+114
-47
lines changed

10 files changed

+114
-47
lines changed

docs/changelog/130254.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 130254
2+
summary: Patch for Lucene bug 14857
3+
area: Vector Search
4+
type: bug
5+
issues: []

docs/reference/search/profile.asciidoc

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1268,6 +1268,7 @@ POST my-knn-index/_bulk?refresh=true
12681268
{ "index": { "_id": "3" } }
12691269
{ "my-vector": [15, 11, 23] }
12701270
--------------------------------------------------
1271+
// TEST[skip:response format changes in 130254]
12711272

12721273
With an index setup, we can now profile a kNN search query.
12731274

@@ -1284,7 +1285,7 @@ POST my-knn-index/_search
12841285
}
12851286
}
12861287
--------------------------------------------------
1287-
// TEST[continued]
1288+
// TEST[skip:response format changes in 130254]
12881289

12891290
<1> The `profile` parameter is set to `true`.
12901291

@@ -1302,8 +1303,8 @@ One of the `dfs.knn` sections for a shard looks like the following:
13021303
"vector_operations_count" : 4,
13031304
"query" : [
13041305
{
1305-
"type" : "DocAndScoreQuery",
1306-
"description" : "DocAndScore[100]",
1306+
"type" : "KnnScoreDocQuery",
1307+
"description" : "KnnScoreDocQuery[100]",
13071308
"time_in_nanos" : 444414,
13081309
"breakdown" : {
13091310
"set_min_competitive_score_count" : 0,
@@ -1340,10 +1341,7 @@ One of the `dfs.knn` sections for a shard looks like the following:
13401341
} ]
13411342
}
13421343
--------------------------------------------------
1343-
// TESTRESPONSE[s/^/{\n"took": $body.took,\n"timed_out": $body.timed_out,\n"_shards": $body._shards,\n"hits": $body.hits,\n"profile": {\n"shards": [ {\n"id": "$body.$_path",\n"node_id": "$body.$_path",\n"shard_id": "$body.$_path",\n"index": "$body.$_path",\n"cluster": "$body.$_path",\n/]
1344-
// TESTRESPONSE[s/}$/}, "aggregations": [], "searches": $body.$_path, "fetch": $body.$_path}]}}/]
1345-
// TESTRESPONSE[s/ (\-)?[0-9]+/ $body.$_path/]
1346-
// TESTRESPONSE[s/"dfs" : \{/"dfs" : {"statistics": $body.$_path,/]
1344+
// TESTRESPONSE[skip:response format changes in 130254]
13471345

13481346
In the `dfs.knn` portion of the response we can see the output
13491347
the of timings for <<query-section, query>>, <<rewrite-section, rewrite>>,

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/370_profile.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -211,8 +211,6 @@ dfs knn vector profiling:
211211
num_candidates: 100
212212

213213
- match: { hits.total.value: 1 }
214-
- match: { profile.shards.0.dfs.knn.0.query.0.type: "DocAndScoreQuery" }
215-
- match: { profile.shards.0.dfs.knn.0.query.0.description: "DocAndScore[100]" }
216214
- gt: { profile.shards.0.dfs.knn.0.query.0.time_in_nanos: 0 }
217215
- match: { profile.shards.0.dfs.knn.0.query.0.breakdown.set_min_competitive_score_count: 0 }
218216
- match: { profile.shards.0.dfs.knn.0.query.0.breakdown.set_min_competitive_score: 0 }
@@ -275,8 +273,6 @@ dfs knn vector profiling with vector_operations_count:
275273
num_candidates: 100
276274

277275
- match: { hits.total.value: 1 }
278-
- match: { profile.shards.0.dfs.knn.0.query.0.type: "DocAndScoreQuery" }
279-
- match: { profile.shards.0.dfs.knn.0.query.0.description: "DocAndScore[100]" }
280276
- match: { profile.shards.0.dfs.knn.0.vector_operations_count: 1 }
281277
- gt: { profile.shards.0.dfs.knn.0.query.0.time_in_nanos: 0 }
282278
- match: { profile.shards.0.dfs.knn.0.query.0.breakdown.set_min_competitive_score_count: 0 }

server/src/main/java/org/elasticsearch/search/SearchFeatures.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,16 @@ public Set<NodeFeature> getFeatures() {
2727
);
2828
public static final NodeFeature INT_SORT_FOR_INT_SHORT_BYTE_FIELDS = new NodeFeature("search.sort.int_sort_for_int_short_byte_fields");
2929
static final NodeFeature MULTI_MATCH_CHECKS_POSITIONS = new NodeFeature("search.multi.match.checks.positions");
30+
private static final NodeFeature KNN_QUERY_BUGFIX_130254 = new NodeFeature("search.knn.query.bugfix.130254");
3031

3132
@Override
3233
public Set<NodeFeature> getTestFeatures() {
3334
return Set.of(
3435
RETRIEVER_RESCORER_ENABLED,
3536
COMPLETION_FIELD_SUPPORTS_DUPLICATE_SUGGESTIONS,
3637
INT_SORT_FOR_INT_SHORT_BYTE_FIELDS,
37-
MULTI_MATCH_CHECKS_POSITIONS
38+
MULTI_MATCH_CHECKS_POSITIONS,
39+
KNN_QUERY_BUGFIX_130254
3840
);
3941
}
4042
}

server/src/main/java/org/elasticsearch/search/vectors/ESDiversifyingChildrenByteKnnVectorQuery.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,20 @@
99

1010
package org.elasticsearch.search.vectors;
1111

12+
import org.apache.lucene.search.IndexSearcher;
13+
import org.apache.lucene.search.MatchNoDocsQuery;
1214
import org.apache.lucene.search.Query;
1315
import org.apache.lucene.search.TopDocs;
1416
import org.apache.lucene.search.join.BitSetProducer;
1517
import org.apache.lucene.search.join.DiversifyingChildrenByteKnnVectorQuery;
1618
import org.elasticsearch.search.profile.query.QueryProfiler;
1719

20+
import java.io.IOException;
21+
1822
public class ESDiversifyingChildrenByteKnnVectorQuery extends DiversifyingChildrenByteKnnVectorQuery implements QueryProfilerProvider {
1923
private final Integer kParam;
2024
private long vectorOpsCount;
25+
private final int k;
2126

2227
public ESDiversifyingChildrenByteKnnVectorQuery(
2328
String field,
@@ -29,6 +34,18 @@ public ESDiversifyingChildrenByteKnnVectorQuery(
2934
) {
3035
super(field, query, childFilter, numCands, parentsFilter);
3136
this.kParam = k;
37+
this.k = numCands;
38+
}
39+
40+
@Override
41+
public Query rewrite(IndexSearcher searcher) throws IOException {
42+
Query rewrittenQuery = super.rewrite(searcher);
43+
if (rewrittenQuery instanceof MatchNoDocsQuery) {
44+
// If the rewritten query is a MatchNoDocsQuery, we can return it directly.
45+
return rewrittenQuery;
46+
}
47+
// We don't rewrite this query, so we return it as is.
48+
return KnnScoreDocQuery.fromQuery(rewrittenQuery, kParam == null ? k : kParam, searcher);
3249
}
3350

3451
@Override

server/src/main/java/org/elasticsearch/search/vectors/ESDiversifyingChildrenFloatKnnVectorQuery.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,20 @@
99

1010
package org.elasticsearch.search.vectors;
1111

12+
import org.apache.lucene.search.IndexSearcher;
13+
import org.apache.lucene.search.MatchNoDocsQuery;
1214
import org.apache.lucene.search.Query;
1315
import org.apache.lucene.search.TopDocs;
1416
import org.apache.lucene.search.join.BitSetProducer;
1517
import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery;
1618
import org.elasticsearch.search.profile.query.QueryProfiler;
1719

20+
import java.io.IOException;
21+
1822
public class ESDiversifyingChildrenFloatKnnVectorQuery extends DiversifyingChildrenFloatKnnVectorQuery implements QueryProfilerProvider {
1923
private final Integer kParam;
2024
private long vectorOpsCount;
25+
private final int k;
2126

2227
public ESDiversifyingChildrenFloatKnnVectorQuery(
2328
String field,
@@ -29,6 +34,18 @@ public ESDiversifyingChildrenFloatKnnVectorQuery(
2934
) {
3035
super(field, query, childFilter, numCands, parentsFilter);
3136
this.kParam = k;
37+
this.k = numCands;
38+
}
39+
40+
@Override
41+
public Query rewrite(IndexSearcher searcher) throws IOException {
42+
Query rewrittenQuery = super.rewrite(searcher);
43+
if (rewrittenQuery instanceof MatchNoDocsQuery) {
44+
// If the rewritten query is a MatchNoDocsQuery, we can return it directly.
45+
return rewrittenQuery;
46+
}
47+
// We don't rewrite this query, so we return it as is.
48+
return KnnScoreDocQuery.fromQuery(rewrittenQuery, kParam == null ? k : kParam, searcher);
3249
}
3350

3451
@Override

server/src/main/java/org/elasticsearch/search/vectors/ESKnnByteVectorQuery.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,15 @@
99

1010
package org.elasticsearch.search.vectors;
1111

12+
import org.apache.lucene.search.IndexSearcher;
1213
import org.apache.lucene.search.KnnByteVectorQuery;
14+
import org.apache.lucene.search.MatchNoDocsQuery;
1315
import org.apache.lucene.search.Query;
1416
import org.apache.lucene.search.TopDocs;
1517
import org.elasticsearch.search.profile.query.QueryProfiler;
1618

19+
import java.io.IOException;
20+
1721
public class ESKnnByteVectorQuery extends KnnByteVectorQuery implements QueryProfilerProvider {
1822
private final Integer kParam;
1923
private long vectorOpsCount;
@@ -23,6 +27,17 @@ public ESKnnByteVectorQuery(String field, byte[] target, Integer k, int numCands
2327
this.kParam = k;
2428
}
2529

30+
@Override
31+
public Query rewrite(IndexSearcher searcher) throws IOException {
32+
Query rewrittenQuery = super.rewrite(searcher);
33+
if (rewrittenQuery instanceof MatchNoDocsQuery) {
34+
// If the rewritten query is a MatchNoDocsQuery, we can return it directly.
35+
return rewrittenQuery;
36+
}
37+
// We don't rewrite this query, so we return it as is.
38+
return KnnScoreDocQuery.fromQuery(rewrittenQuery, kParam == null ? k : kParam, searcher);
39+
}
40+
2641
@Override
2742
protected TopDocs mergeLeafResults(TopDocs[] perLeafResults) {
2843
// if k param is set, we get only top k results from each shard

server/src/main/java/org/elasticsearch/search/vectors/ESKnnFloatVectorQuery.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,15 @@
99

1010
package org.elasticsearch.search.vectors;
1111

12+
import org.apache.lucene.search.IndexSearcher;
1213
import org.apache.lucene.search.KnnFloatVectorQuery;
14+
import org.apache.lucene.search.MatchNoDocsQuery;
1315
import org.apache.lucene.search.Query;
1416
import org.apache.lucene.search.TopDocs;
1517
import org.elasticsearch.search.profile.query.QueryProfiler;
1618

19+
import java.io.IOException;
20+
1721
public class ESKnnFloatVectorQuery extends KnnFloatVectorQuery implements QueryProfilerProvider {
1822
private final Integer kParam;
1923
private long vectorOpsCount;
@@ -23,6 +27,17 @@ public ESKnnFloatVectorQuery(String field, float[] target, Integer k, int numCan
2327
this.kParam = k;
2428
}
2529

30+
@Override
31+
public Query rewrite(IndexSearcher searcher) throws IOException {
32+
Query rewrittenQuery = super.rewrite(searcher);
33+
if (rewrittenQuery instanceof MatchNoDocsQuery) {
34+
// If the rewritten query is a MatchNoDocsQuery, we can return it directly.
35+
return rewrittenQuery;
36+
}
37+
// We don't rewrite this query, so we return it as is.
38+
return KnnScoreDocQuery.fromQuery(rewrittenQuery, kParam == null ? k : kParam, searcher);
39+
}
40+
2641
@Override
2742
protected TopDocs mergeLeafResults(TopDocs[] perLeafResults) {
2843
// if k param is set, we get only top k results from each shard

server/src/main/java/org/elasticsearch/search/vectors/KnnScoreDocQuery.java

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.lucene.search.ScoreDoc;
2121
import org.apache.lucene.search.ScoreMode;
2222
import org.apache.lucene.search.Scorer;
23+
import org.apache.lucene.search.TopDocs;
2324
import org.apache.lucene.search.Weight;
2425

2526
import java.io.IOException;
@@ -36,6 +37,7 @@
3637
* {@link org.apache.lucene.search.KnnFloatVectorQuery}, which is package-private.
3738
*/
3839
public class KnnScoreDocQuery extends Query {
40+
private final float maxScore;
3941
private final int[] docs;
4042
private final float[] scores;
4143

@@ -47,6 +49,18 @@ public class KnnScoreDocQuery extends Query {
4749
// an object identifying the reader context that was used to build this query
4850
private final Object contextIdentity;
4951

52+
static Query fromQuery(Query query, int k, IndexSearcher searcher) throws IOException {
53+
if (query instanceof MatchNoDocsQuery) {
54+
// If the rewritten query is a MatchNoDocsQuery, we can return it directly.
55+
return query;
56+
}
57+
if (query instanceof KnnScoreDocQuery knnQuery) {
58+
return knnQuery;
59+
}
60+
TopDocs topDocs = searcher.search(query, k);
61+
return new KnnScoreDocQuery(topDocs.scoreDocs, searcher.getIndexReader());
62+
}
63+
5064
/**
5165
* Creates a query.
5266
*
@@ -58,10 +72,13 @@ public class KnnScoreDocQuery extends Query {
5872
Arrays.sort(scoreDocs, Comparator.comparingInt(scoreDoc -> scoreDoc.doc));
5973
this.docs = new int[scoreDocs.length];
6074
this.scores = new float[scoreDocs.length];
75+
float maxScore = Float.NEGATIVE_INFINITY;
6176
for (int i = 0; i < scoreDocs.length; i++) {
6277
docs[i] = scoreDocs[i].doc;
6378
scores[i] = scoreDocs[i].score;
79+
maxScore = Math.max(maxScore, scores[i]);
6480
}
81+
this.maxScore = maxScore;
6582
this.segmentStarts = findSegmentStarts(reader, docs);
6683
this.contextIdentity = reader.getContext().id();
6784
}
@@ -157,36 +174,14 @@ public long cost() {
157174

158175
@Override
159176
public float getMaxScore(int docId) {
160-
// NO_MORE_DOCS indicates the maximum score for all docs in this segment
161-
// Anything less than must be accounted for via the docBase.
162-
if (docId != NO_MORE_DOCS) {
163-
docId += context.docBase;
164-
}
165-
float maxScore = 0;
166-
for (int idx = Math.max(lower, upTo); idx < upper && docs[idx] <= docId; idx++) {
167-
maxScore = Math.max(maxScore, scores[idx] * boost);
168-
}
169-
return maxScore;
177+
return maxScore * boost;
170178
}
171179

172180
@Override
173181
public float score() {
174182
return scores[upTo] * boost;
175183
}
176184

177-
@Override
178-
public int advanceShallow(int docId) {
179-
int start = Math.max(upTo, lower);
180-
int docIdIndex = Arrays.binarySearch(docs, start, upper, docId + context.docBase);
181-
if (docIdIndex < 0) {
182-
docIdIndex = -1 - docIdIndex;
183-
}
184-
if (docIdIndex >= upper) {
185-
return NO_MORE_DOCS;
186-
}
187-
return docs[docIdIndex];
188-
}
189-
190185
@Override
191186
public int docID() {
192187
return currentDocId();
@@ -224,7 +219,7 @@ float[] scores() {
224219

225220
@Override
226221
public String toString(String field) {
227-
return "ScoreAndDocQuery";
222+
return "DocAndScoreQuery[" + docs[0] + ",...][" + scores[0] + ",...]," + maxScore;
228223
}
229224

230225
@Override

x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/rrf/600_rrf_retriever_profile.yml

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,16 @@ setup:
7171

7272
- do:
7373
indices.refresh: {}
74+
- do:
75+
indices.forcemerge:
76+
index: test
77+
max_num_segments: 1
7478

7579
---
7680
"profile standard and knn query":
81+
- requires:
82+
cluster_features: ["search.knn.query.bugfix.130254"]
83+
reason: "Bug fix introduced via PR #130254"
7784

7885
- do:
7986
search:
@@ -114,13 +121,13 @@ setup:
114121
- match: { hits.hits.2._id: "4" }
115122

116123
- not_exists: profile.shards.0.dfs
117-
- match: { profile.shards.0.searches.0.query.0.type: RankDocsQuery }
118-
- length: { profile.shards.0.searches.0.query.0.children: 2 }
119-
- match: { profile.shards.0.searches.0.query.0.children.0.type: TopQuery }
120-
- match: { profile.shards.0.searches.0.query.0.children.1.type: BooleanQuery }
121-
- length: { profile.shards.0.searches.0.query.0.children.1.children: 2 }
122-
- match: { profile.shards.0.searches.0.query.0.children.1.children.0.type: TermQuery }
123-
- match: { profile.shards.0.searches.0.query.0.children.1.children.1.type: DocAndScoreQuery }
124+
- match: { profile.shards.0.searches.0.query.2.type: RankDocsQuery }
125+
- length: { profile.shards.0.searches.0.query.2.children: 2 }
126+
- match: { profile.shards.0.searches.0.query.2.children.0.type: TopQuery }
127+
- match: { profile.shards.0.searches.0.query.2.children.1.type: BooleanQuery }
128+
- length: { profile.shards.0.searches.0.query.2.children.1.children: 2 }
129+
- match: { profile.shards.0.searches.0.query.2.children.1.children.0.type: TermQuery }
130+
- match: { profile.shards.0.searches.0.query.2.children.1.children.1.type: KnnScoreDocQuery }
124131

125132
---
126133
"profile standard and knn dfs retrievers":
@@ -173,8 +180,8 @@ setup:
173180
"using query and dfs knn search":
174181

175182
- requires:
176-
cluster_features: ["gte_v8.16.0"]
177-
reason: "deprecation added in 8.16"
183+
cluster_features: ["search.knn.query.bugfix.130254"]
184+
reason: "Bug fix introduced via PR #130254"
178185
test_runner_features: warnings
179186

180187
- do:
@@ -212,8 +219,8 @@ setup:
212219

213220
- exists: profile.shards.0.dfs
214221
- length: { profile.shards.0.dfs.knn: 1 }
215-
- length: { profile.shards.0.dfs.knn.0.query: 1 }
216-
- match: { profile.shards.0.dfs.knn.0.query.0.type: DocAndScoreQuery }
222+
- length: { profile.shards.0.dfs.knn.0.query: 2 }
223+
- match: { profile.shards.0.dfs.knn.0.query.1.type: KnnScoreDocQuery }
217224

218225
- match: { profile.shards.0.searches.0.query.0.type: ConstantScoreQuery }
219226
- length: { profile.shards.0.searches.0.query.0.children: 1 }

0 commit comments

Comments
 (0)