Skip to content

Commit 2216323

Browse files
authored
[8.18] Backporting fix minscore propagation in text similarity reranker (#129701)
* Fixed merge * Fixed compile issue
1 parent 1f35595 commit 2216323

File tree

7 files changed

+133
-17
lines changed

7 files changed

+133
-17
lines changed

docs/changelog/129223.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 129223
2+
summary: Fix text similarity reranker does not propagate min score correctly
3+
area: Search
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,8 @@ public void onFailure(Exception e) {
195195
RankDocsRetrieverBuilder rankDocsRetrieverBuilder = new RankDocsRetrieverBuilder(
196196
rankWindowSize,
197197
newRetrievers.stream().map(s -> s.retriever).toList(),
198-
results::get
198+
results::get,
199+
this.minScore
199200
);
200201
rankDocsRetrieverBuilder.retrieverName(retrieverName());
201202
return rankDocsRetrieverBuilder;

server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,14 @@ public class RankDocsRetrieverBuilder extends RetrieverBuilder {
3333
final List<RetrieverBuilder> sources;
3434
final Supplier<RankDoc[]> rankDocs;
3535

36-
public RankDocsRetrieverBuilder(int rankWindowSize, List<RetrieverBuilder> sources, Supplier<RankDoc[]> rankDocs) {
36+
public RankDocsRetrieverBuilder(int rankWindowSize, List<RetrieverBuilder> sources, Supplier<RankDoc[]> rankDocs, Float minScore) {
3737
this.rankWindowSize = rankWindowSize;
3838
this.rankDocs = rankDocs;
3939
if (sources == null || sources.isEmpty()) {
4040
throw new IllegalArgumentException("sources must not be null or empty");
4141
}
4242
this.sources = sources;
43+
this.minScore = minScore;
4344
}
4445

4546
@Override
@@ -48,7 +49,7 @@ public String getName() {
4849
}
4950

5051
private boolean sourceHasMinScore() {
51-
return minScore != null || sources.stream().anyMatch(x -> x.minScore() != null);
52+
return this.minScore != null || sources.stream().anyMatch(x -> x.minScore() != null);
5253
}
5354

5455
private boolean sourceShouldRewrite(QueryRewriteContext ctx) throws IOException {
@@ -132,7 +133,7 @@ public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder
132133
searchSourceBuilder.size(rankWindowSize);
133134
}
134135
if (sourceHasMinScore()) {
135-
searchSourceBuilder.minScore(this.minScore() == null ? Float.MIN_VALUE : this.minScore());
136+
searchSourceBuilder.minScore(this.minScore == null ? Float.MIN_VALUE : this.minScore);
136137
}
137138
if (searchSourceBuilder.size() + searchSourceBuilder.from() > rankDocResults.length) {
138139
searchSourceBuilder.size(Math.max(0, rankDocResults.length - searchSourceBuilder.from()));

server/src/test/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilderTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ private List<QueryBuilder> preFilters(QueryRewriteContext queryRewriteContext) t
9797
}
9898

9999
private RankDocsRetrieverBuilder createRandomRankDocsRetrieverBuilder(QueryRewriteContext queryRewriteContext) throws IOException {
100-
return new RankDocsRetrieverBuilder(randomIntBetween(1, 100), innerRetrievers(queryRewriteContext), rankDocsSupplier());
100+
return new RankDocsRetrieverBuilder(randomIntBetween(1, 100), innerRetrievers(queryRewriteContext), rankDocsSupplier(), null);
101101
}
102102

103103
public void testExtractToSearchSourceBuilder() throws IOException {

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ public Set<NodeFeature> getTestFeatures() {
5959
SemanticInferenceMetadataFieldsMapper.EXPLICIT_NULL_FIXES,
6060
SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED,
6161
TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX,
62+
TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_MINSCORE_FIX,
6263
SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT,
6364
SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT,
6465
SEMANTIC_KNN_FILTER_FIX,

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.elasticsearch.xcontent.XContentParser;
2525

2626
import java.io.IOException;
27+
import java.util.ArrayList;
2728
import java.util.List;
2829
import java.util.Objects;
2930

@@ -49,6 +50,7 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder
4950
"text_similarity_reranker_alias_handling_fix",
5051
true
5152
);
53+
public static final NodeFeature TEXT_SIMILARITY_RERANKER_MINSCORE_FIX = new NodeFeature("text_similarity_reranker_minscore_fix");
5254

5355
public static final ParseField RETRIEVER_FIELD = new ParseField("retriever");
5456
public static final ParseField INFERENCE_ID_FIELD = new ParseField("inference_id");
@@ -159,23 +161,21 @@ protected TextSimilarityRankRetrieverBuilder clone(
159161
protected RankDoc[] combineInnerRetrieverResults(List<ScoreDoc[]> rankResults, boolean explain) {
160162
assert rankResults.size() == 1;
161163
ScoreDoc[] scoreDocs = rankResults.get(0);
162-
TextSimilarityRankDoc[] textSimilarityRankDocs = new TextSimilarityRankDoc[scoreDocs.length];
164+
List<TextSimilarityRankDoc> filteredDocs = new ArrayList<>();
165+
// Filtering by min_score must be done here, after reranking.
166+
// Applying min_score in the child retriever could prematurely exclude documents that would receive high scores from the reranker.
163167
for (int i = 0; i < scoreDocs.length; i++) {
164168
ScoreDoc scoreDoc = scoreDocs[i];
165169
assert scoreDoc.score >= 0;
166-
if (explain) {
167-
textSimilarityRankDocs[i] = new TextSimilarityRankDoc(
168-
scoreDoc.doc,
169-
scoreDoc.score,
170-
scoreDoc.shardIndex,
171-
inferenceId,
172-
field
173-
);
174-
} else {
175-
textSimilarityRankDocs[i] = new TextSimilarityRankDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex);
170+
if (minScore == null || scoreDoc.score >= minScore) {
171+
if (explain) {
172+
filteredDocs.add(new TextSimilarityRankDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex, inferenceId, field));
173+
} else {
174+
filteredDocs.add(new TextSimilarityRankDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex));
175+
}
176176
}
177177
}
178-
return textSimilarityRankDocs;
178+
return filteredDocs.toArray(new TextSimilarityRankDoc[0]);
179179
}
180180

181181
@Override

x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,3 +351,111 @@ setup:
351351
- match: { hits.total.value: 1 }
352352
- length: { hits.hits: 1 }
353353
- match: { hits.hits.0._id: "doc_1" }
354+
355+
---
356+
"Text similarity reranker respects min_score":
357+
358+
- requires:
359+
cluster_features: "text_similarity_reranker_minscore_fix"
360+
reason: test min score functionality
361+
362+
- do:
363+
index:
364+
index: test-index
365+
id: doc_2
366+
body:
367+
text: "The phases of the Moon come from the position of the Moon relative to the Earth and Sun."
368+
topic: [ "science" ]
369+
subtopic: [ "astronomy" ]
370+
inference_text_field: "10"
371+
refresh: true
372+
373+
- do:
374+
search:
375+
index: test-index
376+
body:
377+
track_total_hits: true
378+
fields: [ "text", "topic" ]
379+
retriever:
380+
text_similarity_reranker:
381+
retriever:
382+
standard:
383+
query:
384+
bool:
385+
should:
386+
- constant_score:
387+
filter:
388+
term: { subtopic: "technology" }
389+
boost: 10
390+
- constant_score:
391+
filter:
392+
term: { subtopic: "astronomy" }
393+
boost: 1
394+
rank_window_size: 10
395+
inference_id: my-rerank-model
396+
inference_text: "How often does the moon hide the sun?"
397+
field: inference_text_field
398+
min_score: 10
399+
size: 10
400+
401+
- match: { hits.total.value: 1 }
402+
- length: { hits.hits: 1 }
403+
- match: { hits.hits.0._id: "doc_2" }
404+
405+
---
406+
"Text similarity reranker with min_score zero includes all docs":
407+
408+
- requires:
409+
cluster_features: "text_similarity_reranker_minscore_fix"
410+
reason: test min score functionality
411+
412+
- do:
413+
search:
414+
index: test-index
415+
body:
416+
track_total_hits: true
417+
fields: [ "text", "topic" ]
418+
retriever:
419+
text_similarity_reranker:
420+
retriever:
421+
standard:
422+
query:
423+
match_all: {}
424+
rank_window_size: 10
425+
inference_id: my-rerank-model
426+
inference_text: "How often does the moon hide the sun?"
427+
field: inference_text_field
428+
min_score: 0
429+
size: 10
430+
431+
- match: { hits.total.value: 3 }
432+
- length: { hits.hits: 3 }
433+
434+
---
435+
"Text similarity reranker with high min_score excludes all docs":
436+
437+
- requires:
438+
cluster_features: "text_similarity_reranker_minscore_fix"
439+
reason: test min score functionality
440+
441+
- do:
442+
search:
443+
index: test-index
444+
body:
445+
track_total_hits: true
446+
fields: [ "text", "topic" ]
447+
retriever:
448+
text_similarity_reranker:
449+
retriever:
450+
standard:
451+
query:
452+
match_all: {}
453+
rank_window_size: 10
454+
inference_id: my-rerank-model
455+
inference_text: "How often does the moon hide the sun?"
456+
field: inference_text_field
457+
min_score: 1000
458+
size: 10
459+
460+
- match: { hits.total.value: 0 }
461+
- length: { hits.hits: 0 }

0 commit comments

Comments
 (0)