Skip to content

Commit a604089

Browse files
authored
Fixed backporting (#129368)
1 parent 1bd1bbe commit a604089

File tree

10 files changed

+280
-24
lines changed

10 files changed

+280
-24
lines changed

docs/changelog/129359.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 129359
2+
summary: Add min score linear retriever
3+
area: Search
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,8 @@ public void onFailure(Exception e) {
195195
RankDocsRetrieverBuilder rankDocsRetrieverBuilder = new RankDocsRetrieverBuilder(
196196
rankWindowSize,
197197
newRetrievers.stream().map(s -> s.retriever).toList(),
198-
results::get
198+
results::get,
199+
this.minScore
199200
);
200201
rankDocsRetrieverBuilder.retrieverName(retrieverName());
201202
return rankDocsRetrieverBuilder;

server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,14 @@ public class RankDocsRetrieverBuilder extends RetrieverBuilder {
3333
final List<RetrieverBuilder> sources;
3434
final Supplier<RankDoc[]> rankDocs;
3535

36-
public RankDocsRetrieverBuilder(int rankWindowSize, List<RetrieverBuilder> sources, Supplier<RankDoc[]> rankDocs) {
36+
public RankDocsRetrieverBuilder(int rankWindowSize, List<RetrieverBuilder> sources, Supplier<RankDoc[]> rankDocs, Float minScore) {
3737
this.rankWindowSize = rankWindowSize;
3838
this.rankDocs = rankDocs;
3939
if (sources == null || sources.isEmpty()) {
4040
throw new IllegalArgumentException("sources must not be null or empty");
4141
}
4242
this.sources = sources;
43+
this.minScore = minScore;
4344
}
4445

4546
@Override
@@ -48,7 +49,7 @@ public String getName() {
4849
}
4950

5051
private boolean sourceHasMinScore() {
51-
return minScore != null || sources.stream().anyMatch(x -> x.minScore() != null);
52+
return this.minScore != null || sources.stream().anyMatch(x -> x.minScore() != null);
5253
}
5354

5455
private boolean sourceShouldRewrite(QueryRewriteContext ctx) throws IOException {
@@ -132,7 +133,7 @@ public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder
132133
searchSourceBuilder.size(rankWindowSize);
133134
}
134135
if (sourceHasMinScore()) {
135-
searchSourceBuilder.minScore(this.minScore() == null ? Float.MIN_VALUE : this.minScore());
136+
searchSourceBuilder.minScore(this.minScore == null ? Float.MIN_VALUE : this.minScore);
136137
}
137138
if (searchSourceBuilder.size() + searchSourceBuilder.from() > rankDocResults.length) {
138139
searchSourceBuilder.size(Math.max(0, rankDocResults.length - searchSourceBuilder.from()));

server/src/test/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilderTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ private List<QueryBuilder> preFilters(QueryRewriteContext queryRewriteContext) t
9797
}
9898

9999
private RankDocsRetrieverBuilder createRandomRankDocsRetrieverBuilder(QueryRewriteContext queryRewriteContext) throws IOException {
100-
return new RankDocsRetrieverBuilder(randomIntBetween(1, 100), innerRetrievers(queryRewriteContext), rankDocsSupplier());
100+
return new RankDocsRetrieverBuilder(randomIntBetween(1, 100), innerRetrievers(queryRewriteContext), rankDocsSupplier(), null);
101101
}
102102

103103
public void testExtractToSearchSourceBuilder() throws IOException {

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ public Set<NodeFeature> getTestFeatures() {
6161
SemanticInferenceMetadataFieldsMapper.EXPLICIT_NULL_FIXES,
6262
SEMANTIC_KNN_VECTOR_QUERY_REWRITE_INTERCEPTION_SUPPORTED,
6363
TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_ALIAS_HANDLING_FIX,
64+
TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_MINSCORE_FIX,
6465
SemanticInferenceMetadataFieldsMapper.INFERENCE_METADATA_FIELDS_ENABLED_BY_DEFAULT,
6566
SEMANTIC_TEXT_HIGHLIGHTER_DEFAULT,
6667
SEMANTIC_KNN_FILTER_FIX,

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rank/textsimilarity/TextSimilarityRankRetrieverBuilder.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.elasticsearch.xcontent.XContentParser;
2525

2626
import java.io.IOException;
27+
import java.util.ArrayList;
2728
import java.util.List;
2829
import java.util.Objects;
2930

@@ -49,6 +50,7 @@ public class TextSimilarityRankRetrieverBuilder extends CompoundRetrieverBuilder
4950
"text_similarity_reranker_alias_handling_fix",
5051
true
5152
);
53+
public static final NodeFeature TEXT_SIMILARITY_RERANKER_MINSCORE_FIX = new NodeFeature("text_similarity_reranker_minscore_fix");
5254

5355
public static final ParseField RETRIEVER_FIELD = new ParseField("retriever");
5456
public static final ParseField INFERENCE_ID_FIELD = new ParseField("inference_id");
@@ -175,23 +177,21 @@ protected TextSimilarityRankRetrieverBuilder clone(
175177
protected RankDoc[] combineInnerRetrieverResults(List<ScoreDoc[]> rankResults, boolean explain) {
176178
assert rankResults.size() == 1;
177179
ScoreDoc[] scoreDocs = rankResults.get(0);
178-
TextSimilarityRankDoc[] textSimilarityRankDocs = new TextSimilarityRankDoc[scoreDocs.length];
180+
List<TextSimilarityRankDoc> filteredDocs = new ArrayList<>();
181+
// Filtering by min_score must be done here, after reranking.
182+
// Applying min_score in the child retriever could prematurely exclude documents that would receive high scores from the reranker.
179183
for (int i = 0; i < scoreDocs.length; i++) {
180184
ScoreDoc scoreDoc = scoreDocs[i];
181185
assert scoreDoc.score >= 0;
182-
if (explain) {
183-
textSimilarityRankDocs[i] = new TextSimilarityRankDoc(
184-
scoreDoc.doc,
185-
scoreDoc.score,
186-
scoreDoc.shardIndex,
187-
inferenceId,
188-
field
189-
);
190-
} else {
191-
textSimilarityRankDocs[i] = new TextSimilarityRankDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex);
186+
if (minScore == null || scoreDoc.score >= minScore) {
187+
if (explain) {
188+
filteredDocs.add(new TextSimilarityRankDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex, inferenceId, field));
189+
} else {
190+
filteredDocs.add(new TextSimilarityRankDoc(scoreDoc.doc, scoreDoc.score, scoreDoc.shardIndex));
191+
}
192192
}
193193
}
194-
return textSimilarityRankDocs;
194+
return filteredDocs.toArray(new TextSimilarityRankDoc[0]);
195195
}
196196

197197
@Override

x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/70_text_similarity_rank_retriever.yml

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,3 +381,111 @@ setup:
381381
- match: { hits.total.value: 1 }
382382
- length: { hits.hits: 1 }
383383
- match: { hits.hits.0._id: "doc_1" }
384+
385+
---
386+
"Text similarity reranker respects min_score":
387+
388+
- requires:
389+
cluster_features: "text_similarity_reranker_minscore_fix"
390+
reason: test min score functionality
391+
392+
- do:
393+
index:
394+
index: test-index
395+
id: doc_2
396+
body:
397+
text: "The phases of the Moon come from the position of the Moon relative to the Earth and Sun."
398+
topic: [ "science" ]
399+
subtopic: [ "astronomy" ]
400+
inference_text_field: "10"
401+
refresh: true
402+
403+
- do:
404+
search:
405+
index: test-index
406+
body:
407+
track_total_hits: true
408+
fields: [ "text", "topic" ]
409+
retriever:
410+
text_similarity_reranker:
411+
retriever:
412+
standard:
413+
query:
414+
bool:
415+
should:
416+
- constant_score:
417+
filter:
418+
term: { subtopic: "technology" }
419+
boost: 10
420+
- constant_score:
421+
filter:
422+
term: { subtopic: "astronomy" }
423+
boost: 1
424+
rank_window_size: 10
425+
inference_id: my-rerank-model
426+
inference_text: "How often does the moon hide the sun?"
427+
field: inference_text_field
428+
min_score: 10
429+
size: 10
430+
431+
- match: { hits.total.value: 1 }
432+
- length: { hits.hits: 1 }
433+
- match: { hits.hits.0._id: "doc_2" }
434+
435+
---
436+
"Text similarity reranker with min_score zero includes all docs":
437+
438+
- requires:
439+
cluster_features: "text_similarity_reranker_minscore_fix"
440+
reason: test min score functionality
441+
442+
- do:
443+
search:
444+
index: test-index
445+
body:
446+
track_total_hits: true
447+
fields: [ "text", "topic" ]
448+
retriever:
449+
text_similarity_reranker:
450+
retriever:
451+
standard:
452+
query:
453+
match_all: {}
454+
rank_window_size: 10
455+
inference_id: my-rerank-model
456+
inference_text: "How often does the moon hide the sun?"
457+
field: inference_text_field
458+
min_score: 0
459+
size: 10
460+
461+
- match: { hits.total.value: 3 }
462+
- length: { hits.hits: 3 }
463+
464+
---
465+
"Text similarity reranker with high min_score excludes all docs":
466+
467+
- requires:
468+
cluster_features: "text_similarity_reranker_minscore_fix"
469+
reason: test min score functionality
470+
471+
- do:
472+
search:
473+
index: test-index
474+
body:
475+
track_total_hits: true
476+
fields: [ "text", "topic" ]
477+
retriever:
478+
text_similarity_reranker:
479+
retriever:
480+
standard:
481+
query:
482+
match_all: {}
483+
rank_window_size: 10
484+
inference_id: my-rerank-model
485+
inference_text: "How often does the moon hide the sun?"
486+
field: inference_text_field
487+
min_score: 1000
488+
size: 10
489+
490+
- match: { hits.total.value: 0 }
491+
- length: { hits.hits: 0 }

x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT;
1717
import static org.elasticsearch.xpack.rank.linear.L2ScoreNormalizer.LINEAR_RETRIEVER_L2_NORM;
18+
import static org.elasticsearch.xpack.rank.linear.LinearRetrieverBuilder.LINEAR_RETRIEVER_MINSCORE_FIX;
1819
import static org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer.LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX;
1920
import static org.elasticsearch.xpack.rank.rrf.RRFRetrieverBuilder.RRF_RETRIEVER_COMPOSITION_SUPPORTED;
2021

@@ -32,6 +33,11 @@ public Set<NodeFeature> getFeatures() {
3233

3334
@Override
3435
public Set<NodeFeature> getTestFeatures() {
35-
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX, LINEAR_RETRIEVER_L2_NORM);
36+
return Set.of(
37+
INNER_RETRIEVERS_FILTER_SUPPORT,
38+
LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX,
39+
LINEAR_RETRIEVER_L2_NORM,
40+
LINEAR_RETRIEVER_MINSCORE_FIX
41+
);
3642
}
3743
}

x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/LinearRetrieverBuilder.java

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import org.apache.lucene.search.ScoreDoc;
1111
import org.elasticsearch.common.ParsingException;
1212
import org.elasticsearch.common.util.Maps;
13+
import org.elasticsearch.features.NodeFeature;
1314
import org.elasticsearch.index.query.QueryBuilder;
1415
import org.elasticsearch.license.LicenseUtils;
1516
import org.elasticsearch.search.builder.SearchSourceBuilder;
@@ -46,6 +47,7 @@
4647
*/
4748
public final class LinearRetrieverBuilder extends CompoundRetrieverBuilder<LinearRetrieverBuilder> {
4849

50+
public static final NodeFeature LINEAR_RETRIEVER_MINSCORE_FIX = new NodeFeature("linear_retriever_minscore_fix");
4951
public static final String NAME = "linear";
5052

5153
public static final ParseField RETRIEVERS_FIELD = new ParseField("retrievers");
@@ -125,12 +127,35 @@ public LinearRetrieverBuilder(
125127
this.normalizers = normalizers;
126128
}
127129

130+
public LinearRetrieverBuilder(
131+
List<RetrieverSource> innerRetrievers,
132+
int rankWindowSize,
133+
float[] weights,
134+
ScoreNormalizer[] normalizers,
135+
Float minScore,
136+
String retrieverName,
137+
List<QueryBuilder> preFilterQueryBuilders
138+
) {
139+
this(innerRetrievers, rankWindowSize, weights, normalizers);
140+
this.minScore = minScore;
141+
if (minScore != null && minScore < 0) {
142+
throw new IllegalArgumentException("[min_score] must be greater than or equal to 0, was: [" + minScore + "]");
143+
}
144+
this.retrieverName = retrieverName;
145+
this.preFilterQueryBuilders = preFilterQueryBuilders;
146+
}
147+
128148
@Override
129149
protected LinearRetrieverBuilder clone(List<RetrieverSource> newChildRetrievers, List<QueryBuilder> newPreFilterQueryBuilders) {
130-
LinearRetrieverBuilder clone = new LinearRetrieverBuilder(newChildRetrievers, rankWindowSize, weights, normalizers);
131-
clone.preFilterQueryBuilders = newPreFilterQueryBuilders;
132-
clone.retrieverName = retrieverName;
133-
return clone;
150+
return new LinearRetrieverBuilder(
151+
newChildRetrievers,
152+
rankWindowSize,
153+
weights,
154+
normalizers,
155+
minScore,
156+
retrieverName,
157+
newPreFilterQueryBuilders
158+
);
134159
}
135160

136161
@Override
@@ -181,6 +206,10 @@ protected RankDoc[] combineInnerRetrieverResults(List<ScoreDoc[]> rankResults, b
181206
topResults[rank] = sortedResults[rank];
182207
topResults[rank].rank = rank + 1;
183208
}
209+
// Filter by minScore if set(inclusive)
210+
if (minScore != null) {
211+
topResults = Arrays.stream(topResults).filter(doc -> doc.score >= minScore).toArray(LinearRankDoc[]::new);
212+
}
184213
return topResults;
185214
}
186215

0 commit comments

Comments
 (0)