Skip to content

Commit 35c2132

Browse files
Mikep86mridula-s109
authored andcommitted
Fix minmax normalizer handling of single-doc result sets (elastic#128689)
1 parent 902a013 commit 35c2132

File tree

4 files changed

+54
-10
lines changed

4 files changed

+54
-10
lines changed

docs/changelog/128689.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128689
2+
summary: Fix minmax normalizer handling of single-doc result sets
3+
area: Search
4+
type: bug
5+
issues: []

x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import java.util.Set;
1414

1515
import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT;
16+
import static org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer.LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX;
1617

1718
public class RankRRFFeatures implements FeatureSpecification {
1819

@@ -25,6 +26,6 @@ public Set<NodeFeature> getFeatures() {
2526

2627
@Override
2728
public Set<NodeFeature> getTestFeatures() {
28-
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT);
29+
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX);
2930
}
3031
}

x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@
88
package org.elasticsearch.xpack.rank.linear;
99

1010
import org.apache.lucene.search.ScoreDoc;
11+
import org.elasticsearch.features.NodeFeature;
1112

1213
public class MinMaxScoreNormalizer extends ScoreNormalizer {
13-
1414
public static final MinMaxScoreNormalizer INSTANCE = new MinMaxScoreNormalizer();
1515

16+
public static final NodeFeature LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX = new NodeFeature("linear_retriever.minmax_single_doc_fix");
17+
1618
public static final String NAME = "minmax";
1719

1820
private static final float EPSILON = 1e-6f;
@@ -54,7 +56,9 @@ public ScoreDoc[] normalizeScores(ScoreDoc[] docs) {
5456
for (int i = 0; i < docs.length; i++) {
5557
float score;
5658
if (minEqualsMax) {
57-
score = min;
59+
// This can happen if there is only one doc in the result set or if all docs have nearly equivalent scores
60+
// (i.e. within epsilon). In this case, assign every doc the max normalized score.
61+
score = 1.0f;
5862
} else {
5963
score = (docs[i].score - min) / (max - min);
6064
}

x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1017,7 +1017,7 @@ setup:
10171017

10181018

10191019
---
1020-
"linear retriever with custom sort and score for nested retrievers":
1020+
"linear retriever with custom sort for nested retrievers":
10211021
- do:
10221022
search:
10231023
index: test
@@ -1036,17 +1036,18 @@ setup:
10361036
{
10371037
term: {
10381038
keyword: {
1039-
value: "one" # this will give doc 1 a normalized score of 10 because min == max
1039+
value: "one"
10401040
}
10411041
}
10421042
},
10431043
{
10441044
term: {
10451045
keyword: {
1046-
value: "two" # this will give doc 2 a normalized score of 10 because min == max
1046+
value: "two"
10471047
}
10481048
}
1049-
} ]
1049+
}
1050+
]
10501051
}
10511052
},
10521053
boost: 10.0
@@ -1145,11 +1146,11 @@ setup:
11451146
size: 2
11461147

11471148
- match: { hits.total.value: 3 }
1148-
- length: {hits.hits: 2}
1149+
- length: { hits.hits: 2 }
11491150
- match: { hits.hits.0._id: "2" }
1150-
- close_to: { hits.hits.0._score: { value: 10.5, error: 0.001 } }
1151+
- close_to: { hits.hits.0._score: { value: 1.5, error: 0.001 } }
11511152
- match: { hits.hits.1._id: "1" }
1152-
- match: { hits.hits.1._score: 10 }
1153+
- match: { hits.hits.1._score: 1 }
11531154

11541155
---
11551156
"should throw when rank_window_size is negative":
@@ -1189,3 +1190,36 @@ setup:
11891190
]
11901191
rank_window_size: -10
11911192
- match: { status: 400 }
1193+
1194+
---
1195+
"minmax normalization properly handles a single doc result set":
1196+
- requires:
1197+
cluster_features: [ "linear_retriever.minmax_single_doc_fix" ]
1198+
reason: "Fix bug where minmax normalizer would emit unnormalized score when handling a single doc result set"
1199+
1200+
- do:
1201+
search:
1202+
index: test
1203+
body:
1204+
retriever:
1205+
linear:
1206+
retrievers: [
1207+
{
1208+
retriever: {
1209+
standard: {
1210+
query: {
1211+
term: {
1212+
"keyword": {
1213+
"value": "one"
1214+
}
1215+
}
1216+
}
1217+
}
1218+
},
1219+
normalizer: "minmax"
1220+
}
1221+
]
1222+
1223+
- match: { hits.total.value: 1 }
1224+
- match: { hits.hits.0._id: "1" }
1225+
- match: { hits.hits.0._score: 1.0 }

0 commit comments

Comments
 (0)