Skip to content

Commit 19645fd

Browse files
authored
Fix minmax normalizer handling of single-doc result sets (elastic#128689) (elastic#128756)
(cherry picked from commit adda402) # Conflicts: # x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java
1 parent cb1e227 commit 19645fd

File tree

4 files changed

+54
-10
lines changed

4 files changed

+54
-10
lines changed

docs/changelog/128689.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128689
2+
summary: Fix minmax normalizer handling of single-doc result sets
3+
area: Search
4+
type: bug
5+
issues: []

x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import java.util.Set;
1515

1616
import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT;
17+
import static org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer.LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX;
1718
import static org.elasticsearch.xpack.rank.rrf.RRFRetrieverBuilder.RRF_RETRIEVER_COMPOSITION_SUPPORTED;
1819

1920
/**
@@ -30,6 +31,6 @@ public Set<NodeFeature> getFeatures() {
3031

3132
@Override
3233
public Set<NodeFeature> getTestFeatures() {
33-
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT);
34+
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX);
3435
}
3536
}

x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/MinMaxScoreNormalizer.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@
88
package org.elasticsearch.xpack.rank.linear;
99

1010
import org.apache.lucene.search.ScoreDoc;
11+
import org.elasticsearch.features.NodeFeature;
1112

1213
public class MinMaxScoreNormalizer extends ScoreNormalizer {
13-
1414
public static final MinMaxScoreNormalizer INSTANCE = new MinMaxScoreNormalizer();
1515

16+
public static final NodeFeature LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX = new NodeFeature("linear_retriever.minmax_single_doc_fix");
17+
1618
public static final String NAME = "minmax";
1719

1820
private static final float EPSILON = 1e-6f;
@@ -54,7 +56,9 @@ public ScoreDoc[] normalizeScores(ScoreDoc[] docs) {
5456
for (int i = 0; i < docs.length; i++) {
5557
float score;
5658
if (minEqualsMax) {
57-
score = min;
59+
// This can happen if there is only one doc in the result set or if all docs have nearly equivalent scores
60+
// (i.e. within epsilon). In this case, assign every doc the max normalized score.
61+
score = 1.0f;
5862
} else {
5963
score = (docs[i].score - min) / (max - min);
6064
}

x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -930,7 +930,7 @@ setup:
930930

931931

932932
---
933-
"linear retriever with custom sort and score for nested retrievers":
933+
"linear retriever with custom sort for nested retrievers":
934934
- do:
935935
search:
936936
index: test
@@ -949,17 +949,18 @@ setup:
949949
{
950950
term: {
951951
keyword: {
952-
value: "one" # this will give doc 1 a normalized score of 10 because min == max
952+
value: "one"
953953
}
954954
}
955955
},
956956
{
957957
term: {
958958
keyword: {
959-
value: "two" # this will give doc 2 a normalized score of 10 because min == max
959+
value: "two"
960960
}
961961
}
962-
} ]
962+
}
963+
]
963964
}
964965
},
965966
boost: 10.0
@@ -1058,11 +1059,11 @@ setup:
10581059
size: 2
10591060

10601061
- match: { hits.total.value: 3 }
1061-
- length: {hits.hits: 2}
1062+
- length: { hits.hits: 2 }
10621063
- match: { hits.hits.0._id: "2" }
1063-
- close_to: { hits.hits.0._score: { value: 10.5, error: 0.001 } }
1064+
- close_to: { hits.hits.0._score: { value: 1.5, error: 0.001 } }
10641065
- match: { hits.hits.1._id: "1" }
1065-
- match: { hits.hits.1._score: 10 }
1066+
- match: { hits.hits.1._score: 1 }
10661067

10671068
---
10681069
"should throw when rank_window_size is negative":
@@ -1102,3 +1103,36 @@ setup:
11021103
]
11031104
rank_window_size: -10
11041105
- match: { status: 400 }
1106+
1107+
---
1108+
"minmax normalization properly handles a single doc result set":
1109+
- requires:
1110+
cluster_features: [ "linear_retriever.minmax_single_doc_fix" ]
1111+
reason: "Fix bug where minmax normalizer would emit unnormalized score when handling a single doc result set"
1112+
1113+
- do:
1114+
search:
1115+
index: test
1116+
body:
1117+
retriever:
1118+
linear:
1119+
retrievers: [
1120+
{
1121+
retriever: {
1122+
standard: {
1123+
query: {
1124+
term: {
1125+
"keyword": {
1126+
"value": "one"
1127+
}
1128+
}
1129+
}
1130+
}
1131+
},
1132+
normalizer: "minmax"
1133+
}
1134+
]
1135+
1136+
- match: { hits.total.value: 1 }
1137+
- match: { hits.hits.0._id: "1" }
1138+
- match: { hits.hits.0._score: 1.0 }

0 commit comments

Comments
 (0)