Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/128689.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 128689
summary: Fix minmax normalizer handling of single-doc result sets
area: Search
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import java.util.Set;

import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT;
import static org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer.LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX;

public class RankRRFFeatures implements FeatureSpecification {

Expand All @@ -25,6 +26,6 @@ public Set<NodeFeature> getFeatures() {

@Override
public Set<NodeFeature> getTestFeatures() {
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT);
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
package org.elasticsearch.xpack.rank.linear;

import org.apache.lucene.search.ScoreDoc;
import org.elasticsearch.features.NodeFeature;

public class MinMaxScoreNormalizer extends ScoreNormalizer {

public static final MinMaxScoreNormalizer INSTANCE = new MinMaxScoreNormalizer();

public static final NodeFeature LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX = new NodeFeature("linear_retriever.minmax_single_doc_fix");

public static final String NAME = "minmax";

private static final float EPSILON = 1e-6f;
Expand Down Expand Up @@ -54,7 +56,9 @@ public ScoreDoc[] normalizeScores(ScoreDoc[] docs) {
for (int i = 0; i < docs.length; i++) {
float score;
if (minEqualsMax) {
score = min;
// This can happen if there is only one doc in the result set or if all docs have nearly equivalent scores
// (i.e. within epsilon). In this case, assign every doc the max normalized score.
score = 1.0f;
} else {
score = (docs[i].score - min) / (max - min);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,7 @@ setup:


---
"linear retriever with custom sort and score for nested retrievers":
"linear retriever with custom sort for nested retrievers":
- do:
search:
index: test
Expand All @@ -949,17 +949,18 @@ setup:
{
term: {
keyword: {
value: "one" # this will give doc 1 a normalized score of 10 because min == max
value: "one"
}
}
},
{
term: {
keyword: {
value: "two" # this will give doc 2 a normalized score of 10 because min == max
value: "two"
}
}
} ]
}
]
}
},
boost: 10.0
Expand Down Expand Up @@ -1058,11 +1059,11 @@ setup:
size: 2

- match: { hits.total.value: 3 }
- length: {hits.hits: 2}
- length: { hits.hits: 2 }
- match: { hits.hits.0._id: "2" }
- close_to: { hits.hits.0._score: { value: 10.5, error: 0.001 } }
- close_to: { hits.hits.0._score: { value: 1.5, error: 0.001 } }
- match: { hits.hits.1._id: "1" }
- match: { hits.hits.1._score: 10 }
- match: { hits.hits.1._score: 1 }

---
"should throw when rank_window_size is negative":
Expand Down Expand Up @@ -1102,3 +1103,36 @@ setup:
]
rank_window_size: -10
- match: { status: 400 }

---
"minmax normalization properly handles a single doc result set":
- requires:
cluster_features: [ "linear_retriever.minmax_single_doc_fix" ]
reason: "Fix bug where minmax normalizer would emit unnormalized score when handling a single doc result set"

- do:
search:
index: test
body:
retriever:
linear:
retrievers: [
{
retriever: {
standard: {
query: {
term: {
"keyword": {
"value": "one"
}
}
}
}
},
normalizer: "minmax"
}
]

- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.0._score: 1.0 }
Loading