Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/128504.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 128504
summary: Add l2_norm normalization support to linear retriever
area: Relevance
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,40 @@ Returns top documents from a <<search-api-knn,knn search>>, in the context of a
* <<linear-retriever,*Linear Retriever*>>.
Combines the top results from multiple sub-retrievers using a weighted sum of their scores. Allows to specify different
weights for each retriever, as well as independently normalize the scores from each result set.

[discrete]
[[retrievers-overview-linear-retriever-parameters]]
==== Linear Retriever Parameters

`retrievers`
: (Required, array of objects)
A list of the sub-retrievers' configuration, that we will take into account and whose result sets we will merge through a weighted sum. Each configuration can have a different weight and normalization depending on the specified retriever.

Each entry specifies the following parameters:

`retriever`
: (Required, a `retriever` object)
Specifies the retriever for which we will compute the top documents for. The retriever will produce `rank_window_size` results, which will later be merged based on the specified `weight` and `normalizer`.

`weight`
: (Optional, float)
The weight that each score of this retriever’s top docs will be multiplied with. Must be greater or equal to 0. Defaults to 1.0.

`normalizer`
: (Optional, String)
Specifies how we will normalize the retriever’s scores, before applying the specified `weight`. Available values are: `minmax`, `l2_norm`, and `none`. Defaults to `none`.

* `none`
* `minmax` : A `MinMaxScoreNormalizer` that normalizes scores based on the following formula

```
score = (score - min) / (max - min)
```

* `l2_norm` : An `L2ScoreNormalizer` that normalizes scores using the L2 norm of the score values.

See also the hybrid search example for how to independently configure and apply normalizers to retrievers.

* <<rrf-retriever,*RRF Retriever*>>.
Combines and ranks multiple first-stage retrievers using the reciprocal rank fusion (RRF) algorithm.
Allows you to combine multiple result sets with different relevance indicators into a single result set.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import java.util.Set;

import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT;
import static org.elasticsearch.xpack.rank.linear.L2ScoreNormalizer.LINEAR_RETRIEVER_L2_NORM;
import static org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer.LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX;
import static org.elasticsearch.xpack.rank.rrf.RRFRetrieverBuilder.RRF_RETRIEVER_COMPOSITION_SUPPORTED;

Expand All @@ -31,6 +32,6 @@ public Set<NodeFeature> getFeatures() {

@Override
public Set<NodeFeature> getTestFeatures() {
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX);
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX, LINEAR_RETRIEVER_L2_NORM);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@

/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.rank.linear;

import org.apache.lucene.search.ScoreDoc;
import org.elasticsearch.features.NodeFeature;

/**
* A score normalizer that applies L2 normalization to a set of scores.
* <p>
* Each score is divided by the L2 norm of the scores if the norm is greater than a small EPSILON.
* If all scores are zero or NaN, normalization is skipped and the original scores are returned.
* </p>
*/
public class L2ScoreNormalizer extends ScoreNormalizer {

public static final L2ScoreNormalizer INSTANCE = new L2ScoreNormalizer();

public static final String NAME = "l2_norm";

private static final float EPSILON = 1e-6f;

public static final NodeFeature LINEAR_RETRIEVER_L2_NORM = new NodeFeature("linear_retriever.l2_norm");

public L2ScoreNormalizer() {}

@Override
public String getName() {
return NAME;
}

@Override
public ScoreDoc[] normalizeScores(ScoreDoc[] docs) {
if (docs.length == 0) {
return docs;
}
double sumOfSquares = 0.0;
boolean atLeastOneValidScore = false;
for (ScoreDoc doc : docs) {
if (Float.isNaN(doc.score) == false) {
atLeastOneValidScore = true;
sumOfSquares += doc.score * doc.score;
}
}
if (atLeastOneValidScore == false) {
// No valid scores to normalize
return docs;
}
double norm = Math.sqrt(sumOfSquares);
if (norm < EPSILON) {
return docs;
}
ScoreDoc[] scoreDocs = new ScoreDoc[docs.length];
for (int i = 0; i < docs.length; i++) {
float score = (float) (docs[i].score / norm);
scoreDocs[i] = new ScoreDoc(docs[i].doc, score, docs[i].shardIndex);
}
return scoreDocs;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ public abstract class ScoreNormalizer {
public static ScoreNormalizer valueOf(String normalizer) {
if (MinMaxScoreNormalizer.NAME.equalsIgnoreCase(normalizer)) {
return MinMaxScoreNormalizer.INSTANCE;
} else if (L2ScoreNormalizer.NAME.equalsIgnoreCase(normalizer)) {
return L2ScoreNormalizer.INSTANCE;

} else if (IdentityScoreNormalizer.NAME.equalsIgnoreCase(normalizer)) {
return IdentityScoreNormalizer.INSTANCE;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.rank.linear;

import org.apache.lucene.search.ScoreDoc;
import org.elasticsearch.test.ESTestCase;

public class L2ScoreNormalizerTests extends ESTestCase {

public void testNormalizeTypicalVector() {
ScoreDoc[] docs = { new ScoreDoc(1, 3.0f, 0), new ScoreDoc(2, 4.0f, 0) };
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
assertEquals(0.6f, normalized[0].score, 1e-5);
assertEquals(0.8f, normalized[1].score, 1e-5);
}

public void testAllZeros() {
ScoreDoc[] docs = { new ScoreDoc(1, 0.0f, 0), new ScoreDoc(2, 0.0f, 0) };
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
assertEquals(0.0f, normalized[0].score, 0.0f);
assertEquals(0.0f, normalized[1].score, 0.0f);
}

public void testAllNaN() {
ScoreDoc[] docs = { new ScoreDoc(1, Float.NaN, 0), new ScoreDoc(2, Float.NaN, 0) };
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
assertTrue(Float.isNaN(normalized[0].score));
assertTrue(Float.isNaN(normalized[1].score));
}

public void testMixedZeroAndNaN() {
ScoreDoc[] docs = { new ScoreDoc(1, 0.0f, 0), new ScoreDoc(2, Float.NaN, 0) };
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
assertEquals(0.0f, normalized[0].score, 0.0f);
assertTrue(Float.isNaN(normalized[1].score));
}

public void testSingleElement() {
ScoreDoc[] docs = { new ScoreDoc(1, 42.0f, 0) };
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
assertEquals(1.0f, normalized[0].score, 1e-5);
}

public void testEmptyArray() {
ScoreDoc[] docs = {};
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
assertEquals(0, normalized.length);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,99 @@ setup:
- match: { hits.hits.3._id: "3" }
- close_to: { hits.hits.3._score: { value: 0.0, error: 0.001 } }

---
"should normalize initial scores with l2_norm":
- requires:
cluster_features: [ "linear_retriever.l2_norm" ]
reason: "Requires l2_norm normalization support in linear retriever"
- do:
search:
index: test
body:
retriever:
linear:
retrievers: [
{
retriever: {
standard: {
query: {
bool: {
should: [
{ constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 3.0 } },
{ constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 4.0 } }
]
}
}
}
},
weight: 10.0,
normalizer: "l2_norm"
},
{
retriever: {
standard: {
query: {
bool: {
should: [
{ constant_score: { filter: { term: { keyword: { value: "three" } } }, boost: 6.0 } },
{ constant_score: { filter: { term: { keyword: { value: "four" } } }, boost: 8.0 } }
]
}
}
}
},
weight: 2.0,
normalizer: "l2_norm"
}
]

- match: { hits.total.value: 4 }
- match: { hits.hits.0._id: "2" }
- match: { hits.hits.0._score: 8.0 }
- match: { hits.hits.1._id: "1" }
- match: { hits.hits.1._score: 6.0 }
- match: { hits.hits.2._id: "4" }
- close_to: { hits.hits.2._score: { value: 1.6, error: 0.001 } }
- match: { hits.hits.3._id: "3" }
- close_to: { hits.hits.3._score: { value: 1.2, error: 0.001 } }

---
"should handle all zero scores in normalization":
- requires:
cluster_features: [ "linear_retriever.l2_norm" ]
reason: "Requires l2_norm normalization support in linear retriever"
- do:
search:
index: test
body:
retriever:
linear:
retrievers: [
{
retriever: {
standard: {
query: {
bool: {
should: [
{ constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 0.0 } },
{ constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 0.0 } },
{ constant_score: { filter: { term: { keyword: { value: "three" } } }, boost: 0.0 } },
{ constant_score: { filter: { term: { keyword: { value: "four" } } }, boost: 0.0 } }
]
}
}
}
},
weight: 1.0,
normalizer: "l2_norm"
}
]
- match: { hits.total.value: 4 }
- close_to: { hits.hits.0._score: { value: 0.0, error: 0.0001 } }
- close_to: { hits.hits.1._score: { value: 0.0, error: 0.0001 } }
- close_to: { hits.hits.2._score: { value: 0.0, error: 0.0001 } }
- close_to: { hits.hits.3._score: { value: 0.0, error: 0.0001 } }

---
"should throw on unknown normalizer":
- do:
Expand Down