Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/128504.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 128504
summary: Add l2_norm normalization support to linear retriever
area: Relevance
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,40 @@ Returns top documents from a <<search-api-knn,knn search>>, in the context of a
* <<linear-retriever,*Linear Retriever*>>.
Combines the top results from multiple sub-retrievers using a weighted sum of their scores. Allows to specify different
weights for each retriever, as well as independently normalize the scores from each result set.

[discrete]
[[linear-retriever-parameters]]
==== Linear Retriever Parameters

`retrievers`
: (Required, array of objects)
A list of the sub-retrievers' configuration, that we will take into account and whose result sets we will merge through a weighted sum. Each configuration can have a different weight and normalization depending on the specified retriever.

Each entry specifies the following parameters:

`retriever`
: (Required, a `retriever` object)
Specifies the retriever for which we will compute the top documents for. The retriever will produce `rank_window_size` results, which will later be merged based on the specified `weight` and `normalizer`.

`weight`
: (Optional, float)
The weight that each score of this retriever’s top docs will be multiplied with. Must be greater or equal to 0. Defaults to 1.0.

`normalizer`
: (Optional, String)
Specifies how we will normalize the retriever’s scores, before applying the specified `weight`. Available values are: `minmax`, `l2_norm`, and `none`. Defaults to `none`.

* `none`
* `minmax` : A `MinMaxScoreNormalizer` that normalizes scores based on the following formula

```
score = (score - min) / (max - min)
```

* `l2_norm` : An `L2ScoreNormalizer` that normalizes scores using the L2 norm of the score values.

See also the hybrid search example for how to independently configure and apply normalizers to retrievers.

* <<rrf-retriever,*RRF Retriever*>>.
Combines and ranks multiple first-stage retrievers using the reciprocal rank fusion (RRF) algorithm.
Allows you to combine multiple result sets with different relevance indicators into a single result set.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import java.util.Set;

import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT;
import static org.elasticsearch.xpack.rank.linear.L2ScoreNormalizer.LINEAR_RETRIEVER_L2_NORM;
import static org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer.LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX;
import static org.elasticsearch.xpack.rank.rrf.RRFRetrieverBuilder.RRF_RETRIEVER_COMPOSITION_SUPPORTED;

Expand All @@ -31,6 +32,6 @@ public Set<NodeFeature> getFeatures() {

@Override
public Set<NodeFeature> getTestFeatures() {
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX);
return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX, LINEAR_RETRIEVER_L2_NORM);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@

/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.rank.linear;

import org.apache.lucene.search.ScoreDoc;
import org.elasticsearch.features.NodeFeature;

/**
* A score normalizer that applies L2 normalization to a set of scores.
* <p>
* Each score is divided by the L2 norm of the scores if the norm is greater than a small EPSILON.
* If all scores are zero or NaN, normalization is skipped and the original scores are returned.
* </p>
*/
public class L2ScoreNormalizer extends ScoreNormalizer {

public static final L2ScoreNormalizer INSTANCE = new L2ScoreNormalizer();

public static final String NAME = "l2_norm";

private static final float EPSILON = 1e-6f;

public static final NodeFeature LINEAR_RETRIEVER_L2_NORM = new NodeFeature("linear_retriever.l2_norm");

public L2ScoreNormalizer() {}

@Override
public String getName() {
return NAME;
}

@Override
public ScoreDoc[] normalizeScores(ScoreDoc[] docs) {
if (docs.length == 0) {
return docs;
}
double sumOfSquares = 0.0;
boolean atLeastOneValidScore = false;
for (ScoreDoc doc : docs) {
if (Float.isNaN(doc.score) == false) {
atLeastOneValidScore = true;
sumOfSquares += doc.score * doc.score;
}
}
if (atLeastOneValidScore == false) {
// No valid scores to normalize
return docs;
}
double norm = Math.sqrt(sumOfSquares);
if (norm < EPSILON) {
return docs;
}
ScoreDoc[] scoreDocs = new ScoreDoc[docs.length];
for (int i = 0; i < docs.length; i++) {
float score = (float) (docs[i].score / norm);
scoreDocs[i] = new ScoreDoc(docs[i].doc, score, docs[i].shardIndex);
}
return scoreDocs;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ public abstract class ScoreNormalizer {
public static ScoreNormalizer valueOf(String normalizer) {
if (MinMaxScoreNormalizer.NAME.equalsIgnoreCase(normalizer)) {
return MinMaxScoreNormalizer.INSTANCE;
} else if (L2ScoreNormalizer.NAME.equalsIgnoreCase(normalizer)) {
return L2ScoreNormalizer.INSTANCE;

} else if (IdentityScoreNormalizer.NAME.equalsIgnoreCase(normalizer)) {
return IdentityScoreNormalizer.INSTANCE;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.rank.linear;

import org.apache.lucene.search.ScoreDoc;
import org.elasticsearch.test.ESTestCase;

public class L2ScoreNormalizerTests extends ESTestCase {

public void testNormalizeTypicalVector() {
ScoreDoc[] docs = { new ScoreDoc(1, 3.0f, 0), new ScoreDoc(2, 4.0f, 0) };
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
assertEquals(0.6f, normalized[0].score, 1e-5);
assertEquals(0.8f, normalized[1].score, 1e-5);
}

public void testAllZeros() {
ScoreDoc[] docs = { new ScoreDoc(1, 0.0f, 0), new ScoreDoc(2, 0.0f, 0) };
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
assertEquals(0.0f, normalized[0].score, 0.0f);
assertEquals(0.0f, normalized[1].score, 0.0f);
}

public void testAllNaN() {
ScoreDoc[] docs = { new ScoreDoc(1, Float.NaN, 0), new ScoreDoc(2, Float.NaN, 0) };
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
assertTrue(Float.isNaN(normalized[0].score));
assertTrue(Float.isNaN(normalized[1].score));
}

public void testMixedZeroAndNaN() {
ScoreDoc[] docs = { new ScoreDoc(1, 0.0f, 0), new ScoreDoc(2, Float.NaN, 0) };
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
assertEquals(0.0f, normalized[0].score, 0.0f);
assertTrue(Float.isNaN(normalized[1].score));
}

public void testSingleElement() {
ScoreDoc[] docs = { new ScoreDoc(1, 42.0f, 0) };
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
assertEquals(1.0f, normalized[0].score, 1e-5);
}

public void testEmptyArray() {
ScoreDoc[] docs = {};
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
assertEquals(0, normalized.length);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,99 @@ setup:
- match: { hits.hits.3._id: "3" }
- close_to: { hits.hits.3._score: { value: 0.0, error: 0.001 } }

---
"should normalize initial scores with l2_norm":
- requires:
cluster_features: [ "linear_retriever.l2_norm" ]
reason: "Requires l2_norm normalization support in linear retriever"
- do:
search:
index: test
body:
retriever:
linear:
retrievers: [
{
retriever: {
standard: {
query: {
bool: {
should: [
{ constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 3.0 } },
{ constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 4.0 } }
]
}
}
}
},
weight: 10.0,
normalizer: "l2_norm"
},
{
retriever: {
standard: {
query: {
bool: {
should: [
{ constant_score: { filter: { term: { keyword: { value: "three" } } }, boost: 6.0 } },
{ constant_score: { filter: { term: { keyword: { value: "four" } } }, boost: 8.0 } }
]
}
}
}
},
weight: 2.0,
normalizer: "l2_norm"
}
]

- match: { hits.total.value: 4 }
- match: { hits.hits.0._id: "2" }
- match: { hits.hits.0._score: 8.0 }
- match: { hits.hits.1._id: "1" }
- match: { hits.hits.1._score: 6.0 }
- match: { hits.hits.2._id: "4" }
- close_to: { hits.hits.2._score: { value: 1.6, error: 0.001 } }
- match: { hits.hits.3._id: "3" }
- match: { hits.hits.3._score: 1.2 }

---
"should handle all zero scores in normalization":
- requires:
cluster_features: [ "linear_retriever.l2_norm" ]
reason: "Requires l2_norm normalization support in linear retriever"
- do:
search:
index: test
body:
retriever:
linear:
retrievers: [
{
retriever: {
standard: {
query: {
bool: {
should: [
{ constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 0.0 } },
{ constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 0.0 } },
{ constant_score: { filter: { term: { keyword: { value: "three" } } }, boost: 0.0 } },
{ constant_score: { filter: { term: { keyword: { value: "four" } } }, boost: 0.0 } }
]
}
}
}
},
weight: 1.0,
normalizer: "l2_norm"
}
]
- match: { hits.total.value: 4 }
- close_to: { hits.hits.0._score: { value: 0.0, error: 0.0001 } }
- close_to: { hits.hits.1._score: { value: 0.0, error: 0.0001 } }
- close_to: { hits.hits.2._score: { value: 0.0, error: 0.0001 } }
- close_to: { hits.hits.3._score: { value: 0.0, error: 0.0001 } }

---
"should throw on unknown normalizer":
- do:
Expand Down