Skip to content

Commit 550d288

Browse files
mridula-s109Copilot
authored andcommitted
Add l2_norm normalization support to linear retriever (elastic#128504)
* New l2 normalizer added * L2 score normaliser is registered * test case added to the yaml * Documentation added * Resolved checkstyle issues * Update docs/changelog/128504.yaml * Update docs/reference/elasticsearch/rest-apis/retrievers.md Co-authored-by: Copilot <[email protected]> * Score 0 test case added to check for corner cases * Edited the markdown doc description * Pruned the comment * Renamed the variable * Added comment to the class * Unit tests added * Spotless and checkstyle fixed * Fixed build failure * Fixed the forbidden test --------- Co-authored-by: Copilot <[email protected]>
1 parent bc76585 commit 550d288

File tree

6 files changed

+214
-1
lines changed

6 files changed

+214
-1
lines changed

docs/changelog/128504.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128504
2+
summary: Add l2_norm normalization support to linear retriever
3+
area: Relevance
4+
type: enhancement
5+
issues: []

docs/reference/elasticsearch/rest-apis/retrievers.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ Each entry specifies the following parameters:
276276
`normalizer`
277277
: (Optional, String)
278278

279-
Specifies how we will normalize the retriever’s scores, before applying the specified `weight`. Available values are: `minmax`, and `none`. Defaults to `none`.
279+
- Specifies how we will normalize the retriever’s scores, before applying the specified `weight`. Available values are: `minmax`, `l2_norm`, and `none`. Defaults to `none`.
280280

281281
* `none`
282282
* `minmax` : A `MinMaxScoreNormalizer` that normalizes scores based on the following formula
@@ -285,6 +285,7 @@ Each entry specifies the following parameters:
285285
score = (score - min) / (max - min)
286286
```
287287

288+
* `l2_norm` : An `L2ScoreNormalizer` that normalizes scores using the L2 norm of the score values.
288289

289290
See also [this hybrid search example](docs-content://solutions/search/retrievers-examples.md#retrievers-examples-linear-retriever) using a linear retriever on how to independently configure and apply normalizers to retrievers.
290291

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
2+
/*
3+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
4+
* or more contributor license agreements. Licensed under the Elastic License
5+
* 2.0; you may not use this file except in compliance with the Elastic License
6+
* 2.0.
7+
*/
8+
9+
package org.elasticsearch.xpack.rank.linear;
10+
11+
import org.apache.lucene.search.ScoreDoc;
12+
13+
/**
14+
* A score normalizer that applies L2 normalization to a set of scores.
15+
* <p>
16+
* This normalizer scales the scores so that the L2 norm of the score vector is 1,
17+
* if possible. If all scores are zero or NaN, normalization is skipped and the original scores are returned.
18+
* </p>
19+
*/
20+
public class L2ScoreNormalizer extends ScoreNormalizer {
21+
22+
public static final L2ScoreNormalizer INSTANCE = new L2ScoreNormalizer();
23+
24+
public static final String NAME = "l2_norm";
25+
26+
private static final float EPSILON = 1e-6f;
27+
28+
public L2ScoreNormalizer() {}
29+
30+
@Override
31+
public String getName() {
32+
return NAME;
33+
}
34+
35+
@Override
36+
public ScoreDoc[] normalizeScores(ScoreDoc[] docs) {
37+
if (docs.length == 0) {
38+
return docs;
39+
}
40+
double sumOfSquares = 0.0;
41+
boolean atLeastOneValidScore = false;
42+
for (ScoreDoc doc : docs) {
43+
if (Float.isNaN(doc.score) == false) {
44+
atLeastOneValidScore = true;
45+
sumOfSquares += doc.score * doc.score;
46+
}
47+
}
48+
if (atLeastOneValidScore == false) {
49+
// No valid scores to normalize
50+
return docs;
51+
}
52+
double norm = Math.sqrt(sumOfSquares);
53+
if (norm < EPSILON) {
54+
return docs;
55+
}
56+
ScoreDoc[] scoreDocs = new ScoreDoc[docs.length];
57+
for (int i = 0; i < docs.length; i++) {
58+
float score = (float) (docs[i].score / norm);
59+
scoreDocs[i] = new ScoreDoc(docs[i].doc, score, docs[i].shardIndex);
60+
}
61+
return scoreDocs;
62+
}
63+
}

x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/ScoreNormalizer.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ public abstract class ScoreNormalizer {
1717
public static ScoreNormalizer valueOf(String normalizer) {
1818
if (MinMaxScoreNormalizer.NAME.equalsIgnoreCase(normalizer)) {
1919
return MinMaxScoreNormalizer.INSTANCE;
20+
} else if (L2ScoreNormalizer.NAME.equalsIgnoreCase(normalizer)) {
21+
return L2ScoreNormalizer.INSTANCE;
22+
2023
} else if (IdentityScoreNormalizer.NAME.equalsIgnoreCase(normalizer)) {
2124
return IdentityScoreNormalizer.INSTANCE;
2225

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.rank.linear;
9+
10+
import org.apache.lucene.search.ScoreDoc;
11+
import org.elasticsearch.test.ESTestCase;
12+
13+
public class L2ScoreNormalizerTests extends ESTestCase {
14+
15+
public void testNormalizeTypicalVector() {
16+
ScoreDoc[] docs = { new ScoreDoc(1, 3.0f, 0), new ScoreDoc(2, 4.0f, 0) };
17+
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
18+
assertEquals(0.6f, normalized[0].score, 1e-5);
19+
assertEquals(0.8f, normalized[1].score, 1e-5);
20+
}
21+
22+
public void testAllZeros() {
23+
ScoreDoc[] docs = { new ScoreDoc(1, 0.0f, 0), new ScoreDoc(2, 0.0f, 0) };
24+
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
25+
assertEquals(0.0f, normalized[0].score, 0.0f);
26+
assertEquals(0.0f, normalized[1].score, 0.0f);
27+
}
28+
29+
public void testAllNaN() {
30+
ScoreDoc[] docs = { new ScoreDoc(1, Float.NaN, 0), new ScoreDoc(2, Float.NaN, 0) };
31+
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
32+
assertTrue(Float.isNaN(normalized[0].score));
33+
assertTrue(Float.isNaN(normalized[1].score));
34+
}
35+
36+
public void testMixedZeroAndNaN() {
37+
ScoreDoc[] docs = { new ScoreDoc(1, 0.0f, 0), new ScoreDoc(2, Float.NaN, 0) };
38+
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
39+
assertEquals(0.0f, normalized[0].score, 0.0f);
40+
assertTrue(Float.isNaN(normalized[1].score));
41+
}
42+
43+
public void testSingleElement() {
44+
ScoreDoc[] docs = { new ScoreDoc(1, 42.0f, 0) };
45+
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
46+
assertEquals(1.0f, normalized[0].score, 1e-5);
47+
}
48+
49+
public void testEmptyArray() {
50+
ScoreDoc[] docs = {};
51+
ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
52+
assertEquals(0, normalized.length);
53+
}
54+
}

x-pack/plugin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,93 @@ setup:
265265
- match: { hits.hits.3._id: "3" }
266266
- close_to: { hits.hits.3._score: { value: 0.0, error: 0.001 } }
267267

268+
---
269+
"should normalize initial scores with l2_norm":
270+
- do:
271+
search:
272+
index: test
273+
body:
274+
retriever:
275+
linear:
276+
retrievers: [
277+
{
278+
retriever: {
279+
standard: {
280+
query: {
281+
bool: {
282+
should: [
283+
{ constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 3.0 } },
284+
{ constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 4.0 } }
285+
]
286+
}
287+
}
288+
}
289+
},
290+
weight: 10.0,
291+
normalizer: "l2_norm"
292+
},
293+
{
294+
retriever: {
295+
standard: {
296+
query: {
297+
bool: {
298+
should: [
299+
{ constant_score: { filter: { term: { keyword: { value: "three" } } }, boost: 6.0 } },
300+
{ constant_score: { filter: { term: { keyword: { value: "four" } } }, boost: 8.0 } }
301+
]
302+
}
303+
}
304+
}
305+
},
306+
weight: 2.0,
307+
normalizer: "l2_norm"
308+
}
309+
]
310+
311+
- match: { hits.total.value: 4 }
312+
- match: { hits.hits.0._id: "2" }
313+
- match: { hits.hits.0._score: 8.0 }
314+
- match: { hits.hits.1._id: "1" }
315+
- match: { hits.hits.1._score: 6.0 }
316+
- match: { hits.hits.2._id: "4" }
317+
- close_to: { hits.hits.2._score: { value: 1.6, error: 0.001 } }
318+
- match: { hits.hits.3._id: "3" }
319+
- match: { hits.hits.3._score: 1.2 }
320+
321+
---
322+
"should handle all zero scores in normalization":
323+
- do:
324+
search:
325+
index: test
326+
body:
327+
retriever:
328+
linear:
329+
retrievers: [
330+
{
331+
retriever: {
332+
standard: {
333+
query: {
334+
bool: {
335+
should: [
336+
{ constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 0.0 } },
337+
{ constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 0.0 } },
338+
{ constant_score: { filter: { term: { keyword: { value: "three" } } }, boost: 0.0 } },
339+
{ constant_score: { filter: { term: { keyword: { value: "four" } } }, boost: 0.0 } }
340+
]
341+
}
342+
}
343+
}
344+
},
345+
weight: 1.0,
346+
normalizer: "l2_norm"
347+
}
348+
]
349+
- match: { hits.total.value: 4 }
350+
- close_to: { hits.hits.0._score: { value: 0.0, error: 0.0001 } }
351+
- close_to: { hits.hits.1._score: { value: 0.0, error: 0.0001 } }
352+
- close_to: { hits.hits.2._score: { value: 0.0, error: 0.0001 } }
353+
- close_to: { hits.hits.3._score: { value: 0.0, error: 0.0001 } }
354+
268355
---
269356
"should throw on unknown normalizer":
270357
- do:

0 commit comments

Comments
 (0)