elastic · mridula-s109 · Jun 11, 2025 · Jun 2, 2025 · Jun 3, 2025 · Jun 5, 2025
diff --git a/docs/changelog/128504.yaml b/docs/changelog/128504.yaml
@@ -0,0 +1,5 @@
+pr: 128504
+summary: Add l2_norm normalization support to linear retriever
+area: Relevance
+type: enhancement
+issues: []
diff --git a/docs/reference/search/search-your-data/retrievers-overview.asciidoc b/docs/reference/search/search-your-data/retrievers-overview.asciidoc
@@ -26,6 +26,40 @@ Returns top documents from a <<search-api-knn,knn search>>, in the context of a
 * <<linear-retriever,*Linear Retriever*>>.
 Combines the top results from multiple sub-retrievers using a weighted sum of their scores. Allows to specify different
 weights for each retriever, as well as independently normalize the scores from each result set.
+
+  [discrete]
+  [[retrievers-overview-linear-retriever-parameters]]
+==== Linear Retriever Parameters
+
+`retrievers`
+:   (Required, array of objects)
+    A list of the sub-retrievers' configuration, that we will take into account and whose result sets we will merge through a weighted sum. Each configuration can have a different weight and normalization depending on the specified retriever.
+
+Each entry specifies the following parameters:
+
+`retriever`
+:   (Required, a `retriever` object)
+    Specifies the retriever for which we will compute the top documents for. The retriever will produce `rank_window_size` results, which will later be merged based on the specified `weight` and `normalizer`.
+
+`weight`
+:   (Optional, float)
+    The weight that each score of this retriever’s top docs will be multiplied with. Must be greater or equal to 0. Defaults to 1.0.
+
+`normalizer`
+:   (Optional, String)
+    Specifies how we will normalize the retriever’s scores, before applying the specified `weight`. Available values are: `minmax`, `l2_norm`, and `none`. Defaults to `none`.
+
+    * `none`
+    * `minmax` : A `MinMaxScoreNormalizer` that normalizes scores based on the following formula
+
+        ```
+        score = (score - min) / (max - min)
+        ```
+
+    * `l2_norm` : An `L2ScoreNormalizer` that normalizes scores using the L2 norm of the score values.
+
+See also the hybrid search example for how to independently configure and apply normalizers to retrievers.
+
 * <<rrf-retriever,*RRF Retriever*>>.
 Combines and ranks multiple first-stage retrievers using the reciprocal rank fusion (RRF) algorithm.
 Allows you to combine multiple result sets with different relevance indicators into a single result set.

diff --git a/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java b/x-pack/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/RankRRFFeatures.java
@@ -14,6 +14,7 @@
 import java.util.Set;
 
 import static org.elasticsearch.search.retriever.CompoundRetrieverBuilder.INNER_RETRIEVERS_FILTER_SUPPORT;
+import static org.elasticsearch.xpack.rank.linear.L2ScoreNormalizer.LINEAR_RETRIEVER_L2_NORM;
 import static org.elasticsearch.xpack.rank.linear.MinMaxScoreNormalizer.LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX;
 import static org.elasticsearch.xpack.rank.rrf.RRFRetrieverBuilder.RRF_RETRIEVER_COMPOSITION_SUPPORTED;
 
@@ -31,6 +32,6 @@ public Set<NodeFeature> getFeatures() {
 
     @Override
     public Set<NodeFeature> getTestFeatures() {
-        return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX);
+        return Set.of(INNER_RETRIEVERS_FILTER_SUPPORT, LINEAR_RETRIEVER_MINMAX_SINGLE_DOC_FIX, LINEAR_RETRIEVER_L2_NORM);
     }
 }
diff --git a/.../plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/L2ScoreNormalizer.java b/.../plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/L2ScoreNormalizer.java
@@ -0,0 +1,66 @@
+
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.rank.linear;
+
+import org.apache.lucene.search.ScoreDoc;
+import org.elasticsearch.features.NodeFeature;
+
+/**
+ * A score normalizer that applies L2 normalization to a set of scores.
+ * <p>
+ * Each score is divided by the L2 norm of the scores if the norm is greater than a small EPSILON.
+ * If all scores are zero or NaN, normalization is skipped and the original scores are returned.
+ * </p>
+ */
+public class L2ScoreNormalizer extends ScoreNormalizer {
+
+    public static final L2ScoreNormalizer INSTANCE = new L2ScoreNormalizer();
+
+    public static final String NAME = "l2_norm";
+
+    private static final float EPSILON = 1e-6f;
+
+    public static final NodeFeature LINEAR_RETRIEVER_L2_NORM = new NodeFeature("linear_retriever.l2_norm");
+
+    public L2ScoreNormalizer() {}
+
+    @Override
+    public String getName() {
+        return NAME;
+    }
+
+    @Override
+    public ScoreDoc[] normalizeScores(ScoreDoc[] docs) {
+        if (docs.length == 0) {
+            return docs;
+        }
+        double sumOfSquares = 0.0;
+        boolean atLeastOneValidScore = false;
+        for (ScoreDoc doc : docs) {
+            if (Float.isNaN(doc.score) == false) {
+                atLeastOneValidScore = true;
+                sumOfSquares += doc.score * doc.score;
+            }
+        }
+        if (atLeastOneValidScore == false) {
+            // No valid scores to normalize
+            return docs;
+        }
+        double norm = Math.sqrt(sumOfSquares);
+        if (norm < EPSILON) {
+            return docs;
+        }
+        ScoreDoc[] scoreDocs = new ScoreDoc[docs.length];
+        for (int i = 0; i < docs.length; i++) {
+            float score = (float) (docs[i].score / norm);
+            scoreDocs[i] = new ScoreDoc(docs[i].doc, score, docs[i].shardIndex);
+        }
+        return scoreDocs;
+    }
+}
diff --git a/...ck/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/ScoreNormalizer.java b/...ck/plugin/rank-rrf/src/main/java/org/elasticsearch/xpack/rank/linear/ScoreNormalizer.java
@@ -17,6 +17,9 @@ public abstract class ScoreNormalizer {
     public static ScoreNormalizer valueOf(String normalizer) {
         if (MinMaxScoreNormalizer.NAME.equalsIgnoreCase(normalizer)) {
             return MinMaxScoreNormalizer.INSTANCE;
+        } else if (L2ScoreNormalizer.NAME.equalsIgnoreCase(normalizer)) {
+            return L2ScoreNormalizer.INSTANCE;
+
         } else if (IdentityScoreNormalizer.NAME.equalsIgnoreCase(normalizer)) {
             return IdentityScoreNormalizer.INSTANCE;
 

diff --git a/...in/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/L2ScoreNormalizerTests.java b/...in/rank-rrf/src/test/java/org/elasticsearch/xpack/rank/linear/L2ScoreNormalizerTests.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+package org.elasticsearch.xpack.rank.linear;
+
+import org.apache.lucene.search.ScoreDoc;
+import org.elasticsearch.test.ESTestCase;
+
+public class L2ScoreNormalizerTests extends ESTestCase {
+
+    public void testNormalizeTypicalVector() {
+        ScoreDoc[] docs = { new ScoreDoc(1, 3.0f, 0), new ScoreDoc(2, 4.0f, 0) };
+        ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
+        assertEquals(0.6f, normalized[0].score, 1e-5);
+        assertEquals(0.8f, normalized[1].score, 1e-5);
+    }
+
+    public void testAllZeros() {
+        ScoreDoc[] docs = { new ScoreDoc(1, 0.0f, 0), new ScoreDoc(2, 0.0f, 0) };
+        ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
+        assertEquals(0.0f, normalized[0].score, 0.0f);
+        assertEquals(0.0f, normalized[1].score, 0.0f);
+    }
+
+    public void testAllNaN() {
+        ScoreDoc[] docs = { new ScoreDoc(1, Float.NaN, 0), new ScoreDoc(2, Float.NaN, 0) };
+        ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
+        assertTrue(Float.isNaN(normalized[0].score));
+        assertTrue(Float.isNaN(normalized[1].score));
+    }
+
+    public void testMixedZeroAndNaN() {
+        ScoreDoc[] docs = { new ScoreDoc(1, 0.0f, 0), new ScoreDoc(2, Float.NaN, 0) };
+        ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
+        assertEquals(0.0f, normalized[0].score, 0.0f);
+        assertTrue(Float.isNaN(normalized[1].score));
+    }
+
+    public void testSingleElement() {
+        ScoreDoc[] docs = { new ScoreDoc(1, 42.0f, 0) };
+        ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
+        assertEquals(1.0f, normalized[0].score, 1e-5);
+    }
+
+    public void testEmptyArray() {
+        ScoreDoc[] docs = {};
+        ScoreDoc[] normalized = L2ScoreNormalizer.INSTANCE.normalizeScores(docs);
+        assertEquals(0, normalized.length);
+    }
+}
diff --git a/...gin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml b/...gin/rank-rrf/src/yamlRestTest/resources/rest-api-spec/test/linear/10_linear_retriever.yml
@@ -265,6 +265,99 @@ setup:
   - match: { hits.hits.3._id: "3" }
   - close_to: { hits.hits.3._score: { value: 0.0, error: 0.001 } }
 
+---
+"should normalize initial scores with l2_norm":
+  - requires:
+      cluster_features: [ "linear_retriever.l2_norm" ]
+      reason: "Requires l2_norm normalization support in linear retriever"
+  - do:
+      search:
+        index: test
+        body:
+          retriever:
+            linear:
+              retrievers: [
+                {
+                  retriever: {
+                    standard: {
+                      query: {
+                        bool: {
+                          should: [
+                            { constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 3.0 } },
+                            { constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 4.0 } }
+                          ]
+                        }
+                      }
+                    }
+                  },
+                  weight: 10.0,
+                  normalizer: "l2_norm"
+                },
+                {
+                  retriever: {
+                    standard: {
+                      query: {
+                        bool: {
+                          should: [
+                            { constant_score: { filter: { term: { keyword: { value: "three" } } }, boost: 6.0 } },
+                            { constant_score: { filter: { term: { keyword: { value: "four" } } }, boost: 8.0 } }
+                          ]
+                        }
+                      }
+                    }
+                  },
+                  weight: 2.0,
+                  normalizer: "l2_norm"
+                }
+              ]
+
+  - match: { hits.total.value: 4 }
+  - match: { hits.hits.0._id: "2" }
+  - match: { hits.hits.0._score: 8.0 }
+  - match: { hits.hits.1._id: "1" }
+  - match: { hits.hits.1._score: 6.0 }
+  - match: { hits.hits.2._id: "4" }
+  - close_to: { hits.hits.2._score: { value: 1.6, error: 0.001 } }
+  - match: { hits.hits.3._id: "3" }
+  - close_to: { hits.hits.3._score: { value: 1.2, error: 0.001 } }
+
+---
+"should handle all zero scores in normalization":
+  - requires:
+      cluster_features: [ "linear_retriever.l2_norm" ]
+      reason: "Requires l2_norm normalization support in linear retriever"
+  - do:
+      search:
+        index: test
+        body:
+          retriever:
+            linear:
+              retrievers: [
+                {
+                  retriever: {
+                    standard: {
+                      query: {
+                        bool: {
+                          should: [
+                            { constant_score: { filter: { term: { keyword: { value: "one" } } }, boost: 0.0 } },
+                            { constant_score: { filter: { term: { keyword: { value: "two" } } }, boost: 0.0 } },
+                            { constant_score: { filter: { term: { keyword: { value: "three" } } }, boost: 0.0 } },
+                            { constant_score: { filter: { term: { keyword: { value: "four" } } }, boost: 0.0 } }
+                          ]
+                        }
+                      }
+                    }
+                  },
+                  weight: 1.0,
+                  normalizer: "l2_norm"
+                }
+              ]
+  - match: { hits.total.value: 4 }
+  - close_to: { hits.hits.0._score: { value: 0.0, error: 0.0001 } }
+  - close_to: { hits.hits.1._score: { value: 0.0, error: 0.0001 } }
+  - close_to: { hits.hits.2._score: { value: 0.0, error: 0.0001 } }
+  - close_to: { hits.hits.3._score: { value: 0.0, error: 0.0001 } }
+
 ---
 "should throw on unknown normalizer":
   - do: