elastic · mridula-s109 · Mar 6, 2025 · Mar 6, 2025 · Mar 12, 2025 · Mar 13, 2025
diff --git a/docs/changelog/124182.yaml b/docs/changelog/124182.yaml
@@ -0,0 +1,5 @@
+pr: 124182
+summary: Adding `MinScore` support to Linear Retriever
+area: Search
+type: enhancement
+issues: []
diff --git a/docs/reference/elasticsearch/rest-apis/retrievers.md b/docs/reference/elasticsearch/rest-apis/retrievers.md
@@ -269,11 +269,11 @@ Each entry specifies the following parameters:
 
 * `weight`:: (Optional, float)
 
-    The weight that each score of this retriever’s top docs will be multiplied with. Must be greater or equal to 0. Defaults to 1.0.
+    The weight that each score of this retriever's top docs will be multiplied with. Must be greater or equal to 0. Defaults to 1.0.
 
 * `normalizer`:: (Optional, String)
 
-    Specifies how we will normalize the retriever’s scores, before applying the specified `weight`. Available values are: `minmax`, and `none`. Defaults to `none`.
+    Specifies how we will normalize the retriever's scores, before applying the specified `weight`. Available values are: `minmax`, and `none`. Defaults to `none`.
 
     * `none`
     * `minmax` : A `MinMaxScoreNormalizer` that normalizes scores based on the following formula
@@ -288,14 +288,78 @@ See also [this hybrid search example](docs-content://solutions/search/retrievers
 `rank_window_size`
 :   (Optional, integer)
 
-    This value determines the size of the individual result sets per query. A higher value will improve result relevance at the cost of performance. The final ranked result set is pruned down to the search request’s [size](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#search-size-param). `rank_window_size` must be greater than or equal to `size` and greater than or equal to `1`. Defaults to the `size` parameter.
+    This value determines the size of the individual result sets per query. A higher value will improve result relevance at the cost of performance. The final ranked result set is pruned down to the search request's [size](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#search-size-param). `rank_window_size` must be greater than or equal to `size` and greater than or equal to `1`. Defaults to the `size` parameter.
+
+
+`min_score`
+:   (Optional, float)
+
+    Minimum score threshold for documents to be included in the final result set. Documents with scores below this threshold will be filtered out. Must be greater than or equal to 0. Defaults to 0.
 
 
 `filter`
 :   (Optional, [query object or list of query objects](/reference/query-languages/querydsl.md))
 
-    Applies the specified [boolean query filter](/reference/query-languages/query-dsl-bool-query.md) to all of the specified sub-retrievers, according to each retriever’s specifications.
+    Applies the specified [boolean query filter](/reference/query-languages/query-dsl-bool-query.md) to all of the specified sub-retrievers, according to each retriever's specifications.
+
+
+### Example: Hybrid search with min_score [linear-retriever-example]
+
+This example demonstrates how to use the Linear retriever to combine a standard retriever with a kNN retriever, applying weights, normalization, and a minimum score threshold:
+
+```console
+GET /restaurants/_search
+{
+  "retriever": {
+    "linear": { <1>
+      "retrievers": [ <2>
+        {
+          "retriever": { <3>
+            "standard": {
+              "query": {
+                "multi_match": {
+                  "query": "Italian cuisine",
+                  "fields": [
+                    "description",
+                    "cuisine"
+                  ]
+                }
+              }
+            }
+          },
+          "weight": 2.0, <4>
+          "normalizer": "minmax" <5>
+        },
+        {
+          "retriever": { <6>
+            "knn": {
+              "field": "vector",
+              "query_vector": [10, 22, 77],
+              "k": 10,
+              "num_candidates": 10
+            }
+          },
+          "weight": 1.0, <7>
+          "normalizer": "minmax" <8>
+        }
+      ],
+      "rank_window_size": 50, <9>
+      "min_score": 1.5 <10>
+    }
+  }
+}
+```
 
+1. Defines a retriever tree with a Linear retriever.
+2. The sub-retrievers array.
+3. The first sub-retriever is a `standard` retriever.
+4. The weight applied to the scores from the standard retriever (2.0).
+5. The normalization method applied to the standard retriever's scores.
+6. The second sub-retriever is a `knn` retriever.
+7. The weight applied to the scores from the kNN retriever (1.0).
+8. The normalization method applied to the kNN retriever's scores.
+9. The rank window size for the Linear retriever.
+10. The minimum score threshold - documents with a combined score below 1.5 will be filtered out from the final result set.
 
 
 ## RRF Retriever [rrf-retriever]
@@ -320,13 +384,13 @@ An [RRF](/reference/elasticsearch/rest-apis/reciprocal-rank-fusion.md) retriever
 `rank_window_size`
 :   (Optional, integer)
 
-    This value determines the size of the individual result sets per query. A higher value will improve result relevance at the cost of performance. The final ranked result set is pruned down to the search request’s [size](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#search-size-param). `rank_window_size` must be greater than or equal to `size` and greater than or equal to `1`. Defaults to the `size` parameter.
+    This value determines the size of the individual result sets per query. A higher value will improve result relevance at the cost of performance. The final ranked result set is pruned down to the search request's [size](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-search#search-size-param). `rank_window_size` must be greater than or equal to `size` and greater than or equal to `1`. Defaults to the `size` parameter.
 
 
 `filter`
 :   (Optional, [query object or list of query objects](/reference/query-languages/querydsl.md))
 
-    Applies the specified [boolean query filter](/reference/query-languages/query-dsl-bool-query.md) to all of the specified sub-retrievers, according to each retriever’s specifications.
+    Applies the specified [boolean query filter](/reference/query-languages/query-dsl-bool-query.md) to all of the specified sub-retrievers, according to each retriever's specifications.
 
 
 
@@ -435,12 +499,12 @@ For compound retrievers like `rrf`, the `window_size` parameter defines the tota
 
 When using the `rescorer`, an error is returned if the following conditions are not met:
 
-* The minimum configured rescore’s `window_size` is:
+* The minimum configured rescore's `window_size` is:
 
     * Greater than or equal to the `size` of the parent retriever for nested `rescorer` setups.
     * Greater than or equal to the `size` of the search request when used as the primary retriever in the tree.
 
-* And the maximum rescore’s `window_size` is:
+* And the maximum rescore's `window_size` is:
 
     * Smaller than or equal to the `size` or `rank_window_size` of the child retriever.
 
@@ -564,7 +628,7 @@ To use `text_similarity_reranker` you must first set up an inference endpoint fo
 
 You have the following options:
 
-* Use the the built-in [Elastic Rerank](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put) cross-encoder model via the inference API’s {{es}} service.
+* Use the the built-in [Elastic Rerank](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put) cross-encoder model via the inference API's {{es}} service.
 * Use the [Cohere Rerank inference endpoint](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put) with the `rerank` task type.
 * Use the [Google Vertex AI inference endpoint](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put) with the `rerank` task type.
 * Upload a model to {{es}} with [Eland](eland://reference/machine-learning.md#ml-nlp-pytorch) using the `text_similarity` NLP task type.

diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java
@@ -185,6 +185,7 @@ static TransportVersion def(int id) {
     public static final TransportVersion ESQL_THREAD_NAME_IN_DRIVER_PROFILE = def(9_027_0_00);
     public static final TransportVersion INFERENCE_CONTEXT = def(9_028_0_00);
     public static final TransportVersion ML_INFERENCE_DEEPSEEK = def(9_029_00_0);
+    public static final TransportVersion RANK_DOCS_MIN_SCORE = def(9_030_0_00);
 
     /*
      * STOP! READ THIS FIRST! No, really,

diff --git a/server/src/main/java/org/elasticsearch/index/query/RankDocsQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/RankDocsQueryBuilder.java
@@ -29,26 +29,49 @@ public class RankDocsQueryBuilder extends AbstractQueryBuilder<RankDocsQueryBuil
 
     public static final String NAME = "rank_docs_query";
 
+    /**
+     * Default minimum score threshold for documents to be included in results.
+     * Using Float.MIN_VALUE as the default ensures that by default no documents
+     * are filtered out based on score, as virtually all scores will be above this threshold.
+     *
+     * This threshold is separate from the special handling of scores that are exactly 0:
+     * - The minScore parameter determines which documents are included in results based on their score
+     * - Documents with a score of exactly 0 will always be assigned Float.MIN_VALUE internally
+     *   to differentiate them from filtered matches, regardless of the minScore value
+     *
+     * Setting minScore to a higher value (e.g., 0.0f) would filter out documents with scores below that threshold,
+     * which can be useful to remove documents that only match filters but have no relevance score contribution.
+     */
+    public static final float DEFAULT_MIN_SCORE = Float.MIN_VALUE;
+
     private final RankDoc[] rankDocs;
     private final QueryBuilder[] queryBuilders;
     private final boolean onlyRankDocs;
+    private final float minScore;
 
     public RankDocsQueryBuilder(RankDoc[] rankDocs, QueryBuilder[] queryBuilders, boolean onlyRankDocs) {
+        this(rankDocs, queryBuilders, onlyRankDocs, DEFAULT_MIN_SCORE);
+    }
+
+    public RankDocsQueryBuilder(RankDoc[] rankDocs, QueryBuilder[] queryBuilders, boolean onlyRankDocs, float minScore) {
         this.rankDocs = rankDocs;
         this.queryBuilders = queryBuilders;
         this.onlyRankDocs = onlyRankDocs;
+        this.minScore = minScore;
     }
 
     public RankDocsQueryBuilder(StreamInput in) throws IOException {
         super(in);
         this.rankDocs = in.readArray(c -> c.readNamedWriteable(RankDoc.class), RankDoc[]::new);
+        QueryBuilder[] queryBuilders = null;
+        boolean onlyRankDocs = false;
         if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_16_0)) {
-            this.queryBuilders = in.readOptionalArray(c -> c.readNamedWriteable(QueryBuilder.class), QueryBuilder[]::new);
-            this.onlyRankDocs = in.readBoolean();
-        } else {
-            this.queryBuilders = null;
-            this.onlyRankDocs = false;
+            queryBuilders = in.readOptionalArray(c -> c.readNamedWriteable(QueryBuilder.class), QueryBuilder[]::new);
+            onlyRankDocs = in.readBoolean();
         }
+        this.queryBuilders = queryBuilders;
+        this.onlyRankDocs = onlyRankDocs;
+        this.minScore = in.getTransportVersion().onOrAfter(TransportVersions.RANK_DOCS_MIN_SCORE) ? in.readFloat() : DEFAULT_MIN_SCORE;
     }
 
     @Override
@@ -70,7 +93,7 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws
                 changed |= newQueryBuilders[i] != queryBuilders[i];
             }
             if (changed) {
-                RankDocsQueryBuilder clone = new RankDocsQueryBuilder(rankDocs, newQueryBuilders, onlyRankDocs);
+                RankDocsQueryBuilder clone = new RankDocsQueryBuilder(rankDocs, newQueryBuilders, onlyRankDocs, minScore);
                 clone.queryName(queryName());
                 return clone;
             }
@@ -88,6 +111,9 @@ protected void doWriteTo(StreamOutput out) throws IOException {
         if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_16_0)) {
             out.writeOptionalArray(StreamOutput::writeNamedWriteable, queryBuilders);
             out.writeBoolean(onlyRankDocs);
+            if (out.getTransportVersion().onOrAfter(TransportVersions.RANK_DOCS_MIN_SCORE)) {
+                out.writeFloat(minScore);
+            }
         }
     }
 
@@ -115,7 +141,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException {
             queries = new Query[0];
             queryNames = Strings.EMPTY_ARRAY;
         }
-        return new RankDocsQuery(reader, shardRankDocs, queries, queryNames, onlyRankDocs);
+        return new RankDocsQuery(reader, shardRankDocs, queries, queryNames, onlyRankDocs, minScore);
     }
 
     @Override
@@ -135,12 +161,13 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep
     protected boolean doEquals(RankDocsQueryBuilder other) {
         return Arrays.equals(rankDocs, other.rankDocs)
             && Arrays.equals(queryBuilders, other.queryBuilders)
-            && onlyRankDocs == other.onlyRankDocs;
+            && onlyRankDocs == other.onlyRankDocs
+            && minScore == other.minScore;
     }
 
     @Override
     protected int doHashCode() {
-        return Objects.hash(Arrays.hashCode(rankDocs), Arrays.hashCode(queryBuilders), onlyRankDocs);
+        return Objects.hash(Arrays.hashCode(rankDocs), Arrays.hashCode(queryBuilders), onlyRankDocs, minScore);
     }
 
     @Override

diff --git a/server/src/main/java/org/elasticsearch/search/retriever/KnnRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/KnnRetrieverBuilder.java
@@ -34,6 +34,7 @@
 import java.util.function.Supplier;
 
 import static org.elasticsearch.common.Strings.format;
+import static org.elasticsearch.index.query.RankDocsQueryBuilder.DEFAULT_MIN_SCORE;
 import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
 import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
 
@@ -201,7 +202,7 @@ public RetrieverBuilder rewrite(QueryRewriteContext ctx) throws IOException {
     public QueryBuilder topDocsQuery() {
         assert queryVector != null : "query vector must be materialized at this point";
         assert rankDocs != null : "rankDocs should have been materialized by now";
-        var rankDocsQuery = new RankDocsQueryBuilder(rankDocs, null, true);
+        var rankDocsQuery = new RankDocsQueryBuilder(rankDocs, null, true, DEFAULT_MIN_SCORE);
         if (preFilterQueryBuilders.isEmpty()) {
             return rankDocsQuery.queryName(retrieverName);
         }
@@ -217,7 +218,8 @@ public QueryBuilder explainQuery() {
         var rankDocsQuery = new RankDocsQueryBuilder(
             rankDocs,
             new QueryBuilder[] { new ExactKnnQueryBuilder(VectorData.fromFloats(queryVector.get()), field, similarity) },
-            true
+            false,
+            DEFAULT_MIN_SCORE
         );
         if (preFilterQueryBuilders.isEmpty()) {
             return rankDocsQuery.queryName(retrieverName);

diff --git a/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/RankDocsRetrieverBuilder.java
@@ -23,6 +23,8 @@
 import java.util.Objects;
 import java.util.function.Supplier;
 
+import static org.elasticsearch.index.query.RankDocsQueryBuilder.DEFAULT_MIN_SCORE;
+
 /**
  * An {@link RetrieverBuilder} that is used to retrieve documents based on the rank of the documents.
  */
@@ -93,7 +95,8 @@ public QueryBuilder explainQuery() {
         var explainQuery = new RankDocsQueryBuilder(
             rankDocs.get(),
             sources.stream().map(RetrieverBuilder::explainQuery).toArray(QueryBuilder[]::new),
-            true
+            true,
+            DEFAULT_MIN_SCORE
         );
         explainQuery.queryName(retrieverName());
         return explainQuery;
@@ -113,17 +116,19 @@ public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder
                 rankQuery = new RankDocsQueryBuilder(
                     rankDocResults,
                     sources.stream().map(RetrieverBuilder::topDocsQuery).toArray(QueryBuilder[]::new),
-                    false
+                    false,
+                    DEFAULT_MIN_SCORE
                 );
             } else {
                 rankQuery = new RankDocsQueryBuilder(
                     rankDocResults,
                     sources.stream().map(RetrieverBuilder::explainQuery).toArray(QueryBuilder[]::new),
-                    false
+                    false,
+                    DEFAULT_MIN_SCORE
                 );
             }
         } else {
-            rankQuery = new RankDocsQueryBuilder(rankDocResults, null, false);
+            rankQuery = new RankDocsQueryBuilder(rankDocResults, null, false, DEFAULT_MIN_SCORE);
         }
         rankQuery.queryName(retrieverName());
         // ignore prefilters of this level, they were already propagated to children
@@ -132,7 +137,7 @@ public void extractToSearchSourceBuilder(SearchSourceBuilder searchSourceBuilder
             searchSourceBuilder.size(rankWindowSize);
         }
         if (sourceHasMinScore()) {
-            searchSourceBuilder.minScore(this.minScore() == null ? Float.MIN_VALUE : this.minScore());
+            searchSourceBuilder.minScore(this.minScore() == null ? DEFAULT_MIN_SCORE : this.minScore());
         }
         if (searchSourceBuilder.size() + searchSourceBuilder.from() > rankDocResults.length) {
             searchSourceBuilder.size(Math.max(0, rankDocResults.length - searchSourceBuilder.from()));