elastic · elasticsearchmachine · Dec 19, 2024 · Dec 18, 2024 · Dec 18, 2024 · Dec 18, 2024
diff --git a/docs/changelog/118585.yaml b/docs/changelog/118585.yaml
@@ -0,0 +1,7 @@
+pr: 118585
+summary: Add a generic `rescorer` retriever based on the search request's rescore
+  functionality
+area: Ranking
+type: feature
+issues:
+ - 118327
diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc
@@ -22,6 +22,9 @@ A <<standard-retriever, retriever>> that replaces the functionality of a traditi
 `knn`::
 A <<knn-retriever, retriever>> that replaces the functionality of a <<search-api-knn, knn search>>.
 
+`rescorer`::
+A <<rescorer-retriever, retriever>> that replaces the functionality of the <<rescore, query rescorer>>.
+
 `rrf`::
 A <<rrf-retriever, retriever>> that produces top documents from <<rrf, reciprocal rank fusion (RRF)>>.
 
@@ -371,6 +374,122 @@ GET movies/_search
 ----
 // TEST[skip:uses ELSER]
 
+[[rescorer-retriever]]
+==== Rescorer Retriever
+
+The `rescorer` retriever re-scores only the results produced by its child retriever.
+For the `standard` and `knn` retrievers, the `window_size` parameter specifies the number of documents examined per shard.
+
+For compound retrievers like `rrf`, the `window_size` parameter defines the total number of documents examined globally.
+
+When using the `rescorer`, an error is returned if the following conditions are not met:
+
+* The minimum configured rescore's `window_size` is:
+** Greater than or equal to the `size` of the parent retriever for nested `rescorer` setups.
+** Greater than or equal to the `size` of the search request when used as the primary retriever in the tree.
+
+* And the maximum rescore's `window_size` is:
+** Smaller than or equal to the `size` or `rank_window_size` of the child retriever.
+
+[discrete]
+[[rescorer-retriever-parameters]]
+===== Parameters
+
+`rescore`::
+(Required. <<rescore, A rescorer definition or an array of rescorer definitions>>)
++
+Defines the <<rescore, rescorers>> applied sequentially to the top documents returned by the child retriever.
+
+`retriever`::
+(Required. <<retriever, retriever>>)
++
+Specifies the child retriever responsible for generating the initial set of top documents to be re-ranked.
+
+`filter`::
+(Optional. <<query-dsl, query object or list of query objects>>)
++
+Applies a <<query-dsl-bool-query, boolean query filter>> to the retriever, ensuring that all documents match the filter criteria without affecting their scores.
+
+[discrete]
+[[rescorer-retriever-example]]
+==== Example
+
+The `rescorer` retriever can be placed at any level within the retriever tree.
+The following example demonstrates a `rescorer` applied to the results produced by an `rrf` retriever:
+
+[source,console]
+----
+GET movies/_search
+{
+  "size": 10, <1>
+  "retriever": {
+    "rescorer": { <2>
+      "rescore": {
+        "query": { <3>
+          "window_size": 50, <4>
+          "rescore_query": {
+            "script_score": {
+              "script": {
+                "source": "cosineSimilarity(params.queryVector, 'product-vector_final_stage') + 1.0",
+                "params": {
+                  "queryVector": [-0.5, 90.0, -10, 14.8, -156.0]
+                }
+              }
+            }
+          }
+        }
+      },
+      "retriever": { <5>
+        "rrf": {
+          "rank_window_size": 100, <6>
+          "retrievers": [
+            {
+              "standard": {
+                "query": {
+                  "sparse_vector": {
+                    "field": "plot_embedding",
+                    "inference_id": "my-elser-model",
+                    "query": "films that explore psychological depths"
+                  }
+                }
+              }
+            },
+            {
+              "standard": {
+                "query": {
+                  "multi_match": {
+                    "query": "crime",
+                    "fields": [
+                      "plot",
+                      "title"
+                    ]
+                  }
+                }
+              }
+            },
+            {
+              "knn": {
+                "field": "vector",
+                "query_vector": [10, 22, 77],
+                "k": 10,
+                "num_candidates": 10
+              }
+            }
+          ]
+        }
+      }
+    }
+  }
+}
+----
+// TEST[skip:uses ELSER]
+<1> Specifies the number of top documents to return in the final response.
+<2> A `rescorer` retriever applied as the final step.
+<3> The definition of the `query` rescorer.
+<4> Defines the number of documents to rescore from the child retriever.
+<5> Specifies the child retriever definition.
+<6> Defines the number of documents returned by the `rrf` retriever, which limits the available documents to
+
 [[text-similarity-reranker-retriever]]
 ==== Text Similarity Re-ranker Retriever
 
@@ -772,4 +891,4 @@ When a retriever is specified as part of a search, the following elements are no
 * <<search-after, `search_after`>>
 * <<request-body-search-terminate-after, `terminate_after`>>
 * <<search-sort-param, `sort`>>
-* <<rescore, `rescore`>>
+* <<rescore, `rescore`>> use a <<rescorer-retriever, rescorer retriever>> instead
diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle
@@ -258,4 +258,5 @@ tasks.named("yamlRestTestV7CompatTransform").configure({ task ->
   task.skipTest("search.vectors/41_knn_search_bbq_hnsw/Test knn search", "Scoring has changed in latest versions")
   task.skipTest("search.vectors/42_knn_search_bbq_flat/Test knn search", "Scoring has changed in latest versions")
   task.skipTest("synonyms/90_synonyms_reloading_for_synset/Reload analyzers for specific synonym set", "Can't work until auto-expand replicas is 0-1 for synonyms index")
+  task.skipTest("search/90_search_after/_shard_doc sort", "restriction has been lifted in latest versions")
 })
diff --git a/...src/yamlRestTest/resources/rest-api-spec/test/search.retrievers/30_rescorer_retriever.yml b/...src/yamlRestTest/resources/rest-api-spec/test/search.retrievers/30_rescorer_retriever.yml
@@ -0,0 +1,225 @@
+setup:
+  - requires:
+      cluster_features: [ "search.retriever.rescorer.enabled" ]
+      reason: "Support for rescorer retriever"
+
+  - do:
+      indices.create:
+        index: test
+        body:
+          settings:
+            number_of_shards: 1
+            number_of_replicas: 0
+          mappings:
+            properties:
+              available:
+                type: boolean
+              features:
+                type: rank_features
+
+  - do:
+      bulk:
+        refresh: true
+        index: test
+        body:
+          - '{"index": {"_id": 1 }}'
+          - '{"features": { "first_stage": 1, "second_stage": 10}, "available": true, "group": 1}'
+          - '{"index": {"_id": 2 }}'
+          - '{"features": { "first_stage": 2, "second_stage": 9}, "available": false, "group": 1}'
+          - '{"index": {"_id": 3 }}'
+          - '{"features": { "first_stage": 3, "second_stage": 8}, "available": false, "group": 3}'
+          - '{"index": {"_id": 4 }}'
+          - '{"features": { "first_stage": 4, "second_stage": 7}, "available": true, "group": 1}'
+          - '{"index": {"_id": 5 }}'
+          - '{"features": { "first_stage": 5, "second_stage": 6}, "available": true, "group": 3}'
+          - '{"index": {"_id": 6 }}'
+          - '{"features": { "first_stage": 6, "second_stage": 5}, "available": false, "group": 2}'
+          - '{"index": {"_id": 7 }}'
+          - '{"features": { "first_stage": 7, "second_stage": 4}, "available": true, "group": 3}'
+          - '{"index": {"_id": 8 }}'
+          - '{"features": { "first_stage": 8, "second_stage": 3}, "available": true, "group": 1}'
+          - '{"index": {"_id": 9 }}'
+          - '{"features": { "first_stage": 9, "second_stage": 2}, "available": true, "group": 2}'
+          - '{"index": {"_id": 10 }}'
+          - '{"features": { "first_stage": 10, "second_stage": 1}, "available": false, "group": 1}'
+
+---
+"Rescorer retriever basic":
+  - do:
+      search:
+        index: test
+        body:
+          retriever:
+            rescorer:
+              rescore:
+                window_size: 10
+                query:
+                  rescore_query:
+                    rank_feature:
+                      field: "features.second_stage"
+                      linear: { }
+                  query_weight: 0
+              retriever:
+                standard:
+                  query:
+                    rank_feature:
+                      field: "features.first_stage"
+                      linear: { }
+          size: 2
+
+  - match: { hits.total.value: 10 }
+  - match: { hits.hits.0._id: "1" }
+  - match: { hits.hits.0._score: 10.0 }
+  - match: { hits.hits.1._id: "2" }
+  - match: { hits.hits.1._score: 9.0 }
+
+  - do:
+      search:
+        index: test
+        body:
+          retriever:
+            rescorer:
+              rescore:
+                window_size: 3
+                query:
+                  rescore_query:
+                    rank_feature:
+                      field: "features.second_stage"
+                      linear: {}
+                  query_weight: 0
+              retriever:
+                standard:
+                  query:
+                    rank_feature:
+                      field: "features.first_stage"
+                      linear: {}
+          size: 2
+
+  - match: {hits.total.value: 10}
+  - match: {hits.hits.0._id: "8"}
+  - match: { hits.hits.0._score: 3.0 }
+  - match: {hits.hits.1._id: "9"}
+  - match: { hits.hits.1._score: 2.0 }
+
+---
+"Rescorer retriever with pre-filters":
+  - do:
+      search:
+        index: test
+        body:
+          retriever:
+            rescorer:
+              filter:
+                match:
+                  available: true
+              rescore:
+                window_size: 10
+                query:
+                  rescore_query:
+                    rank_feature:
+                      field: "features.second_stage"
+                      linear: { }
+                  query_weight: 0
+              retriever:
+                standard:
+                  query:
+                    rank_feature:
+                      field: "features.first_stage"
+                      linear: { }
+          size: 2
+
+  - match: { hits.total.value: 6 }
+  - match: { hits.hits.0._id: "1" }
+  - match: { hits.hits.0._score: 10.0 }
+  - match: { hits.hits.1._id: "4" }
+  - match: { hits.hits.1._score: 7.0 }
+
+  - do:
+      search:
+        index: test
+        body:
+          retriever:
+            rescorer:
+              rescore:
+                window_size: 4
+                query:
+                  rescore_query:
+                    rank_feature:
+                      field: "features.second_stage"
+                      linear: { }
+                  query_weight: 0
+              retriever:
+                standard:
+                  filter:
+                    match:
+                      available: true
+                  query:
+                    rank_feature:
+                      field: "features.first_stage"
+                      linear: { }
+          size: 2
+
+  - match: { hits.total.value: 6 }
+  - match: { hits.hits.0._id: "5" }
+  - match: { hits.hits.0._score: 6.0 }
+  - match: { hits.hits.1._id: "7" }
+  - match: { hits.hits.1._score: 4.0 }
+
+---
+"Rescorer retriever and collapsing":
+  - do:
+      search:
+        index: test
+        body:
+          retriever:
+            rescorer:
+              rescore:
+                window_size: 10
+                query:
+                  rescore_query:
+                    rank_feature:
+                      field: "features.second_stage"
+                      linear: { }
+                  query_weight: 0
+              retriever:
+                standard:
+                  query:
+                    rank_feature:
+                      field: "features.first_stage"
+                      linear: { }
+          collapse:
+            field: group
+          size: 3
+
+  - match: { hits.total.value: 10 }
+  - match: { hits.hits.0._id: "1" }
+  - match: { hits.hits.0._score: 10.0 }
+  - match: { hits.hits.1._id: "3" }
+  - match: { hits.hits.1._score: 8.0 }
+  - match: { hits.hits.2._id: "6" }
+  - match: { hits.hits.2._score: 5.0 }
+
+---
+"Rescorer retriever and invalid window size":
+  - do:
+      catch: "/\\[rescorer\\] requires \\[window_size: 5\\] be greater than or equal to \\[size: 10\\]/"
+      search:
+        index: test
+        body:
+          retriever:
+            rescorer:
+              rescore:
+                window_size: 5
+                query:
+                  rescore_query:
+                    rank_feature:
+                      field: "features.second_stage"
+                      linear: { }
+                  query_weight: 0
+              retriever:
+                standard:
+                  query:
+                    rank_feature:
+                      field: "features.first_stage"
+                      linear: { }
+          size: 10