diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md b/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md index f3a61ab0ccf49..fab0a05e356f2 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md @@ -113,7 +113,9 @@ First, let’s examine how to combine two different types of queries: a `kNN` qu While these queries may produce scores in different ranges, we can use Reciprocal Rank Fusion (`rrf`) to combine the results and generate a merged final result list. To implement this in the retriever framework, we start with the top-level element: our `rrf` retriever. -This retriever operates on top of two other retrievers: a `knn` retriever and a `standard` retriever. Our query structure would look like this: +This retriever operates on top of two other retrievers: a `knn` retriever and a `standard` retriever. +We can specify weights to adjust the influence of each retriever on the final ranking. +In this example, we're giving the `standard` retriever twice the influence of the `knn` retriever: ```console GET /retrievers_example/_search @@ -122,26 +124,32 @@ GET /retrievers_example/_search "rrf": { "retrievers": [ { - "standard": { - "query": { - "query_string": { - "query": "(information retrieval) OR (artificial intelligence)", - "default_field": "text" + "retriever": { + "standard": { + "query": { + "query_string": { + "query": "(information retrieval) OR (artificial intelligence)", + "default_field": "text" + } } } - } + }, + "weight": 2.0 }, { - "knn": { - "field": "vector", - "query_vector": [ - 0.23, - 0.67, - 0.89 - ], - "k": 3, - "num_candidates": 5 - } + "retriever": { + "knn": { + "field": "vector", + "query_vector": [ + 0.23, + 0.67, + 0.89 + ], + "k": 3, + "num_candidates": 5 + } + }, + "weight": 1.0 } ], "rank_window_size": 10, diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/rrf-retriever.md b/docs/reference/elasticsearch/rest-apis/retrievers/rrf-retriever.md index 36c7ce4cad453..c82b11ffedd2d 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/rrf-retriever.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/rrf-retriever.md @@ -6,7 +6,7 @@ applies_to: # RRF retriever [rrf-retriever] -An [RRF](/reference/elasticsearch/rest-apis/reciprocal-rank-fusion.md) retriever returns top documents based on the RRF formula, equally weighting two or more child retrievers. +An [RRF](/reference/elasticsearch/rest-apis/reciprocal-rank-fusion.md) retriever returns top documents based on the RRF formula, combining two or more child retrievers. Reciprocal rank fusion (RRF) is a method for combining multiple result sets with different relevance indicators into a single result set. @@ -32,7 +32,8 @@ Combining `query` and `retrievers` is not supported. : (Optional, array of retriever objects) A list of child retrievers to specify which sets of returned top documents will have the RRF formula applied to them. - Each child retriever carries an equal weight as part of the RRF formula. Two or more child retrievers are required. + Two or more child retrievers are required. + Each retriever can optionally include a weight to adjust its influence on the final ranking. `rank_constant` : (Optional, integer) @@ -53,6 +54,34 @@ Combining `query` and `retrievers` is not supported. Applies the specified [boolean query filter](/reference/query-languages/query-dsl/query-dsl-bool-query.md) to all of the specified sub-retrievers, according to each retriever’s specifications. +Each entry in the `retrievers` array can be specified in two ways: + +**Without custom weight** (uses default weight of `1.0`): +```json +{ "standard": { "query": {...} } } +``` + +**With custom weight** {applies_to}`stack: ga 9.2`: +```json +{ "retriever": { "standard": { "query": {...} } }, "weight": 2.0 } +``` + +When you need to specify a custom weight, wrap your retriever in an object with `retriever` and `weight` fields. {applies_to}`stack: ga 9.2` + +The wrapped form supports these parameters: + +`retriever` +: (Optional, a retriever object) + + Specifies a child retriever. Any valid retriever type can be used (e.g., `standard`, `knn`, `text_similarity_reranker`, etc.). + +`weight` {applies_to}`stack: ga 9.2` +: (Optional, float) + + The weight that each score of this retriever's top docs will be multiplied in the RRF formula. Higher values increase this retriever's influence on the final ranking. Must be non-negative. + + When weight is not specified, all retrievers are equally weighted against each other (each with a weight of 1.0). + ## Example: Hybrid search [rrf-retriever-example-hybrid] A simple hybrid search example (lexical search + dense vector search) combining a `standard` retriever with a `knn` retriever using RRF: @@ -182,6 +211,75 @@ GET /restaurants/_search 5. The rank constant for the RRF retriever. 6. The rank window size for the RRF retriever. +## Example: Weighted hybrid search [rrf-retriever-example-weighted] + +{applies_to}`stack: ga 9.2` + +This example demonstrates how to use weights to adjust the influence of different retrievers in the RRF ranking. +In this case, we're giving the `standard` retriever more importance (weight 2.0) compared to the `knn` retriever (weight 1.0): + +```console +GET /restaurants/_search +{ + "retriever": { + "rrf": { + "retrievers": [ + { + "retriever": { <1> + "standard": { + "query": { + "multi_match": { + "query": "Austria", + "fields": ["city", "region"] + } + } + } + }, + "weight": 2.0 <2> + }, + { + "retriever": { <3> + "knn": { + "field": "vector", + "query_vector": [10, 22, 77], + "k": 10, + "num_candidates": 10 + } + }, + "weight": 1.0 <4> + } + ], + "rank_constant": 60, + "rank_window_size": 50 + } + } +} +``` +% TEST[continued] + +1. The first retriever in weighted format. +2. This retriever has a weight of 2.0, giving it twice the influence of the kNN retriever. +3. The second retriever in weighted format. +4. This retriever has a weight of 1.0 (default weight). + +::::{note} +You can mix weighted and non-weighted formats in the same query. +The direct format (without explicit `retriever` wrapper) uses the default weight of `1.0`: + +```json +{ + "rrf": { + "retrievers": [ + { "standard": { "query": {...} } }, + { "retriever": { "knn": {...} }, "weight": 2.0 } + ] + } +} +``` + +In this example, the `standard` retriever uses weight `1.0` (default), while the `knn` retriever uses weight `2.0`. +:::: + ## Example: Hybrid search with sparse vectors [rrf-retriever-example-hybrid-sparse] A more complex hybrid search example (lexical search + ELSER sparse vector search + dense vector search) using RRF: