From 1ac4b4643e48426f842b6c034e91c0624447b34e Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Wed, 16 Oct 2024 09:16:43 +0300 Subject: [PATCH] Removing tech-preview header and updating documentation for retrievers and RRF (#114810) --- docs/reference/search/retriever.asciidoc | 15 +-- docs/reference/search/rrf.asciidoc | 14 +-- .../retrievers-overview.asciidoc | 105 +++++++----------- .../retriever/CompoundRetrieverBuilder.java | 3 + 4 files changed, 49 insertions(+), 88 deletions(-) diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 54836ac33762d..9306d83c79136 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -1,8 +1,6 @@ [[retriever]] === Retriever -preview::["This functionality is in technical preview and may be changed or removed in a future release. The syntax will likely change before GA. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] - A retriever is a specification to describe top documents returned from a search. A retriever replaces other elements of the <> that also return top documents such as <> and @@ -75,7 +73,7 @@ Collapses the top documents by a specified key into a single top document per ke ===== Restrictions When a retriever tree contains a compound retriever (a retriever with two or more child -retrievers) *only* the query element is allowed. +retrievers) the <> parameter is not supported. [discrete] [[standard-retriever-example]] @@ -245,12 +243,6 @@ include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-rank-window-size] include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-filter] -===== Restrictions - -An RRF retriever is a compound retriever. Child retrievers may not use -elements that are restricted by having a compound retriever as part of -the retriever tree. - [discrete] [[rrf-retriever-example-hybrid]] ==== Example: Hybrid search @@ -413,10 +405,6 @@ Applies the specified <> to the chil If the child retriever already specifies any filters, then this top-level filter is applied in conjuction with the filter defined in the child retriever. -===== Restrictions - -A text similarity re-ranker retriever is a compound retriever. Child retrievers may not use elements that are restricted by having a compound retriever as part of the retriever tree. - [discrete] [[text-similarity-reranker-retriever-example-cohere]] ==== Example: Cohere Rerank @@ -555,4 +543,3 @@ at the top-level and instead are only allowed as elements of specific retrievers * <> * <> * <> -* <> diff --git a/docs/reference/search/rrf.asciidoc b/docs/reference/search/rrf.asciidoc index 2a676e5fba336..edd3b67e3de04 100644 --- a/docs/reference/search/rrf.asciidoc +++ b/docs/reference/search/rrf.asciidoc @@ -1,8 +1,6 @@ [[rrf]] === Reciprocal rank fusion -preview::["This functionality is in technical preview and may be changed or removed in a future release. The syntax will likely change before GA. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] - https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf[Reciprocal rank fusion (RRF)] is a method for combining multiple result sets with different relevance indicators into a single result set. RRF requires no tuning, and the different relevance indicators do not have to be related to each other to achieve high-quality results. @@ -95,19 +93,21 @@ The `rrf` retriever supports: * <> * <> +* <> +* <> +* <> +* <> The `rrf` retriever does not currently support: * <> -* <> * <> * <> -* <> -* <> -* <> -* <> Using unsupported features as part of a search with an `rrf` retriever results in an exception. ++ +IMPORTANT: It is best to avoid providing a <> as part of the request, as +RRF creates one internally that is shared by all sub-retrievers to ensure consistent results. [[rrf-using-multiple-standard-retrievers]] ==== Reciprocal rank fusion using multiple standard retrievers diff --git a/docs/reference/search/search-your-data/retrievers-overview.asciidoc b/docs/reference/search/search-your-data/retrievers-overview.asciidoc index c0fe7471946f3..9df4026fc6445 100644 --- a/docs/reference/search/search-your-data/retrievers-overview.asciidoc +++ b/docs/reference/search/search-your-data/retrievers-overview.asciidoc @@ -1,9 +1,7 @@ [[retrievers-overview]] === Retrievers -preview::[] - -A retriever is an abstraction that was added to the Search API in *8.14.0*. +A retriever is an abstraction that was added to the Search API in *8.14.0* and was made generally available in *8.16.0*. This abstraction enables the configuration of multi-stage retrieval pipelines within a single `_search` call. This simplifies your search application logic, because you no longer need to configure complex searches via multiple {es} calls or implement additional client-side logic to combine results from different queries. @@ -32,8 +30,7 @@ with different relevance indicators into a single result set. An RRF retriever is a *compound retriever*, where its `filter` element is propagated to its sub retrievers. + -Sub retrievers may not use elements that are restricted by having a compound retriever as part of the retriever tree. -See the <> for detailed examples and information on how to use the RRF retriever. + * <>. Used for <>. Requires first creating a `rerank` task using the <>. @@ -72,82 +69,56 @@ When using compound retrievers, only the query element is allowed, which enforce [[retrievers-overview-example]] ==== Example -The following example demonstrates how using retrievers simplify the composability of queries for RRF ranking. +The following example demonstrates the powerful queries that we can now compose, and how retrievers simplify this process. We can use any combination of retrievers we want, propagating the +results of a nested retriever to its parent. In this scenario, we'll make use of all 4 (currently) available retrievers, i.e. `standard`, `knn`, `text_similarity_reranker` and `rrf`. +We'll first combine the results of a `semantic` query using the `standard` retriever, and that of a `knn` search on a dense vector field, using `rrf` to get the top 100 results. +Finally, we'll then rerank the top-50 results of `rrf` using the `text_similarity_reranker` [source,js] ---- GET example-index/_search { "retriever": { - "rrf": { - "retrievers": [ - { - "standard": { - "query": { - "sparse_vector": { - "field": "vector.tokens", - "inference_id": "my-elser-endpoint", - "query": "What blue shoes are on sale?" + "text_similarity_reranker": { + "retriever": { + "rrf": { + "retrievers": [ + { + "standard": { + "query": { + "semantic": { + "field": "inference_field", + "query": "state of the art vector database" + } + } + } + }, + { + "knn": { + "query_vector": [ + 0.54, + ..., + 0.245 + ], + "field": "embedding", + "k": 10, + "num_candidates": 15 } } - } - }, - { - "standard": { - "query": { - "match": { - "text": "blue shoes sale" - } - } - } - } - ] - } - } -} ----- -//NOTCONSOLE - -This example demonstrates how you can combine different retrieval strategies into a single `retriever` pipeline. - -Compare to `RRF` with `sub_searches` approach: - -.*Expand* for example -[%collapsible] -============== - -[source,js] ----- -GET example-index/_search -{ - "sub_searches":[ - { - "query":{ - "match":{ - "text":"blue shoes sale" - } - } - }, - { - "query":{ - "sparse_vector": { - "field": "vector.tokens", - "inference_id": "my-elser-endoint", - "query": "What blue shoes are on sale?" - } + ], + "rank_window_size": 100, + "rank_constant": 10 } - } - ], - "rank":{ - "rrf":{ - "rank_window_size":50, - "rank_constant":20 + }, + "rank_window_size": 50, + "field": "description", + "inference_text": "what's the best way to create complex pipelines and retrieve documents?", + "inference_id": "my-awesome-rerank-model" } } } ---- //NOTCONSOLE -============== [discrete] [[retrievers-overview-glossary]] diff --git a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java index 22bef026523e9..e994c55e43452 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java @@ -194,6 +194,9 @@ public ActionRequestValidationException validate( validationException ); } + for (RetrieverSource innerRetriever : innerRetrievers) { + validationException = innerRetriever.retriever().validate(source, validationException, allowPartialSearchResults); + } return validationException; }