From 4e7f594fcd019c31fd38e74931c9c502bd1c3dcc Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Tue, 15 Oct 2024 13:32:38 +0300 Subject: [PATCH 1/6] Removing tech-preview header and documentation for retrievers and RRF --- docs/reference/search/retriever.asciidoc | 14 +------------- docs/reference/search/rrf.asciidoc | 7 ------- .../search-your-data/retrievers-overview.asciidoc | 7 ++----- .../search/retriever/CompoundRetrieverBuilder.java | 3 +++ 4 files changed, 6 insertions(+), 25 deletions(-) diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 54836ac33762d..097af27491ff7 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -1,8 +1,6 @@ [[retriever]] === Retriever -preview::["This functionality is in technical preview and may be changed or removed in a future release. The syntax will likely change before GA. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] - A retriever is a specification to describe top documents returned from a search. A retriever replaces other elements of the <> that also return top documents such as <> and @@ -75,7 +73,7 @@ Collapses the top documents by a specified key into a single top document per ke ===== Restrictions When a retriever tree contains a compound retriever (a retriever with two or more child -retrievers) *only* the query element is allowed. +retrievers) the <> parameter is not supported. [discrete] [[standard-retriever-example]] @@ -245,12 +243,6 @@ include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-rank-window-size] include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-filter] -===== Restrictions - -An RRF retriever is a compound retriever. Child retrievers may not use -elements that are restricted by having a compound retriever as part of -the retriever tree. - [discrete] [[rrf-retriever-example-hybrid]] ==== Example: Hybrid search @@ -413,10 +405,6 @@ Applies the specified <> to the chil If the child retriever already specifies any filters, then this top-level filter is applied in conjuction with the filter defined in the child retriever. -===== Restrictions - -A text similarity re-ranker retriever is a compound retriever. Child retrievers may not use elements that are restricted by having a compound retriever as part of the retriever tree. - [discrete] [[text-similarity-reranker-retriever-example-cohere]] ==== Example: Cohere Rerank diff --git a/docs/reference/search/rrf.asciidoc b/docs/reference/search/rrf.asciidoc index 2a676e5fba336..2eb618661e7d8 100644 --- a/docs/reference/search/rrf.asciidoc +++ b/docs/reference/search/rrf.asciidoc @@ -1,8 +1,6 @@ [[rrf]] === Reciprocal rank fusion -preview::["This functionality is in technical preview and may be changed or removed in a future release. The syntax will likely change before GA. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] - https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf[Reciprocal rank fusion (RRF)] is a method for combining multiple result sets with different relevance indicators into a single result set. RRF requires no tuning, and the different relevance indicators do not have to be related to each other to achieve high-quality results. @@ -99,13 +97,8 @@ The `rrf` retriever supports: The `rrf` retriever does not currently support: * <> -* <> * <> * <> -* <> -* <> -* <> -* <> Using unsupported features as part of a search with an `rrf` retriever results in an exception. diff --git a/docs/reference/search/search-your-data/retrievers-overview.asciidoc b/docs/reference/search/search-your-data/retrievers-overview.asciidoc index c0fe7471946f3..f3759231e2bc4 100644 --- a/docs/reference/search/search-your-data/retrievers-overview.asciidoc +++ b/docs/reference/search/search-your-data/retrievers-overview.asciidoc @@ -1,9 +1,7 @@ [[retrievers-overview]] === Retrievers -preview::[] - -A retriever is an abstraction that was added to the Search API in *8.14.0*. +A retriever is an abstraction that was added to the Search API in *8.14.0* and has been made GA in *8.16.0*. This abstraction enables the configuration of multi-stage retrieval pipelines within a single `_search` call. This simplifies your search application logic, because you no longer need to configure complex searches via multiple {es} calls or implement additional client-side logic to combine results from different queries. @@ -32,8 +30,7 @@ with different relevance indicators into a single result set. An RRF retriever is a *compound retriever*, where its `filter` element is propagated to its sub retrievers. + -Sub retrievers may not use elements that are restricted by having a compound retriever as part of the retriever tree. -See the <> for detailed examples and information on how to use the RRF retriever. + * <>. Used for <>. Requires first creating a `rerank` task using the <>. diff --git a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java index 22bef026523e9..e994c55e43452 100644 --- a/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/retriever/CompoundRetrieverBuilder.java @@ -194,6 +194,9 @@ public ActionRequestValidationException validate( validationException ); } + for (RetrieverSource innerRetriever : innerRetrievers) { + validationException = innerRetriever.retriever().validate(source, validationException, allowPartialSearchResults); + } return validationException; } From bb8f7ddcbbafc7a489da266e0ae13ee01c0d52f0 Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Tue, 15 Oct 2024 13:48:46 +0300 Subject: [PATCH 2/6] updating docs --- docs/reference/search/rrf.asciidoc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/reference/search/rrf.asciidoc b/docs/reference/search/rrf.asciidoc index 2eb618661e7d8..edd3b67e3de04 100644 --- a/docs/reference/search/rrf.asciidoc +++ b/docs/reference/search/rrf.asciidoc @@ -93,6 +93,10 @@ The `rrf` retriever supports: * <> * <> +* <> +* <> +* <> +* <> The `rrf` retriever does not currently support: @@ -101,6 +105,9 @@ The `rrf` retriever does not currently support: * <> Using unsupported features as part of a search with an `rrf` retriever results in an exception. ++ +IMPORTANT: It is best to avoid providing a <> as part of the request, as +RRF creates one internally that is shared by all sub-retrievers to ensure consistent results. [[rrf-using-multiple-standard-retrievers]] ==== Reciprocal rank fusion using multiple standard retrievers From 5bf05116cad833ed1bc760be4784fed19ec3cc20 Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Tue, 15 Oct 2024 14:07:43 +0300 Subject: [PATCH 3/6] Update docs/reference/search/search-your-data/retrievers-overview.asciidoc Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com> --- .../search/search-your-data/retrievers-overview.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/retrievers-overview.asciidoc b/docs/reference/search/search-your-data/retrievers-overview.asciidoc index f3759231e2bc4..d092a05813392 100644 --- a/docs/reference/search/search-your-data/retrievers-overview.asciidoc +++ b/docs/reference/search/search-your-data/retrievers-overview.asciidoc @@ -1,7 +1,7 @@ [[retrievers-overview]] === Retrievers -A retriever is an abstraction that was added to the Search API in *8.14.0* and has been made GA in *8.16.0*. +A retriever is an abstraction that was added to the Search API in *8.14.0* and was made GA in *8.16.0*. This abstraction enables the configuration of multi-stage retrieval pipelines within a single `_search` call. This simplifies your search application logic, because you no longer need to configure complex searches via multiple {es} calls or implement additional client-side logic to combine results from different queries. From 89b8bc8945e6168489f8267e8cc0f64285a7bc01 Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Tue, 15 Oct 2024 21:28:41 +0300 Subject: [PATCH 4/6] Update docs/reference/search/search-your-data/retrievers-overview.asciidoc Co-authored-by: Benjamin Trent --- .../search/search-your-data/retrievers-overview.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/retrievers-overview.asciidoc b/docs/reference/search/search-your-data/retrievers-overview.asciidoc index d092a05813392..3cfccc0c6d81b 100644 --- a/docs/reference/search/search-your-data/retrievers-overview.asciidoc +++ b/docs/reference/search/search-your-data/retrievers-overview.asciidoc @@ -1,7 +1,7 @@ [[retrievers-overview]] === Retrievers -A retriever is an abstraction that was added to the Search API in *8.14.0* and was made GA in *8.16.0*. +A retriever is an abstraction that was added to the Search API in *8.14.0* and was made generally available in *8.16.0*. This abstraction enables the configuration of multi-stage retrieval pipelines within a single `_search` call. This simplifies your search application logic, because you no longer need to configure complex searches via multiple {es} calls or implement additional client-side logic to combine results from different queries. From 58dd18c8ba6dff761f1ec6f0515e5e84f7e3be91 Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Tue, 15 Oct 2024 22:06:28 +0300 Subject: [PATCH 5/6] adding rrf with semantic_reranker example --- .../retrievers-overview.asciidoc | 98 +++++++------------ 1 file changed, 36 insertions(+), 62 deletions(-) diff --git a/docs/reference/search/search-your-data/retrievers-overview.asciidoc b/docs/reference/search/search-your-data/retrievers-overview.asciidoc index f3759231e2bc4..d8db062748e09 100644 --- a/docs/reference/search/search-your-data/retrievers-overview.asciidoc +++ b/docs/reference/search/search-your-data/retrievers-overview.asciidoc @@ -69,82 +69,56 @@ When using compound retrievers, only the query element is allowed, which enforce [[retrievers-overview-example]] ==== Example -The following example demonstrates how using retrievers simplify the composability of queries for RRF ranking. +The following example demonstrates the powerful queries that we can now compose, and how retrievers simplify this process. We can use any combination of retrievers we want, propagating the +results of a nested retriever to its parent. In this scenario, we'll make use of all 4 (currently) available retrievers, i.e. `standard`, `knn`, `text_similarity_reranker` and `rrf`. +We'll first combine the results of a `semantic` query using the `standard` retriever, and that of a `knn` search on a dense vector field, using `rrf` to get the top 100 results. +Finally, we'll then rerank the top-50 results of `rrf` using the `text_similarity_reranker` [source,js] ---- GET example-index/_search { "retriever": { - "rrf": { - "retrievers": [ - { - "standard": { - "query": { - "sparse_vector": { - "field": "vector.tokens", - "inference_id": "my-elser-endpoint", - "query": "What blue shoes are on sale?" + "text_similarity_reranker": { + "retriever": { + "rrf": { + "retrievers": [ + { + "standard": { + "query": { + "semantic": { + "field": "inference_field", + "query": "state of the art vector database" + } + } + } + }, + { + "knn": { + "query_vector": [ + 0.54, + ..., + 0.245 + ], + "field": "embedding", + "k": 10, + "num_candidates": 15 } } - } - }, - { - "standard": { - "query": { - "match": { - "text": "blue shoes sale" - } - } - } - } - ] - } - } -} ----- -//NOTCONSOLE - -This example demonstrates how you can combine different retrieval strategies into a single `retriever` pipeline. - -Compare to `RRF` with `sub_searches` approach: - -.*Expand* for example -[%collapsible] -============== - -[source,js] ----- -GET example-index/_search -{ - "sub_searches":[ - { - "query":{ - "match":{ - "text":"blue shoes sale" - } - } - }, - { - "query":{ - "sparse_vector": { - "field": "vector.tokens", - "inference_id": "my-elser-endoint", - "query": "What blue shoes are on sale?" - } + ], + "rank_window_size": 100, + "rank_constant": 10 } - } - ], - "rank":{ - "rrf":{ - "rank_window_size":50, - "rank_constant":20 + }, + "rank_window_size": 50, + "field": "description", + "inference_text": "what's the best way to create complex pipelines and retrieve documents?", + "inference_id": "my-awesome-rerank-model" } } } ---- //NOTCONSOLE -============== [discrete] [[retrievers-overview-glossary]] From 6e8843a49b7b1ee6613e4267d22125b566fbfd9e Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Tue, 15 Oct 2024 22:17:46 +0300 Subject: [PATCH 6/6] removing min_score as an unsupported option --- docs/reference/search/retriever.asciidoc | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 097af27491ff7..9306d83c79136 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -543,4 +543,3 @@ at the top-level and instead are only allowed as elements of specific retrievers * <> * <> * <> -* <>