From 39b570fc7e3f65c7d339cc8e05a5df16445002b0 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Fri, 10 Oct 2025 15:05:31 -0400 Subject: [PATCH 1/5] Add docs for chunk_rescorer --- .../rest-apis/retrievers/retrievers-examples.md | 17 +++++++++++++---- .../text-similarity-reranker-retriever.md | 16 ++++++++++++++++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md b/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md index b9dcc24a841ed..f3a61ab0ccf49 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/retrievers-examples.md @@ -440,7 +440,7 @@ GET /retrievers_example/_search "query": "artificial intelligence" } } -} +} ``` This returns the following response based on the final rrf score for each result. @@ -497,7 +497,7 @@ GET /retrievers_example/_search "fields": ["text", "text_semantic"] } } -} +} ``` ::::{note} @@ -570,7 +570,7 @@ GET /retrievers_example/_search "normalizer": "minmax" } } -} +} ``` This returns the following response based on the normalized score for each result: @@ -1503,6 +1503,7 @@ PUT _inference/rerank/my-rerank-model ``` Let’s start by reranking the results of the `rrf` retriever in our previous example. +We'll also apply a `chunk_rescorer` to ensure that we only consider the best scoring chunks when sending information to the reranker. ```console GET retrievers_example/_search @@ -1541,7 +1542,15 @@ GET retrievers_example/_search }, "field": "text", "inference_id": "my-rerank-model", - "inference_text": "What are the state of the art applications of AI in information retrieval?" + "inference_text": "What are the state of the art applications of AI in information retrieval?", + "chunk_rescorer": { + "size": 1, + "chunking_settings": { + "strategy": "sentence", + "max_chunk_size": 300, + "sentence_overlap": 0 + } + }, } }, "_source": false diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md b/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md index 9abb236a45d1e..fa8f919ceb049 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md @@ -86,6 +86,22 @@ score = ln(score), if score < 0 Applies the specified [boolean query filter](/reference/query-languages/query-dsl/query-dsl-bool-query.md) to the child `retriever`. If the child retriever already specifies any filters, then this top-level filter is applied in conjuction with the filter defined in the child retriever. +`chunk_rescorer` {applies_to}`stack: beta 9.2` +: (Optional, `object`) + + When specified, chunks and scores documents based on configured chunking settings, and only sends the best scoring chunks to the reranking model as input. This helps improve relevance when reranking long documents that would otherwise be truncated by the reranking model's token limit. + + Parameters for `chunk_rescorer`: + + `size` + : (Optional, `int`) + + The number of chunks to pass to the reranker for consideration. Defaults to `1`. + + `chunking_settings` + : (Optional, `object`) + + Settings for chunking text into smaller passages for scoring and reranking. Defaults to the optimal chunk size for the Elastic Reranker. Note that if chunking settings are specified that chunk content into larger chunks than the reranker's token limit, it may result in truncation and negatively impact relevance. ## Example: Elastic Rerank [text-similarity-reranker-retriever-example-elastic-rerank] From 932fb05bc98d38ba93bab0a3a83ba6e54d778cb3 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Fri, 10 Oct 2025 15:20:50 -0400 Subject: [PATCH 2/5] Updates to docs based on preview --- .../rest-apis/retrievers/text-similarity-reranker-retriever.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md b/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md index fa8f919ceb049..f1933a62018d4 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md @@ -101,7 +101,7 @@ score = ln(score), if score < 0 `chunking_settings` : (Optional, `object`) - Settings for chunking text into smaller passages for scoring and reranking. Defaults to the optimal chunk size for the Elastic Reranker. Note that if chunking settings are specified that chunk content into larger chunks than the reranker's token limit, it may result in truncation and negatively impact relevance. + Settings for chunking text into smaller passages for scoring and reranking. Defaults to the optimal chunking settings for the Elastic Reranker. Refer to the Inference API for valid values for `chunking_settings`. Warning: if chunking settings are specified that chunk content into larger chunks than the reranker's token limit, it may result in truncation and negatively impact relevance. ## Example: Elastic Rerank [text-similarity-reranker-retriever-example-elastic-rerank] From d5b7b3d6ca09066f6b60d451106c2a6a4c10c1ba Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Mon, 13 Oct 2025 10:55:58 -0400 Subject: [PATCH 3/5] Update docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md Co-authored-by: Liam Thompson --- .../rest-apis/retrievers/text-similarity-reranker-retriever.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md b/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md index f1933a62018d4..dfacc77d53665 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md @@ -89,7 +89,7 @@ score = ln(score), if score < 0 `chunk_rescorer` {applies_to}`stack: beta 9.2` : (Optional, `object`) - When specified, chunks and scores documents based on configured chunking settings, and only sends the best scoring chunks to the reranking model as input. This helps improve relevance when reranking long documents that would otherwise be truncated by the reranking model's token limit. + Chunks and scores documents based on configured chunking settings, and only sends the best scoring chunks to the reranking model as input. This helps improve relevance when reranking long documents that would otherwise be truncated by the reranking model's token limit. Parameters for `chunk_rescorer`: From 1274c5e3a61f6606a9ab0102728e05875165fde5 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Mon, 13 Oct 2025 10:56:23 -0400 Subject: [PATCH 4/5] Update docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md Co-authored-by: Liam Thompson --- .../retrievers/text-similarity-reranker-retriever.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md b/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md index dfacc77d53665..39ae1c492b8aa 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md @@ -101,7 +101,10 @@ score = ln(score), if score < 0 `chunking_settings` : (Optional, `object`) - Settings for chunking text into smaller passages for scoring and reranking. Defaults to the optimal chunking settings for the Elastic Reranker. Refer to the Inference API for valid values for `chunking_settings`. Warning: if chunking settings are specified that chunk content into larger chunks than the reranker's token limit, it may result in truncation and negatively impact relevance. + Settings for chunking text into smaller passages for scoring and reranking. Defaults to the optimal chunking settings for [Elastic Rerank](docs-content:///explore-analyze/machine-learning/nlp/ml-nlp-rerank.md). Refer to the [Inference API documentation](https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put#operation-inference-put-body-application-json-chunking_settings) for valid values for `chunking_settings`. + :::{warning} + If you configure chunks larger than the reranker's token limit, the results may be truncated. This can degrade relevance significantly. + ::: ## Example: Elastic Rerank [text-similarity-reranker-retriever-example-elastic-rerank] From a5d117e2e6e3bd13897b509d9ca5dd7247e8d273 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Mon, 13 Oct 2025 10:56:52 -0400 Subject: [PATCH 5/5] Update docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md Co-authored-by: Liam Thompson --- .../rest-apis/retrievers/text-similarity-reranker-retriever.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md b/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md index 39ae1c492b8aa..1ffa71608e849 100644 --- a/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md +++ b/docs/reference/elasticsearch/rest-apis/retrievers/text-similarity-reranker-retriever.md @@ -96,7 +96,7 @@ score = ln(score), if score < 0 `size` : (Optional, `int`) - The number of chunks to pass to the reranker for consideration. Defaults to `1`. + The number of chunks to pass to the reranker. Defaults to `1`. `chunking_settings` : (Optional, `object`)