From 2f9cd8d7a9d26d326ce46bc7823f9f0cb798d573 Mon Sep 17 00:00:00 2001 From: kosabogi Date: Fri, 11 Oct 2024 10:59:30 +0200 Subject: [PATCH 01/20] Creates a new page for the hybrid search tutorial --- .../search-your-data/semantic-search.asciidoc | 1 + .../semantic-text-hybrid-search | 197 ++++++++++++++++++ 2 files changed, 198 insertions(+) create mode 100644 docs/reference/search/search-your-data/semantic-text-hybrid-search diff --git a/docs/reference/search/search-your-data/semantic-search.asciidoc b/docs/reference/search/search-your-data/semantic-search.asciidoc index 62e41b3eef3de..0ef8591e42b5d 100644 --- a/docs/reference/search/search-your-data/semantic-search.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search.asciidoc @@ -104,6 +104,7 @@ IMPORTANT: For the easiest way to perform semantic search in the {stack}, refer include::semantic-search-semantic-text.asciidoc[] +include::semantic-text-hybrid-search[] include::semantic-search-inference.asciidoc[] include::semantic-search-elser.asciidoc[] include::cohere-es.asciidoc[] diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search new file mode 100644 index 0000000000000..de237645f5c4a --- /dev/null +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -0,0 +1,197 @@ +[[semantic-text-hybrid-search]] +=== Tutorial: hybrid search with `semantic_text` +++++ +Hybrid search with `semantic_text` +++++ + +This tutorial demonstrates how to perform **hybrid search**, combining semantic search with traditional full-text search. + +In hybrid search, semantic search retrieves results based on the meaning of the text, while full-text search focuses on exact word matches. By combining both methods, hybrid search delivers more relevant results, particularly in cases where relying on a single approach may not be sufficient. + +The recommended way to use hybrid search in the {stack} is following the `semantic_text` workflow. This tutorial uses the <> for demonstration, but you can use any service and its supported models offered by the {infer-cap} API. + +[discrete] +[[semantic-text-hybrid-infer-endpoint]] +==== Create the {infer} endpoint + +Create an inference endpoint by using the <>: + +[source,console] +------------------------------------------------------------ +PUT _inference/sparse_embedding/my-elser-endpoint <1> +{ + "service": "elser", <2> + "service_settings": { + "num_allocations": 1, + "num_threads": 1 + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The task type is `sparse_embedding` in the path as the `elser` service will +be used and ELSER creates sparse vectors. The `inference_id` is +`my-elser-endpoint`. +<2> The `elser` service is used in this example. + +[NOTE] +==== +You might see a 502 bad gateway error in the response when using the {kib} Console. +This error usually just reflects a timeout, while the model downloads in the background. +You can check the download progress in the {ml-app} UI. +==== + +[discrete] +[[hybrid-search-create-index-mapping]] +==== Create an index mapping for hybrid search + +The destination index (where the search will be performed) should contain both the embeddings for semantic search and the original text field for full-text search. This structure enables the combination of semantic search and full-text search, allowing the search engine to consider both the +meaning and the exact words in the query. + +[source,console] +------------------------------------------------------------ +PUT semantic-hybrid-embeddings +{ + "mappings": { + "properties": { + "semantic_text": { <1> + "type": "semantic_text", + "inference_id": "my-elser-endpoint" <2> + }, + "content": { <3> + "type": "text", + "copy_to": "semantic_text" <4> + } + } + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The name of the field to contain the generated embeddings for semantic search. +<2> The `inference_id` is the inference endpoint that generates the embeddings based on the input text. +<3> The name of the field to contain the original text for lexical search. +<4> The `copy_to` field copies the contents of the `content` field into the `semantic_text` field, enabling hybrid search functionality. + +[NOTE] +==== +If you want to run a search on indices that were populated by web crawlers or connectors, you have to +<> for these indices to +include the `semantic_text` field. Once the mapping is updated, you'll need to run a full web crawl or a full connector sync. This ensures that all existing +documents are reprocessed and updated with the new semantic embeddings, enabling hybrid search on the updated data. +==== + +[discrete] +[[semantic-text-hybrid-load-data]] +==== Load data + +In this step, you load the data that you later use to create embeddings from. + +Use the `msmarco-passagetest2019-top1000` data set, which is a subset of the MS MARCO Passage Ranking data set. It consists of 200 queries, each accompanied by a list of relevant text passages. All unique passages, along with their IDs, have been extracted from that data set and compiled into a https://github.com/elastic/stack-docs/blob/main/docs/en/stack/ml/nlp/data/msmarco-passagetest2019-unique.tsv[tsv file]. + +Download the file and upload it to your cluster using the {kibana-ref}/connect-to-elasticsearch.html#upload-data-kibana[Data Visualizer] in the {ml-app} UI. After your data is analyzed, click **Override settings**. Under **Edit field names**, assign `id` to the first column and `content` to the second. Click **Apply**, then **Import**. Name the index `test-data`, and click **Import**. After the upload is complete, you will see an index named `test-data` with 182,469 documents. + +[discrete] +[[hybrid-search-reindex-data]] +==== Reindex the data for hybrid search + +Reindex the data from the `test-data` index into the `semantic-hybrid-embeddings` index to enable both lexical and semantic search. +The data in the `content` field of the source index is copied into the `content` field of the destination index. +The `copy_to` functionality ensures that the content is duplicated into the `semantic_text` field, where it will be processed by the inference endpoint to generate embeddings. + +[NOTE] +==== +This step uses the reindex API to simulate data ingestion. If you are working with data that has already been indexed, +rather than using the `test-data` set, reindexing is still required to ensure that the data is processed by the {infer} endpoint +and the necessary embeddings are generated. +==== + +[source,console] +------------------------------------------------------------ +POST _reindex?wait_for_completion=false +{ + "source": { + "index": "test-data", + "size": 10 <1> + }, + "dest": { + "index": "semantic-hybrid-embeddings" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The default batch size for reindexing is 1000. Reducing size to a smaller +number makes the update of the reindexing process quicker which enables you to +follow the progress closely and detect errors early. + +The call returns a task ID to monitor the progress: + +[source,console] +------------------------------------------------------------ +GET _tasks/ +------------------------------------------------------------ +// TEST[skip:TBD] + +Reindexing large datasets can take a long time. You can test this workflow using only a subset of the dataset. + +To cancel the reindexing process and generate embeddings for the subset that was reindexed: + +[source,console] +------------------------------------------------------------ +POST _tasks//_cancel +------------------------------------------------------------ +// TEST[skip:TBD] + +[discrete] +[[hybrid-search-perform-search]] +==== Perform hybrid search + +After reindexing the data into the `semantic-hybrid-embeddings` index, you can perform hybrid search, which combines both semantic and lexical search. You can perform hybrid search using <>. RRF is a technique that merges the rankings from both semantic and lexical queries, giving more weight to results that rank high in either search. This ensures that the final results are balanced and relevant. + +[source,console] +------------------------------------------------------------ +GET semantic-hybrid-embeddings/_search +{ + "retriever": { + "rrf": { + "retrievers": [ + { + "standard": { <1> + "query": { + "match": { + "content": "How to avoid muscle soreness while running?" <2> + } + } + } + }, + { + "standard": { <3> + "query": { + "semantic": { + "field": "semantic_text", <4> + "query": "How to avoid muscle soreness while running?" + } + } + } + } + ] + } + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The first `standard` retriever represents the traditional lexical search. +<2> Lexical search is performed on the `content` field using the specified phrase. +<3> The second `standard` retriever refers to the semantic search. +<4> The `semantic_text` field is used to perform the semantic search. + + +After performing the hybrid search, the query will return the top 10 documents that match both semantic and lexical search criteria. The results include detailed information about each document: + +- The `index` field shows the name of the index the document belongs to. + +- The `id` field is a unique identifier for the document. + +- The `score` field represents the relevance score of the document based on the search query. + +- The `semantic_text` field contains the processed embeddings generated by the inference endpoint, which reflect the meaning of the text. + +- The `content` field holds the original text, allowing for full-text (lexical) search. From 9d6368e3225ae8f4e3587d9841ddf1f84cd399fd Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 08:49:19 +0200 Subject: [PATCH 02/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index de237645f5c4a..72b41816354dc 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -4,7 +4,7 @@ Hybrid search with `semantic_text` ++++ -This tutorial demonstrates how to perform **hybrid search**, combining semantic search with traditional full-text search. +This tutorial demonstrates how to perform hybrid search, combining semantic search with traditional full-text search. In hybrid search, semantic search retrieves results based on the meaning of the text, while full-text search focuses on exact word matches. By combining both methods, hybrid search delivers more relevant results, particularly in cases where relying on a single approach may not be sufficient. From 09065f4dc953e0e18b8416f651a1aee8b99cef07 Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 08:50:21 +0200 Subject: [PATCH 03/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index 72b41816354dc..b543999adeaf6 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -22,7 +22,11 @@ PUT _inference/sparse_embedding/my-elser-endpoint <1> { "service": "elser", <2> "service_settings": { - "num_allocations": 1, + "adaptive_allocations": { <3> + "enabled": true, + "min_number_of_allocations": 3, + "max_number_of_allocations": 10 + }, "num_threads": 1 } } @@ -32,6 +36,8 @@ PUT _inference/sparse_embedding/my-elser-endpoint <1> be used and ELSER creates sparse vectors. The `inference_id` is `my-elser-endpoint`. <2> The `elser` service is used in this example. +<3> This setting enables and configures adaptive allocations. +Adaptive allocations make it possible for ELSER to automatically scale up or down resources based on the current load on the process. [NOTE] ==== From ad429c2a6278c05fc44b30937e4d83ba5bd5beb6 Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 08:51:09 +0200 Subject: [PATCH 04/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index b543999adeaf6..94b3d9b53d27a 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -50,8 +50,7 @@ You can check the download progress in the {ml-app} UI. [[hybrid-search-create-index-mapping]] ==== Create an index mapping for hybrid search -The destination index (where the search will be performed) should contain both the embeddings for semantic search and the original text field for full-text search. This structure enables the combination of semantic search and full-text search, allowing the search engine to consider both the -meaning and the exact words in the query. +The destination index will contain both the embeddings for semantic search and the original text field for full-text search. This structure enables the combination of semantic search and full-text search. [source,console] ------------------------------------------------------------ From 287cfc563c2491b33def404f61b0fbef7a30f1ae Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 08:51:37 +0200 Subject: [PATCH 05/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index 94b3d9b53d27a..33e17dd4e50f6 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -72,7 +72,7 @@ PUT semantic-hybrid-embeddings ------------------------------------------------------------ // TEST[skip:TBD] <1> The name of the field to contain the generated embeddings for semantic search. -<2> The `inference_id` is the inference endpoint that generates the embeddings based on the input text. +<2> The identifier of the inference endpoint that generates the embeddings based on the input text. <3> The name of the field to contain the original text for lexical search. <4> The `copy_to` field copies the contents of the `content` field into the `semantic_text` field, enabling hybrid search functionality. From ecea4958a6b170400f985d959c333daf23d8f827 Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 08:52:36 +0200 Subject: [PATCH 06/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index 33e17dd4e50f6..2ea0496fb45c4 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -74,7 +74,7 @@ PUT semantic-hybrid-embeddings <1> The name of the field to contain the generated embeddings for semantic search. <2> The identifier of the inference endpoint that generates the embeddings based on the input text. <3> The name of the field to contain the original text for lexical search. -<4> The `copy_to` field copies the contents of the `content` field into the `semantic_text` field, enabling hybrid search functionality. +<4> The textual data stored in the `content` field will be copied to `semantic_text` and processed by the {infer} endpoint. [NOTE] ==== From 551657e3708a8f23c7eecd378e4fc4b08a74f015 Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 08:53:22 +0200 Subject: [PATCH 07/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index 2ea0496fb45c4..d740d1ef225d0 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -98,7 +98,7 @@ Download the file and upload it to your cluster using the {kibana-ref}/connect-t [[hybrid-search-reindex-data]] ==== Reindex the data for hybrid search -Reindex the data from the `test-data` index into the `semantic-hybrid-embeddings` index to enable both lexical and semantic search. +Reindex the data from the `test-data` index into the `semantic-embeddings` index. The data in the `content` field of the source index is copied into the `content` field of the destination index. The `copy_to` functionality ensures that the content is duplicated into the `semantic_text` field, where it will be processed by the inference endpoint to generate embeddings. From 493723007cc1b8664ec432eeb41f83a9f102cf0d Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 08:54:00 +0200 Subject: [PATCH 08/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index d740d1ef225d0..ea730aac9b0b9 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -100,7 +100,7 @@ Download the file and upload it to your cluster using the {kibana-ref}/connect-t Reindex the data from the `test-data` index into the `semantic-embeddings` index. The data in the `content` field of the source index is copied into the `content` field of the destination index. -The `copy_to` functionality ensures that the content is duplicated into the `semantic_text` field, where it will be processed by the inference endpoint to generate embeddings. +The `copy_to` parameter set in the index mapping creation ensures that the content is copied into the `semantic_text` field. The data is processed by the {infer} endpoint at ingest time to generate embeddings. [NOTE] ==== From 6907b998eb060fd17c6e3ce3d0d00feed3d10747 Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 08:54:15 +0200 Subject: [PATCH 09/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index ea730aac9b0b9..07debe2e37952 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -118,7 +118,7 @@ POST _reindex?wait_for_completion=false "size": 10 <1> }, "dest": { - "index": "semantic-hybrid-embeddings" + "index": "semantic-embeddings" } } ------------------------------------------------------------ From ebeb008cef5b4ba88fb78b4d6687237f50e96fcf Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 08:54:59 +0200 Subject: [PATCH 10/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index 07debe2e37952..1695e53eef8d6 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -149,7 +149,7 @@ POST _tasks//_cancel [[hybrid-search-perform-search]] ==== Perform hybrid search -After reindexing the data into the `semantic-hybrid-embeddings` index, you can perform hybrid search, which combines both semantic and lexical search. You can perform hybrid search using <>. RRF is a technique that merges the rankings from both semantic and lexical queries, giving more weight to results that rank high in either search. This ensures that the final results are balanced and relevant. +After reindexing the data into the `semantic-embeddings` index, you can perform hybrid search by using <>. RRF is a technique that merges the rankings from both semantic and lexical queries, giving more weight to results that rank high in either search. This ensures that the final results are balanced and relevant. [source,console] ------------------------------------------------------------ From b9db8525579c983115b75a8d0777f7f16cab0a0d Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 08:55:11 +0200 Subject: [PATCH 11/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index 1695e53eef8d6..7911d04d2b8c9 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -153,7 +153,7 @@ After reindexing the data into the `semantic-embeddings` index, you can perform [source,console] ------------------------------------------------------------ -GET semantic-hybrid-embeddings/_search +GET semantic-embeddings/_search { "retriever": { "rrf": { From 205ab876cc8583cad597df893fd9cf80b117560e Mon Sep 17 00:00:00 2001 From: kosabogi Date: Mon, 14 Oct 2024 11:19:37 +0200 Subject: [PATCH 12/20] Adds search response example --- .../semantic-text-hybrid-search | 77 ++++++++++++++++--- 1 file changed, 67 insertions(+), 10 deletions(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index 7911d04d2b8c9..668f36b93c688 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -54,7 +54,7 @@ The destination index will contain both the embeddings for semantic search and t [source,console] ------------------------------------------------------------ -PUT semantic-hybrid-embeddings +PUT semantic-embeddings { "mappings": { "properties": { @@ -191,12 +191,69 @@ GET semantic-embeddings/_search After performing the hybrid search, the query will return the top 10 documents that match both semantic and lexical search criteria. The results include detailed information about each document: -- The `index` field shows the name of the index the document belongs to. - -- The `id` field is a unique identifier for the document. - -- The `score` field represents the relevance score of the document based on the search query. - -- The `semantic_text` field contains the processed embeddings generated by the inference endpoint, which reflect the meaning of the text. - -- The `content` field holds the original text, allowing for full-text (lexical) search. +[source,console] +------------------------------------------------------------ +{ + "took": 107, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 473, + "relation": "eq" + }, + "max_score": null, + "hits": [ + { + "_index": "semantic-embeddings", <1> + "_id": "wv65epIBEMBRnhfTsOFM", <2> + "_score": 0.032786883, <3> + "_rank": 1, + "_source": { + "semantic_text": { <4> + "inference": { + "inference_id": "my-elser-endpoint", + "model_settings": { + "task_type": "sparse_embedding" + }, + "chunks": [ + { + "text": "What so many out there do not realize is the importance of what you do after you work out. You may have done the majority of the work, but how you treat your body in the minutes and hours after you exercise has a direct effect on muscle soreness, muscle strength and growth, and staying hydrated. Cool Down. After your last exercise, your workout is not over. The first thing you need to do is cool down. Even if running was all that you did, you still should do light cardio for a few minutes. This brings your heart rate down at a slow and steady pace, which helps you avoid feeling sick after a workout.", + "embeddings": { + "exercise": 1.571044, + "after": 1.3603843, + "sick": 1.3281639, + "cool": 1.3227621, + "muscle": 1.2645415, + "sore": 1.2561599, + "cooling": 1.2335974, + "running": 1.1750668, + "hours": 1.1104802, + "out": 1.0991782, + "##io": 1.0794281, + "last": 1.0474665, + (...) + } + } + ] + } + }, + "id": 8408852, + "content": "What so many out there do not realize is the importance of (...)" <5> + } + } + ] + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The name of the index the document belongs to. +<2> The unique identifier of the document. +<3> The relevance score of the document based on the search query. +<4> The processed embeddings generated by the inference endpoint. +<5> The original text, allowing for full-text search. From eaad4bb283ed842a036401781c876a49862a6c9d Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 12:38:34 +0200 Subject: [PATCH 13/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index 668f36b93c688..250569c2a2176 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -191,7 +191,7 @@ GET semantic-embeddings/_search After performing the hybrid search, the query will return the top 10 documents that match both semantic and lexical search criteria. The results include detailed information about each document: -[source,console] +[source,console-result] ------------------------------------------------------------ { "took": 107, From 4a2e268a085bc1f1004d698dca586a36a5a068a9 Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 12:38:56 +0200 Subject: [PATCH 14/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index 250569c2a2176..22f313c9b89e2 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -251,7 +251,7 @@ After performing the hybrid search, the query will return the top 10 documents t } } ------------------------------------------------------------ -// TEST[skip:TBD] +// NOTCONSOLE <1> The name of the index the document belongs to. <2> The unique identifier of the document. <3> The relevance score of the document based on the search query. From 09eb168b6971062ef829121792ee7a25d041d8b6 Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 12:39:29 +0200 Subject: [PATCH 15/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index 22f313c9b89e2..49f8b6460a4ad 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -210,7 +210,7 @@ After performing the hybrid search, the query will return the top 10 documents t "max_score": null, "hits": [ { - "_index": "semantic-embeddings", <1> + "_index": "semantic-embeddings", "_id": "wv65epIBEMBRnhfTsOFM", <2> "_score": 0.032786883, <3> "_rank": 1, From 772a0027b9b9b12cfbb1daded7be8f9e81f5b380 Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 12:39:40 +0200 Subject: [PATCH 16/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index 49f8b6460a4ad..16f369c8934fb 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -211,7 +211,7 @@ After performing the hybrid search, the query will return the top 10 documents t "hits": [ { "_index": "semantic-embeddings", - "_id": "wv65epIBEMBRnhfTsOFM", <2> + "_id": "wv65epIBEMBRnhfTsOFM", "_score": 0.032786883, <3> "_rank": 1, "_source": { From 5d00fe8583b792219bb783e40ba7b478c411576b Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 12:39:48 +0200 Subject: [PATCH 17/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index 16f369c8934fb..9cd5b2c0997a5 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -212,7 +212,7 @@ After performing the hybrid search, the query will return the top 10 documents t { "_index": "semantic-embeddings", "_id": "wv65epIBEMBRnhfTsOFM", - "_score": 0.032786883, <3> + "_score": 0.032786883, "_rank": 1, "_source": { "semantic_text": { <4> From 3cb65bba4585e4f754ac584454b3342282def016 Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 12:39:58 +0200 Subject: [PATCH 18/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index 9cd5b2c0997a5..6ef4ebda9d5b7 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -215,7 +215,7 @@ After performing the hybrid search, the query will return the top 10 documents t "_score": 0.032786883, "_rank": 1, "_source": { - "semantic_text": { <4> + "semantic_text": { "inference": { "inference_id": "my-elser-endpoint", "model_settings": { From a10b97fbdf007c2922d3fbb2a8ebc90a6da9b2c6 Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 12:40:08 +0200 Subject: [PATCH 19/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index 6ef4ebda9d5b7..602f359f34684 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -244,7 +244,7 @@ After performing the hybrid search, the query will return the top 10 documents t } }, "id": 8408852, - "content": "What so many out there do not realize is the importance of (...)" <5> + "content": "What so many out there do not realize is the importance of (...)" } } ] From 3d2a867ff578cafd957424596dd3d3b8dea3e2e9 Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Mon, 14 Oct 2024 12:40:23 +0200 Subject: [PATCH 20/20] Update docs/reference/search/search-your-data/semantic-text-hybrid-search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: István Zoltán Szabó --- .../search/search-your-data/semantic-text-hybrid-search | 5 ----- 1 file changed, 5 deletions(-) diff --git a/docs/reference/search/search-your-data/semantic-text-hybrid-search b/docs/reference/search/search-your-data/semantic-text-hybrid-search index 602f359f34684..c56b283434df5 100644 --- a/docs/reference/search/search-your-data/semantic-text-hybrid-search +++ b/docs/reference/search/search-your-data/semantic-text-hybrid-search @@ -252,8 +252,3 @@ After performing the hybrid search, the query will return the top 10 documents t } ------------------------------------------------------------ // NOTCONSOLE -<1> The name of the index the document belongs to. -<2> The unique identifier of the document. -<3> The relevance score of the document based on the search query. -<4> The processed embeddings generated by the inference endpoint. -<5> The original text, allowing for full-text search.