diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 6d3a1a36ad407..54836ac33762d 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -81,7 +81,43 @@ retrievers) *only* the query element is allowed. [[standard-retriever-example]] ==== Example -[source,js] +//// +[source,console] +---- +PUT /restaurants +{ + "mappings": { + "properties": { + "region": { "type": "keyword" }, + "year": { "type": "keyword" }, + "vector": { + "type": "dense_vector", + "dims": 3 + } + } + } +} + +POST /restaurants/_bulk?refresh +{"index":{}} +{"region": "Austria", "year": "2019", "vector": [10, 22, 77]} +{"index":{}} +{"region": "France", "year": "2019", "vector": [10, 22, 78]} +{"index":{}} +{"region": "Austria", "year": "2020", "vector": [10, 22, 79]} +{"index":{}} +{"region": "France", "year": "2020", "vector": [10, 22, 80]} +---- +// TESTSETUP + +[source,console] +-------------------------------------------------- +DELETE /restaurants +-------------------------------------------------- +// TEARDOWN +//// + +[source,console] ---- GET /restaurants/_search { @@ -109,9 +145,8 @@ GET /restaurants/_search } } ---- -// NOTCONSOLE <1> Opens the `retriever` object. -<2> The `standard` retriever is used for definining traditional {es} queries. +<2> The `standard` retriever is used for defining traditional {es} queries. <3> The entry point for defining the search query. <4> The `bool` object allows for combining multiple query clauses logically. <5> The `should` array indicates conditions under which a document will match. Documents matching these conditions will increase their relevancy score. @@ -171,9 +206,9 @@ The parameters `query_vector` and `query_vector_builder` cannot be used together [[knn-retriever-example]] ==== Example -[source,js] +[source,console] ---- -GET my-embeddings/_search +GET /restaurants/_search { "retriever": { "knn": { <1> @@ -185,8 +220,7 @@ GET my-embeddings/_search } } ---- -// NOTCONSOLE - +// TEST[continued] <1> Configuration for k-nearest neighbor (knn) search, which is based on vector similarity. <2> Specifies the field name that contains the vectors. <3> The query vector against which document vectors are compared in the `knn` search. @@ -223,7 +257,7 @@ the retriever tree. A simple hybrid search example (lexical search + dense vector search) combining a `standard` retriever with a `knn` retriever using RRF: -[source,js] +[source,console] ---- GET /restaurants/_search { @@ -234,7 +268,7 @@ GET /restaurants/_search "standard": { <3> "query": { "multi_match": { - "query": "San Francisco", + "query": "Austria", "fields": [ "city", "region" @@ -258,7 +292,7 @@ GET /restaurants/_search } } ---- -// NOTCONSOLE +// TEST[continued] <1> Defines a retriever tree with an RRF retriever. <2> The sub-retriever array. <3> The first sub-retriever is a `standard` retriever. @@ -272,7 +306,7 @@ GET /restaurants/_search A more complex hybrid search example (lexical search + ELSER sparse vector search + dense vector search) using RRF: -[source,js] +[source,console] ---- GET movies/_search { @@ -316,7 +350,7 @@ GET movies/_search } } ---- -// NOTCONSOLE +// TEST[skip:uses ELSER] [[text-similarity-reranker-retriever]] ==== Text Similarity Re-ranker Retriever @@ -390,7 +424,7 @@ A text similarity re-ranker retriever is a compound retriever. Child retrievers This example enables out-of-the-box semantic search by re-ranking top documents using the Cohere Rerank API. This approach eliminate the need to generate and store embeddings for all indexed documents. This requires a <> using the `rerank` task type. -[source,js] +[source,console] ---- GET /index/_search { @@ -414,7 +448,7 @@ GET /index/_search } } ---- -// NOTCONSOLE +// TEST[skip:uses ML] [discrete] [[text-similarity-reranker-retriever-example-eland]] @@ -452,7 +486,7 @@ eland_import_hub_model \ + . Create an inference endpoint for the `rerank` task + -[source,js] +[source,console] ---- PUT _inference/rerank/my-msmarco-minilm-model { @@ -464,11 +498,11 @@ PUT _inference/rerank/my-msmarco-minilm-model } } ---- -// NOTCONSOLE +// TEST[skip:uses ML] + . Define a `text_similarity_rerank` retriever. + -[source,js] +[source,console] ---- POST movies/_search { @@ -490,7 +524,7 @@ POST movies/_search } } ---- -// NOTCONSOLE +// TEST[skip:uses ML] + This retriever uses a standard `match` query to search the `movie` index for films tagged with the genre "drama". It then re-ranks the results based on semantic similarity to the text in the `inference_text` parameter, using the model we uploaded to {es}.