From 6e8c09cb03462efbca738769a17ac914deae70a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Mon, 21 Oct 2024 15:42:34 +0200 Subject: [PATCH 1/6] [DOCS] Adds inference default endpoints to docs. --- .../inference/inference-apis.asciidoc | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/reference/inference/inference-apis.asciidoc b/docs/reference/inference/inference-apis.asciidoc index b291b464be498..2bca105e0b67d 100644 --- a/docs/reference/inference/inference-apis.asciidoc +++ b/docs/reference/inference/inference-apis.asciidoc @@ -34,6 +34,24 @@ Elastic –, then create an {infer} endpoint by the <>. Now use <> to perform <> on your data. + +[discrete] +[[default-enpoints]] +=== Default {infer} endpoints + +Your {es} deployment contains some preconfigured {infer} endpoints that makes it easier for you to use them when defining `semantic_text` fields or {infer} processors. +The following list contains the default {infer} endpoints listed by `inference_id`: + +* `.elser-2`: uses the {ml-docs}/ml-nlp-elser.html[ELSER] built-in trained model for `sparse_embedding` tasks (recommended for English language texts) +* `.multi-e5-small`: uses the {ml-docs}/ml-nlp-e5.html[E5] built-in trained model for `text_embedding` tasks (recommended for non-English language texts) + +Use the `inference_id` of the endpoint in a <> field definition or when creating an <>. +The API call will automatically download and deploy the model which might take a couple of minutes. +Default {infer} enpoints have {ml-docs}/ml-nlp-auto-scale.html#nlp-model-adaptive-allocations[adaptive allocations] enabled. +For these models, the minimum number of allocations is `0` and the maximum is `8`. +If there is no {infer} activity that uses the endpoint, the number of allocations will scale down to `0` automatically after 15 minutes. + + include::delete-inference.asciidoc[] include::get-inference.asciidoc[] include::post-inference.asciidoc[] From ed386cf8115454e2ce2543ceb6b44f30fa650905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Mon, 21 Oct 2024 15:58:48 +0200 Subject: [PATCH 2/6] [DOCS] Documents that the ELSER endpoint is deprecated. --- .../inference/service-elasticsearch.asciidoc | 48 ++++++++++++++----- .../inference/service-elser.asciidoc | 1 + 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/docs/reference/inference/service-elasticsearch.asciidoc b/docs/reference/inference/service-elasticsearch.asciidoc index efa0c78b8356f..beafbd58d9b9f 100644 --- a/docs/reference/inference/service-elasticsearch.asciidoc +++ b/docs/reference/inference/service-elasticsearch.asciidoc @@ -1,12 +1,9 @@ [[infer-service-elasticsearch]] === Elasticsearch {infer} service -Creates an {infer} endpoint to perform an {infer} task with the `elasticsearch` -service. +Creates an {infer} endpoint to perform an {infer} task with the `elasticsearch` service. -NOTE: If you use the E5 model through the `elasticsearch` service, the API -request will automatically download and deploy the model if it isn't downloaded -yet. +NOTE: If you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet. [discrete] @@ -71,8 +68,8 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-min-number] `model_id`::: (Required, string) The name of the model to use for the {infer} task. -It can be the ID of either a built-in model (for example, `.multilingual-e5-small` for E5) or a text embedding model already -{ml-docs}/ml-nlp-import-model.html#ml-nlp-import-script[uploaded through Eland]. +It can be the ID of either a built-in model (for example, `.multilingual-e5-small` for E5), a text embedding model already +{ml-docs}/ml-nlp-import-model.html#ml-nlp-import-script[uploaded through Eland], or a `deployment_id` of an already existing trained model deployment. `num_allocations`::: (Required, integer) @@ -98,15 +95,44 @@ Returns the document instead of only the index. Defaults to `true`. ===== +[discrete] +[[inference-example-elasticsearch-elser]] +==== ELSER via the `elasticsearch` service + +The following example shows how to create an {infer} endpoint called `my-elser-model` to perform a `sparse_embedding` task type. + +The API request below will automatically download the ELSER model if it isn't already downloaded and then deploy the model. + +[source,console] +------------------------------------------------------------ +PUT _inference/sparse_embedding/my-elser-model +{ + "service": "elasticsearch", + "service_settings": { + "adaptive_allocations": { <1> + "enabled": true, + "min_number_of_allocations": 3, + "max_number_of_allocations": 10 + }, + "num_threads": 1, + "model_id": ".elser_model_2" <2> + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> Adaptive allocations will be enabled with the minimum of 3 and the maximum of 10 allocations. +<2> The `model_id` must be the ID of one of the built-in ELSER models. +Valid values are `.elser_model_2` and `.elser_model_2_linux-x86_64`. +For further details, refer to the {ml-docs}/ml-nlp-elser.html[ELSER model documentation]. + + [discrete] [[inference-example-elasticsearch]] ==== E5 via the `elasticsearch` service -The following example shows how to create an {infer} endpoint called -`my-e5-model` to perform a `text_embedding` task type. +The following example shows how to create an {infer} endpoint called `my-e5-model` to perform a `text_embedding` task type. -The API request below will automatically download the E5 model if it isn't -already downloaded and then deploy the model. +The API request below will automatically download the E5 model if it isn't already downloaded and then deploy the model. [source,console] ------------------------------------------------------------ diff --git a/docs/reference/inference/service-elser.asciidoc b/docs/reference/inference/service-elser.asciidoc index 6afc2a2e3ef65..ac3127e04b538 100644 --- a/docs/reference/inference/service-elser.asciidoc +++ b/docs/reference/inference/service-elser.asciidoc @@ -2,6 +2,7 @@ === ELSER {infer} service Creates an {infer} endpoint to perform an {infer} task with the `elser` service. +You can also deploy ELSER by using the <>. NOTE: The API request will automatically download and deploy the ELSER model if it isn't already downloaded. From 1204ed9df0d74907e4f2cd42f4fe6b11b60436bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Mon, 21 Oct 2024 16:40:45 +0200 Subject: [PATCH 3/6] [DOCS] Fixes typo. --- docs/reference/inference/service-elser.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/inference/service-elser.asciidoc b/docs/reference/inference/service-elser.asciidoc index ac3127e04b538..521fab0375584 100644 --- a/docs/reference/inference/service-elser.asciidoc +++ b/docs/reference/inference/service-elser.asciidoc @@ -129,7 +129,7 @@ If using the Python client, you can set the `timeout` parameter to a higher valu [discrete] [[inference-example-elser-adaptive-allocation]] -==== Setting adaptive allocation for the ELSER service +==== Setting adaptive allocations for the ELSER service NOTE: For more information on how to optimize your ELSER endpoints, refer to {ml-docs}/ml-nlp-elser.html#elser-recommendations[the ELSER recommendations] section in the model documentation. To learn more about model autoscaling, refer to the {ml-docs}/ml-nlp-auto-scale.html[trained model autoscaling] page. From 8846dac638a964ef408514ca760e7ece655f116b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Thu, 24 Oct 2024 09:44:22 +0200 Subject: [PATCH 4/6] Apply suggestions from code review Co-authored-by: David Kyle --- docs/reference/inference/inference-apis.asciidoc | 4 ++-- docs/reference/inference/service-elasticsearch.asciidoc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/reference/inference/inference-apis.asciidoc b/docs/reference/inference/inference-apis.asciidoc index 2bca105e0b67d..0c36fa114cabb 100644 --- a/docs/reference/inference/inference-apis.asciidoc +++ b/docs/reference/inference/inference-apis.asciidoc @@ -42,8 +42,8 @@ Now use <> to perform Your {es} deployment contains some preconfigured {infer} endpoints that makes it easier for you to use them when defining `semantic_text` fields or {infer} processors. The following list contains the default {infer} endpoints listed by `inference_id`: -* `.elser-2`: uses the {ml-docs}/ml-nlp-elser.html[ELSER] built-in trained model for `sparse_embedding` tasks (recommended for English language texts) -* `.multi-e5-small`: uses the {ml-docs}/ml-nlp-e5.html[E5] built-in trained model for `text_embedding` tasks (recommended for non-English language texts) +* `.elser-2-elasticsearch`: uses the {ml-docs}/ml-nlp-elser.html[ELSER] built-in trained model for `sparse_embedding` tasks (recommended for English language texts) +* `.multilingual-e5-small-elasticsearch`: uses the {ml-docs}/ml-nlp-e5.html[E5] built-in trained model for `text_embedding` tasks (recommended for non-English language texts) Use the `inference_id` of the endpoint in a <> field definition or when creating an <>. The API call will automatically download and deploy the model which might take a couple of minutes. diff --git a/docs/reference/inference/service-elasticsearch.asciidoc b/docs/reference/inference/service-elasticsearch.asciidoc index beafbd58d9b9f..53a89bd395ce6 100644 --- a/docs/reference/inference/service-elasticsearch.asciidoc +++ b/docs/reference/inference/service-elasticsearch.asciidoc @@ -69,7 +69,7 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-min-number] (Required, string) The name of the model to use for the {infer} task. It can be the ID of either a built-in model (for example, `.multilingual-e5-small` for E5), a text embedding model already -{ml-docs}/ml-nlp-import-model.html#ml-nlp-import-script[uploaded through Eland], or a `deployment_id` of an already existing trained model deployment. +{ml-docs}/ml-nlp-import-model.html#ml-nlp-import-script[uploaded through Eland]. `num_allocations`::: (Required, integer) From 16fde4e47ef9480538a62903b1766d936662f42d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Thu, 24 Oct 2024 09:59:11 +0200 Subject: [PATCH 5/6] [DOCS] Addresses feedback. --- .../inference/service-elasticsearch.asciidoc | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/docs/reference/inference/service-elasticsearch.asciidoc b/docs/reference/inference/service-elasticsearch.asciidoc index 53a89bd395ce6..370bf60409a87 100644 --- a/docs/reference/inference/service-elasticsearch.asciidoc +++ b/docs/reference/inference/service-elasticsearch.asciidoc @@ -53,6 +53,11 @@ These settings are specific to the `elasticsearch` service. (Optional, object) include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation] +`deployment_id`::: +(Optional, string) +The `deployment_id` of an existing trained model deployment. +When `deployment_id` is used the `model_id` is optional. + `enabled`:::: (Optional, Boolean) include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-enabled] @@ -211,3 +216,46 @@ PUT _inference/text_embedding/my-e5-model } ------------------------------------------------------------ // TEST[skip:TBD] + + +[discrete] +[[inference-example-existing-deployment]] +==== Using an existing model deployment with the `elasticsearch` service + +The following example shows how to use an already existing model deployment when creating an {infer} endpoint. + +[source,console] +------------------------------------------------------------ +PUT _inference/sparse_embedding/use_existing_deployment +{ + "service": "elasticsearch", + "service_settings": { + "deployment_id": ".elser_model_2" <1> + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The `deployment_id` of the already existing model deployment. + +The API response contains the properties of `model_id`, and the threads and allocations settings: + +[source,console-result] +------------------------------------------------------------ +{ + "inference_id": "use_existing_deployment", + "task_type": "sparse_embedding", + "service": "elasticsearch", + "service_settings": { + "num_allocations": 2, + "num_threads": 1, + "model_id": ".elser_model_2", + "deployment_id": ".elser_model_2" + }, + "chunking_settings": { + "strategy": "sentence", + "max_chunk_size": 250, + "sentence_overlap": 1 + } +} +------------------------------------------------------------ +// NOTCONSOLE \ No newline at end of file From d0a607f35483ce9ad8e6f8e4b6fcd9f39831549f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Thu, 24 Oct 2024 13:17:40 +0200 Subject: [PATCH 6/6] Apply suggestions from code review Co-authored-by: David Kyle --- docs/reference/inference/inference-apis.asciidoc | 2 +- docs/reference/inference/service-elasticsearch.asciidoc | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/reference/inference/inference-apis.asciidoc b/docs/reference/inference/inference-apis.asciidoc index 0c36fa114cabb..ddcff1abc7dce 100644 --- a/docs/reference/inference/inference-apis.asciidoc +++ b/docs/reference/inference/inference-apis.asciidoc @@ -48,7 +48,7 @@ The following list contains the default {infer} endpoints listed by `inference_i Use the `inference_id` of the endpoint in a <> field definition or when creating an <>. The API call will automatically download and deploy the model which might take a couple of minutes. Default {infer} enpoints have {ml-docs}/ml-nlp-auto-scale.html#nlp-model-adaptive-allocations[adaptive allocations] enabled. -For these models, the minimum number of allocations is `0` and the maximum is `8`. +For these models, the minimum number of allocations is `0`. If there is no {infer} activity that uses the endpoint, the number of allocations will scale down to `0` automatically after 15 minutes. diff --git a/docs/reference/inference/service-elasticsearch.asciidoc b/docs/reference/inference/service-elasticsearch.asciidoc index 370bf60409a87..259779a12134d 100644 --- a/docs/reference/inference/service-elasticsearch.asciidoc +++ b/docs/reference/inference/service-elasticsearch.asciidoc @@ -116,7 +116,7 @@ PUT _inference/sparse_embedding/my-elser-model "service_settings": { "adaptive_allocations": { <1> "enabled": true, - "min_number_of_allocations": 3, + "min_number_of_allocations": 1, "max_number_of_allocations": 10 }, "num_threads": 1, @@ -125,7 +125,7 @@ PUT _inference/sparse_embedding/my-elser-model } ------------------------------------------------------------ // TEST[skip:TBD] -<1> Adaptive allocations will be enabled with the minimum of 3 and the maximum of 10 allocations. +<1> Adaptive allocations will be enabled with the minimum of 1 and the maximum of 10 allocations. <2> The `model_id` must be the ID of one of the built-in ELSER models. Valid values are `.elser_model_2` and `.elser_model_2_linux-x86_64`. For further details, refer to the {ml-docs}/ml-nlp-elser.html[ELSER model documentation]. @@ -237,7 +237,7 @@ PUT _inference/sparse_embedding/use_existing_deployment // TEST[skip:TBD] <1> The `deployment_id` of the already existing model deployment. -The API response contains the properties of `model_id`, and the threads and allocations settings: +The API response contains the `model_id`, and the threads and allocations settings from the model deployment: [source,console-result] ------------------------------------------------------------