|
| 1 | +[[infer-service-jinaai]] |
| 2 | +=== JinaAI {infer} service |
| 3 | + |
| 4 | +Creates an {infer} endpoint to perform an {infer} task with the `jinaai` service. |
| 5 | + |
| 6 | + |
| 7 | +[discrete] |
| 8 | +[[infer-service-jinaai-api-request]] |
| 9 | +==== {api-request-title} |
| 10 | + |
| 11 | +`PUT /_inference/<task_type>/<inference_id>` |
| 12 | + |
| 13 | +[discrete] |
| 14 | +[[infer-service-jinaai-api-path-params]] |
| 15 | +==== {api-path-parms-title} |
| 16 | + |
| 17 | +`<inference_id>`:: |
| 18 | +(Required, string) |
| 19 | +include::inference-shared.asciidoc[tag=inference-id] |
| 20 | + |
| 21 | +`<task_type>`:: |
| 22 | +(Required, string) |
| 23 | +include::inference-shared.asciidoc[tag=task-type] |
| 24 | ++ |
| 25 | +-- |
| 26 | +Available task types: |
| 27 | + |
| 28 | +* `text_embedding`, |
| 29 | +* `rerank`. |
| 30 | +-- |
| 31 | + |
| 32 | +[discrete] |
| 33 | +[[infer-service-jinaai-api-request-body]] |
| 34 | +==== {api-request-body-title} |
| 35 | + |
| 36 | +`chunking_settings`:: |
| 37 | +(Optional, object) |
| 38 | +include::inference-shared.asciidoc[tag=chunking-settings] |
| 39 | + |
| 40 | +`max_chunking_size`::: |
| 41 | +(Optional, integer) |
| 42 | +include::inference-shared.asciidoc[tag=chunking-settings-max-chunking-size] |
| 43 | + |
| 44 | +`overlap`::: |
| 45 | +(Optional, integer) |
| 46 | +include::inference-shared.asciidoc[tag=chunking-settings-overlap] |
| 47 | + |
| 48 | +`sentence_overlap`::: |
| 49 | +(Optional, integer) |
| 50 | +include::inference-shared.asciidoc[tag=chunking-settings-sentence-overlap] |
| 51 | + |
| 52 | +`strategy`::: |
| 53 | +(Optional, string) |
| 54 | +include::inference-shared.asciidoc[tag=chunking-settings-strategy] |
| 55 | + |
| 56 | +`service`:: |
| 57 | +(Required, string) |
| 58 | +The type of service supported for the specified task type. In this case, |
| 59 | +`jinaai`. |
| 60 | + |
| 61 | +`service_settings`:: |
| 62 | +(Required, object) |
| 63 | +include::inference-shared.asciidoc[tag=service-settings] |
| 64 | ++ |
| 65 | +-- |
| 66 | +These settings are specific to the `jinaai` service. |
| 67 | +-- |
| 68 | + |
| 69 | +`api_key`::: |
| 70 | +(Required, string) |
| 71 | +A valid API key for your JinaAI account. |
| 72 | +You can find it at https://jina.ai/embeddings/. |
| 73 | ++ |
| 74 | +-- |
| 75 | +include::inference-shared.asciidoc[tag=api-key-admonition] |
| 76 | +-- |
| 77 | + |
| 78 | +`rate_limit`::: |
| 79 | +(Optional, object) |
| 80 | +The default rate limit for the `jinaai` service is 2000 requests per minute for all task types. |
| 81 | +You can modify this using the `requests_per_minute` setting in your service settings: |
| 82 | ++ |
| 83 | +-- |
| 84 | +include::inference-shared.asciidoc[tag=request-per-minute-example] |
| 85 | + |
| 86 | +More information about JinaAI's rate limits can be found in https://jina.ai/contact-sales/#rate-limit. |
| 87 | +-- |
| 88 | ++ |
| 89 | +.`service_settings` for the `rerank` task type |
| 90 | +[%collapsible%closed] |
| 91 | +===== |
| 92 | +`model_id`:: |
| 93 | +(Required, string) |
| 94 | +The name of the model to use for the {infer} task. |
| 95 | +To review the available `rerank` compatible models, refer to https://jina.ai/reranker. |
| 96 | +===== |
| 97 | ++ |
| 98 | +.`service_settings` for the `text_embedding` task type |
| 99 | +[%collapsible%closed] |
| 100 | +===== |
| 101 | +`model_id`::: |
| 102 | +(Optional, string) |
| 103 | +The name of the model to use for the {infer} task. |
| 104 | +To review the available `text_embedding` models, refer to the |
| 105 | +https://jina.ai/embeddings/. |
| 106 | +
|
| 107 | +`similarity`::: |
| 108 | +(Optional, string) |
| 109 | +Similarity measure. One of `cosine`, `dot_product`, `l2_norm`. |
| 110 | +Defaults based on the `embedding_type` (`float` -> `dot_product`, `int8/byte` -> `cosine`). |
| 111 | +===== |
| 112 | + |
| 113 | + |
| 114 | + |
| 115 | +`task_settings`:: |
| 116 | +(Optional, object) |
| 117 | +include::inference-shared.asciidoc[tag=task-settings] |
| 118 | ++ |
| 119 | +.`task_settings` for the `rerank` task type |
| 120 | +[%collapsible%closed] |
| 121 | +===== |
| 122 | +`return_documents`:: |
| 123 | +(Optional, boolean) |
| 124 | +Specify whether to return doc text within the results. |
| 125 | +
|
| 126 | +`top_n`:: |
| 127 | +(Optional, integer) |
| 128 | +The number of most relevant documents to return, defaults to the number of the documents. |
| 129 | +If this {infer} endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query. |
| 130 | +===== |
| 131 | ++ |
| 132 | +.`task_settings` for the `text_embedding` task type |
| 133 | +[%collapsible%closed] |
| 134 | +===== |
| 135 | +`task`::: |
| 136 | +(Optional, string) |
| 137 | +Specifies the task passed to the model. |
| 138 | +Valid values are: |
| 139 | +* `classification`: use it for embeddings passed through a text classifier. |
| 140 | +* `clustering`: use it for the embeddings run through a clustering algorithm. |
| 141 | +* `ingest`: use it for storing document embeddings in a vector database. |
| 142 | +* `search`: use it for storing embeddings of search queries run against a vector database to find relevant documents. |
| 143 | +===== |
| 144 | + |
| 145 | + |
| 146 | +[discrete] |
| 147 | +[[inference-example-jinaai]] |
| 148 | +==== JinaAI service examples |
| 149 | + |
| 150 | +The following examples demonstrate how to create {infer} endpoints for `text_embeddings` and `rerank` tasks using the JinaAI service and use them in search requests. |
| 151 | + |
| 152 | +First, we create the `embeddings` service: |
| 153 | + |
| 154 | +[source,console] |
| 155 | +------------------------------------------------------------ |
| 156 | +PUT _inference/text_embedding/jinaai-embeddings |
| 157 | +{ |
| 158 | + "service": "jinaai", |
| 159 | + "service_settings": { |
| 160 | + "model_id": "jina-embeddings-v3", |
| 161 | + "api_key": "<api_key>" |
| 162 | + } |
| 163 | +} |
| 164 | +------------------------------------------------------------ |
| 165 | +// TEST[skip:uses ML] |
| 166 | + |
| 167 | +Then, we create the `rerank` service: |
| 168 | +[source,console] |
| 169 | +------------------------------------------------------------ |
| 170 | +PUT _inference/rerank/jinaai-rerank |
| 171 | +{ |
| 172 | + "service": "jinaai", |
| 173 | + "service_settings": { |
| 174 | + "api_key": "<api_key>", |
| 175 | + "model_id": "jina-reranker-v2-base-multilingual" |
| 176 | + }, |
| 177 | + "task_settings": { |
| 178 | + "top_n": 10, |
| 179 | + "return_documents": true |
| 180 | + } |
| 181 | +} |
| 182 | +------------------------------------------------------------ |
| 183 | +// TEST[skip:uses ML] |
| 184 | + |
| 185 | +Now we can create an index that will use `jinaai-embeddings` service to index the documents. |
| 186 | + |
| 187 | +[source,console] |
| 188 | +------------------------------------------------------------ |
| 189 | +PUT jinaai-index |
| 190 | +{ |
| 191 | + "mappings": { |
| 192 | + "properties": { |
| 193 | + "content": { |
| 194 | + "type": "semantic_text", |
| 195 | + "inference_id": "jinaai-embeddings" |
| 196 | + } |
| 197 | + } |
| 198 | + } |
| 199 | +} |
| 200 | +------------------------------------------------------------ |
| 201 | +// TEST[skip:uses ML] |
| 202 | + |
| 203 | +[source,console] |
| 204 | +------------------------------------------------------------ |
| 205 | +PUT jinaai-index/_bulk |
| 206 | +{ "index" : { "_index" : "jinaai-index", "_id" : "1" } } |
| 207 | +{"content": "Sarah Johnson is a talented marine biologist working at the Oceanographic Institute. Her groundbreaking research on coral reef ecosystems has garnered international attention and numerous accolades."} |
| 208 | +{ "index" : { "_index" : "jinaai-index", "_id" : "2" } } |
| 209 | +{"content": "She spends months at a time diving in remote locations, meticulously documenting the intricate relationships between various marine species. "} |
| 210 | +{ "index" : { "_index" : "jinaai-index", "_id" : "3" } } |
| 211 | +{"content": "Her dedication to preserving these delicate underwater environments has inspired a new generation of conservationists."} |
| 212 | +------------------------------------------------------------ |
| 213 | +// TEST[skip:uses ML] |
| 214 | + |
| 215 | +Now, with the index created, we can search with and without the reranker service. |
| 216 | + |
| 217 | +[source,console] |
| 218 | +------------------------------------------------------------ |
| 219 | +GET jinaai-index/_search |
| 220 | +{ |
| 221 | + "query": { |
| 222 | + "semantic": { |
| 223 | + "field": "content", |
| 224 | + "query": "who inspired taking care of the sea?" |
| 225 | + } |
| 226 | + } |
| 227 | +} |
| 228 | +------------------------------------------------------------ |
| 229 | +// TEST[skip:uses ML] |
| 230 | + |
| 231 | +[source,console] |
| 232 | +------------------------------------------------------------ |
| 233 | +POST jinaai-index/_search |
| 234 | +{ |
| 235 | + "retriever": { |
| 236 | + "text_similarity_reranker": { |
| 237 | + "retriever": { |
| 238 | + "standard": { |
| 239 | + "query": { |
| 240 | + "semantic": { |
| 241 | + "field": "content", |
| 242 | + "query": "who inspired taking care of the sea?" |
| 243 | + } |
| 244 | + } |
| 245 | + } |
| 246 | + }, |
| 247 | + "field": "content", |
| 248 | + "rank_window_size": 100, |
| 249 | + "inference_id": "jinaai-rerank", |
| 250 | + "inference_text": "who inspired taking care of the sea?" |
| 251 | + } |
| 252 | + } |
| 253 | +} |
| 254 | +------------------------------------------------------------ |
| 255 | +// TEST[skip:uses ML] |
0 commit comments