|
| 1 | +[[infer-service-jinaai]] |
| 2 | +=== JinaAI {infer} service |
| 3 | + |
| 4 | +Creates an {infer} endpoint to perform an {infer} task with the `jinaai` service. |
| 5 | + |
| 6 | + |
| 7 | +[discrete] |
| 8 | +[[infer-service-jinaai-api-request]] |
| 9 | +==== {api-request-title} |
| 10 | + |
| 11 | +`PUT /_inference/<task_type>/<inference_id>` |
| 12 | + |
| 13 | +[discrete] |
| 14 | +[[infer-service-jinaai-api-path-params]] |
| 15 | +==== {api-path-parms-title} |
| 16 | + |
| 17 | +`<inference_id>`:: |
| 18 | +(Required, string) |
| 19 | +include::inference-shared.asciidoc[tag=inference-id] |
| 20 | + |
| 21 | +`<task_type>`:: |
| 22 | +(Required, string) |
| 23 | +include::inference-shared.asciidoc[tag=task-type] |
| 24 | ++ |
| 25 | +-- |
| 26 | +Available task types: |
| 27 | + |
| 28 | +* `text_embedding`, |
| 29 | +* `rerank`. |
| 30 | +-- |
| 31 | + |
| 32 | +[discrete] |
| 33 | +[[infer-service-jinaai-api-request-body]] |
| 34 | +==== {api-request-body-title} |
| 35 | + |
| 36 | +`chunking_settings`:: |
| 37 | +(Optional, object) |
| 38 | +include::inference-shared.asciidoc[tag=chunking-settings] |
| 39 | + |
| 40 | +`max_chunking_size`::: |
| 41 | +(Optional, integer) |
| 42 | +include::inference-shared.asciidoc[tag=chunking-settings-max-chunking-size] |
| 43 | + |
| 44 | +`overlap`::: |
| 45 | +(Optional, integer) |
| 46 | +include::inference-shared.asciidoc[tag=chunking-settings-overlap] |
| 47 | + |
| 48 | +`sentence_overlap`::: |
| 49 | +(Optional, integer) |
| 50 | +include::inference-shared.asciidoc[tag=chunking-settings-sentence-overlap] |
| 51 | + |
| 52 | +`strategy`::: |
| 53 | +(Optional, string) |
| 54 | +include::inference-shared.asciidoc[tag=chunking-settings-strategy] |
| 55 | + |
| 56 | +`service`:: |
| 57 | +(Required, string) |
| 58 | +The type of service supported for the specified task type. In this case, |
| 59 | +`jinaai`. |
| 60 | + |
| 61 | +`service_settings`:: |
| 62 | +(Required, object) |
| 63 | +include::inference-shared.asciidoc[tag=service-settings] |
| 64 | ++ |
| 65 | +-- |
| 66 | +These settings are specific to the `jinaai` service. |
| 67 | +-- |
| 68 | + |
| 69 | +`api_key`::: |
| 70 | +(Required, string) |
| 71 | +A valid API key of your JinaAI account. |
| 72 | +You can find in: |
| 73 | +https://jina.ai/embeddings/. |
| 74 | ++ |
| 75 | +-- |
| 76 | +include::inference-shared.asciidoc[tag=api-key-admonition] |
| 77 | +-- |
| 78 | + |
| 79 | +`rate_limit`::: |
| 80 | +(Optional, object) |
| 81 | +By default, the `jinaai` service sets the number of requests allowed per minute to `2000`. |
| 82 | +This value is the same for all task types. |
| 83 | +To modify this, set the `requests_per_minute` setting of this object in your service settings: |
| 84 | ++ |
| 85 | +-- |
| 86 | +include::inference-shared.asciidoc[tag=request-per-minute-example] |
| 87 | + |
| 88 | +More information about JinaAI's rate limits can be found in https://jina.ai/contact-sales/#rate-limit. |
| 89 | +-- |
| 90 | ++ |
| 91 | +.`service_settings` for the `rerank` task type |
| 92 | +[%collapsible%closed] |
| 93 | +===== |
| 94 | +`model_id`:: |
| 95 | +(Optional, string) |
| 96 | +The name of the model to use for the {infer} task. |
| 97 | +To review the available `rerank` models, refer to the |
| 98 | +https://jina.ai/reranker. |
| 99 | +===== |
| 100 | ++ |
| 101 | +.`service_settings` for the `text_embedding` task type |
| 102 | +[%collapsible%closed] |
| 103 | +===== |
| 104 | +`model_id`::: |
| 105 | +(Optional, string) |
| 106 | +The name of the model to use for the {infer} task. |
| 107 | +To review the available `text_embedding` models, refer to the |
| 108 | +https://jina.ai/embeddings/. |
| 109 | +
|
| 110 | +`similarity`::: |
| 111 | +(Optional, string) |
| 112 | +Similarity measure. One of `cosine`, `dot_product`, `l2_norm`. |
| 113 | +Defaults based on the `embedding_type` (`float` -> `dot_product`, `int8/byte` -> `cosine`). |
| 114 | +===== |
| 115 | + |
| 116 | + |
| 117 | + |
| 118 | +`task_settings`:: |
| 119 | +(Optional, object) |
| 120 | +include::inference-shared.asciidoc[tag=task-settings] |
| 121 | ++ |
| 122 | +.`task_settings` for the `rerank` task type |
| 123 | +[%collapsible%closed] |
| 124 | +===== |
| 125 | +`return_documents`:: |
| 126 | +(Optional, boolean) |
| 127 | +Specify whether to return doc text within the results. |
| 128 | +
|
| 129 | +`top_n`:: |
| 130 | +(Optional, integer) |
| 131 | +The number of most relevant documents to return, defaults to the number of the documents. |
| 132 | +If this {infer} endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query. |
| 133 | +===== |
| 134 | ++ |
| 135 | +.`task_settings` for the `text_embedding` task type |
| 136 | +[%collapsible%closed] |
| 137 | +===== |
| 138 | +`task`::: |
| 139 | +(Optional, string) |
| 140 | +Specifies the task passed to the model. |
| 141 | +Valid values are: |
| 142 | +* `classification`: use it for embeddings passed through a text classifier. |
| 143 | +* `clustering`: use it for the embeddings run through a clustering algorithm. |
| 144 | +* `ingest`: use it for storing document embeddings in a vector database. |
| 145 | +* `search`: use it for storing embeddings of search queries run against a vector database to find relevant documents. |
| 146 | +===== |
| 147 | + |
| 148 | + |
| 149 | +[discrete] |
| 150 | +[[inference-example-jinaai]] |
| 151 | +==== JinaAI service examples |
| 152 | + |
| 153 | +The following example shows how to create {infer} endpoints to get `text_embeddings` and `rerank` and to use them in a search application. |
| 154 | + |
| 155 | +First, we create the `embeddings` service: |
| 156 | + |
| 157 | +[source,console] |
| 158 | +------------------------------------------------------------ |
| 159 | +PUT _inference/text_embedding/jinaai-embeddings |
| 160 | +{ |
| 161 | + "service": "jinaai", |
| 162 | + "service_settings": { |
| 163 | + "model_id": "jina-embeddings-v3", |
| 164 | + "api_key": "<api_key>", |
| 165 | + }, |
| 166 | + "task_settings": {} |
| 167 | +} |
| 168 | +------------------------------------------------------------ |
| 169 | + |
| 170 | +Then, we create the `rerank` service: |
| 171 | +[source,console] |
| 172 | +------------------------------------------------------------ |
| 173 | +PUT _inference/rerank/jinaai-rerank |
| 174 | +{ |
| 175 | + "service": "jinaai", |
| 176 | + "service_settings": { |
| 177 | + "api_key": "<API-KEY>", |
| 178 | + "model_id": "jina-reranker-v2-base-multilingual" |
| 179 | + }, |
| 180 | + "task_settings": { |
| 181 | + "top_n": 10, |
| 182 | + "return_documents": true |
| 183 | + } |
| 184 | +} |
| 185 | +------------------------------------------------------------ |
| 186 | + |
| 187 | +Now we can create an index that will use `jinaai-embeddings` service to index the documents. |
| 188 | + |
| 189 | +[source,console] |
| 190 | +------------------------------------------------------------ |
| 191 | +PUT jinaai-index |
| 192 | +{ |
| 193 | + "mappings": { |
| 194 | + "properties": { |
| 195 | + "content": { |
| 196 | + "type": "semantic_text", |
| 197 | + "inference_id": "jinaai-embeddings" |
| 198 | + } |
| 199 | + } |
| 200 | + } |
| 201 | +} |
| 202 | +------------------------------------------------------------ |
| 203 | + |
| 204 | +[source,console] |
| 205 | +------------------------------------------------------------ |
| 206 | +PUT jinaai-index/_bulk |
| 207 | +{ "index" : { "_index" : "jinaai-index", "_id" : "1" } } |
| 208 | +{"content": "Sarah Johnson is a talented marine biologist working at the Oceanographic Institute. Her groundbreaking research on coral reef ecosystems has garnered international attention and numerous accolades."} |
| 209 | +{ "index" : { "_index" : "jinaai-index", "_id" : "2" } } |
| 210 | +{"content": "She spends months at a time diving in remote locations, meticulously documenting the intricate relationships between various marine species. "} |
| 211 | +{ "index" : { "_index" : "jinaai-index", "_id" : "3" } } |
| 212 | +{"content": "Her dedication to preserving these delicate underwater environments has inspired a new generation of conservationists."} |
| 213 | +------------------------------------------------------------ |
| 214 | + |
| 215 | +Now, with the index created, we can search with and without the reranker service. |
| 216 | + |
| 217 | +[source,console] |
| 218 | +------------------------------------------------------------ |
| 219 | +GET jinaai-index/_search |
| 220 | +{ |
| 221 | + "query": { |
| 222 | + "semantic": { |
| 223 | + "field": "content", |
| 224 | + "query": "who inspired taking care of the sea?" |
| 225 | + } |
| 226 | + } |
| 227 | +} |
| 228 | +------------------------------------------------------------ |
| 229 | + |
| 230 | +[source,console] |
| 231 | +------------------------------------------------------------ |
| 232 | +POST jinaai-index/_search |
| 233 | +{ |
| 234 | + "retriever": { |
| 235 | + "text_similarity_reranker": { |
| 236 | + "retriever": { |
| 237 | + "standard": { |
| 238 | + "query": { |
| 239 | + "semantic": { |
| 240 | + "field": "content", |
| 241 | + "query": "who inspired taking care of the sea?" |
| 242 | + } |
| 243 | + } |
| 244 | + } |
| 245 | + }, |
| 246 | + "field": "content", |
| 247 | + "rank_window_size": 100, |
| 248 | + "inference_id": "jinaai-rerank", |
| 249 | + "inference_text": "who inspired taking care of the sea?" |
| 250 | + } |
| 251 | + } |
| 252 | +} |
| 253 | +------------------------------------------------------------ |
0 commit comments