Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions docs/reference.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -2479,7 +2479,6 @@ A partial reduction is performed every time the coordinating node has received a
** *`ignore_unavailable` (Optional, boolean)*: Whether specified concrete indices should be ignored when unavailable (missing or closed)
** *`lenient` (Optional, boolean)*: Specify whether format-based query failures (such as providing text to a numeric field) should be ignored
** *`max_concurrent_shard_requests` (Optional, number)*: The number of concurrent shard requests per node this search executes concurrently. This value should be used to limit the impact of the search on the cluster in order to limit the number of concurrent shard requests
** *`min_compatible_shard_node` (Optional, string)*
** *`preference` (Optional, string)*: Specify the node or shard the operation should be performed on (default: random)
** *`request_cache` (Optional, boolean)*: Specify if request cache should be used for this request or not, defaults to true
** *`routing` (Optional, string)*: A list of specific routing values
Expand Down Expand Up @@ -2622,9 +2621,6 @@ It supports a list of values, such as `open,hidden`.
local cluster state. If `false` the list of selected nodes are computed
from the cluster state of the master node. In both cases the coordinating
node will send requests for further information to each selected node.
** *`master_timeout` (Optional, string | -1 | 0)*: The period to wait for a connection to the master node.
If the master node is not available before the timeout expires, the request fails and returns an error.
To indicated that the request should never timeout, you can set it to `-1`.

[discrete]
==== allocation
Expand Down Expand Up @@ -8317,6 +8313,17 @@ These settings are specific to the `cohere` service.
These settings are specific to the task type you specified.
** *`timeout` (Optional, string | -1 | 0)*: Specifies the amount of time to wait for the inference endpoint to be created.

[discrete]
==== put_custom
Configure a custom inference endpoint

https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom[Endpoint documentation]
[source,ts]
----
client.inference.putCustom()
----


[discrete]
==== put_elasticsearch
Create an Elasticsearch inference endpoint.
Expand Down
33 changes: 33 additions & 0 deletions src/api/api/inference.ts
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,39 @@ export default class Inference {
return await this.transport.request({ path, method, querystring, body, meta }, options)
}

/**
* Configure a custom inference endpoint
* @see {@link https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom | Elasticsearch API documentation}
*/
async putCustom (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithOutMeta): Promise<T.TODO>
async putCustom (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptionsWithMeta): Promise<TransportResult<T.TODO, unknown>>
async putCustom (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise<T.TODO>
async putCustom (this: That, params?: T.TODO | TB.TODO, options?: TransportRequestOptions): Promise<any> {
const acceptedPath: string[] = ['task_type', 'custom_inference_id']
const querystring: Record<string, any> = {}
const body = undefined

params = params ?? {}
for (const key in params) {
if (acceptedPath.includes(key)) {
continue
} else if (key !== 'body') {
querystring[key] = params[key]
}
}

const method = 'PUT'
const path = `/_inference/${encodeURIComponent(params.task_type.toString())}/${encodeURIComponent(params.custom_inference_id.toString())}`
const meta: TransportRequestMetadata = {
name: 'inference.put_custom',
pathParts: {
task_type: params.task_type,
custom_inference_id: params.custom_inference_id
}
}
return await this.transport.request({ path, method, querystring, body, meta }, options)
}

/**
* Create an Elasticsearch inference endpoint. Create an inference endpoint to perform an inference task with the `elasticsearch` service. > info > Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings. If you use the ELSER or the E5 model through the `elasticsearch` service, the API request will automatically download and deploy the model if it isn't downloaded yet. > info > You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value. After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`. Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
* @see {@link https://www.elastic.co/guide/en/elasticsearch/reference/8.18/infer-service-elasticsearch.html | Elasticsearch API documentation}
Expand Down
7 changes: 3 additions & 4 deletions src/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3489,9 +3489,10 @@ export interface AggregationsFiltersAggregation extends AggregationsBucketAggreg
}

export interface AggregationsFiltersBucketKeys extends AggregationsMultiBucketBase {
key?: string
}
export type AggregationsFiltersBucket = AggregationsFiltersBucketKeys
& { [property: string]: AggregationsAggregate | long }
& { [property: string]: AggregationsAggregate | string | long }

export interface AggregationsFormatMetricAggregationBase extends AggregationsMetricAggregationBase {
format?: string
Expand Down Expand Up @@ -4657,7 +4658,7 @@ export interface AnalysisEdgeNGramTokenizer extends AnalysisTokenizerBase {
custom_token_chars?: string
max_gram?: integer
min_gram?: integer
token_chars?: string | AnalysisTokenChar[]
token_chars?: AnalysisTokenChar[]
}

export interface AnalysisElisionTokenFilter extends AnalysisTokenFilterBase {
Expand Down Expand Up @@ -6933,7 +6934,6 @@ export interface AsyncSearchSubmitRequest extends RequestBase {
ignore_unavailable?: boolean
lenient?: boolean
max_concurrent_shard_requests?: long
min_compatible_shard_node?: VersionString
preference?: string
request_cache?: boolean
routing?: Routing
Expand Down Expand Up @@ -7122,7 +7122,6 @@ export interface CatAliasesRequest extends CatCatRequestBase {
s?: Names
expand_wildcards?: ExpandWildcards
local?: boolean
master_timeout?: Duration
}

export type CatAliasesResponse = CatAliasesAliasesRecord[]
Expand Down
7 changes: 3 additions & 4 deletions src/api/typesWithBodyKey.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3566,9 +3566,10 @@ export interface AggregationsFiltersAggregation extends AggregationsBucketAggreg
}

export interface AggregationsFiltersBucketKeys extends AggregationsMultiBucketBase {
key?: string
}
export type AggregationsFiltersBucket = AggregationsFiltersBucketKeys
& { [property: string]: AggregationsAggregate | long }
& { [property: string]: AggregationsAggregate | string | long }

export interface AggregationsFormatMetricAggregationBase extends AggregationsMetricAggregationBase {
format?: string
Expand Down Expand Up @@ -4734,7 +4735,7 @@ export interface AnalysisEdgeNGramTokenizer extends AnalysisTokenizerBase {
custom_token_chars?: string
max_gram?: integer
min_gram?: integer
token_chars?: string | AnalysisTokenChar[]
token_chars?: AnalysisTokenChar[]
}

export interface AnalysisElisionTokenFilter extends AnalysisTokenFilterBase {
Expand Down Expand Up @@ -7010,7 +7011,6 @@ export interface AsyncSearchSubmitRequest extends RequestBase {
ignore_unavailable?: boolean
lenient?: boolean
max_concurrent_shard_requests?: long
min_compatible_shard_node?: VersionString
preference?: string
request_cache?: boolean
routing?: Routing
Expand Down Expand Up @@ -7203,7 +7203,6 @@ export interface CatAliasesRequest extends CatCatRequestBase {
s?: Names
expand_wildcards?: ExpandWildcards
local?: boolean
master_timeout?: Duration
}

export type CatAliasesResponse = CatAliasesAliasesRecord[]
Expand Down
Loading