Skip to content

Commit 6df8970

Browse files
committed
First draft of Alibaba Cloud inference API
1 parent b55d29e commit 6df8970

File tree

4 files changed

+156
-0
lines changed

4 files changed

+156
-0
lines changed

specification/_doc_ids/table.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,7 @@ inference-api-delete,https://www.elastic.co/docs/api/doc/elasticsearch/operation
317317
inference-api-get,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-get
318318
inference-api-post,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-inference
319319
inference-api-put,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put
320+
inference-api-put-alibabacloud,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-alibabacloud-ai-search.html
320321
inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-eis.html
321322
inference-api-put-openai,https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-openai.html
322323
inference-api-put-voyageai,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-voyageai
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"inference.put_alibabacloud": {
3+
"documentation": {
4+
"url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-alibabacloud-ai-search.html",
5+
"description": "Configure an AlibabaCloud AI Search inference endpoint"
6+
},
7+
"stability": "stable",
8+
"visibility": "public",
9+
"headers": {
10+
"accept": ["application/json"],
11+
"content_type": ["application/json"]
12+
},
13+
"url": {
14+
"paths": [
15+
{
16+
"path": "/_inference/{task_type}/{alibabacloud_inference_id}",
17+
"methods": ["PUT"],
18+
"parts": {
19+
"task_type": {
20+
"type": "string",
21+
"description": "The task type"
22+
},
23+
"mistral_inference_id": {
24+
"type": "string",
25+
"description": "The inference Id"
26+
}
27+
}
28+
}
29+
]
30+
},
31+
"body": {
32+
"description": "The inference endpoint's task and service settings"
33+
}
34+
}
35+
}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/*
2+
* Licensed to Elasticsearch B.V. under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch B.V. licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
import { InferenceChunkingSettings } from '@inference/_types/Services'
21+
import { RequestBase } from '@_types/Base'
22+
import { Id } from '@_types/common'
23+
24+
/**
25+
* Create an AlibabaCloud AI Search inference endpoint.
26+
*
27+
* Create an inference endpoint to perform an inference task with the `alibabacloud-ai-search` service.
28+
*
29+
* When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.
30+
* After creating the endpoint, wait for the model deployment to complete before using it.
31+
* To verify the deployment status, use the get trained model statistics API.
32+
* Look for `"state": "fully_allocated"` in the response and ensure that the `"allocation_count"` matches the `"target_allocation_count"`.
33+
* Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.
34+
* @rest_spec_name inference.put_openai
35+
* @availability stack since=8.16.0 stability=stable visibility=public
36+
* @availability serverless stability=stable visibility=public
37+
* @cluster_privileges manage_inference
38+
* @doc_id inference-api-put-alibabacloud
39+
*/
40+
export interface Request extends RequestBase {
41+
urls: [
42+
{
43+
path: '/_inference/{task_type}/{alibabacloud_inference_id}'
44+
methods: ['PUT']
45+
}
46+
]
47+
path_parts: {
48+
/**
49+
* The type of the inference task that the model will perform.
50+
*/
51+
task_type: AlibabaCloudTaskType
52+
/**
53+
* The unique identifier of the inference endpoint.
54+
*/
55+
alibabacloud_inference_id: Id
56+
}
57+
body: {
58+
/**
59+
* The chunking configuration object.
60+
* @ext_doc_id inference-chunking
61+
*/
62+
chunking_settings?: InferenceChunkingSettings
63+
/**
64+
* The type of service supported for the specified task type. In this case, `alibabacloud-ai-search`.
65+
*/
66+
service: ServiceType
67+
/**
68+
* Settings used to install the inference model. These settings are specific to the `alibabacloud-ai-search` service.
69+
*/
70+
service_settings: AlibabaCloudServiceSettings
71+
/**
72+
* Settings to configure the inference task.
73+
* These settings are specific to the task type you specified.
74+
*/
75+
task_settings?: AlibabaCloudTaskSettings
76+
}
77+
}
78+
79+
export enum AlibabaCloudTaskType {
80+
completion,
81+
rerank,
82+
space_embedding,
83+
text_embedding
84+
}
85+
86+
export enum ServiceType {
87+
'alibabacloud-ai-search'
88+
}
89+
90+
export class AlibabaCloudServiceSettings {
91+
/** TBD */
92+
}
93+
94+
export class AlibabaCloudTaskSettings {
95+
/** TBD */
96+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* Licensed to Elasticsearch B.V. under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch B.V. licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
import { InferenceEndpointInfo } from '@inference/_types/Services'
21+
22+
export class Response {
23+
body: InferenceEndpointInfo
24+
}

0 commit comments

Comments
 (0)