Skip to content

Commit 687063f

Browse files
Refactoring inference endpoints
1 parent 85cbe3c commit 687063f

21 files changed

+542
-80
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"inference.chat_completion_unified": {
3+
"documentation": {
4+
"url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/chat-completion-inference.html",
5+
"description": "Perform chat completion inference"
6+
},
7+
"stability": "stable",
8+
"visibility": "public",
9+
"headers": {
10+
"accept": ["text/event-stream"],
11+
"content_type": ["application/json"]
12+
},
13+
"url": {
14+
"paths": [
15+
{
16+
"path": "/_inference/chat_completion/{inference_id}/_unified",
17+
"methods": ["POST"],
18+
"parts": {
19+
"inference_id": {
20+
"type": "string",
21+
"description": "The inference Id"
22+
}
23+
}
24+
}
25+
]
26+
},
27+
"body": {
28+
"description": "The inference payload"
29+
}
30+
}
31+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"inference.inference": {
3+
"documentation": {
4+
"url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
5+
"description": "Perform completion inference"
6+
},
7+
"stability": "stable",
8+
"visibility": "public",
9+
"headers": {
10+
"accept": ["application/json"],
11+
"content_type": ["application/json"]
12+
},
13+
"url": {
14+
"paths": [
15+
{
16+
"path": "/_inference/completion/{inference_id}",
17+
"methods": ["POST"],
18+
"parts": {
19+
"inference_id": {
20+
"type": "string",
21+
"description": "The inference Id"
22+
}
23+
}
24+
}
25+
]
26+
},
27+
"body": {
28+
"description": "The inference payload"
29+
}
30+
}
31+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"inference.inference": {
3+
"documentation": {
4+
"url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
5+
"description": "Perform reranking inference"
6+
},
7+
"stability": "stable",
8+
"visibility": "public",
9+
"headers": {
10+
"accept": ["application/json"],
11+
"content_type": ["application/json"]
12+
},
13+
"url": {
14+
"paths": [
15+
{
16+
"path": "/_inference/rerank/{inference_id}",
17+
"methods": ["POST"],
18+
"parts": {
19+
"inference_id": {
20+
"type": "string",
21+
"description": "The inference Id"
22+
}
23+
}
24+
}
25+
]
26+
},
27+
"body": {
28+
"description": "The inference payload"
29+
}
30+
}
31+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"inference.inference": {
3+
"documentation": {
4+
"url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
5+
"description": "Perform sparse embedding inference"
6+
},
7+
"stability": "stable",
8+
"visibility": "public",
9+
"headers": {
10+
"accept": ["application/json"],
11+
"content_type": ["application/json"]
12+
},
13+
"url": {
14+
"paths": [
15+
{
16+
"path": "/_inference/sparse_embedding/{inference_id}",
17+
"methods": ["POST"],
18+
"parts": {
19+
"inference_id": {
20+
"type": "string",
21+
"description": "The inference Id"
22+
}
23+
}
24+
}
25+
]
26+
},
27+
"body": {
28+
"description": "The inference payload"
29+
}
30+
}
31+
}

specification/_json_spec/inference.stream_inference.json renamed to specification/_json_spec/inference.stream_completion.json

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"inference.stream_inference": {
2+
"inference.stream_completion": {
33
"documentation": {
44
"url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html",
55
"description": "Perform streaming inference"
@@ -12,24 +12,10 @@
1212
},
1313
"url": {
1414
"paths": [
15-
{
16-
"path": "/_inference/{inference_id}/_stream",
17-
"methods": ["POST"],
18-
"parts": {
19-
"inference_id": {
20-
"type": "string",
21-
"description": "The inference Id"
22-
}
23-
}
24-
},
2515
{
2616
"path": "/_inference/{task_type}/{inference_id}/_stream",
2717
"methods": ["POST"],
2818
"parts": {
29-
"task_type": {
30-
"type": "string",
31-
"description": "The task type"
32-
},
3319
"inference_id": {
3420
"type": "string",
3521
"description": "The inference Id"
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
{
2+
"inference.inference": {
3+
"documentation": {
4+
"url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
5+
"description": "Perform text embedding inference"
6+
},
7+
"stability": "stable",
8+
"visibility": "public",
9+
"headers": {
10+
"accept": ["application/json"],
11+
"content_type": ["application/json"]
12+
},
13+
"url": {
14+
"paths": [
15+
{
16+
"path": "/_inference/text_embedding/{inference_id}",
17+
"methods": ["POST"],
18+
"parts": {
19+
"inference_id": {
20+
"type": "string",
21+
"description": "The inference Id"
22+
}
23+
}
24+
}
25+
]
26+
},
27+
"body": {
28+
"description": "The inference payload"
29+
}
30+
}
31+
}

specification/_json_spec/inference.unified_inference.json

Lines changed: 0 additions & 45 deletions
This file was deleted.

specification/inference/_types/Results.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,14 @@ export class SparseEmbeddingResult {
3737
embedding: SparseVector
3838
}
3939

40+
/**
41+
* The response format for the sparse embedding request.
42+
*/
43+
export class SparseEmbeddingInferenceResult {
44+
// TODO should we make this optional if we ever support multiple encoding types? So we can make it a variant
45+
sparse_embedding: Array<SparseEmbeddingResult>
46+
}
47+
4048
/**
4149
* Text Embedding results containing bytes are represented as Dense
4250
* Vectors of bytes.
@@ -57,13 +65,29 @@ export class TextEmbeddingResult {
5765
embedding: DenseVector
5866
}
5967

68+
/**
69+
* TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants
70+
* @variants container
71+
*/
72+
export class TextEmbeddingInferenceResult {
73+
text_embedding_bytes?: Array<TextEmbeddingByteResult>
74+
text_embedding?: Array<TextEmbeddingResult>
75+
}
76+
6077
/**
6178
* The completion result object
6279
*/
6380
export class CompletionResult {
6481
result: string
6582
}
6683

84+
/**
85+
* Defines the completion result.
86+
*/
87+
export class CompletionInferenceResult {
88+
completion: Array<CompletionResult>
89+
}
90+
6791
/**
6892
* The rerank result object representing a single ranked document
6993
* id: the original index of the document in the request
@@ -76,6 +100,13 @@ export class RankedDocument {
76100
text?: string
77101
}
78102

103+
/**
104+
* Defines the response for a rerank request.
105+
*/
106+
export class RerankedInferenceResult {
107+
rerank: Array<RankedDocument>
108+
}
109+
79110
/**
80111
* InferenceResult is an aggregation of mutually exclusive variants
81112
* @variants container

specification/inference/unified_inference/UnifiedRequest.ts renamed to specification/inference/chat_completion_unified/UnifiedRequest.ts

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
* under the License.
1818
*/
1919

20-
import { TaskType } from '@inference/_types/TaskType'
2120
import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
2221
import { RequestBase } from '@_types/Base'
2322
import { Id } from '@_types/common'
@@ -33,19 +32,11 @@ import { Duration } from '@_types/Time'
3332
export interface Request extends RequestBase {
3433
urls: [
3534
{
36-
path: '/_inference/{inference_id}/_unified'
37-
methods: ['POST']
38-
},
39-
{
40-
path: '/_inference/{task_type}/{inference_id}/_unified'
35+
path: '/_inference/chat_completion/{inference_id}/_unified'
4136
methods: ['POST']
4237
}
4338
]
4439
path_parts: {
45-
/**
46-
* The task type
47-
*/
48-
task_type?: TaskType
4940
/**
5041
* The inference Id
5142
*/

0 commit comments

Comments
 (0)