Skip to content

Commit 90f9fd2

Browse files
Addressing feedback and removing response
1 parent b195968 commit 90f9fd2

File tree

4 files changed

+79
-179
lines changed

4 files changed

+79
-179
lines changed

specification/_types/Binary.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,6 @@ export type MapboxVectorTiles = ArrayBuffer
2222

2323
// ES|QL columns
2424
export type EsqlColumns = ArrayBuffer
25+
26+
// Streaming endpoints response
27+
export type StreamResult = ArrayBuffer

specification/inference/_types/Results.ts

Lines changed: 0 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -88,120 +88,6 @@ export class InferenceResult {
8888
rerank?: Array<RankedDocument>
8989
}
9090

91-
/**
92-
* The function the model wants to call.
93-
*/
94-
export class ResultFunctionCall {
95-
/**
96-
* The arguments to call the function with in that the model generated in JSON format.
97-
*/
98-
arguments?: string
99-
/**
100-
* The name of the function to call.
101-
*/
102-
name?: string
103-
}
104-
105-
/**
106-
* The tool call made by the model.
107-
*/
108-
export class ResultToolCall {
109-
index: number
110-
/**
111-
* The identifier of the tool call.
112-
*/
113-
id?: string
114-
/**
115-
* The function the model wants to call.
116-
*/
117-
function?: ResultFunctionCall
118-
/**
119-
* The type of the tool.
120-
*/
121-
type?: string
122-
}
123-
124-
export class CompletionDelta {
125-
/**
126-
* The contents of the chunked message.
127-
*/
128-
content?: string
129-
/**
130-
* The refusal message.
131-
*/
132-
refusal?: string
133-
/**
134-
* The role of the author of the message.
135-
*/
136-
role?: string
137-
/**
138-
* The tool calls made by the model.
139-
*/
140-
tool_calls?: Array<ResultToolCall>
141-
}
142-
143-
/**
144-
* Represent a completion choice returned from a model.
145-
*/
146-
export class CompletionChoice {
147-
/**
148-
* The delta generated by the model.
149-
*/
150-
delta: CompletionDelta
151-
/**
152-
* The reason the model stopped generating tokens.
153-
*/
154-
finish_reason?: string
155-
/**
156-
* The index of the choice in the array of choices field.
157-
*/
158-
index: number
159-
}
160-
161-
/**
162-
* The token usage statistics for the entire request.
163-
*/
164-
export class Usage {
165-
/**
166-
* The number of tokens in the generated completion.
167-
*/
168-
completion_tokens: number
169-
/**
170-
* The number of tokens in the prompt.
171-
*/
172-
prompt_tokens: number
173-
/**
174-
* The sum of completion_tokens and prompt_tokens.
175-
*/
176-
total_tokens: number
177-
}
178-
179-
/**
180-
* Respresents the result format for a completion request using the Unified Inference API.
181-
*/
182-
export class UnifiedInferenceResult {
183-
/**
184-
* A unique identifier for the chat completion
185-
*/
186-
id: string
187-
/**
188-
* A list of completion choices.
189-
*/
190-
choices: Array<CompletionChoice>
191-
/**
192-
* The model that generated the completion.
193-
*/
194-
model: string
195-
/**
196-
* The object type.
197-
*/
198-
object: string
199-
/**
200-
* The token usage statistics for the entire request.
201-
*/
202-
usage?: Usage
203-
}
204-
20591
/**
20692
* Acknowledged response. For dry_run, contains the list of pipelines which reference the inference endpoint
20793
*/

specification/inference/unified_inference/UnifiedRequest.ts

Lines changed: 74 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,74 @@ import { TaskType } from '@inference/_types/TaskType'
2121
import { UserDefinedValue } from '@spec_utils/UserDefinedValue'
2222
import { RequestBase } from '@_types/Base'
2323
import { Id } from '@_types/common'
24+
import { float, long } from '@_types/Numeric'
2425
import { Duration } from '@_types/Time'
2526

27+
/**
28+
* Perform inference on the service using the Unified Schema
29+
* @rest_spec_name inference.unified_inference
30+
* @availability stack since=8.18.0 stability=stable visibility=public
31+
* @availability serverless stability=stable visibility=public
32+
*/
33+
export interface Request extends RequestBase {
34+
path_parts: {
35+
/**
36+
* The task type
37+
*/
38+
task_type?: TaskType
39+
/**
40+
* The inference Id
41+
*/
42+
inference_id: Id
43+
}
44+
query_parameters: {
45+
/**
46+
* Specifies the amount of time to wait for the inference request to complete.
47+
* @server_default 30s
48+
*/
49+
timeout?: Duration
50+
}
51+
body: {
52+
/**
53+
* A list of objects representing the conversation.
54+
*/
55+
messages: Array<Message>
56+
/**
57+
* The ID of the model to use.
58+
*/
59+
model?: string
60+
/**
61+
* The upper bound limit for the number of tokens that can be generated for a completion request.
62+
*/
63+
max_completion_tokens?: long
64+
/**
65+
* A sequence of strings to control when the model should stop generating additional tokens.
66+
*/
67+
stop?: Array<string>
68+
/**
69+
* The sampling temperature to use.
70+
*/
71+
temperature?: float
72+
/**
73+
* Controls which tool is called by the model.
74+
*/
75+
tool_choice?: CompletionToolType
76+
/**
77+
* A list of tools that the model can call.
78+
*/
79+
tools?: Array<CompletionTool>
80+
/**
81+
* Nucleus sampling, an alternative to sampling with temperature.
82+
*/
83+
top_p?: float
84+
}
85+
}
86+
87+
/**
88+
* @codegen_names string, object
89+
*/
90+
export type CompletionToolType = string | CompletionToolChoice
91+
2692
/**
2793
* An object style representation of a single portion of a conversation.
2894
*/
@@ -58,7 +124,7 @@ export interface ToolCall {
58124
/**
59125
* The identifier of the tool call.
60126
*/
61-
id: string
127+
id: Id
62128
/**
63129
* The function that the model called.
64130
*/
@@ -69,22 +135,27 @@ export interface ToolCall {
69135
type: string
70136
}
71137

138+
/**
139+
* @codegen_names string, object
140+
*/
141+
export type MessageContent = string | Array<ContentObject>
142+
72143
/**
73144
* An object representing part of the conversation.
74145
*/
75146
export interface Message {
76147
/**
77148
* The content of the message.
78149
*/
79-
content: string | Array<ContentObject>
150+
content?: MessageContent
80151
/**
81152
* The role of the message author.
82153
*/
83154
role: string
84155
/**
85156
* The tool call that this message is responding to.
86157
*/
87-
tool_call_id?: string
158+
tool_call_id?: Id
88159
/**
89160
* The tool calls generated by the model.
90161
*/
@@ -152,63 +223,3 @@ export interface CompletionTool {
152223
*/
153224
function: CompletionToolFunction
154225
}
155-
156-
/**
157-
* Perform inference on the service using the Unified Schema
158-
* @rest_spec_name inference.unified_inference
159-
* @availability stack since=8.18.0 stability=stable visibility=public
160-
* @availability serverless stability=stable visibility=public
161-
*/
162-
export interface Request extends RequestBase {
163-
path_parts: {
164-
/**
165-
* The task type
166-
*/
167-
task_type?: TaskType
168-
/**
169-
* The inference Id
170-
*/
171-
inference_id: Id
172-
}
173-
query_parameters: {
174-
/**
175-
* Specifies the amount of time to wait for the inference request to complete.
176-
* @server_default 30s
177-
*/
178-
timeout?: Duration
179-
}
180-
body: {
181-
/**
182-
* A list of objects representing the conversation.
183-
*/
184-
messages: Array<Message>
185-
/**
186-
* The ID of the model to use.
187-
*/
188-
model?: string
189-
/**
190-
* The upper bound limit for the number of tokens that can be generated for a completion request.
191-
*/
192-
max_completion_tokens?: number
193-
/**
194-
* A sequence of strings to control when the model should stop generating additional tokens.
195-
*/
196-
stop?: Array<string>
197-
/**
198-
* The sampling temperature to use.
199-
*/
200-
temperature?: number
201-
/**
202-
* Controls which tool is called by the model.
203-
*/
204-
tool_choice?: string | CompletionToolChoice
205-
/**
206-
* A list of tools that the model can call.
207-
*/
208-
tools?: Array<CompletionTool>
209-
/**
210-
* Nucleus sampling, an alternative to sampling with temperature.
211-
*/
212-
top_p?: number
213-
}
214-
}

specification/inference/unified_inference/UnifiedResponse.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
* under the License.
1818
*/
1919

20-
import { UnifiedInferenceResult } from '@inference/_types/Results'
20+
import { StreamResult } from '@_types/Binary'
2121

2222
export class Response {
23-
body: UnifiedInferenceResult
23+
body: StreamResult
2424
}

0 commit comments

Comments
 (0)