Skip to content

Commit 6d5c7a0

Browse files
Merge pull request #221 from openai/erinkav/new-params
Add token controls, tool choice and response format to Assistants API
2 parents adeccb4 + 9055b4d commit 6d5c7a0

File tree

1 file changed

+226
-7
lines changed

1 file changed

+226
-7
lines changed

openapi.yaml

Lines changed: 226 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4268,6 +4268,7 @@ paths:
42684268
"last_error": null,
42694269
"model": "gpt-4-turbo",
42704270
"instructions": null,
4271+
"incomplete_details": null,
42714272
"tools": [
42724273
{
42734274
"type": "code_interpreter"
@@ -4283,7 +4284,15 @@ paths:
42834284
"completion_tokens": 456,
42844285
"total_tokens": 579
42854286
},
4286-
"temperature": 1
4287+
"temperature": 1,
4288+
"max_prompt_tokens": 1000,
4289+
"max_completion_tokens": 1000,
4290+
"truncation_strategy": {
4291+
"type": "auto",
4292+
"last_messages": null
4293+
},
4294+
"response_format": "auto",
4295+
"tool_choice": "auto"
42874296
},
42884297
{
42894298
"id": "run_abc456",
@@ -4300,6 +4309,7 @@ paths:
43004309
"last_error": null,
43014310
"model": "gpt-4-turbo",
43024311
"instructions": null,
4312+
"incomplete_details": null,
43034313
"tools": [
43044314
{
43054315
"type": "code_interpreter"
@@ -4315,7 +4325,15 @@ paths:
43154325
"completion_tokens": 456,
43164326
"total_tokens": 579
43174327
},
4318-
"temperature": 1
4328+
"temperature": 1,
4329+
"max_prompt_tokens": 1000,
4330+
"max_completion_tokens": 1000,
4331+
"truncation_strategy": {
4332+
"type": "auto",
4333+
"last_messages": null
4334+
},
4335+
"response_format": "auto",
4336+
"tool_choice": "auto"
43194337
}
43204338
],
43214339
"first_id": "run_abc123",
@@ -4404,6 +4422,7 @@ paths:
44044422
"last_error": null,
44054423
"model": "gpt-4-turbo",
44064424
"instructions": null,
4425+
"incomplete_details": null,
44074426
"tools": [
44084427
{
44094428
"type": "code_interpreter"
@@ -4415,7 +4434,15 @@ paths:
44154434
],
44164435
"metadata": {},
44174436
"usage": null,
4418-
"temperature": 1
4437+
"temperature": 1,
4438+
"max_prompt_tokens": 1000,
4439+
"max_completion_tokens": 1000,
4440+
"truncation_strategy": {
4441+
"type": "auto",
4442+
"last_messages": null
4443+
},
4444+
"response_format": "auto",
4445+
"tool_choice": "auto"
44194446
}
44204447
- title: Streaming
44214448
request:
@@ -4736,6 +4763,7 @@ paths:
47364763
"last_error": null,
47374764
"model": "gpt-4-turbo",
47384765
"instructions": null,
4766+
"incomplete_details": null,
47394767
"tools": [
47404768
{
47414769
"type": "code_interpreter"
@@ -4751,7 +4779,15 @@ paths:
47514779
"completion_tokens": 456,
47524780
"total_tokens": 579
47534781
},
4754-
"temperature": 1
4782+
"temperature": 1,
4783+
"max_prompt_tokens": 1000,
4784+
"max_completion_tokens": 1000,
4785+
"truncation_strategy": {
4786+
"type": "auto",
4787+
"last_messages": null
4788+
},
4789+
"response_format": "auto",
4790+
"tool_choice": "auto"
47554791
}
47564792
post:
47574793
operationId: modifyRun
@@ -4848,6 +4884,7 @@ paths:
48484884
"last_error": null,
48494885
"model": "gpt-4-turbo",
48504886
"instructions": null,
4887+
"incomplete_details": null,
48514888
"tools": [
48524889
{
48534890
"type": "code_interpreter"
@@ -4865,7 +4902,15 @@ paths:
48654902
"completion_tokens": 456,
48664903
"total_tokens": 579
48674904
},
4868-
"temperature": 1
4905+
"temperature": 1,
4906+
"max_prompt_tokens": 1000,
4907+
"max_completion_tokens": 1000,
4908+
"truncation_strategy": {
4909+
"type": "auto",
4910+
"last_messages": null
4911+
},
4912+
"response_format": "auto",
4913+
"tool_choice": "auto"
48694914
}
48704915
48714916
/threads/{thread_id}/runs/{run_id}/submit_tool_outputs:
@@ -4977,6 +5022,7 @@ paths:
49775022
"last_error": null,
49785023
"model": "gpt-4-turbo",
49795024
"instructions": null,
5025+
"incomplete_details": null,
49805026
"tools": [
49815027
{
49825028
"type": "function",
@@ -5003,7 +5049,15 @@ paths:
50035049
"file_ids": [],
50045050
"metadata": {},
50055051
"usage": null,
5006-
"temperature": 1
5052+
"temperature": 1,
5053+
"max_prompt_tokens": 1000,
5054+
"max_completion_tokens": 1000,
5055+
"truncation_strategy": {
5056+
"type": "auto",
5057+
"last_messages": null
5058+
},
5059+
"response_format": "auto",
5060+
"tool_choice": "auto"
50075061
}
50085062
50095063
- title: Streaming
@@ -8779,6 +8833,84 @@ components:
87798833
- type
87808834
- function
87818835

8836+
TruncationObject:
8837+
type: object
8838+
title: Thread Truncation Controls
8839+
properties:
8840+
type:
8841+
type: string
8842+
description: The truncation strategy to use for the thread. The default is `auto`. If set to `last_messages`, the thread will be truncated to the n most recent messages in the thread. When set to `auto`, messages in the middle of the thread will be dropped to fit the context length of the model, `max_prompt_tokens`.
8843+
enum: ["auto", "last_messages"]
8844+
last_messages:
8845+
type: integer
8846+
description: The number of most recent messages from the thread when constructing the context for the run.
8847+
minimum: 1
8848+
nullable: true
8849+
required:
8850+
- strategy
8851+
8852+
AssistantsApiToolChoiceOption:
8853+
description: |
8854+
Controls which (if any) tool is called by the model.
8855+
`none` means the model will not call any tools and instead generates a message.
8856+
`auto` is the default value and means the model can pick between generating a message or calling a tool.
8857+
Specifying a particular tool like `{"type": "TOOL_TYPE"}` or `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool.
8858+
8859+
oneOf:
8860+
- type: string
8861+
description: >
8862+
`none` means the model will not call a function and instead generates a message.
8863+
`auto` means the model can pick between generating a message or calling a function.
8864+
enum: [none, auto]
8865+
- $ref: "#/components/schemas/AssistantsApiNamedToolChoice"
8866+
x-oaiExpandable: true
8867+
8868+
AssistantsApiNamedToolChoice:
8869+
type: object
8870+
description: Specifies a tool the model should use. Use to force the model to call a specific tool.
8871+
properties:
8872+
type:
8873+
type: string
8874+
enum: ["function", "code_interpreter", "retrieval"]
8875+
description: The type of the tool. If type is `function`, the function name must be set
8876+
function:
8877+
type: object
8878+
properties:
8879+
name:
8880+
type: string
8881+
description: The name of the function to call.
8882+
required:
8883+
- name
8884+
required:
8885+
- type
8886+
8887+
AssistantsApiResponseFormatOption:
8888+
description: |
8889+
Specifies the format that the model must output. Compatible with [GPT-4 Turbo](/docs/models/gpt-4-and-gpt-4-turbo) and all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
8890+
8891+
Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the message the model generates is valid JSON.
8892+
8893+
**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.
8894+
oneOf:
8895+
- type: string
8896+
description: >
8897+
`auto` is the default value
8898+
enum: [none, auto]
8899+
- $ref: "#/components/schemas/AssistantsApiResponseFormat"
8900+
x-oaiExpandable: true
8901+
8902+
AssistantsApiResponseFormat:
8903+
type: object
8904+
description: |
8905+
An object describing the expected output of the model. If `json_object` only `function` type `tools` are allowed to be passed to the Run. If `text` the model can return text or any value needed.
8906+
properties:
8907+
type:
8908+
type: string
8909+
enum: ["text", "json_object"]
8910+
example: "json_object"
8911+
default: "text"
8912+
description: Must be one of `text` or `json_object`.
8913+
87828914
RunObject:
87838915
type: object
87848916
title: A run on a thread
@@ -8872,6 +9004,15 @@ components:
88729004
description: The Unix timestamp (in seconds) for when the run was completed.
88739005
type: integer
88749006
nullable: true
9007+
incomplete_details:
9008+
description: Details on why the run is incomplete. Will be `null` if the run is not incomplete.
9009+
type: object
9010+
nullable: true
9011+
properties:
9012+
reason:
9013+
description: The reason why the run is incomplete. This will point to which specific token limit was reached over the course of the run.
9014+
type: string
9015+
enum: ["max_completion_tokens", "max_prompt_tokens"]
88759016
model:
88769017
description: The model that the [assistant](/docs/api-reference/assistants) used for this run.
88779018
type: string
@@ -8906,6 +9047,27 @@ components:
89069047
description: The sampling temperature used for this run. If not set, defaults to 1.
89079048
type: number
89089049
nullable: true
9050+
max_prompt_tokens:
9051+
type: integer
9052+
nullable: true
9053+
description: |
9054+
The maximum number of prompt tokens specified to have been used over the course of the run.
9055+
minimum: 256
9056+
max_completion_tokens:
9057+
type: integer
9058+
nullable: true
9059+
description: |
9060+
The maximum number of completion tokens specified to have been used over the course of the run.
9061+
minimum: 256
9062+
truncation_strategy:
9063+
$ref: "#/components/schemas/TruncationObject"
9064+
nullable: true
9065+
tool_choice:
9066+
$ref: "#/components/schemas/AssistantsApiToolChoiceOption"
9067+
nullable: true
9068+
response_format:
9069+
$ref: "#/components/schemas/AssistantsApiResponseFormatOption"
9070+
nullable: true
89099071
required:
89109072
- id
89119073
- object
@@ -8926,6 +9088,12 @@ components:
89269088
- file_ids
89279089
- metadata
89289090
- usage
9091+
- incomplete_details
9092+
- max_prompt_tokens
9093+
- max_completion_tokens
9094+
- truncation_strategy
9095+
- tool_choice
9096+
- response_format
89299097
x-oaiMeta:
89309098
name: The run object
89319099
beta: true
@@ -8948,12 +9116,21 @@ components:
89489116
"tools": [{"type": "retrieval"}, {"type": "code_interpreter"}],
89499117
"file_ids": [],
89509118
"metadata": {},
9119+
"incomplete_details": null,
89519120
"usage": {
89529121
"prompt_tokens": 123,
89539122
"completion_tokens": 456,
89549123
"total_tokens": 579
89559124
},
8956-
"temperature": 1
9125+
"temperature": 1,
9126+
"max_prompt_tokens": 1000,
9127+
"max_completion_tokens": 1000,
9128+
"truncation_strategy": {
9129+
"type": "auto",
9130+
"last_messages": null
9131+
},
9132+
"response_format": "auto",
9133+
"tool_choice": "auto"
89579134
}
89589135
CreateRunRequest:
89599136
type: object
@@ -9035,6 +9212,27 @@ components:
90359212
nullable: true
90369213
description: |
90379214
If `true`, returns a stream of events that happen during the Run as server-sent events, terminating when the Run enters a terminal state with a `data: [DONE]` message.
9215+
max_prompt_tokens:
9216+
type: integer
9217+
nullable: true
9218+
description: |
9219+
The maximum number of prompt tokens that may be used over the course of the run. The run will make a best effort to use only the number of prompt tokens specified, across multiple turns of the run. If the run exceeds the number of prompt tokens specified, the run will end with status `complete`. See `incomplete_details` for more info.
9220+
minimum: 256
9221+
max_completion_tokens:
9222+
type: integer
9223+
nullable: true
9224+
description: |
9225+
The maximum number of completion tokens that may be used over the course of the run. The run will make a best effort to use only the number of completion tokens specified, across multiple turns of the run. If the run exceeds the number of completion tokens specified, the run will end with status `complete`. See `incomplete_details` for more info.
9226+
minimum: 256
9227+
truncation_strategy:
9228+
$ref: "#/components/schemas/TruncationObject"
9229+
nullable: true
9230+
tool_choice:
9231+
$ref: "#/components/schemas/AssistantsApiToolChoiceOption"
9232+
nullable: true
9233+
response_format:
9234+
$ref: "#/components/schemas/AssistantsApiResponseFormatOption"
9235+
nullable: true
90389236
required:
90399237
- thread_id
90409238
- assistant_id
@@ -9196,6 +9394,27 @@ components:
91969394
nullable: true
91979395
description: |
91989396
If `true`, returns a stream of events that happen during the Run as server-sent events, terminating when the Run enters a terminal state with a `data: [DONE]` message.
9397+
max_prompt_tokens:
9398+
type: integer
9399+
nullable: true
9400+
description: |
9401+
The maximum number of prompt tokens that may be used over the course of the run. The run will make a best effort to use only the number of prompt tokens specified, across multiple turns of the run. If the run exceeds the number of prompt tokens specified, the run will end with status `complete`. See `incomplete_details` for more info.
9402+
minimum: 256
9403+
max_completion_tokens:
9404+
type: integer
9405+
nullable: true
9406+
description: |
9407+
The maximum number of completion tokens that may be used over the course of the run. The run will make a best effort to use only the number of completion tokens specified, across multiple turns of the run. If the run exceeds the number of completion tokens specified, the run will end with status `incomplete`. See `incomplete_details` for more info.
9408+
minimum: 256
9409+
truncation_strategy:
9410+
$ref: "#/components/schemas/TruncationObject"
9411+
nullable: true
9412+
tool_choice:
9413+
$ref: "#/components/schemas/AssistantsApiToolChoiceOption"
9414+
nullable: true
9415+
response_format:
9416+
$ref: "#/components/schemas/AssistantsApiResponseFormatOption"
9417+
nullable: true
91999418
required:
92009419
- thread_id
92019420
- assistant_id

0 commit comments

Comments
 (0)