Merge pull request #221 from openai/erinkav/new-params

erinkav-openai · web-flow · commit 6d5c7a0e038b · 2024-04-12T13:00:02.000-07:00
Add token controls, tool choice and response format to Assistants API
diff --git a/openapi.yaml b/openapi.yaml
@@ -4268,6 +4268,7 @@ paths:
                   "last_error": null,
                   "model": "gpt-4-turbo",
                   "instructions": null,
+                  "incomplete_details": null,
                   "tools": [
                     {
                       "type": "code_interpreter"
@@ -4283,7 +4284,15 @@ paths:
                     "completion_tokens": 456,
                     "total_tokens": 579
                   },
-                  "temperature": 1
+                  "temperature": 1,
+                  "max_prompt_tokens": 1000,
+                  "max_completion_tokens": 1000,
+                  "truncation_strategy": {
+                    "type": "auto",
+                    "last_messages": null
+                  },
+                  "response_format": "auto",
+                  "tool_choice": "auto"
                 },
                 {
                   "id": "run_abc456",
@@ -4300,6 +4309,7 @@ paths:
                   "last_error": null,
                   "model": "gpt-4-turbo",
                   "instructions": null,
+                  "incomplete_details": null,
                   "tools": [
                     {
                       "type": "code_interpreter"
@@ -4315,7 +4325,15 @@ paths:
                     "completion_tokens": 456,
                     "total_tokens": 579
                   },
-                  "temperature": 1
+                  "temperature": 1,
+                  "max_prompt_tokens": 1000,
+                  "max_completion_tokens": 1000,
+                  "truncation_strategy": {
+                    "type": "auto",
+                    "last_messages": null
+                  },
+                  "response_format": "auto",
+                  "tool_choice": "auto"
                 }
               ],
               "first_id": "run_abc123",
@@ -4404,6 +4422,7 @@ paths:
                 "last_error": null,
                 "model": "gpt-4-turbo",
                 "instructions": null,
+                "incomplete_details": null,
                 "tools": [
                   {
                     "type": "code_interpreter"
@@ -4415,7 +4434,15 @@ paths:
                 ],
                 "metadata": {},
                 "usage": null,
-                "temperature": 1
+                "temperature": 1,
+                "max_prompt_tokens": 1000,
+                "max_completion_tokens": 1000,
+                "truncation_strategy": {
+                  "type": "auto",
+                  "last_messages": null
+                },
+                "response_format": "auto",
+                "tool_choice": "auto"
               }
           - title: Streaming
             request:
@@ -4736,6 +4763,7 @@ paths:
               "last_error": null,
               "model": "gpt-4-turbo",
               "instructions": null,
+              "incomplete_details": null,
               "tools": [
                 {
                   "type": "code_interpreter"
@@ -4751,7 +4779,15 @@ paths:
                 "completion_tokens": 456,
                 "total_tokens": 579
               },
-              "temperature": 1
+              "temperature": 1,
+              "max_prompt_tokens": 1000,
+              "max_completion_tokens": 1000,
+              "truncation_strategy": {
+                "type": "auto",
+                "last_messages": null
+              },
+              "response_format": "auto",
+              "tool_choice": "auto"
             }
     post:
       operationId: modifyRun
@@ -4848,6 +4884,7 @@ paths:
               "last_error": null,
               "model": "gpt-4-turbo",
               "instructions": null,
+              "incomplete_details": null,
               "tools": [
                 {
                   "type": "code_interpreter"
@@ -4865,7 +4902,15 @@ paths:
                 "completion_tokens": 456,
                 "total_tokens": 579
               },
-              "temperature": 1
+              "temperature": 1,
+              "max_prompt_tokens": 1000,
+              "max_completion_tokens": 1000,
+              "truncation_strategy": {
+                "type": "auto",
+                "last_messages": null
+              },
+              "response_format": "auto",
+              "tool_choice": "auto"
             }
 
   /threads/{thread_id}/runs/{run_id}/submit_tool_outputs:
@@ -4977,6 +5022,7 @@ paths:
                 "last_error": null,
                 "model": "gpt-4-turbo",
                 "instructions": null,
+                "incomplete_details": null,
                 "tools": [
                   {
                     "type": "function",
@@ -5003,7 +5049,15 @@ paths:
                 "file_ids": [],
                 "metadata": {},
                 "usage": null,
-                "temperature": 1
+                "temperature": 1,
+                "max_prompt_tokens": 1000,
+                "max_completion_tokens": 1000,
+                "truncation_strategy": {
+                  "type": "auto",
+                  "last_messages": null
+                },
+                "response_format": "auto",
+                "tool_choice": "auto"
               }
 
           - title: Streaming
@@ -8779,6 +8833,84 @@ components:
         - type
         - function
 
+    TruncationObject:
+      type: object
+      title: Thread Truncation Controls
+      properties:
+        type:
+          type: string
+          description: The truncation strategy to use for the thread. The default is `auto`. If set to `last_messages`, the thread will be truncated to the n most recent messages in the thread. When set to `auto`, messages in the middle of the thread will be dropped to fit the context length of the model, `max_prompt_tokens`.
+          enum: ["auto", "last_messages"]
+        last_messages:
+          type: integer
+          description: The number of most recent messages from the thread when constructing the context for the run.
+          minimum: 1
+          nullable: true
+      required:
+        - strategy
+
+    AssistantsApiToolChoiceOption:
+      description: |
+        Controls which (if any) tool is called by the model.
+        `none` means the model will not call any tools and instead generates a message.
+        `auto` is the default value and means the model can pick between generating a message or calling a tool.
+        Specifying a particular tool like `{"type": "TOOL_TYPE"}` or `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool.
+
+      oneOf:
+        - type: string
+          description: >
+            `none` means the model will not call a function and instead generates a message.
+            `auto` means the model can pick between generating a message or calling a function.
+          enum: [none, auto]
+        - $ref: "#/components/schemas/AssistantsApiNamedToolChoice"
+      x-oaiExpandable: true
+
+    AssistantsApiNamedToolChoice:
+      type: object
+      description: Specifies a tool the model should use. Use to force the model to call a specific tool.
+      properties:
+        type:
+          type: string
+          enum: ["function", "code_interpreter", "retrieval"]
+          description: The type of the tool. If type is `function`, the function name must be set
+        function:
+          type: object
+          properties:
+            name:
+              type: string
+              description: The name of the function to call.
+          required:
+            - name
+      required:
+        - type
+
+    AssistantsApiResponseFormatOption:
+      description: |
+        Specifies the format that the model must output. Compatible with [GPT-4 Turbo](/docs/models/gpt-4-and-gpt-4-turbo) and all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+
+        Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the message the model generates is valid JSON.
+
+        **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.
+      oneOf:
+        - type: string
+          description: >
+            `auto` is the default value
+          enum: [none, auto]
+        - $ref: "#/components/schemas/AssistantsApiResponseFormat"
+      x-oaiExpandable: true
+
+    AssistantsApiResponseFormat:
+      type: object
+      description: |
+        An object describing the expected output of the model. If `json_object` only `function` type `tools` are allowed to be passed to the Run. If `text` the model can return text or any value needed.
+      properties:
+        type:
+          type: string
+          enum: ["text", "json_object"]
+          example: "json_object"
+          default: "text"
+          description: Must be one of `text` or `json_object`.
+
     RunObject:
       type: object
       title: A run on a thread
@@ -8872,6 +9004,15 @@ components:
           description: The Unix timestamp (in seconds) for when the run was completed.
           type: integer
           nullable: true
+        incomplete_details:
+          description: Details on why the run is incomplete. Will be `null` if the run is not incomplete.
+          type: object
+          nullable: true
+          properties:
+            reason:
+              description: The reason why the run is incomplete. This will point to which specific token limit was reached over the course of the run.
+              type: string
+              enum: ["max_completion_tokens", "max_prompt_tokens"]
         model:
           description: The model that the [assistant](/docs/api-reference/assistants) used for this run.
           type: string
@@ -8906,6 +9047,27 @@ components:
           description: The sampling temperature used for this run. If not set, defaults to 1.
           type: number
           nullable: true
+        max_prompt_tokens:
+          type: integer
+          nullable: true
+          description: |
+            The maximum number of prompt tokens specified to have been used over the course of the run.
+          minimum: 256
+        max_completion_tokens:
+          type: integer
+          nullable: true
+          description: |
+            The maximum number of completion tokens specified to have been used over the course of the run.
+          minimum: 256
+        truncation_strategy:
+          $ref: "#/components/schemas/TruncationObject"
+          nullable: true
+        tool_choice:
+          $ref: "#/components/schemas/AssistantsApiToolChoiceOption"
+          nullable: true
+        response_format:
+          $ref: "#/components/schemas/AssistantsApiResponseFormatOption"
+          nullable: true
       required:
         - id
         - object
@@ -8926,6 +9088,12 @@ components:
         - file_ids
         - metadata
         - usage
+        - incomplete_details
+        - max_prompt_tokens
+        - max_completion_tokens
+        - truncation_strategy
+        - tool_choice
+        - response_format
       x-oaiMeta:
         name: The run object
         beta: true
@@ -8948,12 +9116,21 @@ components:
             "tools": [{"type": "retrieval"}, {"type": "code_interpreter"}],
             "file_ids": [],
             "metadata": {},
+            "incomplete_details": null,
             "usage": {
               "prompt_tokens": 123,
               "completion_tokens": 456,
               "total_tokens": 579
             },
-            "temperature": 1
+            "temperature": 1,
+            "max_prompt_tokens": 1000,
+            "max_completion_tokens": 1000,
+            "truncation_strategy": {
+              "type": "auto",
+              "last_messages": null
+            },
+            "response_format": "auto",
+            "tool_choice": "auto"
           }
     CreateRunRequest:
       type: object
@@ -9035,6 +9212,27 @@ components:
           nullable: true
           description: |
             If `true`, returns a stream of events that happen during the Run as server-sent events, terminating when the Run enters a terminal state with a `data: [DONE]` message.
+        max_prompt_tokens:
+          type: integer
+          nullable: true
+          description: |
+            The maximum number of prompt tokens that may be used over the course of the run. The run will make a best effort to use only the number of prompt tokens specified, across multiple turns of the run. If the run exceeds the number of prompt tokens specified, the run will end with status `complete`. See `incomplete_details` for more info.
+          minimum: 256
+        max_completion_tokens:
+          type: integer
+          nullable: true
+          description: |
+            The maximum number of completion tokens that may be used over the course of the run. The run will make a best effort to use only the number of completion tokens specified, across multiple turns of the run. If the run exceeds the number of completion tokens specified, the run will end with status `complete`. See `incomplete_details` for more info.
+          minimum: 256
+        truncation_strategy:
+          $ref: "#/components/schemas/TruncationObject"
+          nullable: true
+        tool_choice:
+          $ref: "#/components/schemas/AssistantsApiToolChoiceOption"
+          nullable: true
+        response_format:
+          $ref: "#/components/schemas/AssistantsApiResponseFormatOption"
+          nullable: true
       required:
         - thread_id
         - assistant_id
@@ -9196,6 +9394,27 @@ components:
           nullable: true
           description: |
             If `true`, returns a stream of events that happen during the Run as server-sent events, terminating when the Run enters a terminal state with a `data: [DONE]` message.
+        max_prompt_tokens:
+          type: integer
+          nullable: true
+          description: |
+            The maximum number of prompt tokens that may be used over the course of the run. The run will make a best effort to use only the number of prompt tokens specified, across multiple turns of the run. If the run exceeds the number of prompt tokens specified, the run will end with status `complete`. See `incomplete_details` for more info.
+          minimum: 256
+        max_completion_tokens:
+          type: integer
+          nullable: true
+          description: |
+            The maximum number of completion tokens that may be used over the course of the run. The run will make a best effort to use only the number of completion tokens specified, across multiple turns of the run. If the run exceeds the number of completion tokens specified, the run will end with status `incomplete`. See `incomplete_details` for more info.
+          minimum: 256
+        truncation_strategy:
+          $ref: "#/components/schemas/TruncationObject"
+          nullable: true
+        tool_choice:
+          $ref: "#/components/schemas/AssistantsApiToolChoiceOption"
+          nullable: true
+        response_format:
+          $ref: "#/components/schemas/AssistantsApiResponseFormatOption"
+          nullable: true
       required:
         - thread_id
         - assistant_id