diff --git a/openapi.yaml b/openapi.yaml
index de97a6c..70a35b5 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -15,1072 +15,628 @@ servers:
 security:
   - bearerAuth: []
 paths:
-  /voices:
+  /deployments:
     get:
-      tags: ['Voices']
-      summary: Fetch available voices for each model
-      description: Fetch available voices for each model
-      operationId: fetchVoices
+      description: Get a list of all deployments in your project
       responses:
-        '200':
-          description: Success
+        "200":
+          description: List of deployments
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ListVoicesResponse'
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.audio.voices.list()
-
-            print(response.data)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.audio.voices.list()
-
-            print(response.data)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.audio.voices.list()
-
-            console.log(response.data);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.audio.voices.list()
-
-            console.log(response.data);
-  /videos/{id}:
-    get:
-      tags: ['Video']
-      summary: Fetch video metadata
-      description: Fetch video metadata
-      servers:
-        - url: https://api.together.xyz/v2
-      operationId: retrieveVideo
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.videos.retrieve(video_id)
-
-            print(response.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.videos.retrieve(video_id)
-
-            print(response.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.videos.retrieve(videoId);
-
-            console.log(response.status);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.videos.retrieve(videoId);
-
-            console.log(response.status);
-      parameters:
-        - in: path
-          name: id
-          schema:
-            type: string
-          required: true
-          description: Identifier of video from create response.
-      responses:
-        '200':
-          description: Success
+                $ref: "#/components/schemas/DeploymentListResponse"
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/VideoJob'
-        '400':
-          description: Invalid request parameters.
-        '404':
-          description: Video ID not found.
-  /videos:
+                type: object
+      summary: Get the list of deployments
+      tags:
+        - Deployments
     post:
-      tags: ['Video']
-      summary: Create video
-      description: Create a video
-      operationId: createVideo
-      servers:
-        - url: https://api.together.xyz/v2
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.videos.create(
-                model="together/video-model",
-                prompt="A cartoon of an astronaut riding a horse on the moon"
-            )
-
-            print(response.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.videos.create(
-                model="together/video-model",
-                prompt="A cartoon of an astronaut riding a horse on the moon"
-            )
-
-            print(response.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.videos.create({
-              model: "together/video-model",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
-
-            console.log(response.id);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.videos.create({
-              model: "together/video-model",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
-
-            console.log(response.id);
+      description: Create a new deployment with specified configuration
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CreateVideoBody'
+              $ref: "#/components/schemas/CreateDeploymentRequest"
+        description: Deployment configuration
+        required: true
       responses:
-        '200':
-          description: Success
+        "200":
+          description: Deployment created successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/VideoJob'
-  /chat/completions:
-    post:
-      tags: ['Chat']
-      summary: Create chat completion
-      description: Query a chat model.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.chat.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                messages=[
-                    {"role": "system", "content": "You are a helpful assistant."},
-                    {"role": "user", "content": "What are some fun things to do in New York?"},
-                ]
-            )
-
-            print(response.choices[0].message.content)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.chat.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                messages=[
-                    {"role": "system", "content": "You are a helpful assistant."},
-                    {"role": "user", "content": "What are some fun things to do in New York?"},
-                ]
-            )
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.chat.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              messages: [
-                { role: "system", content: "You are a helpful assistant." },
-                { role: "user", "content": "What are some fun things to do in New York?" },
-              ],
-            });
-
-            console.log(response.choices[0].message?.content);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.chat.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              messages: [
-                { role: "system", content: "You are a helpful assistant." },
-                { role: "user", "content": "What are some fun things to do in New York?" },
-              ],
-            });
-
-            console.log(response.choices[0].message?.content);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/chat/completions" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                   "messages": [
-                     {"role": "system", "content": "You are a helpful assistant."},
-                     {"role": "user", "content": "What are some fun things to do in New York?"}
-                   ]
-                 }'
-      operationId: chat-completions
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ChatCompletionRequest'
-      responses:
-        '200':
-          description: '200'
+                $ref: "#/components/schemas/DeploymentResponseItem"
+        "400":
+          description: Invalid request
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ChatCompletionResponse'
-            text/event-stream:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
               schema:
-                $ref: '#/components/schemas/ChatCompletionStream'
-        '400':
-          description: 'BadRequest'
+                type: object
+      summary: Create a new deployment
+      tags:
+        - Deployments
+  /deployments/{id}:
+    delete:
+      description: Delete an existing deployment
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Deployment deleted successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
+                type: object
+        "404":
+          description: Deployment not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+                type: object
+      summary: Delete a deployment
+      tags:
+        - Deployments
+    get:
+      description: Retrieve details of a specific deployment by its ID or name
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Deployment details
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
+                $ref: "#/components/schemas/DeploymentResponseItem"
+        "404":
+          description: Deployment not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /completions:
-    post:
-      tags: ['Completion']
-      summary: Create completion
-      description: Query a language, code, or image model.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                prompt="The largest city in France is",
-                max_tokens=1
-            )
-
-            print(response.choices[0].text)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.completions.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                prompt="The largest city in France is",
-                max_tokens=1
-            )
-
-            print(response.choices[0].text)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              prompt: "The largest city in France is",
-              max_tokens: 1,
-            });
-
-            console.log(response.choices[0].text);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.completions.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              prompt: "The largest city in France is",
-              max_tokens: 1
-            });
-
-            console.log(response.choices[0].text);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/completions" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                   "prompt": "The largest city in France is",
-                   "max_tokens": 1
-                 }'
-      operationId: completions
+                type: object
+      summary: Get a deployment by ID or name
+      tags:
+        - Deployments
+    patch:
+      description: Update an existing deployment configuration
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/CompletionRequest'
+              $ref: "#/components/schemas/UpdateDeploymentRequest"
+        description: Updated deployment configuration
+        required: true
       responses:
-        '200':
-          description: '200'
+        "200":
+          description: Deployment updated successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/CompletionResponse'
-            text/event-stream:
+                $ref: "#/components/schemas/DeploymentResponseItem"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
               schema:
-                $ref: '#/components/schemas/CompletionStream'
-        '400':
-          description: 'BadRequest'
+                type: object
+        "404":
+          description: Deployment not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
+                type: object
+      summary: Update a deployment
+      tags:
+        - Deployments
+  /deployments/{id}/logs:
+    get:
+      description: Retrieve logs from a deployment, optionally filtered by replica ID.
+        Use follow=true to stream logs in real-time.
+      parameters:
+        - description: Deployment ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+        - description: Replica ID to filter logs
+          in: query
+          name: replica_id
+          schema:
+            type: string
+        - description: Stream logs in real-time (ndjson format)
+          in: query
+          name: follow
+          schema:
+            type: boolean
+      responses:
+        "200":
+          description: Deployment logs
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
+                $ref: "#/components/schemas/DeploymentLogs"
+        "404":
+          description: Deployment not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /embeddings:
-    post:
-      tags: ['Embeddings']
-      summary: Create embedding
-      description: Query an embedding model for a given string of text.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.embeddings.create(
-                model="BAAI/bge-large-en-v1.5",
-                input="New York City",
-            )
-
-            print(response.data[0].embedding)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.embeddings.create(
-                model="BAAI/bge-large-en-v1.5",
-                input="New York City",
-            )
-
-            print(response.data[0].embedding)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.embeddings.create({
-              model: "BAAI/bge-large-en-v1.5",
-              input: "New York City",
-            });
-
-            console.log(response.data[0].embedding);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.embeddings.create({
-              model: "BAAI/bge-large-en-v1.5",
-              input: "New York City",
-            });
-
-            console.log(response.data[0].embedding);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/embeddings" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "BAAI/bge-large-en-v1.5",
-                   "input": "New York City"
-                 }'
-      operationId: embeddings
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/EmbeddingsRequest'
+                type: object
+      summary: Get logs for a deployment
+      tags:
+        - Deployments
+  /image-repositories:
+    get:
+      description: Retrieve all container image repositories available in your project
       responses:
-        '200':
-          description: '200'
+        "200":
+          description: List of repositories
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/EmbeddingsResponse'
-        '400':
-          description: 'BadRequest'
+                $ref: "#/components/schemas/RepositoryListResponse"
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
+                type: object
+      summary: Get the list of image repositories in your project
+      tags:
+        - Images
+  /image-repositories/{id}/images:
+    get:
+      description: Retrieve all container images (tags) available in a specific repository
+      parameters:
+        - description: Repository ID
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: List of images
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
+                $ref: "#/components/schemas/ImageListResponse"
+        "404":
+          description: Repository not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
+                type: object
+      summary: Get the list of images available under a repository
+      tags:
+        - Images
+  /secrets:
+    get:
+      description: Retrieve all secrets in your project
+      responses:
+        "200":
+          description: List of secrets
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+                $ref: "#/components/schemas/ListSecretsResponse"
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /models:
-    get:
-      tags: ['Models']
-      summary: List all models
-      description: Lists all of Together's open-source models
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            models = client.models.list()
-
-            for model in models:
-                print(model.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            models = client.models.list()
-
-            for model in models:
-                print(model.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const models = await client.models.list();
-
-            for (const model of models) {
-              console.log(model.id);
-            }
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const models = await client.models.list();
-
-            for (const model of models) {
-              console.log(model.id);
-            }
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/models" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: models
-      parameters:
-        - name: dedicated
-          in: query
-          description: Filter models to only return dedicated models
-          schema:
-            type: boolean
+                type: object
+      summary: Get the list of project secrets
+      tags:
+        - Secrets
+    post:
+      description: Create a new secret to store sensitive configuration values
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/CreateSecretRequest"
+        description: Secret configuration
+        required: true
       responses:
-        '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ModelInfoList'
-        '400':
-          description: 'BadRequest'
+        "200":
+          description: Secret created successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
+                $ref: "#/components/schemas/SecretResponseItem"
+        "400":
+          description: Invalid request
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
+                type: object
+        "500":
+          description: Internal server error
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+                type: object
+      summary: Create a new secret
+      tags:
+        - Secrets
+  /secrets/{id}:
+    delete:
+      description: Delete an existing secret
+      parameters:
+        - description: Secret ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Secret deleted successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+                type: object
+        "404":
+          description: Secret not found
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-    post:
-      tags: ['Models']
-      summary: Upload a custom model or adapter
-      description: Upload a custom model or adapter from Hugging Face or S3
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.models.upload(
-                "My-Fine-Tuned-Model",
-                "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
-            )
-
-            print(response.job_id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.models.upload(
-                model_name="My-Fine-Tuned-Model",
-                model_source="https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
-            )
-
-            print(response.data.job_id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.models.upload({
-              model_name: "My-Fine-Tuned-Model",
-              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
-            })
-
-            console.log(response);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.models.upload({
-              model_name: "My-Fine-Tuned-Model",
-              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
-            })
-
-            console.log(response);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/models" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                    "model_name": "My-Fine-Tuned-Model",
-                    "model_source": "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz"
-                  }'
-      operationId: uploadModel
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Delete a secret
+      tags:
+        - Secrets
+    get:
+      description: Retrieve details of a specific secret by its ID or name
+      parameters:
+        - description: Secret ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Secret details
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/SecretResponseItem"
+        "404":
+          description: Secret not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Get a secret by ID or name
+      tags:
+        - Secrets
+    patch:
+      description: Update an existing secret's value or metadata
+      parameters:
+        - description: Secret ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
       requestBody:
-        required: true
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/ModelUploadRequest'
+              $ref: "#/components/schemas/UpdateSecretRequest"
+        description: Updated secret configuration
+        required: true
       responses:
-        '200':
-          description: Model / adapter upload job created successfully
+        "200":
+          description: Secret updated successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ModelUploadSuccessResponse'
-
-  /jobs/{jobId}:
+                $ref: "#/components/schemas/SecretResponseItem"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                type: object
+        "404":
+          description: Secret not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Update a secret
+      tags:
+        - Secrets
+  /storage/{filename}:
     get:
-      tags: ['Jobs']
-      summary: Get job status
-      description: Get the status of a specific job
-      operationId: getJob
+      description: Download a file by redirecting to a signed URL
       parameters:
-        - name: jobId
+        - description: Filename
           in: path
+          name: filename
           required: true
           schema:
             type: string
-          description: The ID of the job to retrieve
-          example: job-a15dad11-8d8e-4007-97c5-a211304de284
       responses:
-        '200':
-          description: Job status retrieved successfully
+        "307":
+          description: Redirect to signed download URL
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/JobInfoSuccessResponse'
-
-  /jobs:
+                type: string
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "404":
+          description: File not found
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+        "500":
+          description: Internal error
+          content:
+            application/json:
+              schema:
+                additionalProperties:
+                  type: string
+                type: object
+      summary: Download a file
+      tags:
+        - files
+  /storage/volumes:
     get:
-      tags: ['Jobs']
-      summary: List all jobs
-      description: List all jobs and their statuses
-      operationId: listJobs
+      description: Retrieve all volumes in your project
       responses:
-        '200':
-          description: Jobs retrieved successfully
+        "200":
+          description: List of volumes
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/JobsInfoSuccessResponse'
-
-  /images/generations:
+                $ref: "#/components/schemas/ListVolumesResponse"
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Get the list of project volumes
+      tags:
+        - Volumes
     post:
-      tags: ['Images']
-      summary: Create image
-      description: Use an image model to generate an image for a given prompt.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.images.generate(
-                model="black-forest-labs/FLUX.1-schnell",
-                steps=4,
-                prompt="A cartoon of an astronaut riding a horse on the moon",
-            )
-
-            print(response.data[0].url)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.images.generate(
-                model="black-forest-labs/FLUX.1-schnell",
-                steps=4,
-                prompt="A cartoon of an astronaut riding a horse on the moon",
-            )
-
-            print(response.data[0].url)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.images.generate({
-              model: "black-forest-labs/FLUX.1-schnell",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
-
-            console.log(response.data[0].url);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const response = await client.images.generate({
-              model: "black-forest-labs/FLUX.1-schnell",
-              prompt: "A cartoon of an astronaut riding a horse on the moon",
-            });
-
-            console.log(response.data[0].url);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X POST "https://api.together.xyz/v1/images/generations" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "black-forest-labs/FLUX.1-schnell",
-                   "prompt": "A cartoon of an astronaut riding a horse on the moon"
-                 }'
+      description: Create a new volume to preload files in deployments
       requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/CreateVolumeRequest"
+        description: Volume configuration
         required: true
+      responses:
+        "200":
+          description: Volume created successfully
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/VolumeResponseItem"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Create a new volume
+      tags:
+        - Volumes
+  /storage/volumes/{id}:
+    delete:
+      description: Delete an existing volume
+      parameters:
+        - description: Volume ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Volume deleted successfully
+          content:
+            application/json:
+              schema:
+                type: object
+        "404":
+          description: Volume not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Delete a volume
+      tags:
+        - Volumes
+    get:
+      description: Retrieve details of a specific volume by its ID or name
+      parameters:
+        - description: Volume ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: Volume details
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/VolumeResponseItem"
+        "404":
+          description: Volume not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Get a volume by ID or name
+      tags:
+        - Volumes
+    patch:
+      description: Update an existing volume's configuration or contents
+      parameters:
+        - description: Volume ID or name
+          in: path
+          name: id
+          required: true
+          schema:
+            type: string
+      requestBody:
         content:
           application/json:
             schema:
-              type: object
-              required:
-                - prompt
-                - model
-              properties:
-                prompt:
-                  type: string
-                  description: A description of the desired images. Maximum length varies by model.
-                  example: cat floating in space, cinematic
-                model:
-                  type: string
-                  description: >
-                    The model to use for image generation.<br>
-                    <br>
-                    [See all of Together AI's image models](https://docs.together.ai/docs/serverless-models#image-models)
-                  example: black-forest-labs/FLUX.1-schnell
-                  anyOf:
-                    - type: string
-                      enum:
-                        - black-forest-labs/FLUX.1-schnell-Free
-                        - black-forest-labs/FLUX.1-schnell
-                        - black-forest-labs/FLUX.1.1-pro
-                    - type: string
-                steps:
-                  type: integer
-                  default: 20
-                  description: Number of generation steps.
-                image_url:
-                  type: string
-                  description: URL of an image to use for image models that support it.
-                seed:
-                  type: integer
-                  description: Seed used for generation. Can be used to reproduce image generations.
-                n:
-                  type: integer
-                  default: 1
-                  description: Number of image results to generate.
-                height:
-                  type: integer
-                  default: 1024
-                  description: Height of the image to generate in number of pixels.
-                width:
-                  type: integer
-                  default: 1024
-                  description: Width of the image to generate in number of pixels.
-                negative_prompt:
-                  type: string
-                  description: The prompt or prompts not to guide the image generation.
-                response_format:
-                  type: string
-                  description: Format of the image response. Can be either a base64 string or a URL.
-                  enum:
-                    - base64
-                    - url
-                guidance_scale:
-                  type: number
-                  description: Adjusts the alignment of the generated image with the input prompt. Higher values (e.g., 8-10) make the output more faithful to the prompt, while lower values (e.g., 1-5) encourage more creative freedom.
-                  default: 3.5
-                output_format:
-                  type: string
-                  description: The format of the image response. Can be either be `jpeg` or `png`. Defaults to `jpeg`.
-                  default: jpeg
-                  enum:
-                    - jpeg
-                    - png
-                image_loras:
-                  description: An array of objects that define LoRAs (Low-Rank Adaptations) to influence the generated image.
-                  type: array
-                  items:
-                    type: object
-                    required: [path, scale]
-                    properties:
-                      path:
-                        type: string
-                        description: The URL of the LoRA to apply (e.g. https://huggingface.co/strangerzonehf/Flux-Midjourney-Mix2-LoRA).
-                      scale:
-                        type: number
-                        description: The strength of the LoRA's influence. Most LoRA's recommend a value of 1.
-                reference_images:
-                  description: An array of image URLs that guide the overall appearance and style of the generated image. These reference images influence the visual characteristics consistently across the generation.
-                  type: array
-                  items:
-                    type: string
-                    description: URL of a reference image to guide the image generation.
-                disable_safety_checker:
-                  type: boolean
-                  description: If true, disables the safety checker for image generation.
+              $ref: "#/components/schemas/UpdateVolumeRequest"
+        description: Updated volume configuration
+        required: true
       responses:
-        '200':
-          description: Image generated successfully
+        "200":
+          description: Volume updated successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ImageResponse'
-  /files:
+                $ref: "#/components/schemas/VolumeResponseItem"
+        "400":
+          description: Invalid request
+          content:
+            application/json:
+              schema:
+                type: object
+        "404":
+          description: Volume not found
+          content:
+            application/json:
+              schema:
+                type: object
+        "500":
+          description: Internal server error
+          content:
+            application/json:
+              schema:
+                type: object
+      summary: Update a volume
+      tags:
+        - Volumes
+  /voices:
     get:
-      tags: ['Files']
-      summary: List all files
-      description: List the metadata for all uploaded data files.
+      tags: ['Voices']
+      summary: Fetch available voices for each model
+      description: Fetch available voices for each model
+      operationId: fetchVoices
+      responses:
+        '200':
+          description: Success
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListVoicesResponse'
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1093,10 +649,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.list()
+            response = client.audio.voices.list()
 
-            for file in response.data:
-                print(file.id)
+            print(response.data)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1107,10 +662,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.list()
+            response = client.audio.voices.list()
 
-            for file in response.data:
-                print(file.id)
+            print(response.data)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1120,11 +674,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.list();
+            const response = await client.audio.voices.list()
 
-            for (const file of response.data) {
-              console.log(file.id);
-            }
+            console.log(response.data);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1134,29 +686,17 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.list();
+            const response = await client.audio.voices.list()
 
-            for (const file of response.data) {
-              console.log(file.id);
-            }
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/files" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      responses:
-        '200':
-          description: List of files
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/FileList'
-  /files/{id}:
+            console.log(response.data);
+  /videos/{id}:
     get:
-      tags: ['Files']
-      summary: List file
-      description: List the metadata for a single uploaded data file.
+      tags: ['Video']
+      summary: Fetch video metadata
+      description: Fetch video metadata
+      servers:
+        - url: https://api.together.xyz/v2
+      operationId: retrieveVideo
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1169,9 +709,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = client.files.retrieve(id="file-id")
+            response = client.videos.retrieve(video_id)
 
-            print(file)
+            print(response.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1182,9 +722,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = client.files.retrieve(id="file-id")
+            response = client.videos.retrieve(video_id)
 
-            print(file)
+            print(response.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1194,9 +734,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const file = await client.files.retrieve("file-id");
+            const response = await client.videos.retrieve(videoId);
 
-            console.log(file);
+            console.log(response.status);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1206,32 +746,35 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const file = await client.files.retrieve("file-id");
+            const response = await client.videos.retrieve(videoId);
 
-            console.log(file);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/files/ID" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
+            console.log(response.status);
       parameters:
-        - name: id
-          in: path
-          required: true
+        - in: path
+          name: id
           schema:
             type: string
+          required: true
+          description: Identifier of video from create response.
       responses:
         '200':
-          description: File retrieved successfully
+          description: Success
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileResponse'
-    delete:
-      tags: ['Files']
-      summary: Delete a file
-      description: Delete a previously uploaded data file.
+                $ref: '#/components/schemas/VideoJob'
+        '400':
+          description: Invalid request parameters.
+        '404':
+          description: Video ID not found.
+  /videos:
+    post:
+      tags: ['Video']
+      summary: Create video
+      description: Create a video
+      operationId: createVideo
+      servers:
+        - url: https://api.together.xyz/v2
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1244,9 +787,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.delete(id="file-id")
+            response = client.videos.create(
+                model="together/video-model",
+                prompt="A cartoon of an astronaut riding a horse on the moon"
+            )
 
-            print(response)
+            print(response.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1257,9 +803,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.files.delete(id="file-id")
+            response = client.videos.create(
+                model="together/video-model",
+                prompt="A cartoon of an astronaut riding a horse on the moon"
+            )
 
-            print(response)
+            print(response.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1269,9 +818,12 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.delete("file-id");
+            const response = await client.videos.create({
+              model: "together/video-model",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
 
-            console.log(response);
+            console.log(response.id);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1281,32 +833,29 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.delete("file-id");
+            const response = await client.videos.create({
+              model: "together/video-model",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
 
-            console.log(response);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl -X "DELETE" "https://api.together.xyz/v1/files/file-id" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
+            console.log(response.id);
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateVideoBody'
       responses:
         '200':
-          description: File deleted successfully
+          description: Success
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileDeleteResponse'
-  /files/{id}/content:
-    get:
-      tags: ['Files']
-      summary: Get file contents
-      description: Get the contents of a single uploaded data file.
+                $ref: '#/components/schemas/VideoJob'
+  /chat/completions:
+    post:
+      tags: ['Chat']
+      summary: Create chat completion
+      description: Query a chat model.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1319,9 +868,15 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = client.files.retrieve_content(id="file-id")
+            response = client.chat.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "What are some fun things to do in New York?"},
+                ]
+            )
 
-            print(file.filename)
+            print(response.choices[0].message.content)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1332,9 +887,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            with client.files.with_streaming_response.content(id="file-id") as response:
-              for line in response.iter_lines():
-                print(line)
+            response = client.chat.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": "What are some fun things to do in New York?"},
+                ]
+            )
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1344,10 +903,15 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.content("file-id");
-            const content = await response.text();
+            const response = await client.chat.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              messages: [
+                { role: "system", content: "You are a helpful assistant." },
+                { role: "user", "content": "What are some fun things to do in New York?" },
+              ],
+            });
 
-            console.log(content);
+            console.log(response.choices[0].message?.content);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1357,40 +921,86 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.files.content("file-id");
-            const content = await response.text();
+            const response = await client.chat.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              messages: [
+                { role: "system", content: "You are a helpful assistant." },
+                { role: "user", "content": "What are some fun things to do in New York?" },
+              ],
+            });
 
-            console.log(content);
+            console.log(response.choices[0].message?.content);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/files/file-id/content" \
+            curl -X POST "https://api.together.xyz/v1/chat/completions" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                   "messages": [
+                     {"role": "system", "content": "You are a helpful assistant."},
+                     {"role": "user", "content": "What are some fun things to do in New York?"}
+                   ]
+                 }'
+      operationId: chat-completions
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ChatCompletionRequest'
       responses:
         '200':
-          description: File content retrieved successfully
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileObject'
-        '500':
-          description: Internal Server Error
+                $ref: '#/components/schemas/ChatCompletionResponse'
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/ChatCompletionStream'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-  /files/upload:
+        '401':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'NotFound'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '503':
+          description: 'Overloaded'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+  /completions:
     post:
-      tags: ['Files']
-      summary: Upload a file
-      description: Upload a file with specified purpose, file name, and file type.
+      tags: ['Completion']
+      summary: Create completion
+      description: Query a language, code, or image model.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1403,11 +1013,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            current_dir = os.path.dirname(os.path.abspath(__file__))
-            file_path = os.path.join(current_dir, "data.jsonl")
-            file = client.files.upload(file=file_path)
-
-            print(file.id)
+            response = client.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                prompt="The largest city in France is",
+                max_tokens=1
+            )
+
+            print(response.choices[0].text)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1418,98 +1030,114 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            current_dir = os.path.dirname(os.path.abspath(__file__))
-            file_path = os.path.join(current_dir, "data.jsonl")
-            file = client.files.upload(file=file_path)
+            response = client.completions.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                prompt="The largest city in France is",
+                max_tokens=1
+            )
 
-            print(file.id)
+            print(response.choices[0].text)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
-            import { upload } from "together-ai/lib/upload"
-            import path from "path";
-            import { fileURLToPath } from "url";
+            import Together from "together-ai";
 
-            const __filename = fileURLToPath(import.meta.url);
-            const __dirname = path.dirname(__filename);
-            const filepath = path.join(__dirname, "data.jsonl");
-            const file = await upload(filepath);
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-            console.log(file.id);
+            const response = await client.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              prompt: "The largest city in France is",
+              max_tokens: 1,
+            });
+
+            console.log(response.choices[0].text);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
-            import { upload } from "together-ai/lib/upload"
-            import path from "path";
-            import { fileURLToPath } from "url";
+            import Together from "together-ai";
 
-            const __filename = fileURLToPath(import.meta.url);
-            const __dirname = path.dirname(__filename);
-            const filepath = path.join(__dirname, "data.jsonl");
-            const file = await upload(filepath);
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-            console.log(file.id);
+            const response = await client.completions.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              prompt: "The largest city in France is",
+              max_tokens: 1
+            });
+
+            console.log(response.choices[0].text);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/files/upload" \
+            curl -X POST "https://api.together.xyz/v1/completions" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -F "file=@/path/to/data.jsonl" \
-                 -F "file_name=data.jsonl" \
-                 -F "purpose=fine-tune"
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                   "prompt": "The largest city in France is",
+                   "max_tokens": 1
+                 }'
+      operationId: completions
       requestBody:
-        required: true
         content:
-          multipart/form-data:
+          application/json:
             schema:
-              type: object
-              required:
-                - purpose
-                - file_name
-                - file
-              properties:
-                purpose:
-                  $ref: '#/components/schemas/FilePurpose'
-                file_name:
-                  type: string
-                  description: The name of the file being uploaded
-                  example: 'dataset.csv'
-                file_type:
-                  $ref: '#/components/schemas/FileType'
-                file:
-                  type: string
-                  format: binary
-                  description: The content of the file being uploaded
+              $ref: '#/components/schemas/CompletionRequest'
       responses:
         '200':
-          description: File uploaded successfully
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FileResponse'
-        '500':
-          description: Internal Server Error
+                $ref: '#/components/schemas/CompletionResponse'
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/CompletionStream'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '400':
-          description: Bad Request
+        '401':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: Unauthorized
+        '404':
+          description: 'NotFound'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-  /fine-tunes:
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '503':
+          description: 'Overloaded'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+  /embeddings:
     post:
-      tags: ['Fine-tuning']
-      summary: Create job
-      description: Create a fine-tuning job with the provided model and training data.
+      tags: ['Embeddings']
+      summary: Create embedding
+      description: Query an embedding model for a given string of text.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1522,12 +1150,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-                training_file="file-id"
+            response = client.embeddings.create(
+                model="BAAI/bge-large-en-v1.5",
+                input="New York City",
             )
 
-            print(response)
+            print(response.data[0].embedding)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1538,12 +1166,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-                training_file="file-id"
+            response = client.embeddings.create(
+                model="BAAI/bge-large-en-v1.5",
+                input="New York City",
             )
 
-            print(response)
+            print(response.data[0].embedding)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1553,12 +1181,12 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-              training_file: "file-id",
+            const response = await client.embeddings.create({
+              model: "BAAI/bge-large-en-v1.5",
+              input: "New York City",
             });
 
-            console.log(response);
+            console.log(response.data[0].embedding);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1568,148 +1196,77 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-              training_file: "file-id",
+            const response = await client.embeddings.create({
+              model: "BAAI/bge-large-en-v1.5",
+              input: "New York City",
             });
 
-            console.log(response);
+            console.log(response.data[0].embedding);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/fine-tunes" \
+            curl -X POST "https://api.together.xyz/v1/embeddings" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json" \
                  -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
-                   "training_file": "file-id"
+                   "model": "BAAI/bge-large-en-v1.5",
+                   "input": "New York City"
                  }'
+      operationId: embeddings
       requestBody:
-        required: true
         content:
           application/json:
             schema:
-              type: object
-              required:
-                - training_file
-                - model
-              properties:
-                training_file:
-                  type: string
-                  description: File-ID of a training file uploaded to the Together API
-                validation_file:
-                  type: string
-                  description: File-ID of a validation file uploaded to the Together API
-                model:
-                  type: string
-                  description: Name of the base model to run fine-tune job on
-                n_epochs:
-                  type: integer
-                  default: 1
-                  description: Number of complete passes through the training dataset (higher values may improve results but increase cost and risk of overfitting)
-                n_checkpoints:
-                  type: integer
-                  default: 1
-                  description: Number of intermediate model versions saved during training for evaluation
-                n_evals:
-                  type: integer
-                  default: 0
-                  description: Number of evaluations to be run on a given validation set during training
-                batch_size:
-                  oneOf:
-                    - type: integer
-                    - type: string
-                      enum:
-                        - max
-                  default: 'max'
-                  description: Number of training examples processed together (larger batches use more memory but may train faster). Defaults to "max". We use training optimizations like packing, so the effective batch size may be different than the value you set.
-                learning_rate:
-                  type: number
-                  format: float
-                  default: 0.00001
-                  description: Controls how quickly the model adapts to new information (too high may cause instability, too low may slow convergence)
-                lr_scheduler:
-                  type: object
-                  default: none
-                  $ref: '#/components/schemas/LRScheduler'
-                  description: The learning rate scheduler to use. It specifies how the learning rate is adjusted during training.
-                warmup_ratio:
-                  type: number
-                  format: float
-                  default: 0.0
-                  description: The percent of steps at the start of training to linearly increase the learning rate.
-                max_grad_norm:
-                  type: number
-                  format: float
-                  default: 1.0
-                  description: Max gradient norm to be used for gradient clipping. Set to 0 to disable.
-                weight_decay:
-                  type: number
-                  format: float
-                  default: 0.0
-                  description: Weight decay. Regularization parameter for the optimizer.
-                suffix:
-                  type: string
-                  description: Suffix that will be added to your fine-tuned model name
-                wandb_api_key:
-                  type: string
-                  description: Integration key for tracking experiments and model metrics on W&B platform
-                wandb_base_url:
-                  type: string
-                  description: The base URL of a dedicated Weights & Biases instance.
-                wandb_project_name:
-                  type: string
-                  description: The Weights & Biases project for your run. If not specified, will use `together` as the project name.
-                wandb_name:
-                  type: string
-                  description: The Weights & Biases name for your run.
-                train_on_inputs:
-                  oneOf:
-                    - type: boolean
-                    - type: string
-                      enum:
-                        - auto
-                  type: boolean
-                  default: auto
-                  description: Whether to mask the user messages in conversational data or prompts in instruction data.
-                  deprecated: true
-                training_method:
-                  type: object
-                  oneOf:
-                    - $ref: '#/components/schemas/TrainingMethodSFT'
-                    - $ref: '#/components/schemas/TrainingMethodDPO'
-                  description: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
-                training_type:
-                  type: object
-                  oneOf:
-                    - $ref: '#/components/schemas/FullTrainingType'
-                    - $ref: '#/components/schemas/LoRATrainingType'
-                from_checkpoint:
-                  type: string
-                  description: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the final checkpoint will be used.
-                from_hf_model:
-                  type: string
-                  description: The Hugging Face Hub repo to start training from. Should be as close as possible to the base model (specified by the `model` argument) in terms of architecture and size.
-                hf_model_revision:
-                  type: string
-                  description: The revision of the Hugging Face Hub model to continue training from. E.g., hf_model_revision=main (default, used if the argument is not provided) or hf_model_revision='607a30d783dfa663caf39e06633721c8d4cfcd7e' (specific commit).
-                hf_api_token:
-                  type: string
-                  description: The API token for the Hugging Face Hub.
-                hf_output_repo_name:
-                  type: string
-                  description: The name of the Hugging Face repository to upload the fine-tuned model to.
+              $ref: '#/components/schemas/EmbeddingsRequest'
       responses:
         '200':
-          description: Fine-tuning job initiated successfully
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneResponseTruncated'
+                $ref: '#/components/schemas/EmbeddingsResponse'
+        '400':
+          description: 'BadRequest'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '401':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'NotFound'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '503':
+          description: 'Overloaded'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+  /models:
     get:
-      tags: ['Fine-tuning']
-      summary: List all jobs
-      description: List the metadata for all fine-tuning jobs. Returns a list of FinetuneResponseTruncated objects.
+      tags: ['Models']
+      summary: List all models
+      description: Lists all of Together's open-source models
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1722,10 +1279,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.list()
+            models = client.models.list()
 
-            for fine_tune in response.data:
-                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
+            for model in models:
+                print(model.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1736,10 +1293,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.list()
+            models = client.models.list()
 
-            for fine_tune in response.data:
-                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
+            for model in models:
+                print(model.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1749,10 +1306,10 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.list();
+            const models = await client.models.list();
 
-            for (const fineTune of response.data) {
-              console.log(fineTune.id, fineTune.status);
+            for (const model of models) {
+              console.log(model.id);
             }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
@@ -1763,104 +1320,66 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.list();
+            const models = await client.models.list();
 
-            for (const fineTune of response.data) {
-              console.log(fineTune.id, fineTune.status);
+            for (const model of models) {
+              console.log(model.id);
             }
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/fine-tunes" \
+            curl "https://api.together.xyz/v1/models" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
+      operationId: models
+      parameters:
+        - name: dedicated
+          in: query
+          description: Filter models to only return dedicated models
+          schema:
+            type: boolean
       responses:
         '200':
-          description: List of fine-tune jobs
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneTruncatedList'
-  /fine-tunes/estimate-price:
-    post:
-      tags: ['Fine-tuning']
-      summary: Estimate price
-      description: Estimate the price of a fine-tuning job.
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              type: object
-              required:
-                - training_file
-              properties:
-                training_file:
-                  type: string
-                  description: File-ID of a training file uploaded to the Together API
-                validation_file:
-                  type: string
-                  description: File-ID of a validation file uploaded to the Together API
-                model:
-                  type: string
-                  description: Name of the base model to run fine-tune job on
-                n_epochs:
-                  type: integer
-                  default: 1
-                  description: Number of complete passes through the training dataset (higher values may improve results but increase cost and risk of overfitting)
-                n_evals:
-                  type: integer
-                  default: 0
-                  description: Number of evaluations to be run on a given validation set during training
-                training_method:
-                  type: object
-                  oneOf:
-                    - $ref: '#/components/schemas/TrainingMethodSFT'
-                    - $ref: '#/components/schemas/TrainingMethodDPO'
-                  description: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
-                training_type:
-                  type: object
-                  oneOf:
-                    - $ref: '#/components/schemas/FullTrainingType'
-                    - $ref: '#/components/schemas/LoRATrainingType'
-                from_checkpoint:
-                  type: string
-                  description: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the final checkpoint will be used.
-      responses:
-        '500':
-          description: Internal Server Error
+                $ref: '#/components/schemas/ModelInfoList'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '200':
-          description: Price estimated successfully
+        '401':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                type: object
-                properties:
-                  estimated_total_price:
-                    type: number
-                    description: The price of the fine-tuning job
-                  allowed_to_proceed:
-                    type: boolean
-                    description: Whether the user is allowed to proceed with the fine-tuning job
-                    example: true
-                  user_limit:
-                    type: number
-                    description: The user's credit limit in dollars
-                  estimated_train_token_count:
-                    type: number
-                    description: The estimated number of tokens to be trained
-                  estimated_eval_token_count:
-                    type: number
-                    description: The estimated number of tokens for evaluation
-  /fine-tunes/{id}:
-    get:
-      tags: ['Fine-tuning']
-      summary: List job
-      description: List the metadata for a single fine-tuning job.
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'NotFound'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+    post:
+      tags: ['Models']
+      summary: Upload a custom model or adapter
+      description: Upload a custom model or adapter from Hugging Face or S3
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1873,9 +1392,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            fine_tune = client.fine_tuning.retrieve(id="ft-id")
+            response = client.models.upload(
+                "My-Fine-Tuned-Model",
+                "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
+            )
 
-            print(fine_tune)
+            print(response.job_id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1886,9 +1408,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            fine_tune = client.fine_tuning.retrieve(id="ft-id")
+            response = client.models.upload(
+                model_name="My-Fine-Tuned-Model",
+                model_source="https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
+            )
 
-            print(fine_tune)
+            print(response.data.job_id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1898,9 +1423,12 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const fineTune = await client.fineTuning.retrieve("ft-id");
+            const response = await client.models.upload({
+              model_name: "My-Fine-Tuned-Model",
+              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
+            })
 
-            console.log(fineTune);
+            console.log(response);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1910,32 +1438,78 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const fineTune = await client.fineTuning.retrieve("ft-id");
+            const response = await client.models.upload({
+              model_name: "My-Fine-Tuned-Model",
+              model_source: "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz",
+            })
 
-            console.log(fineTune);
+            console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/fine-tunes/ft-id" \
+            curl -X POST "https://api.together.xyz/v1/models" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
-      responses:
-        '200':
-          description: Fine-tune job details retrieved successfully
+                 -H "Content-Type: application/json" \
+                 -d '{
+                    "model_name": "My-Fine-Tuned-Model",
+                    "model_source": "https://ml-models.s3.us-west-2.amazonaws.com/models/my-fine-tuned-model.tar.gz"
+                  }'
+      operationId: uploadModel
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ModelUploadRequest'
+      responses:
+        '200':
+          description: Model / adapter upload job created successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneResponse'
-    delete:
-      tags: ['Fine-tuning']
-      summary: Delete a fine-tune job
-      description: Delete a fine-tuning job.
+                $ref: '#/components/schemas/ModelUploadSuccessResponse'
+
+  /jobs/{jobId}:
+    get:
+      tags: ['Jobs']
+      summary: Get job status
+      description: Get the status of a specific job
+      operationId: getJob
+      parameters:
+        - name: jobId
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The ID of the job to retrieve
+          example: job-a15dad11-8d8e-4007-97c5-a211304de284
+      responses:
+        '200':
+          description: Job status retrieved successfully
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/JobInfoSuccessResponse'
+
+  /jobs:
+    get:
+      tags: ['Jobs']
+      summary: List all jobs
+      description: List all jobs and their statuses
+      operationId: listJobs
+      responses:
+        '200':
+          description: Jobs retrieved successfully
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/JobsInfoSuccessResponse'
+
+  /images/generations:
+    post:
+      tags: ['Images']
+      summary: Create image
+      description: Use an image model to generate an image for a given prompt.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -1948,9 +1522,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.delete(id="ft-id")
+            response = client.images.generate(
+                model="black-forest-labs/FLUX.1-schnell",
+                steps=4,
+                prompt="A cartoon of an astronaut riding a horse on the moon",
+            )
 
-            print(response)
+            print(response.data[0].url)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -1961,9 +1539,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.delete(id="ft-id")
+            response = client.images.generate(
+                model="black-forest-labs/FLUX.1-schnell",
+                steps=4,
+                prompt="A cartoon of an astronaut riding a horse on the moon",
+            )
 
-            print(response)
+            print(response.data[0].url)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -1973,9 +1555,12 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.delete("ft-id");
+            const response = await client.images.generate({
+              model: "black-forest-labs/FLUX.1-schnell",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
 
-            console.log(response);
+            console.log(response.data[0].url);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -1985,50 +1570,126 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.delete("ft-id");
+            const response = await client.images.generate({
+              model: "black-forest-labs/FLUX.1-schnell",
+              prompt: "A cartoon of an astronaut riding a horse on the moon",
+            });
 
-            console.log(response);
+            console.log(response.data[0].url);
         - lang: Shell
           label: cURL
           source: |
-            curl -X "DELETE" "https://api.together.xyz/v1/fine-tunes/ft-id?force=false" \
+            curl -X POST "https://api.together.xyz/v1/images/generations" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
-        - name: force
-          in: query
-          schema:
-            type: boolean
-            default: false
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "black-forest-labs/FLUX.1-schnell",
+                   "prompt": "A cartoon of an astronaut riding a horse on the moon"
+                 }'
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - prompt
+                - model
+              properties:
+                prompt:
+                  type: string
+                  description: A description of the desired images. Maximum length varies by model.
+                  example: cat floating in space, cinematic
+                model:
+                  type: string
+                  description: >
+                    The model to use for image generation.<br>
+                    <br>
+                    [See all of Together AI's image models](https://docs.together.ai/docs/serverless-models#image-models)
+                  example: black-forest-labs/FLUX.1-schnell
+                  anyOf:
+                    - type: string
+                      enum:
+                        - black-forest-labs/FLUX.1-schnell-Free
+                        - black-forest-labs/FLUX.1-schnell
+                        - black-forest-labs/FLUX.1.1-pro
+                    - type: string
+                steps:
+                  type: integer
+                  default: 20
+                  description: Number of generation steps.
+                image_url:
+                  type: string
+                  description: URL of an image to use for image models that support it.
+                seed:
+                  type: integer
+                  description: Seed used for generation. Can be used to reproduce image generations.
+                n:
+                  type: integer
+                  default: 1
+                  description: Number of image results to generate.
+                height:
+                  type: integer
+                  default: 1024
+                  description: Height of the image to generate in number of pixels.
+                width:
+                  type: integer
+                  default: 1024
+                  description: Width of the image to generate in number of pixels.
+                negative_prompt:
+                  type: string
+                  description: The prompt or prompts not to guide the image generation.
+                response_format:
+                  type: string
+                  description: Format of the image response. Can be either a base64 string or a URL.
+                  enum:
+                    - base64
+                    - url
+                guidance_scale:
+                  type: number
+                  description: Adjusts the alignment of the generated image with the input prompt. Higher values (e.g., 8-10) make the output more faithful to the prompt, while lower values (e.g., 1-5) encourage more creative freedom.
+                  default: 3.5
+                output_format:
+                  type: string
+                  description: The format of the image response. Can be either be `jpeg` or `png`. Defaults to `jpeg`.
+                  default: jpeg
+                  enum:
+                    - jpeg
+                    - png
+                image_loras:
+                  description: An array of objects that define LoRAs (Low-Rank Adaptations) to influence the generated image.
+                  type: array
+                  items:
+                    type: object
+                    required: [path, scale]
+                    properties:
+                      path:
+                        type: string
+                        description: The URL of the LoRA to apply (e.g. https://huggingface.co/strangerzonehf/Flux-Midjourney-Mix2-LoRA).
+                      scale:
+                        type: number
+                        description: The strength of the LoRA's influence. Most LoRA's recommend a value of 1.
+                reference_images:
+                  description: An array of image URLs that guide the overall appearance and style of the generated image. These reference images influence the visual characteristics consistently across the generation.
+                  type: array
+                  items:
+                    type: string
+                    description: URL of a reference image to guide the image generation.
+                disable_safety_checker:
+                  type: boolean
+                  description: If true, disables the safety checker for image generation.
       responses:
         '200':
-          description: Fine-tune job deleted successfully
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/FinetuneDeleteResponse'
-        '404':
-          description: Fine-tune job not found
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: Internal server error
+          description: Image generated successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-  /fine-tunes/{id}/events:
+                $ref: '#/components/schemas/ImageResponse'
+  /files:
     get:
-      tags: ['Fine-tuning']
-      summary: List job events
-      description: List the events for a single fine-tuning job.
+      tags: ['Files']
+      summary: List all files
+      description: List the metadata for all uploaded data files.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2041,9 +1702,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            events = client.fine_tuning.list_events(id="ft-id")
+            response = client.files.list()
 
-            print(events)
+            for file in response.data:
+                print(file.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2054,10 +1716,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.list_events(id="ft-id")
+            response = client.files.list()
 
-            for event in response.data:
-                print(event)
+            for file in response.data:
+                print(file.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -2067,9 +1729,11 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const events = await client.fineTuning.listEvents("ft-id");
+            const response = await client.files.list();
 
-            console.log(events);
+            for (const file of response.data) {
+              console.log(file.id);
+            }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -2079,33 +1743,29 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const events = await client.fineTuning.listEvents("ft-id");
+            const response = await client.files.list();
 
-            console.log(events);
+            for (const file of response.data) {
+              console.log(file.id);
+            }
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/fine-tunes/ft-id/events" \
+            curl "https://api.together.xyz/v1/files" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
       responses:
         '200':
-          description: List of fine-tune events
+          description: List of files
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneListEvents'
-  /fine-tunes/{id}/checkpoints:
+                $ref: '#/components/schemas/FileList'
+  /files/{id}:
     get:
-      tags: ['Fine-tuning']
-      summary: List checkpoints
-      description: List the checkpoints for a single fine-tuning job.
+      tags: ['Files']
+      summary: List file
+      description: List the metadata for a single uploaded data file.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2118,9 +1778,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
+            file = client.files.retrieve(id="file-id")
 
-            print(checkpoints)
+            print(file)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2131,9 +1791,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
+            file = client.files.retrieve(id="file-id")
 
-            print(checkpoints)
+            print(file)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -2143,9 +1803,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
+            const file = await client.files.retrieve("file-id");
 
-            console.log(checkpoints);
+            console.log(file);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -2155,13 +1815,13 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
+            const file = await client.files.retrieve("file-id");
 
-            console.log(checkpoints);
+            console.log(file);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/fine-tunes/ft-id/checkpoints" \
+            curl "https://api.together.xyz/v1/files/ID" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
       parameters:
@@ -2172,16 +1832,15 @@ paths:
             type: string
       responses:
         '200':
-          description: List of fine-tune checkpoints
+          description: File retrieved successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneListCheckpoints'
-  /finetune/download:
-    get:
-      tags: ['Fine-tuning']
-      summary: Download model
-      description: Receive a compressed fine-tuned model or checkpoint.
+                $ref: '#/components/schemas/FileResponse'
+    delete:
+      tags: ['Files']
+      summary: Delete a file
+      description: Delete a previously uploaded data file.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2194,8 +1853,7 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            # This will download the content to a location on disk
-            response = client.fine_tuning.download(id="ft-id")
+            response = client.files.delete(id="file-id")
 
             print(response)
         - lang: Python
@@ -2208,13 +1866,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            # Using `with_streaming_response` gives you control to do what you want with the response.
-            stream = client.fine_tuning.with_streaming_response.content(ft_id="ft-id")
-
-            with stream as response:
-                for line in response.iter_lines():
-                    print(line)
+            response = client.files.delete(id="file-id")
 
+            print(response)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -2224,11 +1878,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.content({
-              ft_id: "ft-id",
-            });
+            const response = await client.files.delete("file-id");
 
-            console.log(await response.blob());
+            console.log(response);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -2238,56 +1890,32 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.content({
-              ft_id: "ft-id",
-            });
+            const response = await client.files.delete("file-id");
 
-            console.log(await response.blob());
+            console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/finetune/download?ft_id=ft-id&checkpoint=merged"
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
+            curl -X "DELETE" "https://api.together.xyz/v1/files/file-id" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY"
       parameters:
-        - in: query
-          name: ft_id
-          schema:
-            type: string
+        - name: id
+          in: path
           required: true
-          description: Fine-tune ID to download. A string that starts with `ft-`.
-        - in: query
-          name: checkpoint_step
-          schema:
-            type: integer
-          required: false
-          description: Specifies step number for checkpoint to download. Ignores `checkpoint` value if set.
-        - in: query
-          name: checkpoint
           schema:
             type: string
-            enum:
-              - merged
-              - adapter
-              - model_output_path
-          description: Specifies checkpoint type to download - `merged` vs `adapter`. This field is required if the checkpoint_step is not set.
       responses:
         '200':
-          description: Successfully downloaded the fine-tuned model or checkpoint.
+          description: File deleted successfully
           content:
-            application/octet-stream:
+            application/json:
               schema:
-                type: string
-                format: binary
-        '400':
-          description: Invalid request parameters.
-        '404':
-          description: Fine-tune ID not found.
-  /fine-tunes/{id}/cancel:
-    post:
-      tags: ['Fine-tuning']
-      summary: Cancel job
-      description: Cancel a currently running fine-tuning job. Returns a FinetuneResponseTruncated object.
+                $ref: '#/components/schemas/FileDeleteResponse'
+  /files/{id}/content:
+    get:
+      tags: ['Files']
+      summary: Get file contents
+      description: Get the contents of a single uploaded data file.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2300,9 +1928,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.cancel(id="ft-id")
+            file = client.files.retrieve_content(id="file-id")
 
-            print(response)
+            print(file.filename)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2313,9 +1941,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.fine_tuning.cancel(id="ft-id")
-
-            print(response)
+            with client.files.with_streaming_response.content(id="file-id") as response:
+              for line in response.iter_lines():
+                print(line)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -2325,9 +1953,10 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.cancel("ft-id");
+            const response = await client.files.content("file-id");
+            const content = await response.text();
 
-            console.log(response);
+            console.log(content);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -2337,38 +1966,40 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.fineTuning.cancel("ft-id");
+            const response = await client.files.content("file-id");
+            const content = await response.text();
 
-            console.log(response);
+            console.log(content);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/fine-tunes/ft-id/cancel" \
+            curl "https://api.together.xyz/v1/files/file-id/content" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
       parameters:
-        - in: path
-          name: id
+        - name: id
+          in: path
+          required: true
           schema:
             type: string
-          required: true
-          description: Fine-tune ID to cancel. A string that starts with `ft-`.
       responses:
         '200':
-          description: Successfully cancelled the fine-tuning job.
+          description: File content retrieved successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/FinetuneResponseTruncated'
-        '400':
-          description: Invalid request parameters.
-        '404':
-          description: Fine-tune ID not found.
-  /rerank:
+                $ref: '#/components/schemas/FileObject'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+  /files/upload:
     post:
-      tags: ['Rerank']
-      summary: Create a rerank request
-      description: Query a reranker model
+      tags: ['Files']
+      summary: Upload a file
+      description: Upload a file with specified purpose, file name, and file type.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2381,35 +2012,11 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            documents = [
-                {
-                    "title": "Llama",
-                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-                },
-                {
-                    "title": "Panda",
-                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-                },
-                {
-                    "title": "Guanaco",
-                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-                },
-                {
-                    "title": "Wild Bactrian camel",
-                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-                }
-            ]
-
-            response = client.rerank.create(
-                model="Salesforce/Llama-Rank-v1",
-                query="What animals can I find near Peru?",
-                documents=documents,
-            )
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            file_path = os.path.join(current_dir, "data.jsonl")
+            file = client.files.upload(file=file_path)
 
-            for result in response.results:
-                print(f"Rank: {result.index + 1}")
-                print(f"Title: {documents[result.index]['title']}")
-                print(f"Text: {documents[result.index]['text']}")
+            print(file.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2420,190 +2027,98 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            documents = [
-                {
-                    "title": "Llama",
-                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-                },
-                {
-                    "title": "Panda",
-                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-                },
-                {
-                    "title": "Guanaco",
-                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-                },
-                {
-                    "title": "Wild Bactrian camel",
-                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-                }
-            ]
-
-            response = client.rerank.create(
-                model="Salesforce/Llama-Rank-v1",
-                query="What animals can I find near Peru?",
-                documents=documents,
-            )
+            current_dir = os.path.dirname(os.path.abspath(__file__))
+            file_path = os.path.join(current_dir, "data.jsonl")
+            file = client.files.upload(file=file_path)
 
-            for result in response.results:
-                print(f"Rank: {result.index + 1}")
-                print(f"Title: {documents[result.index]['title']}")
-                print(f"Text: {documents[result.index]['text']}")
+            print(file.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
-            import Together from "together-ai";
+            import { upload } from "together-ai/lib/upload"
+            import path from "path";
+            import { fileURLToPath } from "url";
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+            const __filename = fileURLToPath(import.meta.url);
+            const __dirname = path.dirname(__filename);
+            const filepath = path.join(__dirname, "data.jsonl");
+            const file = await upload(filepath);
 
-            const documents = [{
-              "title": "Llama",
-              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-            },
-            {
-              "title": "Panda",
-              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-            },
-            {
-              "title": "Guanaco",
-              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-            },
-            {
-              "title": "Wild Bactrian camel",
-              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-            }];
-
-            const response = await client.rerank.create({
-              model: "Salesforce/Llama-Rank-v1",
-              query: "What animals can I find near Peru?",
-              documents,
-            });
-
-            for (const result of response.results) {
-              console.log(`Rank: ${result.index + 1}`);
-              console.log(`Title: ${documents[result.index].title}`);
-              console.log(`Text: ${documents[result.index].text}`);
-            }
+            console.log(file.id);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const documents = [{
-              "title": "Llama",
-              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-            },
-            {
-              "title": "Panda",
-              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-            },
-            {
-              "title": "Guanaco",
-              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-            },
-            {
-              "title": "Wild Bactrian camel",
-              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-            }];
+            import { upload } from "together-ai/lib/upload"
+            import path from "path";
+            import { fileURLToPath } from "url";
 
-            const response = await client.rerank.create({
-              model: "Salesforce/Llama-Rank-v1",
-              query: "What animals can I find near Peru?",
-              documents,
-            });
+            const __filename = fileURLToPath(import.meta.url);
+            const __dirname = path.dirname(__filename);
+            const filepath = path.join(__dirname, "data.jsonl");
+            const file = await upload(filepath);
 
-            for (const result of response.results) {
-              console.log(`Rank: ${result.index + 1}`);
-              console.log(`Title: ${documents[result.index].title}`);
-              console.log(`Text: ${documents[result.index].text}`);
-            }
+            console.log(file.id);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/rerank" \
+            curl "https://api.together.xyz/v1/files/upload" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "Salesforce/Llama-Rank-v1",
-                   "query": "What animals can I find near Peru?",
-                   "documents": [{
-                      "title": "Llama",
-                      "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
-                    },
-                    {
-                      "title": "Panda",
-                      "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
-                    },
-                    {
-                      "title": "Guanaco",
-                      "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
-                    },
-                    {
-                      "title": "Wild Bactrian camel",
-                      "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
-                    }]
-                 }'
-      operationId: rerank
+                 -F "file=@/path/to/data.jsonl" \
+                 -F "file_name=data.jsonl" \
+                 -F "purpose=fine-tune"
       requestBody:
+        required: true
         content:
-          application/json:
+          multipart/form-data:
             schema:
-              $ref: '#/components/schemas/RerankRequest'
+              type: object
+              required:
+                - purpose
+                - file_name
+                - file
+              properties:
+                purpose:
+                  $ref: '#/components/schemas/FilePurpose'
+                file_name:
+                  type: string
+                  description: The name of the file being uploaded
+                  example: 'dataset.csv'
+                file_type:
+                  $ref: '#/components/schemas/FileType'
+                file:
+                  type: string
+                  format: binary
+                  description: The content of the file being uploaded
       responses:
         '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/RerankResponse'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'NotFound'
+          description: File uploaded successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+                $ref: '#/components/schemas/FileResponse'
+        '500':
+          description: Internal Server Error
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '503':
-          description: 'Overloaded'
+        '400':
+          description: Bad Request
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '504':
-          description: 'Timeout'
+        '401':
+          description: Unauthorized
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-      deprecated: false
-  /audio/speech:
+  /fine-tunes:
     post:
-      tags: ['Audio']
-      summary: Create audio generation request
-      description: Generate audio from input text
+      tags: ['Fine-tuning']
+      summary: Create job
+      description: Create a fine-tuning job with the provided model and training data.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -2616,13 +2131,12 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.audio.speech.create(
-                model="cartesia/sonic-2",
-                input="The quick brown fox jumps over the lazy dog.",
-                voice="laidback woman",
+            response = client.fine_tuning.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+                training_file="file-id"
             )
 
-            response.stream_to_file("audio.wav")
+            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -2633,545 +2147,331 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.audio.speech.with_streaming_response.create(
-                model="cartesia/sonic-2",
-                input="The quick brown fox jumps over the lazy dog.",
-                voice="laidback woman",
+            response = client.fine_tuning.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+                training_file="file-id"
             )
 
-            with response as stream:
-              stream.stream_to_file("audio.wav")
+            print(response)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
-            import { createWriteStream } from "fs";
-            import { join } from "path";
-            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.audio.speech.create({
-              model: "cartesia/sonic-2",
-              input: "The quick brown fox jumps over the lazy dog.",
-              voice: "laidback woman",
+            const response = await client.fineTuning.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+              training_file: "file-id",
             });
 
-            const filepath = join(process.cwd(), "audio.wav");
-            const writeStream = createWriteStream(filepath);
-
-            if (response.body) {
-              await pipeline(response.body, writeStream);
-            }
+            console.log(response);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
-            import { createWriteStream } from "fs";
-            import { join } from "path";
-            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.audio.speech.create({
-              model: "cartesia/sonic-2",
-              input: "The quick brown fox jumps over the lazy dog.",
-              voice: "laidback woman",
+            const response = await client.fineTuning.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+              training_file: "file-id",
             });
 
-            const filepath = join(process.cwd(), "audio.wav");
-            const writeStream = createWriteStream(filepath);
-
-            if (response.body) {
-              await pipeline(response.body, writeStream);
-            }
+            console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/audio/speech" \
+            curl -X POST "https://api.together.xyz/v1/fine-tunes" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json" \
                  -d '{
-                   "model": "cartesia/sonic-2",
-                   "input": "The quick brown fox jumps over the lazy dog.",
-                   "voice": "laidback woman"
-                 }' \
-                 --output audio.wav
-      operationId: audio-speech
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference",
+                   "training_file": "file-id"
+                 }'
       requestBody:
+        required: true
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/AudioSpeechRequest'
-      responses:
-        '200':
-          description: 'OK'
-          content:
-            application/octet-stream:
-              schema:
-                type: string
-                format: binary
-            audio/wav:
-              schema:
-                type: string
-                format: binary
-            audio/mpeg:
-              schema:
-                type: string
-                format: binary
-            text/event-stream:
-              schema:
-                $ref: '#/components/schemas/AudioSpeechStreamResponse'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-  /audio/speech/websocket:
-    get:
-      tags: ['Audio']
-      summary: Real-time text-to-speech via WebSocket
-      description: |
-        Establishes a WebSocket connection for real-time text-to-speech generation. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/audio/speech/websocket) for bidirectional streaming communication.
-
-        **Connection Setup:**
-        - Protocol: WebSocket (wss://)
-        - Authentication: Pass API key as Bearer token in Authorization header
-        - Parameters: Sent as query parameters (model, voice, max_partial_length)
-
-        **Client Events:**
-        - `tts_session.updated`: Update session parameters like voice
-          ```json
-          {
-            "type": "tts_session.updated",
-            "session": {
-              "voice": "tara"
-            }
-          }
-          ```
-        - `input_text_buffer.append`: Send text chunks for TTS generation
-          ```json
-          {
-            "type": "input_text_buffer.append",
-            "text": "Hello, this is a test."
-          }
-          ```
-        - `input_text_buffer.clear`: Clear the buffered text
-          ```json
-          {
-            "type": "input_text_buffer.clear"
-          }
-          ```
-        - `input_text_buffer.commit`: Signal end of text input and process remaining text
-          ```json
-          {
-            "type": "input_text_buffer.commit"
-          }
-          ```
-
-        **Server Events:**
-        - `session.created`: Initial session confirmation (sent first)
-          ```json
-          {
-            "event_id": "evt_123456",
-            "type": "session.created",
-            "session": {
-              "id": "session-id",
-              "object": "realtime.tts.session",
-              "modalities": ["text", "audio"],
-              "model": "hexgrad/Kokoro-82M",
-              "voice": "tara"
-            }
-          }
-          ```
-        - `conversation.item.input_text.received`: Acknowledgment that text was received
-          ```json
-          {
-            "type": "conversation.item.input_text.received",
-            "text": "Hello, this is a test."
-          }
-          ```
-        - `conversation.item.audio_output.delta`: Audio chunks as base64-encoded data
-          ```json
-          {
-            "type": "conversation.item.audio_output.delta",
-            "item_id": "tts_1",
-            "delta": "<base64_encoded_audio_chunk>"
-          }
-          ```
-        - `conversation.item.audio_output.done`: Audio generation complete for an item
-          ```json
-          {
-            "type": "conversation.item.audio_output.done",
-            "item_id": "tts_1"
-          }
-          ```
-        - `conversation.item.tts.failed`: Error occurred
-          ```json
-          {
-            "type": "conversation.item.tts.failed",
-            "error": {
-              "message": "Error description",
-              "type": "invalid_request_error",
-              "param": null,
-              "code": "invalid_api_key"
-            }
-          }
-          ```
-
-        **Text Processing:**
-        - Partial text (no sentence ending) is held in buffer until:
-          - We believe that the text is complete enough to be processed for TTS generation
-          - The partial text exceeds `max_partial_length` characters (default: 250)
-          - The `input_text_buffer.commit` event is received
+              type: object
+              required:
+                - training_file
+                - model
+              properties:
+                training_file:
+                  type: string
+                  description: File-ID of a training file uploaded to the Together API
+                validation_file:
+                  type: string
+                  description: File-ID of a validation file uploaded to the Together API
+                model:
+                  type: string
+                  description: Name of the base model to run fine-tune job on
+                n_epochs:
+                  type: integer
+                  default: 1
+                  description: Number of complete passes through the training dataset (higher values may improve results but increase cost and risk of overfitting)
+                n_checkpoints:
+                  type: integer
+                  default: 1
+                  description: Number of intermediate model versions saved during training for evaluation
+                n_evals:
+                  type: integer
+                  default: 0
+                  description: Number of evaluations to be run on a given validation set during training
+                batch_size:
+                  oneOf:
+                    - type: integer
+                    - type: string
+                      enum:
+                        - max
+                  default: 'max'
+                  description: Number of training examples processed together (larger batches use more memory but may train faster). Defaults to "max". We use training optimizations like packing, so the effective batch size may be different than the value you set.
+                learning_rate:
+                  type: number
+                  format: float
+                  default: 0.00001
+                  description: Controls how quickly the model adapts to new information (too high may cause instability, too low may slow convergence)
+                lr_scheduler:
+                  type: object
+                  default: none
+                  $ref: '#/components/schemas/LRScheduler'
+                  description: The learning rate scheduler to use. It specifies how the learning rate is adjusted during training.
+                warmup_ratio:
+                  type: number
+                  format: float
+                  default: 0.0
+                  description: The percent of steps at the start of training to linearly increase the learning rate.
+                max_grad_norm:
+                  type: number
+                  format: float
+                  default: 1.0
+                  description: Max gradient norm to be used for gradient clipping. Set to 0 to disable.
+                weight_decay:
+                  type: number
+                  format: float
+                  default: 0.0
+                  description: Weight decay. Regularization parameter for the optimizer.
+                suffix:
+                  type: string
+                  description: Suffix that will be added to your fine-tuned model name
+                wandb_api_key:
+                  type: string
+                  description: Integration key for tracking experiments and model metrics on W&B platform
+                wandb_base_url:
+                  type: string
+                  description: The base URL of a dedicated Weights & Biases instance.
+                wandb_project_name:
+                  type: string
+                  description: The Weights & Biases project for your run. If not specified, will use `together` as the project name.
+                wandb_name:
+                  type: string
+                  description: The Weights & Biases name for your run.
+                train_on_inputs:
+                  oneOf:
+                    - type: boolean
+                    - type: string
+                      enum:
+                        - auto
+                  type: boolean
+                  default: auto
+                  description: Whether to mask the user messages in conversational data or prompts in instruction data.
+                  deprecated: true
+                training_method:
+                  type: object
+                  oneOf:
+                    - $ref: '#/components/schemas/TrainingMethodSFT'
+                    - $ref: '#/components/schemas/TrainingMethodDPO'
+                  description: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
+                training_type:
+                  type: object
+                  oneOf:
+                    - $ref: '#/components/schemas/FullTrainingType'
+                    - $ref: '#/components/schemas/LoRATrainingType'
+                multimodal_params:
+                  $ref: '#/components/schemas/MultimodalParams'
+                from_checkpoint:
+                  type: string
+                  description: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the final checkpoint will be used.
+                from_hf_model:
+                  type: string
+                  description: The Hugging Face Hub repo to start training from. Should be as close as possible to the base model (specified by the `model` argument) in terms of architecture and size.
+                hf_model_revision:
+                  type: string
+                  description: The revision of the Hugging Face Hub model to continue training from. E.g., hf_model_revision=main (default, used if the argument is not provided) or hf_model_revision='607a30d783dfa663caf39e06633721c8d4cfcd7e' (specific commit).
+                hf_api_token:
+                  type: string
+                  description: The API token for the Hugging Face Hub.
+                hf_output_repo_name:
+                  type: string
+                  description: The name of the Hugging Face repository to upload the fine-tuned model to.
+      responses:
+        '200':
+          description: Fine-tuning job initiated successfully
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/FinetuneResponseTruncated'
+    get:
+      tags: ['Fine-tuning']
+      summary: List all jobs
+      description: List the metadata for all fine-tuning jobs. Returns a list of FinetuneResponseTruncated objects.
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
 
-        **Audio Format:**
-        - Format: WAV (PCM s16le)
-        - Sample Rate: 24000 Hz
-        - Encoding: Base64
-        - Delivered via `conversation.item.audio_output.delta` events
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-        **Error Codes:**
-        - `invalid_api_key`: Invalid API key provided (401)
-        - `missing_api_key`: Authorization header missing (401)
-        - `model_not_available`: Invalid or unavailable model (400)
-        - Invalid text format errors (400)
+            response = client.fine_tuning.list()
 
-      operationId: realtime-tts
-      x-codeSamples:
+            for fine_tune in response.data:
+                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
         - lang: Python
-          label: Python WebSocket Client
+          label: Together AI SDK (v2)
           source: |
-            import asyncio
-            import websockets
-            import json
-            import base64
+            from together import Together
             import os
 
-            async def generate_speech():
-                api_key = os.environ.get("TOGETHER_API_KEY")
-                url = "wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara"
-
-                headers = {
-                    "Authorization": f"Bearer {api_key}"
-                }
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-                async with websockets.connect(url, additional_headers=headers) as ws:
-                    # Wait for session created
-                    session_msg = await ws.recv()
-                    session_data = json.loads(session_msg)
-                    print(f"Session created: {session_data['session']['id']}")
+            response = client.fine_tuning.list()
 
-                    # Send text for TTS
-                    text_chunks = [
-                        "Hello, this is a test.",
-                        "This is the second sentence.",
-                        "And this is the final one."
-                    ]
-
-                    async def send_text():
-                        for chunk in text_chunks:
-                            await ws.send(json.dumps({
-                                "type": "input_text_buffer.append",
-                                "text": chunk
-                            }))
-                            await asyncio.sleep(0.5)  # Simulate typing
-
-                        # Commit to process any remaining text
-                        await ws.send(json.dumps({
-                            "type": "input_text_buffer.commit"
-                        }))
-
-                    async def receive_audio():
-                        audio_data = bytearray()
-                        async for message in ws:
-                            data = json.loads(message)
-
-                            if data["type"] == "conversation.item.input_text.received":
-                                print(f"Text received: {data['text']}")
-                            elif data["type"] == "conversation.item.audio_output.delta":
-                                # Decode base64 audio chunk
-                                audio_chunk = base64.b64decode(data['delta'])
-                                audio_data.extend(audio_chunk)
-                                print(f"Received audio chunk for item {data['item_id']}")
-                            elif data["type"] == "conversation.item.audio_output.done":
-                                print(f"Audio generation complete for item {data['item_id']}")
-                            elif data["type"] == "conversation.item.tts.failed":
-                                error = data.get("error", {})
-                                print(f"Error: {error.get('message')}")
-                                break
-
-                        # Save the audio to a file
-                        with open("output.wav", "wb") as f:
-                            f.write(audio_data)
-                        print("Audio saved to output.wav")
-
-                    # Run send and receive concurrently
-                    await asyncio.gather(send_text(), receive_audio())
-
-            asyncio.run(generate_speech())
-        - lang: JavaScript
-          label: Node.js WebSocket Client
-          source: |
-            import WebSocket from 'ws';
-            import fs from 'fs';
-
-            const apiKey = process.env.TOGETHER_API_KEY;
-            const url = 'wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara';
-
-            const ws = new WebSocket(url, {
-              headers: {
-                'Authorization': `Bearer ${apiKey}`
-              }
-            });
-
-            const audioData = [];
-
-            ws.on('open', () => {
-              console.log('WebSocket connection established!');
-            });
-
-            ws.on('message', (data) => {
-              const message = JSON.parse(data.toString());
-
-              if (message.type === 'session.created') {
-                console.log(`Session created: ${message.session.id}`);
-
-                // Send text chunks
-                const textChunks = [
-                  "Hello, this is a test.",
-                  "This is the second sentence.",
-                  "And this is the final one."
-                ];
-
-                textChunks.forEach((text, index) => {
-                  setTimeout(() => {
-                    ws.send(JSON.stringify({
-                      type: 'input_text_buffer.append',
-                      text: text
-                    }));
-                  }, index * 500);
-                });
-
-                // Commit after all chunks
-                setTimeout(() => {
-                  ws.send(JSON.stringify({
-                    type: 'input_text_buffer.commit'
-                  }));
-                }, textChunks.length * 500 + 100);
-
-              } else if (message.type === 'conversation.item.input_text.received') {
-                console.log(`Text received: ${message.text}`);
-              } else if (message.type === 'conversation.item.audio_output.delta') {
-                // Decode base64 audio chunk
-                const audioChunk = Buffer.from(message.delta, 'base64');
-                audioData.push(audioChunk);
-                console.log(`Received audio chunk for item ${message.item_id}`);
-              } else if (message.type === 'conversation.item.audio_output.done') {
-                console.log(`Audio generation complete for item ${message.item_id}`);
-              } else if (message.type === 'conversation.item.tts.failed') {
-                const errorMessage = message.error?.message ?? 'Unknown error';
-                console.error(`Error: ${errorMessage}`);
-                ws.close();
-              }
-            });
-
-            ws.on('close', () => {
-              // Save the audio to a file
-              if (audioData.length > 0) {
-                const completeAudio = Buffer.concat(audioData);
-                fs.writeFileSync('output.wav', completeAudio);
-                console.log('Audio saved to output.wav');
-              }
-            });
-
-            ws.on('error', (error) => {
-              console.error('WebSocket error:', error);
-            });
-      parameters:
-        - in: query
-          name: model
-          required: false
-          schema:
-            type: string
-            enum:
-              - hexgrad/Kokoro-82M
-              - cartesia/sonic-english
-            default: hexgrad/Kokoro-82M
-          description: The TTS model to use for speech generation. Can also be set via `tts_session.updated` event.
-        - in: query
-          name: voice
-          required: false
-          schema:
-            type: string
-            default: tara
-          description: |
-            The voice to use for speech generation. Default is 'tara'.
-            Available voices vary by model. Can also be updated via `tts_session.updated` event.
-        - in: query
-          name: max_partial_length
-          required: false
-          schema:
-            type: integer
-            default: 250
-          description: |
-            Maximum number of characters in partial text before forcing TTS generation
-            even without a sentence ending. Helps reduce latency for long text without punctuation.
-      responses:
-        '101':
-          description: |
-            Switching Protocols - WebSocket connection established successfully.
-
-            Error message format:
-            ```json
-            {
-              "type": "conversation.item.tts.failed",
-              "error": {
-                "message": "Error description",
-                "type": "invalid_request_error",
-                "param": null,
-                "code": "error_code"
-              }
-            }
-            ```
-  /audio/transcriptions:
-    post:
-      tags: ['Audio']
-      summary: Create audio transcription request
-      description: Transcribes audio into text
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            file = open("audio.wav", "rb")
-
-            response = client.audio.transcriptions.create(
-                model="openai/whisper-large-v3",
-                file=file,
-            )
-
-            print(response.text)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            file = open("audio.wav", "rb")
-
-            response = client.audio.transcriptions.create(
-                model="openai/whisper-large-v3",
-                file=file,
-            )
-
-            print(response.text)
+            for fine_tune in response.data:
+                print(f"ID: {fine_tune.id}, Status: {fine_tune.status}")
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.transcriptions.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
-            });
+            const response = await client.fineTuning.list();
 
-            console.log(response.text);
+            for (const fineTune of response.data) {
+              console.log(fineTune.id, fineTune.status);
+            }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.transcriptions.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
-            });
+            const response = await client.fineTuning.list();
 
-            console.log(response.text);
+            for (const fineTune of response.data) {
+              console.log(fineTune.id, fineTune.status);
+            }
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
+            curl "https://api.together.xyz/v1/fine-tunes" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -F "file=@audio.wav" \
-                 -F "model=openai/whisper-large-v3"
-      operationId: audio-transcriptions
-      requestBody:
-        required: true
-        content:
-          multipart/form-data:
-            schema:
-              $ref: '#/components/schemas/AudioTranscriptionRequest'
+                 -H "Content-Type: application/json"
       responses:
         '200':
-          description: 'OK'
+          description: List of fine-tune jobs
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/AudioTranscriptionResponse'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
+                $ref: '#/components/schemas/FinetuneTruncatedList'
+  /fine-tunes/estimate-price:
+    post:
+      tags: ['Fine-tuning']
+      summary: Estimate price
+      description: Estimate the price of a fine-tuning job.
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              required:
+                - training_file
+              properties:
+                training_file:
+                  type: string
+                  description: File-ID of a training file uploaded to the Together API
+                validation_file:
+                  type: string
+                  description: File-ID of a validation file uploaded to the Together API
+                model:
+                  type: string
+                  description: Name of the base model to run fine-tune job on
+                n_epochs:
+                  type: integer
+                  default: 1
+                  description: Number of complete passes through the training dataset (higher values may improve results but increase cost and risk of overfitting)
+                n_evals:
+                  type: integer
+                  default: 0
+                  description: Number of evaluations to be run on a given validation set during training
+                training_method:
+                  type: object
+                  oneOf:
+                    - $ref: '#/components/schemas/TrainingMethodSFT'
+                    - $ref: '#/components/schemas/TrainingMethodDPO'
+                  description: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
+                training_type:
+                  type: object
+                  oneOf:
+                    - $ref: '#/components/schemas/FullTrainingType'
+                    - $ref: '#/components/schemas/LoRATrainingType'
+                from_checkpoint:
+                  type: string
+                  description: The checkpoint identifier to continue training from a previous fine-tuning job. Format is `{$JOB_ID}` or `{$OUTPUT_MODEL_NAME}` or `{$JOB_ID}:{$STEP}` or `{$OUTPUT_MODEL_NAME}:{$STEP}`. The step value is optional; without it, the final checkpoint will be used.
+      responses:
+        '500':
+          description: Internal Server Error
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
+        '200':
+          description: Price estimated successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-  /audio/translations:
-    post:
-      tags: ['Audio']
-      summary: Create audio translation request
-      description: Translates audio into English
+                type: object
+                properties:
+                  estimated_total_price:
+                    type: number
+                    description: The price of the fine-tuning job
+                  allowed_to_proceed:
+                    type: boolean
+                    description: Whether the user is allowed to proceed with the fine-tuning job
+                    example: true
+                  user_limit:
+                    type: number
+                    description: The user's credit limit in dollars
+                  estimated_train_token_count:
+                    type: number
+                    description: The estimated number of tokens to be trained
+                  estimated_eval_token_count:
+                    type: number
+                    description: The estimated number of tokens for evaluation
+  /fine-tunes/{id}:
+    get:
+      tags: ['Fine-tuning']
+      summary: List job
+      description: List the metadata for a single fine-tuning job.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3184,15 +2484,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = open("audio.wav", "rb")
-
-            response = client.audio.translations.create(
-                model="openai/whisper-large-v3",
-                file=file,
-                language="es",
-            )
+            fine_tune = client.fine_tuning.retrieve(id="ft-id")
 
-            print(response.text)
+            print(fine_tune)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3203,113 +2497,71 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            file = open("audio.wav", "rb")
-
-            response = client.audio.translations.create(
-                model="openai/whisper-large-v3",
-                file=file,
-                language="es",
-            )
+            fine_tune = client.fine_tuning.retrieve(id="ft-id")
 
-            print(response.text)
+            print(fine_tune)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.translations.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
-              language: "es"
-            });
+            const fineTune = await client.fineTuning.retrieve("ft-id");
 
-            console.log(response.text);
+            console.log(fineTune);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
-            import { readFileSync } from "fs";
-            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const audioFilePath = join(process.cwd(), "audio.wav");
-            const audioBuffer = readFileSync(audioFilePath);
-            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
-
-            const response = await client.audio.translations.create({
-              model: "openai/whisper-large-v3",
-              file: audioFile,
-              language: "es"
-            });
+            const fineTune = await client.fineTuning.retrieve("ft-id");
 
-            console.log(response.text);
+            console.log(fineTune);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
+            curl "https://api.together.xyz/v1/fine-tunes/ft-id" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -F "file=@audio.wav" \
-                 -F "model=openai/whisper-large-v3" \
-                 -F "language=es"
-      operationId: audio-translations
-      requestBody:
-        required: true
-        content:
-          multipart/form-data:
-            schema:
-              $ref: '#/components/schemas/AudioTranslationRequest'
-      responses:
-        '200':
-          description: 'OK'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/AudioTranslationResponse'
-        '400':
-          description: 'BadRequest'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '401':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '429':
-          description: 'RateLimit'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-  /clusters/availability-zones:
-    get:
-      tags: ['endpoints']
-      summary: List all available availability zones.
-      description: List all available availability zones.
-      operationId: availabilityZones
+                 -H "Content-Type: application/json"
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
       responses:
         '200':
-          description: Success
+          description: Fine-tune job details retrieved successfully
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ListAvailibilityZonesResponse'
+                $ref: '#/components/schemas/FinetuneResponse'
+    delete:
+      tags: ['Fine-tuning']
+      summary: Delete a fine-tune job
+      description: Delete a fine-tuning job.
       x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.fine_tuning.delete(id="ft-id")
+
+            print(response)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3320,9 +2572,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.endpoints.list_avzones()
+            response = client.fine_tuning.delete(id="ft-id")
 
-            print(response.avzones)
+            print(response)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -3332,9 +2584,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.endpoints.listAvzones();
+            const response = await client.fineTuning.delete("ft-id");
 
-            console.log(response.avzones);
+            console.log(response);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -3344,158 +2596,50 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.endpoints.listAvzones();
+            const response = await client.fineTuning.delete("ft-id");
 
-            console.log(response.avzones);
+            console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/clusters/availability-zones" \
+            curl -X "DELETE" "https://api.together.xyz/v1/fine-tunes/ft-id?force=false" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
-  /endpoints:
-    get:
-      tags: ['Endpoints']
-      summary: List all endpoints, can be filtered by type
-      description: Returns a list of all endpoints associated with your account. You can filter the results by type (dedicated or serverless).
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            endpoints = client.endpoints.list()
-
-            for endpoint in endpoints:
-                print(endpoint.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.endpoints.list()
-
-            for endpoint in response.data:
-                print(endpoint.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const endpoints = await client.endpoints.list();
-
-            for (const endpoint of endpoints.data) {
-              console.log(endpoint);
-            }
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const endpoints = await client.endpoints.list();
-
-            for (const endpoint of endpoints.data) {
-              console.log(endpoint);
-            }
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/endpoints" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: listEndpoints
       parameters:
-        - name: type
-          in: query
-          required: false
-          schema:
-            type: string
-            enum:
-              - dedicated
-              - serverless
-          description: Filter endpoints by type
-          example: dedicated
-        - name: usage_type
-          in: query
-          required: false
+        - name: id
+          in: path
+          required: true
           schema:
             type: string
-            enum:
-              - on-demand
-              - reserved
-          description: Filter endpoints by usage type
-          example: on-demand
-        - name: mine
+        - name: force
           in: query
-          required: false
           schema:
             type: boolean
-          description: If true, return only endpoints owned by the caller
+            default: false
       responses:
         '200':
-          description: '200'
+          description: Fine-tune job deleted successfully
           content:
             application/json:
               schema:
-                type: object
-                required:
-                  - object
-                  - data
-                properties:
-                  object:
-                    type: string
-                    enum:
-                      - list
-                  data:
-                    type: array
-                    items:
-                      $ref: '#/components/schemas/ListEndpoint'
-                example:
-                  object: 'list'
-                  data:
-                    - object: 'endpoint'
-                      id: 'endpoint-5c0c20db-62fe-4f41-8ffc-d9e4ea1a264e'
-                      name: 'allenai/OLMo-7B'
-                      model: 'allenai/OLMo-7B'
-                      type: 'serverless'
-                      owner: 'together'
-                      state: 'STARTED'
-                      created_at: '2024-02-28T21:34:35.444Z'
-        '403':
-          description: 'Unauthorized'
+                $ref: '#/components/schemas/FinetuneDeleteResponse'
+        '404':
+          description: Fine-tune job not found
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
         '500':
-          description: 'Internal error'
+          description: Internal server error
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-    post:
-      tags: ['Endpoints']
-      summary: Create a dedicated endpoint, it will start automatically
-      description: Creates a new dedicated endpoint for serving models. The endpoint will automatically start after creation. You can deploy any supported model on hardware configurations that meet the model's requirements.
+  /fine-tunes/{id}/events:
+    get:
+      tags: ['Fine-tuning']
+      summary: List job events
+      description: List the events for a single fine-tuning job.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3508,14 +2652,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                hardware="1x_nvidia_a100_80gb_sxm",
-                min_replicas=2,
-                max_replicas=5,
-            )
+            events = client.fine_tuning.list_events(id="ft-id")
 
-            print(endpoint.id)
+            print(events)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3526,16 +2665,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.create(
-                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                hardware="1x_nvidia_a100_80gb_sxm",
-                autoscaling={
-                  "min_replicas": 2,
-                  "max_replicas": 5,
-                }
-            )
+            response = client.fine_tuning.list_events(id="ft-id")
 
-            print(endpoint.id)
+            for event in response.data:
+                print(event)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -3545,16 +2678,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              hardware: "1x_nvidia_a100_80gb_sxm",
-              autoscaling: {
-                max_replicas: 5,
-                min_replicas: 2,
-              }
-            });
+            const events = await client.fineTuning.listEvents("ft-id");
 
-            console.log(endpoint.id);
+            console.log(events);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -3564,62 +2690,33 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.create({
-              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-              hardware: "1x_nvidia_a100_80gb_sxm",
-              autoscaling: {
-                max_replicas: 5,
-                min_replicas: 2,
-              }
-            });
+            const events = await client.fineTuning.listEvents("ft-id");
 
-            console.log(endpoint.id);
+            console.log(events);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/endpoints" \
+            curl "https://api.together.xyz/v1/fine-tunes/ft-id/events" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
-                   "hardware": "1x_nvidia_a100_80gb_sxm",
-                   "autoscaling": {
-                     "max_replicas": 5,
-                     "min_replicas": 2
-                   }
-                 }'
-      operationId: createEndpoint
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/CreateEndpointRequest'
+                 -H "Content-Type: application/json"
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
       responses:
         '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/DedicatedEndpoint'
-        '403':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
+          description: List of fine-tune events
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-
-  /endpoints/{endpointId}:
+                $ref: '#/components/schemas/FinetuneListEvents'
+  /fine-tunes/{id}/checkpoints:
     get:
-      tags: ['Endpoints']
-      summary: Get endpoint by ID
-      description: Retrieves details about a specific endpoint, including its current state, configuration, and scaling settings.
+      tags: ['Fine-tuning']
+      summary: List checkpoints
+      description: List the checkpoints for a single fine-tuning job.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3632,9 +2729,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.get("endpoint-id")
+            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
 
-            print(endpoint.id)
+            print(checkpoints)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3645,9 +2742,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.retrieve("endpoint-id")
+            checkpoints = client.fine_tuning.list_checkpoints(id="ft-id")
 
-            print(endpoint.id)
+            print(checkpoints)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -3657,9 +2754,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.retrieve("endpoint-id");
+            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
 
-            console.log(endpoint);
+            console.log(checkpoints);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -3669,54 +2766,33 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.retrieve("endpoint-id");
+            const checkpoints = await client.fineTuning.listCheckpoints("ft-id");
 
-            console.log(endpoint);
+            console.log(checkpoints);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/endpoints/endpoint-id" \
+            curl "https://api.together.xyz/v1/fine-tunes/ft-id/checkpoints" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json"
-      operationId: getEndpoint
       parameters:
-        - name: endpointId
+        - name: id
           in: path
           required: true
           schema:
             type: string
-          description: The ID of the endpoint to retrieve
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
       responses:
         '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/DedicatedEndpoint'
-        '403':
-          description: 'Unauthorized'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '404':
-          description: 'Not Found'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
+          description: List of fine-tune checkpoints
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
-
-    patch:
-      tags: ['Endpoints']
-      summary: Update endpoint, this can also be used to start or stop a dedicated endpoint
-      description: Updates an existing endpoint's configuration. You can modify the display name, autoscaling settings, or change the endpoint's state (start/stop).
+                $ref: '#/components/schemas/FinetuneListCheckpoints'
+  /finetune/download:
+    get:
+      tags: ['Fine-tuning']
+      summary: Download model
+      description: Receive a compressed fine-tuned model or checkpoint.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3729,12 +2805,27 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.update(
-                endpoint_id="endpoint-id",
-                state="STOPPED"
+            # This will download the content to a location on disk
+            response = client.fine_tuning.download(id="ft-id")
+
+            print(response)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            print(endpoint)
+            # Using `with_streaming_response` gives you control to do what you want with the response.
+            stream = client.fine_tuning.with_streaming_response.content(ft_id="ft-id")
+
+            with stream as response:
+                for line in response.iter_lines():
+                    print(line)
+
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -3744,11 +2835,11 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.update("endpoint-id", {
-              state: "STOPPED"
+            const response = await client.fineTuning.content({
+              ft_id: "ft-id",
             });
 
-            console.log(endpoint);
+            console.log(await response.blob());
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -3758,85 +2849,56 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.update("endpoint-id", {
-              state: "STOPPED"
+            const response = await client.fineTuning.content({
+              ft_id: "ft-id",
             });
 
-            console.log(endpoint);
+            console.log(await response.blob());
         - lang: Shell
           label: cURL
           source: |
-            curl -X PATCH "https://api.together.xyz/v1/endpoints/endpoint-id" \
+            curl "https://api.together.xyz/v1/finetune/download?ft_id=ft-id&checkpoint=merged"
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "state": "STOPPED"
-                 }'
-      operationId: updateEndpoint
+                 -H "Content-Type: application/json"
       parameters:
-        - name: endpointId
-          in: path
+        - in: query
+          name: ft_id
+          schema:
+            type: string
           required: true
+          description: Fine-tune ID to download. A string that starts with `ft-`.
+        - in: query
+          name: checkpoint_step
+          schema:
+            type: integer
+          required: false
+          description: Specifies step number for checkpoint to download. Ignores `checkpoint` value if set.
+        - in: query
+          name: checkpoint
           schema:
             type: string
-          description: The ID of the endpoint to update
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              type: object
-              properties:
-                display_name:
-                  type: string
-                  description: A human-readable name for the endpoint
-                  example: My Llama3 70b endpoint
-                state:
-                  type: string
-                  description: The desired state of the endpoint
-                  enum:
-                    - STARTED
-                    - STOPPED
-                  example: STARTED
-                autoscaling:
-                  $ref: '#/components/schemas/Autoscaling'
-                  description: New autoscaling configuration for the endpoint
-                inactive_timeout:
-                  type: integer
-                  description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
-                  nullable: true
-                  example: 60
+            enum:
+              - merged
+              - adapter
+              - model_output_path
+          description: Specifies checkpoint type to download - `merged` vs `adapter`. This field is required if the checkpoint_step is not set.
       responses:
         '200':
-          description: '200'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/DedicatedEndpoint'
-        '403':
-          description: 'Unauthorized'
+          description: Successfully downloaded the fine-tuned model or checkpoint.
           content:
-            application/json:
+            application/octet-stream:
               schema:
-                $ref: '#/components/schemas/ErrorData'
+                type: string
+                format: binary
+        '400':
+          description: Invalid request parameters.
         '404':
-          description: 'Not Found'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-
-    delete:
-      tags: ['Endpoints']
-      summary: Delete endpoint
-      description: Permanently deletes an endpoint. This action cannot be undone.
+          description: Fine-tune ID not found.
+  /fine-tunes/{id}/cancel:
+    post:
+      tags: ['Fine-tuning']
+      summary: Cancel job
+      description: Cancel a currently running fine-tuning job. Returns a FinetuneResponseTruncated object.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3849,11 +2911,22 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            endpoint = client.endpoints.delete(
-                endpoint_id="endpoint-id",
+            response = client.fine_tuning.cancel(id="ft-id")
+
+            print(response)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            print(endpoint)
+            response = client.fine_tuning.cancel(id="ft-id")
+
+            print(response)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -3863,9 +2936,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.delete("endpoint-id");
+            const response = await client.fineTuning.cancel("ft-id");
 
-            console.log(endpoint);
+            console.log(response);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -3875,53 +2948,38 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const endpoint = await client.endpoints.delete("endpoint-id");
+            const response = await client.fineTuning.cancel("ft-id");
 
-            console.log(endpoint);
+            console.log(response);
         - lang: Shell
           label: cURL
           source: |
-            curl -X "DELETE" "https://api.together.xyz/v1/endpoints/endpoint-id" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY"
-      operationId: deleteEndpoint
+            curl -X POST "https://api.together.xyz/v1/fine-tunes/ft-id/cancel" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
       parameters:
-        - name: endpointId
-          in: path
-          required: true
+        - in: path
+          name: id
           schema:
             type: string
-          description: The ID of the endpoint to delete
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+          required: true
+          description: Fine-tune ID to cancel. A string that starts with `ft-`.
       responses:
-        '204':
-          description: 'No Content - Endpoint successfully deleted'
-        '403':
-          description: 'Unauthorized'
+        '200':
+          description: Successfully cancelled the fine-tuning job.
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ErrorData'
+                $ref: '#/components/schemas/FinetuneResponseTruncated'
+        '400':
+          description: Invalid request parameters.
         '404':
-          description: 'Not Found'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ErrorData'
-
-  /hardware:
-    get:
-      tags: ['Hardware']
-      summary: List available hardware configurations
-      description: >
-        Returns a list of available hardware configurations for deploying models.
-        When a model parameter is provided, it returns only hardware configurations compatible
-        with that model, including their current availability status.
+          description: Fine-tune ID not found.
+  /rerank:
+    post:
+      tags: ['Rerank']
+      summary: Create a rerank request
+      description: Query a reranker model
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -3934,10 +2992,35 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.endpoints.list_hardware()
+            documents = [
+                {
+                    "title": "Llama",
+                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+                },
+                {
+                    "title": "Panda",
+                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+                },
+                {
+                    "title": "Guanaco",
+                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+                },
+                {
+                    "title": "Wild Bactrian camel",
+                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+                }
+            ]
 
-            for hardware in response:
-                print(hardware.id)
+            response = client.rerank.create(
+                model="Salesforce/Llama-Rank-v1",
+                query="What animals can I find near Peru?",
+                documents=documents,
+            )
+
+            for result in response.results:
+                print(f"Rank: {result.index + 1}")
+                print(f"Title: {documents[result.index]['title']}")
+                print(f"Text: {documents[result.index]['text']}")
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -3948,11 +3031,36 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.hardware.list()
+            documents = [
+                {
+                    "title": "Llama",
+                    "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+                },
+                {
+                    "title": "Panda",
+                    "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+                },
+                {
+                    "title": "Guanaco",
+                    "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+                },
+                {
+                    "title": "Wild Bactrian camel",
+                    "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+                }
+            ]
 
-            for hardware in response.data:
-                print(hardware.id)
-        - lang: TypeScript
+            response = client.rerank.create(
+                model="Salesforce/Llama-Rank-v1",
+                query="What animals can I find near Peru?",
+                documents=documents,
+            )
+
+            for result in response.results:
+                print(f"Rank: {result.index + 1}")
+                print(f"Title: {documents[result.index]['title']}")
+                print(f"Text: {documents[result.index]['text']}")
+        - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
@@ -3961,9 +3069,34 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const hardware = await client.hardware.list();
+            const documents = [{
+              "title": "Llama",
+              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+            },
+            {
+              "title": "Panda",
+              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+            },
+            {
+              "title": "Guanaco",
+              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+            },
+            {
+              "title": "Wild Bactrian camel",
+              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+            }];
 
-            console.log(hardware);
+            const response = await client.rerank.create({
+              model: "Salesforce/Llama-Rank-v1",
+              query: "What animals can I find near Peru?",
+              documents,
+            });
+
+            for (const result of response.results) {
+              console.log(`Rank: ${result.index + 1}`);
+              console.log(`Title: ${documents[result.index].title}`);
+              console.log(`Text: ${documents[result.index].text}`);
+            }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -3973,64 +3106,115 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const hardware = await client.hardware.list();
+            const documents = [{
+              "title": "Llama",
+              "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+            },
+            {
+              "title": "Panda",
+              "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+            },
+            {
+              "title": "Guanaco",
+              "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+            },
+            {
+              "title": "Wild Bactrian camel",
+              "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+            }];
 
-            console.log(hardware);
+            const response = await client.rerank.create({
+              model: "Salesforce/Llama-Rank-v1",
+              query: "What animals can I find near Peru?",
+              documents,
+            });
+
+            for (const result of response.results) {
+              console.log(`Rank: ${result.index + 1}`);
+              console.log(`Title: ${documents[result.index].title}`);
+              console.log(`Text: ${documents[result.index].text}`);
+            }
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/hardware" \
+            curl -X POST "https://api.together.xyz/v1/rerank" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: listHardware
-      parameters:
-        - name: model
-          in: query
-          required: false
-          schema:
-            type: string
-          description: >
-            Filter hardware configurations by model compatibility. When provided,
-            the response includes availability status for each compatible configuration.
-          example: meta-llama/Llama-3-70b-chat-hf
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "Salesforce/Llama-Rank-v1",
+                   "query": "What animals can I find near Peru?",
+                   "documents": [{
+                      "title": "Llama",
+                      "text": "The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."
+                    },
+                    {
+                      "title": "Panda",
+                      "text": "The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China."
+                    },
+                    {
+                      "title": "Guanaco",
+                      "text": "The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."
+                    },
+                    {
+                      "title": "Wild Bactrian camel",
+                      "text": "The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia."
+                    }]
+                 }'
+      operationId: rerank
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/RerankRequest'
       responses:
         '200':
-          description: 'List of available hardware configurations'
+          description: '200'
           content:
             application/json:
               schema:
-                type: object
-                required:
-                  - object
-                  - data
-                properties:
-                  object:
-                    type: string
-                    enum:
-                      - list
-                  data:
-                    type: array
-                    items:
-                      $ref: '#/components/schemas/HardwareWithStatus'
-        '403':
+                $ref: '#/components/schemas/RerankResponse'
+        '400':
+          description: 'BadRequest'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '401':
           description: 'Unauthorized'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-        '500':
-          description: 'Internal error'
+        '404':
+          description: 'NotFound'
           content:
             application/json:
               schema:
                 $ref: '#/components/schemas/ErrorData'
-  /tci/execute:
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '503':
+          description: 'Overloaded'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '504':
+          description: 'Timeout'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+      deprecated: false
+  /audio/speech:
     post:
-      tags: ['Code Interpreter']
-      callbacks: {}
-      description: |
-        Executes the given code snippet and returns the output. Without a session_id, a new session will be created to run the code. If you do pass in a valid session_id, the code will be run in that session. This is useful for running multiple code snippets in the same environment, because dependencies and similar things are persisted
-        between calls to the same session.
+      tags: ['Audio']
+      summary: Create audio generation request
+      description: Generate audio from input text
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4043,12 +3227,13 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.code_interpreter.run(
-                code="print('Hello world!')",
-                language="python",
+            response = client.audio.speech.create(
+                model="cartesia/sonic-2",
+                input="The quick brown fox jumps over the lazy dog.",
+                voice="laidback woman",
             )
 
-            print(response.data.outputs[0].data);
+            response.stream_to_file("audio.wav")
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4059,342 +3244,545 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.code_interpreter.execute(
-                code="print('Hello world!')",
-                language="python",
+            response = client.audio.speech.with_streaming_response.create(
+                model="cartesia/sonic-2",
+                input="The quick brown fox jumps over the lazy dog.",
+                voice="laidback woman",
             )
 
-            print(response.data.outputs[0].data);
+            with response as stream:
+              stream.stream_to_file("audio.wav")
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
+            import { createWriteStream } from "fs";
+            import { join } from "path";
+            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.codeInterpreter.execute({
-              code: "print('Hello world!')",
-              language: "python"
+            const response = await client.audio.speech.create({
+              model: "cartesia/sonic-2",
+              input: "The quick brown fox jumps over the lazy dog.",
+              voice: "laidback woman",
             });
 
-            console.log(response.data?.outputs?.[0]?.data);
+            const filepath = join(process.cwd(), "audio.wav");
+            const writeStream = createWriteStream(filepath);
+
+            if (response.body) {
+              await pipeline(response.body, writeStream);
+            }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
+            import { createWriteStream } from "fs";
+            import { join } from "path";
+            import { pipeline } from "stream/promises";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.codeInterpreter.execute({
-              code: "print('Hello world!')",
-              language: "python"
+            const response = await client.audio.speech.create({
+              model: "cartesia/sonic-2",
+              input: "The quick brown fox jumps over the lazy dog.",
+              voice: "laidback woman",
             });
 
-            console.log(response.data?.outputs?.[0]?.data);
+            const filepath = join(process.cwd(), "audio.wav");
+            const writeStream = createWriteStream(filepath);
+
+            if (response.body) {
+              await pipeline(response.body, writeStream);
+            }
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/tci/execute" \
+            curl -X POST "https://api.together.xyz/v1/audio/speech" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
                  -H "Content-Type: application/json" \
                  -d '{
-                   "code": "print(\'Hello world!\')",
-                   "language": "python"
-                 }'
-      operationId: tci/execute
-      parameters: []
+                   "model": "cartesia/sonic-2",
+                   "input": "The quick brown fox jumps over the lazy dog.",
+                   "voice": "laidback woman"
+                 }' \
+                 --output audio.wav
+      operationId: audio-speech
       requestBody:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/ExecuteRequest'
-        description: Execute Request
-        required: false
+              $ref: '#/components/schemas/AudioSpeechRequest'
       responses:
         '200':
+          description: 'OK'
+          content:
+            application/octet-stream:
+              schema:
+                type: string
+                format: binary
+            audio/wav:
+              schema:
+                type: string
+                format: binary
+            audio/mpeg:
+              schema:
+                type: string
+                format: binary
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/AudioSpeechStreamResponse'
+        '400':
+          description: 'BadRequest'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/ExecuteResponse'
-          description: Execute Response
-  /tci/sessions:
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+  /audio/speech/websocket:
     get:
-      tags: ['Code Interpreter']
-      callbacks: {}
+      tags: ['Audio']
+      summary: Real-time text-to-speech via WebSocket
       description: |
-        Lists all your currently active sessions.
-      x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            # together v1 does not support this method
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.code_interpreter.sessions.list()
-
-            for session in response.data.sessions:
-                print(session.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
+        Establishes a WebSocket connection for real-time text-to-speech generation. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/audio/speech/websocket) for bidirectional streaming communication.
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+        **Connection Setup:**
+        - Protocol: WebSocket (wss://)
+        - Authentication: Pass API key as Bearer token in Authorization header
+        - Parameters: Sent as query parameters (model, voice, max_partial_length)
 
-            const response = await client.codeInterpreter.sessions.list();
+        **Client Events:**
+        - `tts_session.updated`: Update session parameters like voice
+          ```json
+          {
+            "type": "tts_session.updated",
+            "session": {
+              "voice": "tara"
+            }
+          }
+          ```
+        - `input_text_buffer.append`: Send text chunks for TTS generation
+          ```json
+          {
+            "type": "input_text_buffer.append",
+            "text": "Hello, this is a test."
+          }
+          ```
+        - `input_text_buffer.clear`: Clear the buffered text
+          ```json
+          {
+            "type": "input_text_buffer.clear"
+          }
+          ```
+        - `input_text_buffer.commit`: Signal end of text input and process remaining text
+          ```json
+          {
+            "type": "input_text_buffer.commit"
+          }
+          ```
 
-            for (const session of response.data?.sessions) {
-              console.log(session.id);
+        **Server Events:**
+        - `session.created`: Initial session confirmation (sent first)
+          ```json
+          {
+            "event_id": "evt_123456",
+            "type": "session.created",
+            "session": {
+              "id": "session-id",
+              "object": "realtime.tts.session",
+              "modalities": ["text", "audio"],
+              "model": "hexgrad/Kokoro-82M",
+              "voice": "tara"
             }
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
+          }
+          ```
+        - `conversation.item.input_text.received`: Acknowledgment that text was received
+          ```json
+          {
+            "type": "conversation.item.input_text.received",
+            "text": "Hello, this is a test."
+          }
+          ```
+        - `conversation.item.audio_output.delta`: Audio chunks as base64-encoded data
+          ```json
+          {
+            "type": "conversation.item.audio_output.delta",
+            "item_id": "tts_1",
+            "delta": "<base64_encoded_audio_chunk>"
+          }
+          ```
+        - `conversation.item.audio_output.done`: Audio generation complete for an item
+          ```json
+          {
+            "type": "conversation.item.audio_output.done",
+            "item_id": "tts_1"
+          }
+          ```
+        - `conversation.item.tts.failed`: Error occurred
+          ```json
+          {
+            "type": "conversation.item.tts.failed",
+            "error": {
+              "message": "Error description",
+              "type": "invalid_request_error",
+              "param": null,
+              "code": "invalid_api_key"
+            }
+          }
+          ```
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+        **Text Processing:**
+        - Partial text (no sentence ending) is held in buffer until:
+          - We believe that the text is complete enough to be processed for TTS generation
+          - The partial text exceeds `max_partial_length` characters (default: 250)
+          - The `input_text_buffer.commit` event is received
 
-            const response = await client.codeInterpreter.sessions.list();
+        **Audio Format:**
+        - Format: WAV (PCM s16le)
+        - Sample Rate: 24000 Hz
+        - Encoding: Base64
+        - Delivered via `conversation.item.audio_output.delta` events
 
-            for (const session of response.data?.sessions) {
-              console.log(session.id);
-            }
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/tci/sessions" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      operationId: sessions/list
-      parameters: []
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/SessionListResponse'
-          description: List Response
-  /batches:
-    get:
-      tags: ['Batches']
-      summary: List batch jobs
-      description: List all batch jobs for the authenticated user
+        **Error Codes:**
+        - `invalid_api_key`: Invalid API key provided (401)
+        - `missing_api_key`: Authorization header missing (401)
+        - `model_not_available`: Invalid or unavailable model (400)
+        - Invalid text format errors (400)
+
+      operationId: realtime-tts
       x-codeSamples:
         - lang: Python
-          label: Together AI SDK (v1)
+          label: Python WebSocket Client
           source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
+            import asyncio
+            import websockets
+            import json
+            import base64
             import os
 
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
+            async def generate_speech():
+                api_key = os.environ.get("TOGETHER_API_KEY")
+                url = "wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara"
 
-            batches = client.batches.list_batches()
+                headers = {
+                    "Authorization": f"Bearer {api_key}"
+                }
 
-            for batch in batches:
-                print(batch.id)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
+                async with websockets.connect(url, additional_headers=headers) as ws:
+                    # Wait for session created
+                    session_msg = await ws.recv()
+                    session_data = json.loads(session_msg)
+                    print(f"Session created: {session_data['session']['id']}")
 
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
+                    # Send text for TTS
+                    text_chunks = [
+                        "Hello, this is a test.",
+                        "This is the second sentence.",
+                        "And this is the final one."
+                    ]
 
-            batches = client.batches.list()
+                    async def send_text():
+                        for chunk in text_chunks:
+                            await ws.send(json.dumps({
+                                "type": "input_text_buffer.append",
+                                "text": chunk
+                            }))
+                            await asyncio.sleep(0.5)  # Simulate typing
 
-            for batch in batches:
-                print(batch.id)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
+                        # Commit to process any remaining text
+                        await ws.send(json.dumps({
+                            "type": "input_text_buffer.commit"
+                        }))
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
+                    async def receive_audio():
+                        audio_data = bytearray()
+                        async for message in ws:
+                            data = json.loads(message)
 
-            const batches = await client.batches.list();
+                            if data["type"] == "conversation.item.input_text.received":
+                                print(f"Text received: {data['text']}")
+                            elif data["type"] == "conversation.item.audio_output.delta":
+                                # Decode base64 audio chunk
+                                audio_chunk = base64.b64decode(data['delta'])
+                                audio_data.extend(audio_chunk)
+                                print(f"Received audio chunk for item {data['item_id']}")
+                            elif data["type"] == "conversation.item.audio_output.done":
+                                print(f"Audio generation complete for item {data['item_id']}")
+                            elif data["type"] == "conversation.item.tts.failed":
+                                error = data.get("error", {})
+                                print(f"Error: {error.get('message')}")
+                                break
 
-            console.log(batches);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
+                        # Save the audio to a file
+                        with open("output.wav", "wb") as f:
+                            f.write(audio_data)
+                        print("Audio saved to output.wav")
+
+                    # Run send and receive concurrently
+                    await asyncio.gather(send_text(), receive_audio())
+
+            asyncio.run(generate_speech())
+        - lang: JavaScript
+          label: Node.js WebSocket Client
           source: |
-            import Together from "together-ai";
+            import WebSocket from 'ws';
+            import fs from 'fs';
 
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
+            const apiKey = process.env.TOGETHER_API_KEY;
+            const url = 'wss://api.together.ai/v1/audio/speech/websocket?model=hexgrad/Kokoro-82M&voice=tara';
+
+            const ws = new WebSocket(url, {
+              headers: {
+                'Authorization': `Bearer ${apiKey}`
+              }
             });
 
-            const batches = await client.batches.list();
+            const audioData = [];
 
-            console.log(batches);
-        - lang: Shell
-          label: cURL
-          source: |
-            curl "https://api.together.xyz/v1/batches" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      security:
-        - bearerAuth: []
+            ws.on('open', () => {
+              console.log('WebSocket connection established!');
+            });
+
+            ws.on('message', (data) => {
+              const message = JSON.parse(data.toString());
+
+              if (message.type === 'session.created') {
+                console.log(`Session created: ${message.session.id}`);
+
+                // Send text chunks
+                const textChunks = [
+                  "Hello, this is a test.",
+                  "This is the second sentence.",
+                  "And this is the final one."
+                ];
+
+                textChunks.forEach((text, index) => {
+                  setTimeout(() => {
+                    ws.send(JSON.stringify({
+                      type: 'input_text_buffer.append',
+                      text: text
+                    }));
+                  }, index * 500);
+                });
+
+                // Commit after all chunks
+                setTimeout(() => {
+                  ws.send(JSON.stringify({
+                    type: 'input_text_buffer.commit'
+                  }));
+                }, textChunks.length * 500 + 100);
+
+              } else if (message.type === 'conversation.item.input_text.received') {
+                console.log(`Text received: ${message.text}`);
+              } else if (message.type === 'conversation.item.audio_output.delta') {
+                // Decode base64 audio chunk
+                const audioChunk = Buffer.from(message.delta, 'base64');
+                audioData.push(audioChunk);
+                console.log(`Received audio chunk for item ${message.item_id}`);
+              } else if (message.type === 'conversation.item.audio_output.done') {
+                console.log(`Audio generation complete for item ${message.item_id}`);
+              } else if (message.type === 'conversation.item.tts.failed') {
+                const errorMessage = message.error?.message ?? 'Unknown error';
+                console.error(`Error: ${errorMessage}`);
+                ws.close();
+              }
+            });
+
+            ws.on('close', () => {
+              // Save the audio to a file
+              if (audioData.length > 0) {
+                const completeAudio = Buffer.concat(audioData);
+                fs.writeFileSync('output.wav', completeAudio);
+                console.log('Audio saved to output.wav');
+              }
+            });
+
+            ws.on('error', (error) => {
+              console.error('WebSocket error:', error);
+            });
+      parameters:
+        - in: query
+          name: model
+          required: false
+          schema:
+            type: string
+            enum:
+              - hexgrad/Kokoro-82M
+              - cartesia/sonic-english
+            default: hexgrad/Kokoro-82M
+          description: The TTS model to use for speech generation. Can also be set via `tts_session.updated` event.
+        - in: query
+          name: voice
+          required: false
+          schema:
+            type: string
+            default: tara
+          description: |
+            The voice to use for speech generation. Default is 'tara'.
+            Available voices vary by model. Can also be updated via `tts_session.updated` event.
+        - in: query
+          name: max_partial_length
+          required: false
+          schema:
+            type: integer
+            default: 250
+          description: |
+            Maximum number of characters in partial text before forcing TTS generation
+            even without a sentence ending. Helps reduce latency for long text without punctuation.
       responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                type: array
-                items:
-                  $ref: '#/components/schemas/BatchJob'
-        '401':
-          description: Unauthorized
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '500':
-          description: Internal Server Error
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+        '101':
+          description: |
+            Switching Protocols - WebSocket connection established successfully.
+
+            Error message format:
+            ```json
+            {
+              "type": "conversation.item.tts.failed",
+              "error": {
+                "message": "Error description",
+                "type": "invalid_request_error",
+                "param": null,
+                "code": "error_code"
+              }
+            }
+            ```
+  /audio/transcriptions:
     post:
-      tags: ['Batches']
-      summary: Create a batch job
-      description: Create a new batch job with the given input file and endpoint
+      tags: ['Audio']
+      summary: Create audio transcription request
+      description: Transcribes audio into text
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
           source: |
             # Docs for v2 can be found by changing the above selector ^
             from together import Together
-            import os
 
             client = Together(
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            batch = client.batches.create_batch("file_id", endpoint="/v1/chat/completions")
+            file = open("audio.wav", "rb")
 
-            print(batch.id)
+            response = client.audio.transcriptions.create(
+                model="openai/whisper-large-v3",
+                file=file,
+            )
+
+            print(response.text)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
             from together import Together
-            import os
 
             client = Together(
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            batch = client.batches.create(input_file_id="file_id", endpoint="/v1/chat/completions")
+            file = open("audio.wav", "rb")
 
-            print(batch.job)
+            response = client.audio.transcriptions.create(
+                model="openai/whisper-large-v3",
+                file=file,
+            )
+
+            print(response.text)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
+            import { readFileSync } from "fs";
+            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.create({
-              endpoint: "/v1/chat/completions",
-              input_file_id: "file-id",
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
+
+            const response = await client.audio.transcriptions.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
             });
 
-            console.log(batch);
+            console.log(response.text);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
+            import { readFileSync } from "fs";
+            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.create({
-              endpoint: "/v1/chat/completions",
-              input_file_id: "file-id",
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
+
+            const response = await client.audio.transcriptions.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
             });
 
-            console.log(batch);
+            console.log(response.text);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/batches" \
+            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json" \
-                 -d '{
-                   "endpoint": "/v1/chat/completions",
-                   "input_file_id": "file-id"
-                 }'
-      security:
-        - bearerAuth: []
+                 -F "file=@audio.wav" \
+                 -F "model=openai/whisper-large-v3"
+      operationId: audio-transcriptions
       requestBody:
         required: true
         content:
-          application/json:
+          multipart/form-data:
             schema:
-              $ref: '#/components/schemas/CreateBatchRequest'
+              $ref: '#/components/schemas/AudioTranscriptionRequest'
       responses:
-        '201':
-          description: Job created (potentially with warnings)
+        '200':
+          description: 'OK'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchJobWithWarning'
+                $ref: '#/components/schemas/AudioTranscriptionResponse'
         '400':
-          description: Bad Request
+          description: 'BadRequest'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                $ref: '#/components/schemas/ErrorData'
         '401':
-          description: Unauthorized
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                $ref: '#/components/schemas/ErrorData'
         '429':
-          description: Too Many Requests
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '500':
-          description: Internal Server Error
+          description: 'RateLimit'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-
-  /batches/{id}:
-    get:
-      tags: ['Batches']
-      summary: Get a batch job
-      description: Get details of a batch job by ID
+                $ref: '#/components/schemas/ErrorData'
+  /audio/translations:
+    post:
+      tags: ['Audio']
+      summary: Create audio translation request
+      description: Translates audio into English
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4407,9 +3795,15 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            batch = client.batches.get_batch("batch_id")
+            file = open("audio.wav", "rb")
 
-            print(batch)
+            response = client.audio.translations.create(
+                model="openai/whisper-large-v3",
+                file=file,
+                language="es",
+            )
+
+            print(response.text)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4420,225 +3814,378 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            batch = client.batches.retrieve("batch_id")
+            file = open("audio.wav", "rb")
 
-            print(batch)
+            response = client.audio.translations.create(
+                model="openai/whisper-large-v3",
+                file=file,
+                language="es",
+            )
+
+            print(response.text)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
             import Together from "together-ai";
+            import { readFileSync } from "fs";
+            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.retrieve("batch-id");
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
 
-            console.log(batch);
+            const response = await client.audio.translations.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
+              language: "es"
+            });
+
+            console.log(response.text);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
             import Together from "together-ai";
+            import { readFileSync } from "fs";
+            import { join } from "path";
 
             const client = new Together({
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const batch = await client.batches.retrieve("batch-id");
+            const audioFilePath = join(process.cwd(), "audio.wav");
+            const audioBuffer = readFileSync(audioFilePath);
+            const audioFile = new File([audioBuffer], "audio.wav", { type: "audio/wav" });
 
-            console.log(batch);
+            const response = await client.audio.translations.create({
+              model: "openai/whisper-large-v3",
+              file: audioFile,
+              language: "es"
+            });
+
+            console.log(response.text);
         - lang: Shell
           label: cURL
           source: |
-            curl "https://api.together.xyz/v1/batches/ID" \
+            curl -X POST "https://api.together.xyz/v1/audio/transcriptions" \
                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      security:
-        - bearerAuth: []
-      parameters:
-        - name: id
-          in: path
-          required: true
-          description: Job ID
-          schema:
-            type: string
-          example: 'batch_job_abc123def456'
+                 -F "file=@audio.wav" \
+                 -F "model=openai/whisper-large-v3" \
+                 -F "language=es"
+      operationId: audio-translations
+      requestBody:
+        required: true
+        content:
+          multipart/form-data:
+            schema:
+              $ref: '#/components/schemas/AudioTranslationRequest'
       responses:
         '200':
-          description: OK
+          description: 'OK'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchJob'
+                $ref: '#/components/schemas/AudioTranslationResponse'
         '400':
-          description: Bad Request
+          description: 'BadRequest'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
+                $ref: '#/components/schemas/ErrorData'
         '401':
-          description: Unauthorized
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '403':
-          description: Forbidden
+                $ref: '#/components/schemas/ErrorData'
+        '429':
+          description: 'RateLimit'
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '404':
-          description: Not Found
+                $ref: '#/components/schemas/ErrorData'
+  /clusters:
+    get:
+      tags: ['GPUClusterService']
+      summary: List all GPU clusters.
+      operationId: GPUClusterService_List
+      responses:
+        "200":
+          description: OK
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '500':
-          description: Internal Server Error
+                $ref: '#/components/schemas/GPUClusters'
+      x-codeSamples:
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X GET \
+                  -H "Authorization Bearer $TOGETHER_API_KEY" \
+                  https://api.together.ai/v1/clusters
+    post:
+      tags: ['GPUClusterService']
+      summary: Create GPU Cluster
+      operationId: GPUClusterService_Create
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/GPUClusterCreateRequest'
+        required: true
+      responses:
+        "200":
+          description: OK
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-  /batches/{id}/cancel:
-    post:
-      tags: ['Batches']
-      summary: Cancel a batch job
-      description: Cancel a batch job by ID
+                $ref: '#/components/schemas/GPUClusterCreateResponse'
       x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
-          source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            batch = client.batches.cancel("batch_id")
-
-            print(batch)
-        - lang: Python
-          label: Together AI SDK (v2)
-          source: |
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            batch = client.batches.cancel("batch_id")
-
-            print(batch)
-        - lang: TypeScript
-          label: Together AI SDK (TypeScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const batch = await client.batches.cancel("batch-id");
-
-            console.log(batch);
-        - lang: JavaScript
-          label: Together AI SDK (JavaScript)
-          source: |
-            import Together from "together-ai";
-
-            const client = new Together({
-              apiKey: process.env.TOGETHER_API_KEY,
-            });
-
-            const batch = await client.batches.cancel("batch-id");
-
-            console.log(batch);
         - lang: Shell
           label: cURL
           source: |
-            curl -X POST "https://api.together.xyz/v1/batches/ID/cancel" \
-                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
-                 -H "Content-Type: application/json"
-      security:
-        - bearerAuth: []
+            curl -X POST \
+                  -H "Authorization Bearer $TOGETHER_API_KEY" \
+                  --data '{ "region": "us-west-2", "gpu_type": "H100_SXM", "num_gpus": 8, "cluster_name": "my-gpu-cluster", "duration_days": 7, "driver_version": "CUDA_12_6_560" }' \
+                  https://api.together.ai/v1/clusters
+  /clusters/{cluster_id}:
+    get:
+      tags: ['GPUClusterService']
+      summary: Get GPU cluster by cluster ID
+      operationId: GPUClusterService_Get
       parameters:
-        - name: id
+        - name: cluster_id
           in: path
           required: true
-          description: Job ID
           schema:
             type: string
-          example: 'batch_job_abc123def456'
       responses:
-        '200':
+        "200":
           description: OK
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchJob'
-        '400':
-          description: Bad Request
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '401':
-          description: Unauthorized
+                $ref: '#/components/schemas/GPUClusterInfo'
+      x-codeSamples:
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X GET \
+                  -H "Authorization Bearer $TOGETHER_API_KEY" \
+                  https://api.together.ai/v1/clusters/${CLUSTER_ID}
+    put:
+      tags: ['GPUClusterService']
+      summary: Update a GPU Cluster.
+      operationId: GPUClusterService_Update
+      parameters:
+        - name: cluster_id
+          in: path
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/GPUClusterUpdateRequest'
+        required: true
+      responses:
+        "200":
+          description: OK
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '403':
-          description: Forbidden
+                $ref: '#/components/schemas/GPUClusterUpdateResponse'
+      x-codeSamples:
+          - lang: TypeScript
+            label: Together AI SDK (TypeScript)
+            source: | 
+              client.clusters.create()
+          - lang: Shell
+            label: cURL
+            source: |
+              curl -X PUT \
+                    -H "Authorization Bearer $TOGETHER_API_KEY" \
+                    --data '{ "cluster_id": "cluster id", "cluster_type": "kubernetes", "num_gpus": 24 }' \
+                    https://api.together.ai/v1/clusters
+    delete:
+      tags: ['GPUClusterService']
+      summary: Delete GPU cluster by cluster ID
+      operationId: GPUClusterService_Delete
+      parameters:
+        - name: cluster_id
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: OK
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '404':
-          description: Not Found
+                $ref: '#/components/schemas/GPUCLusterDeleteResponse'
+      x-codeSamples:
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X DELETE \
+                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                  https://api.together.ai/v1/clusters/${CLUSTER_ID}
+  /clusters/regions:
+    get:
+      tags: ['RegionService']
+      summary: List regions and corresponding supported driver versions
+      operationId: RegionService_List
+      responses:
+        "200":
+          description: OK
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-        '500':
-          description: Internal Server Error
+                $ref: '#/components/schemas/RegionListResponse'
+      x-codeSamples:
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X GET \
+                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                  https://api.together.ai/v1/clusters/regions
+  /clusters/storages:
+    get:
+      tags: ['SharedVolumeService']
+      summary: List all shared volumes.
+      operationId: SharedVolumeService_List
+      responses:
+        "200":
+          description: OK
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/BatchErrorResponse'
-  /evaluation:
-    post:
-      tags:
-        - evaluation
-      summary: Create an evaluation job
-      operationId: createEvaluationJob
+                $ref: '#/components/schemas/SharedVolumes'
       x-codeSamples:
-        - lang: Python
-          label: Together AI SDK (v1)
+        - lang: Shell
+          label: cURL
           source: |
-            # Docs for v2 can be found by changing the above selector ^
-            from together import Together
-            import os
-
-            client = Together(
-                api_key=os.environ.get("TOGETHER_API_KEY"),
-            )
-
-            response = client.evaluation.create(
-                type="classify",
-                judge_model_name="meta-llama/Llama-3.1-70B-Instruct-Turbo",
-                judge_system_template="You are an expert evaluator...",
-                input_data_file_path="file-abc123",
-                labels=["good", "bad"],
-                pass_labels=["good"],
-                model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
-            )
-
-            print(response.workflow_id)
+            curl -X GET \
+                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                  https://api.together.ai/v1/clusters/storages
+    put:
+      tags: ['SharedVolumeService']
+      summary: Update a shared volume.
+      operationId: SharedVolumeService_Update
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/SharedVolumeUpdateRequest'
+        required: true
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/SharedVolumeInfo'
+      x-codeSamples:
+          - lang: Shell
+            label: cURL
+            source: |
+              curl -X PUT \
+                    -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                    --data '{ "volume_id": "12345-67890-12345-67890", "size_tib": 3}' \
+                    https://api.together.ai/v1/clusters/storages
+    post:
+      tags: ['SharedVolumeService']
+      summary: Create a shared volume.
+      operationId: SharedVolumeService_Create
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/SharedVolumeCreateRequest'
+        required: true
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/SharedVolumeCreateResponse'
+      x-codeSamples:
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST \
+                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                  --data '{ "volume_name": "my-shared-volume", "size_tib": 2, "region": "us-west-2" }' \
+                  https://api.together.ai/v1/clusters/storages
+  /clusters/storages/{volume_id}:
+    get:
+      tags: ['SharedVolumeService']
+      summary: Get shared volume by volume Id.
+      operationId: SharedVolumeService_Get
+      parameters:
+        - name: volume_id
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+          "200":
+              description: OK
+              content:
+                  application/json:
+                      schema:
+                          $ref: '#/components/schemas/SharedVolumeInfo'
+      x-codeSamples:
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X GET \
+                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                  https://api.together.ai/v1/clusters/storages/${VOLUME_ID}
+    delete:
+      tags: ['SharedVolumeService']
+      summary: Delete shared volume by volume id.
+      operationId: SharedVolumeService_Delete
+      parameters:
+        - name: volume_id
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/SharedVolumeDeleteResponse'
+      x-codeSamples:
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X DELETE \
+                  -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                  https://api.together.ai/v1/clusters/storages/${VOLUME_ID}
+  /clusters/availability-zones:
+    get:
+      tags: ['endpoints']
+      summary: List all available availability zones.
+      description: List all available availability zones.
+      operationId: availabilityZones
+      responses:
+        '200':
+          description: Success
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ListAvailibilityZonesResponse'
+      x-codeSamples:
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4649,22 +4196,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evals.create(
-                type="classify",
-                parameters=ParametersEvaluationClassifyParameters(
-                    judge=ParametersEvaluationClassifyParametersJudge(
-                        model="meta-llama/Llama-3.1-70B-Instruct-Turbo",
-                        model_source="serverless",
-                        system_template="You are an expert evaluator...",
-                    ),
-                    input_data_file_path="file-abc123",
-                    labels=["good", "bad"],
-                    pass_labels=["good"],
-                    model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
-                )
-            )
+            response = client.endpoints.list_avzones()
 
-            print(response.workflow_id)
+            print(response.avzones)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4674,22 +4208,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.create({
-              type: 'classify',
-              parameters: {
-                judge: {
-                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
-                  model_source: 'serverless',
-                  system_template: 'You are an expert evaluator...',
-                },
-                input_data_file_path: 'file-abc123',
-                labels: ['good', 'bad'],
-                pass_labels: ['good'],
-                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
-              },
-            });
+            const response = await client.endpoints.listAvzones();
 
-            console.log(response.workflow_id);
+            console.log(response.avzones);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4699,54 +4220,20 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.create({
-              type: 'classify',
-              parameters: {
-                judge: {
-                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
-                  model_source: 'serverless',
-                  system_template: 'You are an expert evaluator...',
-                },
-                input_data_file_path: 'file-abc123',
-                labels: ['good', 'bad'],
-                pass_labels: ['good'],
-                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
-              },
-            });
-
-            console.log(response.workflow_id);
-
+            const response = await client.endpoints.listAvzones();
 
-      requestBody:
-        required: true
-        content:
-          application/json:
-            schema:
-              $ref: "#/components/schemas/EvaluationTypedRequest"
-      responses:
-        "200":
-          description: "Evaluation job created successfully"
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/EvaluationResponse"
-        "400":
-          description: "Invalid request format"
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ErrorData"
-        "500":
-          description: "Failed to create evaluation job"
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ErrorData"
+            console.log(response.avzones);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/clusters/availability-zones" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+  /endpoints:
     get:
-      tags:
-        - evaluation
-      summary: Get all evaluation jobs
-      operationId: getAllEvaluationJobs
+      tags: ['Endpoints']
+      summary: List all endpoints, can be filtered by type
+      description: Returns a list of all endpoints associated with your account. You can filter the results by type (dedicated or serverless).
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4759,10 +4246,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            jobs = client.evaluation.list()
+            endpoints = client.endpoints.list()
 
-            for job in jobs:
-                print(job.workflow_id)
+            for endpoint in endpoints:
+                print(endpoint.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4773,10 +4260,10 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evals.list()
+            response = client.endpoints.list()
 
-            for job in response:
-                print(job.workflow_id)
+            for endpoint in response.data:
+                print(endpoint.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4786,10 +4273,10 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.list();
+            const endpoints = await client.endpoints.list();
 
-            for (const job of response) {
-              console.log(job.workflow_id);
+            for (const endpoint of endpoints.data) {
+              console.log(endpoint);
             }
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
@@ -4800,95 +4287,91 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.list();
+            const endpoints = await client.endpoints.list();
 
-            for (const job of response) {
-              console.log(job.workflow_id);
+            for (const endpoint of endpoints.data) {
+              console.log(endpoint);
             }
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/endpoints" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      operationId: listEndpoints
       parameters:
-        - name: status
+        - name: type
           in: query
           required: false
           schema:
             type: string
-            default: "pending"
-        - name: limit
+            enum:
+              - dedicated
+              - serverless
+          description: Filter endpoints by type
+          example: dedicated
+        - name: usage_type
           in: query
           required: false
           schema:
-            type: integer
-            default: 10
-        - name: userId
+            type: string
+            enum:
+              - on-demand
+              - reserved
+          description: Filter endpoints by usage type
+          example: on-demand
+        - name: mine
           in: query
           required: false
-          description: "Admin users can specify a user ID to filter jobs. Pass empty string to get all jobs."
           schema:
-            type: string
+            type: boolean
+          description: If true, return only endpoints owned by the caller
       responses:
-        "200":
-          description: "evaluation jobs retrieved successfully"
-          content:
-            application/json:
-              schema:
-                type: array
-                items:
-                  $ref: "#/components/schemas/EvaluationJob"
-        "400":
-          description: "Invalid request format"
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ErrorData"
-        "500":
-          description: "Error retrieving jobs from manager"
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ErrorData"
-  /evaluation/model-list:
-    get:
-      tags:
-        - evaluation
-      summary: Get model list
-      operationId: getModelList
-      parameters:
-        - name: model_source
-          in: query
-          required: false
-          schema:
-            type: string
-            default: "all"
-      responses:
-        "200":
-          description: "Model list retrieved successfully"
+        '200':
+          description: '200'
           content:
             application/json:
               schema:
                 type: object
+                required:
+                  - object
+                  - data
                 properties:
-                  model_list:
+                  object:
+                    type: string
+                    enum:
+                      - list
+                  data:
                     type: array
                     items:
-                      type: string
-                      description: "The name of the model"
-        "400":
-          description: "Invalid request format"
+                      $ref: '#/components/schemas/ListEndpoint'
+                example:
+                  object: 'list'
+                  data:
+                    - object: 'endpoint'
+                      id: 'endpoint-5c0c20db-62fe-4f41-8ffc-d9e4ea1a264e'
+                      name: 'allenai/OLMo-7B'
+                      model: 'allenai/OLMo-7B'
+                      type: 'serverless'
+                      owner: 'together'
+                      state: 'STARTED'
+                      created_at: '2024-02-28T21:34:35.444Z'
+        '403':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-        "500":
-          description: "Error retrieving model list"
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-  /evaluation/{id}:
-    get:
-      tags:
-        - evaluation
-      summary: Get evaluation job details
-      operationId: getEvaluationJobDetails
+                $ref: '#/components/schemas/ErrorData'
+    post:
+      tags: ['Endpoints']
+      summary: Create a dedicated endpoint, it will start automatically
+      description: Creates a new dedicated endpoint for serving models. The endpoint will automatically start after creation. You can deploy any supported model on hardware configurations that meet the model's requirements.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4901,9 +4384,14 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evaluation.retrieve('eval_id')
+            endpoint = client.endpoints.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                hardware="1x_nvidia_a100_80gb_sxm",
+                min_replicas=2,
+                max_replicas=5,
+            )
 
-            print(response)
+            print(endpoint.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4914,9 +4402,16 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evals.retrieve('eval_id')
+            endpoint = client.endpoints.create(
+                model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                hardware="1x_nvidia_a100_80gb_sxm",
+                autoscaling={
+                  "min_replicas": 2,
+                  "max_replicas": 5,
+                }
+            )
 
-            print(response)
+            print(endpoint.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -4926,9 +4421,16 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.retrieve('eval_id');
+            const endpoint = await client.endpoints.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              hardware: "1x_nvidia_a100_80gb_sxm",
+              autoscaling: {
+                max_replicas: 5,
+                min_replicas: 2,
+              }
+            });
 
-            console.log(response);
+            console.log(endpoint.id);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -4938,41 +4440,62 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.retrieve('eval_id');
+            const endpoint = await client.endpoints.create({
+              model: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+              hardware: "1x_nvidia_a100_80gb_sxm",
+              autoscaling: {
+                max_replicas: 5,
+                min_replicas: 2,
+              }
+            });
 
-            console.log(response);
-      parameters:
-        - name: id
-          in: path
-          required: true
-          schema:
-            type: string
+            console.log(endpoint.id);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/endpoints" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "model": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+                   "hardware": "1x_nvidia_a100_80gb_sxm",
+                   "autoscaling": {
+                     "max_replicas": 5,
+                     "min_replicas": 2
+                   }
+                 }'
+      operationId: createEndpoint
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateEndpointRequest'
       responses:
-        "200":
-          description: "Evaluation job details retrieved successfully"
+        '200':
+          description: '200'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/EvaluationJob"
-        "404":
-          description: "Evaluation job not found"
+                $ref: '#/components/schemas/DedicatedEndpoint'
+        '403':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-        "500":
-          description: "Failed to get evaluation job"
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
+                $ref: '#/components/schemas/ErrorData'
 
-  /evaluation/{id}/status:
+  /endpoints/{endpointId}:
     get:
-      tags:
-        - evaluation
-      summary: Get evaluation job status and results
-      operationId: getEvaluationJobStatusAndResults
+      tags: ['Endpoints']
+      summary: Get endpoint by ID
+      description: Retrieves details about a specific endpoint, including its current state, configuration, and scaling settings.
       x-codeSamples:
         - lang: Python
           label: Together AI SDK (v1)
@@ -4985,10 +4508,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evaluation.status('eval_id')
+            endpoint = client.endpoints.get("endpoint-id")
 
-            print(response.status)
-            print(response.results)
+            print(endpoint.id)
         - lang: Python
           label: Together AI SDK (v2)
           source: |
@@ -4999,10 +4521,9 @@ paths:
                 api_key=os.environ.get("TOGETHER_API_KEY"),
             )
 
-            response = client.evals.status('eval_id')
+            endpoint = client.endpoints.retrieve("endpoint-id")
 
-            print(response.status)
-            print(response.results)
+            print(endpoint.id)
         - lang: TypeScript
           label: Together AI SDK (TypeScript)
           source: |
@@ -5012,10 +4533,9 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.status('eval_id');
+            const endpoint = await client.endpoints.retrieve("endpoint-id");
 
-            console.log(response.status);
-            console.log(response.results);
+            console.log(endpoint);
         - lang: JavaScript
           label: Together AI SDK (JavaScript)
           source: |
@@ -5025,546 +4545,2237 @@ paths:
               apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            const response = await client.evals.status('eval_id');
+            const endpoint = await client.endpoints.retrieve("endpoint-id");
 
-            console.log(response.status);
-            console.log(response.results);
+            console.log(endpoint);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/endpoints/endpoint-id" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      operationId: getEndpoint
       parameters:
-        - name: id
+        - name: endpointId
           in: path
           required: true
           schema:
             type: string
+          description: The ID of the endpoint to retrieve
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
       responses:
-        "200":
-          description: "Evaluation job status and results retrieved successfully"
+        '200':
+          description: '200'
           content:
             application/json:
               schema:
-                type: object
-                properties:
-                  status:
-                    type: string
-                    description: "The status of the evaluation job"
-                    enum: ["completed", "error", "user_error", "running", "queued", "pending"]
-                  results:
-                    description: "The results of the evaluation job"
-                    oneOf:
-                      - $ref: "#/components/schemas/EvaluationClassifyResults"
-                      - $ref: "#/components/schemas/EvaluationScoreResults"
-                      - $ref: "#/components/schemas/EvaluationCompareResults"
-        "404":
-          description: "Evaluation job not found"
+                $ref: '#/components/schemas/DedicatedEndpoint'
+        '403':
+          description: 'Unauthorized'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
-        "500":
-          description: "Failed to get evaluation job"
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'Not Found'
           content:
             application/json:
               schema:
-                $ref: "#/components/schemas/ErrorData"
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
 
-  /realtime:
-    get:
-      tags: ['Audio']
-      summary: Real-time audio transcription via WebSocket
-      description: |
-        Establishes a WebSocket connection for real-time audio transcription. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/realtime) for bidirectional streaming communication.
+    patch:
+      tags: ['Endpoints']
+      summary: Update endpoint, this can also be used to start or stop a dedicated endpoint
+      description: Updates an existing endpoint's configuration. You can modify the display name, autoscaling settings, or change the endpoint's state (start/stop).
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
 
-        **Connection Setup:**
-        - Protocol: WebSocket (wss://)
-        - Authentication: Pass API key as Bearer token in Authorization header
-        - Parameters: Sent as query parameters (model, input_audio_format)
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-        **Client Events:**
-        - `input_audio_buffer.append`: Send audio chunks as base64-encoded data
-          ```json
-          {
-            "type": "input_audio_buffer.append",
-            "audio": "<base64_encoded_audio_chunk>"
-          }
-          ```
-        - `input_audio_buffer.commit`: Signal end of audio stream
-          ```json
-          {
-            "type": "input_audio_buffer.commit"
-          }
-          ```
+            endpoint = client.endpoints.update(
+                endpoint_id="endpoint-id",
+                state="STOPPED"
+            )
 
-        **Server Events:**
-        - `session.created`: Initial session confirmation (sent first)
-          ```json
-          {
-            "type": "session.created",
-            "session": {
-              "id": "session-id",
-              "object": "realtime.session",
-              "modalities": ["audio"],
-              "model": "openai/whisper-large-v3"
-            }
-          }
-          ```
-        - `conversation.item.input_audio_transcription.delta`: Partial transcription results
-          ```json
-          {
-            "type": "conversation.item.input_audio_transcription.delta",
-            "delta": "The quick brown"
-          }
-          ```
-        - `conversation.item.input_audio_transcription.completed`: Final transcription
-          ```json
-          {
-            "type": "conversation.item.input_audio_transcription.completed",
-            "transcript": "The quick brown fox jumps over the lazy dog"
-          }
-          ```
-        - `conversation.item.input_audio_transcription.failed`: Error occurred
-          ```json
-          {
-            "type": "conversation.item.input_audio_transcription.failed",
-            "error": {
-              "message": "Error description",
-              "type": "invalid_request_error",
-              "param": null,
-              "code": "invalid_api_key"
-            }
-          }
-          ```
+            print(endpoint)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
 
-        **Error Codes:**
-        - `invalid_api_key`: Invalid API key provided (401)
-        - `missing_api_key`: Authorization header missing (401)
-        - `model_not_available`: Invalid or unavailable model (400)
-        - Unsupported audio format errors (400)
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-      operationId: realtime-transcription
+            const endpoint = await client.endpoints.update("endpoint-id", {
+              state: "STOPPED"
+            });
+
+            console.log(endpoint);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const endpoint = await client.endpoints.update("endpoint-id", {
+              state: "STOPPED"
+            });
+
+            console.log(endpoint);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X PATCH "https://api.together.xyz/v1/endpoints/endpoint-id" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "state": "STOPPED"
+                 }'
+      operationId: updateEndpoint
+      parameters:
+        - name: endpointId
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The ID of the endpoint to update
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                display_name:
+                  type: string
+                  description: A human-readable name for the endpoint
+                  example: My Llama3 70b endpoint
+                state:
+                  type: string
+                  description: The desired state of the endpoint
+                  enum:
+                    - STARTED
+                    - STOPPED
+                  example: STARTED
+                autoscaling:
+                  $ref: '#/components/schemas/Autoscaling'
+                  description: New autoscaling configuration for the endpoint
+                inactive_timeout:
+                  type: integer
+                  description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
+                  nullable: true
+                  example: 60
+      responses:
+        '200':
+          description: '200'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/DedicatedEndpoint'
+        '403':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'Not Found'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+
+    delete:
+      tags: ['Endpoints']
+      summary: Delete endpoint
+      description: Permanently deletes an endpoint. This action cannot be undone.
       x-codeSamples:
         - lang: Python
-          label: Python WebSocket Client
+          label: Together AI SDK (v1)
           source: |
-            import asyncio
-            import websockets
-            import json
-            import base64
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
             import os
 
-            async def transcribe_audio():
-                api_key = os.environ.get("TOGETHER_API_KEY")
-                url = "wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000"
-
-                headers = {
-                    "Authorization": f"Bearer {api_key}"
-                }
-
-                async with websockets.connect(url, additional_headers=headers) as ws:
-                    # Read audio file
-                    with open("audio.wav", "rb") as f:
-                        audio_data = f.read()
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-                    # Send audio in chunks with delay to simulate real-time
-                    chunk_size = 8192
-                    bytes_per_second = 16000 * 2  # 16kHz * 2 bytes (16-bit)
-                    delay_per_chunk = chunk_size / bytes_per_second
+            endpoint = client.endpoints.delete(
+                endpoint_id="endpoint-id",
+            )
 
-                    for i in range(0, len(audio_data), chunk_size):
-                        chunk = audio_data[i:i+chunk_size]
-                        base64_chunk = base64.b64encode(chunk).decode('utf-8')
-                        await ws.send(json.dumps({
-                            "type": "input_audio_buffer.append",
-                            "audio": base64_chunk
-                        }))
-                        # Simulate real-time streaming
-                        if i + chunk_size < len(audio_data):
-                            await asyncio.sleep(delay_per_chunk)
+            print(endpoint)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
 
-                    # Commit the audio buffer
-                    await ws.send(json.dumps({
-                        "type": "input_audio_buffer.commit"
-                    }))
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-                    # Receive transcription results
-                    async for message in ws:
-                        data = json.loads(message)
-                        if data["type"] == "conversation.item.input_audio_transcription.delta":
-                            print(f"Partial: {data['delta']}")
-                        elif data["type"] == "conversation.item.input_audio_transcription.completed":
-                            print(f"Final: {data['transcript']}")
-                            break
-                        elif data["type"] == "conversation.item.input_audio_transcription.failed":
-                            error = data.get("error", {})
-                            print(f"Error: {error.get('message')}")
-                            break
+            const endpoint = await client.endpoints.delete("endpoint-id");
 
-            asyncio.run(transcribe_audio())
+            console.log(endpoint);
         - lang: JavaScript
-          label: Node.js WebSocket Client
+          label: Together AI SDK (JavaScript)
           source: |
-            import WebSocket from 'ws';
-            import fs from 'fs';
-
-            const apiKey = process.env.TOGETHER_API_KEY;
-            const url = 'wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000';
+            import Together from "together-ai";
 
-            const ws = new WebSocket(url, {
-              headers: {
-                'Authorization': `Bearer ${apiKey}`
-              }
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            ws.on('open', async () => {
-              console.log('WebSocket connection established!');
-
-              // Read audio file
-              const audioData = fs.readFileSync('audio.wav');
-
-              // Send audio in chunks with delay to simulate real-time
-              const chunkSize = 8192;
-              const bytesPerSecond = 16000 * 2;  // 16kHz * 2 bytes (16-bit)
-              const delayPerChunk = (chunkSize / bytesPerSecond) * 1000;  // Convert to ms
+            const endpoint = await client.endpoints.delete("endpoint-id");
 
-              for (let i = 0; i < audioData.length; i += chunkSize) {
-                const chunk = audioData.slice(i, i + chunkSize);
-                const base64Chunk = chunk.toString('base64');
-                ws.send(JSON.stringify({
-                  type: 'input_audio_buffer.append',
-                  audio: base64Chunk
-                }));
+            console.log(endpoint);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X "DELETE" "https://api.together.xyz/v1/endpoints/endpoint-id" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY"
+      operationId: deleteEndpoint
+      parameters:
+        - name: endpointId
+          in: path
+          required: true
+          schema:
+            type: string
+          description: The ID of the endpoint to delete
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+      responses:
+        '204':
+          description: 'No Content - Endpoint successfully deleted'
+        '403':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '404':
+          description: 'Not Found'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
 
-                // Simulate real-time streaming
-                if (i + chunkSize < audioData.length) {
-                  await new Promise(resolve => setTimeout(resolve, delayPerChunk));
-                }
-              }
+  /hardware:
+    get:
+      tags: ['Hardware']
+      summary: List available hardware configurations
+      description: >
+        Returns a list of available hardware configurations for deploying models.
+        When a model parameter is provided, it returns only hardware configurations compatible
+        with that model, including their current availability status.
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
 
-              // Commit audio buffer
-              ws.send(JSON.stringify({
-                type: 'input_audio_buffer.commit'
-              }));
-            });
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-            ws.on('message', (data) => {
-              const message = JSON.parse(data.toString());
+            response = client.endpoints.list_hardware()
 
-              if (message.type === 'conversation.item.input_audio_transcription.delta') {
-                console.log(`Partial: ${message.delta}`);
-              } else if (message.type === 'conversation.item.input_audio_transcription.completed') {
-                console.log(`Final: ${message.transcript}`);
-                ws.close();
-              } else if (message.type === 'conversation.item.input_audio_transcription.failed') {
-                const errorMessage = message.error?.message ?? message.message ?? 'Unknown error';
-                console.error(`Error: ${errorMessage}`);
-                ws.close();
-              }
+            for hardware in response:
+                print(hardware.id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.hardware.list()
+
+            for hardware in response.data:
+                print(hardware.id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
             });
 
-            ws.on('error', (error) => {
-              console.error('WebSocket error:', error);
+            const hardware = await client.hardware.list();
+
+            console.log(hardware);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
             });
+
+            const hardware = await client.hardware.list();
+
+            console.log(hardware);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/hardware" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      operationId: listHardware
       parameters:
-        - in: query
-          name: model
-          required: true
-          schema:
-            type: string
-            enum:
-              - openai/whisper-large-v3
-            default: openai/whisper-large-v3
-          description: The Whisper model to use for transcription
-        - in: query
-          name: input_audio_format
-          required: true
+        - name: model
+          in: query
+          required: false
           schema:
             type: string
-            enum:
-              - pcm_s16le_16000
-            default: pcm_s16le_16000
-          description: Audio format specification. Currently supports 16-bit PCM at 16kHz sample rate.
+          description: >
+            Filter hardware configurations by model compatibility. When provided,
+            the response includes availability status for each compatible configuration.
+          example: meta-llama/Llama-3-70b-chat-hf
       responses:
-        '101':
-          description: |
-            Switching Protocols - WebSocket connection established successfully.
+        '200':
+          description: 'List of available hardware configurations'
+          content:
+            application/json:
+              schema:
+                type: object
+                required:
+                  - object
+                  - data
+                properties:
+                  object:
+                    type: string
+                    enum:
+                      - list
+                  data:
+                    type: array
+                    items:
+                      $ref: '#/components/schemas/HardwareWithStatus'
+        '403':
+          description: 'Unauthorized'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+        '500':
+          description: 'Internal error'
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ErrorData'
+  /tci/execute:
+    post:
+      tags: ['Code Interpreter']
+      callbacks: {}
+      description: |
+        Executes the given code snippet and returns the output. Without a session_id, a new session will be created to run the code. If you do pass in a valid session_id, the code will be run in that session. This is useful for running multiple code snippets in the same environment, because dependencies and similar things are persisted
+        between calls to the same session.
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
 
-            Error message format:
-            ```json
-            {
-              "type": "conversation.item.input_audio_transcription.failed",
-              "error": {
-                "message": "Error description",
-                "type": "invalid_request_error",
-                "param": null,
-                "code": "error_code"
-              }
-            }
-            ```
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-components:
-  securitySchemes:
-    bearerAuth:
-      type: http
-      scheme: bearer
-      x-bearer-format: bearer
-      x-default: default
+            response = client.code_interpreter.run(
+                code="print('Hello world!')",
+                language="python",
+            )
 
-  schemas:
-    ListVoicesResponse:
-      description: Response containing a list of models and their available voices.
-      type: object
-      required: ['data']
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/ModelVoices'
+            print(response.data.outputs[0].data);
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
 
-    ModelVoices:
-      description: Represents a model with its available voices.
-      type: object
-      required: ['model', 'voices']
-      properties:
-        model:
-          type: string
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
 
-        voices:
-          type: array
-          items:
-            type: object
-            required: ['id', 'name']
-            properties:
-              id:
-                type: string
-              name:
-                type: string
-
-    ListAvailibilityZonesResponse:
-      description: List of unique availability zones
-      type: object
-      required: ['avzones']
-      properties:
-        avzones:
-          type: array
-          items:
-            type: string
-
-    RerankRequest:
-      type: object
-      properties:
-        model:
-          type: string
-          description: >
-            The model to be used for the rerank request.<br>
-            <br>
-            [See all of Together AI's rerank models](https://docs.together.ai/docs/serverless-models#rerank-models)
-          example: Salesforce/Llama-Rank-V1
-          anyOf:
-            - type: string
-              enum:
-                - Salesforce/Llama-Rank-v1
-            - type: string
-
-        query:
-          type: string
-          description: The search query to be used for ranking.
-          example: What animals can I find near Peru?
-        documents:
-          description: List of documents, which can be either strings or objects.
-          oneOf:
-            - type: array
-              items:
-                type: object
-                additionalProperties: true
-            - type: array
-              items:
-                type: string
-                example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
-          example:
-            - {
-                'title': 'Llama',
-                'text': 'The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era.',
-              }
-            - {
-                'title': 'Panda',
-                'text': 'The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China.',
-              }
-            - {
-                'title': 'Guanaco',
-                'text': 'The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations.',
-              }
-            - {
-                'title': 'Wild Bactrian camel',
-                'text': 'The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia.',
-              }
-        top_n:
-          type: integer
-          description: The number of top results to return.
-          example: 2
-        return_documents:
-          type: boolean
-          description: Whether to return supplied documents with the response.
-          example: true
-        rank_fields:
-          type: array
-          items:
-            type: string
-          description: List of keys in the JSON Object document to rank by. Defaults to use all supplied keys for ranking.
-          example: ['title', 'text']
-      required:
-        - model
-        - query
-        - documents
-      additionalProperties: false
-
-    RerankResponse:
-      type: object
-      required:
-        - object
-        - model
-        - results
-      properties:
-        object:
-          type: string
-          description: Object type
-          enum:
-            - rerank
-          example: rerank
-        id:
-          type: string
-          description: Request ID
-          example: 9dfa1a09-5ebc-4a40-970f-586cb8f4ae47
-        model:
-          type: string
-          description: The model to be used for the rerank request.
-          example: salesforce/turboranker-0.8-3778-6328
-        results:
-          type: array
-          items:
-            type: object
-            required: [index, relevance_score, document]
-            properties:
-              index:
-                type: integer
-              relevance_score:
-                type: number
-              document:
-                type: object
-                properties:
-                  text:
-                    type: string
-                    nullable: true
-          example:
-            - {
-                'index': 0,
-                'relevance_score': 0.29980177813003117,
-                'document':
-                  {
-                    'text': '{"title":"Llama","text":"The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."}',
-                  },
-              }
-            - {
-                'index': 2,
-                'relevance_score': 0.2752447527354349,
-                'document':
-                  {
-                    'text': '{"title":"Guanaco","text":"The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."}',
-                  },
-              }
-        usage:
-          $ref: '#/components/schemas/UsageData'
-          example:
-            {
-              'prompt_tokens': 1837,
-              'completion_tokens': 0,
-              'total_tokens': 1837,
-            }
+            response = client.code_interpreter.execute(
+                code="print('Hello world!')",
+                language="python",
+            )
 
-    ErrorData:
-      type: object
-      required:
-        - error
-      properties:
-        error:
-          type: object
-          properties:
-            message:
-              type: string
-              nullable: false
-            type:
-              type: string
-              nullable: false
-            param:
-              type: string
-              nullable: true
-              default: null
-            code:
-              type: string
-              nullable: true
-              default: null
-          required:
-            - type
-            - message
+            print(response.data.outputs[0].data);
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
 
-    FinishReason:
-      type: string
-      enum:
-        - stop
-        - eos
-        - length
-        - tool_calls
-        - function_call
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-    LogprobsPart:
-      type: object
-      properties:
-        token_ids:
-          type: array
-          items:
-            type: number
-          description: List of token IDs corresponding to the logprobs
-        tokens:
-          type: array
-          items:
-            type: string
-          description: List of token strings
-        token_logprobs:
-          type: array
-          items:
-            type: number
-          description: List of token log probabilities
+            const response = await client.codeInterpreter.execute({
+              code: "print('Hello world!')",
+              language: "python"
+            });
 
-    PromptPart:
-      type: array
-      items:
-        type: object
-        properties:
-          text:
-            type: string
-            example: <s>[INST] What is the capital of France? [/INST]
-          logprobs:
-            $ref: '#/components/schemas/LogprobsPart'
+            console.log(response.data?.outputs?.[0]?.data);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
 
-    InferenceWarning:
-      type: object
-      required:
-        - message
-      properties:
-        message:
-          type: string
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
 
-    UsageData:
-      type: object
-      properties:
-        prompt_tokens:
-          type: integer
-        completion_tokens:
-          type: integer
-        total_tokens:
-          type: integer
-      required:
-        - prompt_tokens
-        - completion_tokens
-        - total_tokens
-      nullable: true
+            const response = await client.codeInterpreter.execute({
+              code: "print('Hello world!')",
+              language: "python"
+            });
+
+            console.log(response.data?.outputs?.[0]?.data);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/tci/execute" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "code": "print(\'Hello world!\')",
+                   "language": "python"
+                 }'
+      operationId: tci/execute
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ExecuteRequest'
+        description: Execute Request
+        required: false
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/ExecuteResponse'
+          description: Execute Response
+  /tci/sessions:
+    get:
+      tags: ['Code Interpreter']
+      callbacks: {}
+      description: |
+        Lists all your currently active sessions.
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            # together v1 does not support this method
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.code_interpreter.sessions.list()
+
+            for session in response.data.sessions:
+                print(session.id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.codeInterpreter.sessions.list();
+
+            for (const session of response.data?.sessions) {
+              console.log(session.id);
+            }
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.codeInterpreter.sessions.list();
+
+            for (const session of response.data?.sessions) {
+              console.log(session.id);
+            }
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/tci/sessions" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      operationId: sessions/list
+      parameters: []
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/SessionListResponse'
+          description: List Response
+  /batches:
+    get:
+      tags: ['Batches']
+      summary: List batch jobs
+      description: List all batch jobs for the authenticated user
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batches = client.batches.list_batches()
+
+            for batch in batches:
+                print(batch.id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batches = client.batches.list()
+
+            for batch in batches:
+                print(batch.id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batches = await client.batches.list();
+
+            console.log(batches);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batches = await client.batches.list();
+
+            console.log(batches);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/batches" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      security:
+        - bearerAuth: []
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: '#/components/schemas/BatchJob'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+    post:
+      tags: ['Batches']
+      summary: Create a batch job
+      description: Create a new batch job with the given input file and endpoint
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batch = client.batches.create_batch("file_id", endpoint="/v1/chat/completions")
+
+            print(batch.id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batch = client.batches.create(input_file_id="file_id", endpoint="/v1/chat/completions")
+
+            print(batch.job)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batch = await client.batches.create({
+              endpoint: "/v1/chat/completions",
+              input_file_id: "file-id",
+            });
+
+            console.log(batch);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batch = await client.batches.create({
+              endpoint: "/v1/chat/completions",
+              input_file_id: "file-id",
+            });
+
+            console.log(batch);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/batches" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json" \
+                 -d '{
+                   "endpoint": "/v1/chat/completions",
+                   "input_file_id": "file-id"
+                 }'
+      security:
+        - bearerAuth: []
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CreateBatchRequest'
+      responses:
+        '201':
+          description: Job created (potentially with warnings)
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchJobWithWarning'
+        '400':
+          description: Bad Request
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '429':
+          description: Too Many Requests
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+
+  /batches/{id}:
+    get:
+      tags: ['Batches']
+      summary: Get a batch job
+      description: Get details of a batch job by ID
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batch = client.batches.get_batch("batch_id")
+
+            print(batch)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batch = client.batches.retrieve("batch_id")
+
+            print(batch)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batch = await client.batches.retrieve("batch-id");
+
+            console.log(batch);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batch = await client.batches.retrieve("batch-id");
+
+            console.log(batch);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl "https://api.together.xyz/v1/batches/ID" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      security:
+        - bearerAuth: []
+      parameters:
+        - name: id
+          in: path
+          required: true
+          description: Job ID
+          schema:
+            type: string
+          example: 'batch_job_abc123def456'
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchJob'
+        '400':
+          description: Bad Request
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '403':
+          description: Forbidden
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '404':
+          description: Not Found
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+  /batches/{id}/cancel:
+    post:
+      tags: ['Batches']
+      summary: Cancel a batch job
+      description: Cancel a batch job by ID
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batch = client.batches.cancel("batch_id")
+
+            print(batch)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            batch = client.batches.cancel("batch_id")
+
+            print(batch)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batch = await client.batches.cancel("batch-id");
+
+            console.log(batch);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const batch = await client.batches.cancel("batch-id");
+
+            console.log(batch);
+        - lang: Shell
+          label: cURL
+          source: |
+            curl -X POST "https://api.together.xyz/v1/batches/ID/cancel" \
+                 -H "Authorization: Bearer $TOGETHER_API_KEY" \
+                 -H "Content-Type: application/json"
+      security:
+        - bearerAuth: []
+      parameters:
+        - name: id
+          in: path
+          required: true
+          description: Job ID
+          schema:
+            type: string
+          example: 'batch_job_abc123def456'
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchJob'
+        '400':
+          description: Bad Request
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '401':
+          description: Unauthorized
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '403':
+          description: Forbidden
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '404':
+          description: Not Found
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+        '500':
+          description: Internal Server Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/BatchErrorResponse'
+  /evaluation:
+    post:
+      tags:
+        - evaluation
+      summary: Create an evaluation job
+      operationId: createEvaluationJob
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evaluation.create(
+                type="classify",
+                judge_model_name="meta-llama/Llama-3.1-70B-Instruct-Turbo",
+                judge_system_template="You are an expert evaluator...",
+                input_data_file_path="file-abc123",
+                labels=["good", "bad"],
+                pass_labels=["good"],
+                model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
+            )
+
+            print(response.workflow_id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evals.create(
+                type="classify",
+                parameters=ParametersEvaluationClassifyParameters(
+                    judge=ParametersEvaluationClassifyParametersJudge(
+                        model="meta-llama/Llama-3.1-70B-Instruct-Turbo",
+                        model_source="serverless",
+                        system_template="You are an expert evaluator...",
+                    ),
+                    input_data_file_path="file-abc123",
+                    labels=["good", "bad"],
+                    pass_labels=["good"],
+                    model_to_evaluate="meta-llama/Llama-3.1-8B-Instruct-Turbo"
+                )
+            )
+
+            print(response.workflow_id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.create({
+              type: 'classify',
+              parameters: {
+                judge: {
+                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
+                  model_source: 'serverless',
+                  system_template: 'You are an expert evaluator...',
+                },
+                input_data_file_path: 'file-abc123',
+                labels: ['good', 'bad'],
+                pass_labels: ['good'],
+                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
+              },
+            });
+
+            console.log(response.workflow_id);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.create({
+              type: 'classify',
+              parameters: {
+                judge: {
+                  model: 'meta-llama/Llama-3.1-70B-Instruct-Turbo',
+                  model_source: 'serverless',
+                  system_template: 'You are an expert evaluator...',
+                },
+                input_data_file_path: 'file-abc123',
+                labels: ['good', 'bad'],
+                pass_labels: ['good'],
+                model_to_evaluate: 'meta-llama/Llama-3.1-8B-Instruct-Turbo',
+              },
+            });
+
+            console.log(response.workflow_id);
+
+
+      requestBody:
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/EvaluationTypedRequest"
+      responses:
+        "200":
+          description: "Evaluation job created successfully"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/EvaluationResponse"
+        "400":
+          description: "Invalid request format"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Failed to create evaluation job"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+    get:
+      tags:
+        - evaluation
+      summary: Get all evaluation jobs
+      operationId: getAllEvaluationJobs
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            jobs = client.evaluation.list()
+
+            for job in jobs:
+                print(job.workflow_id)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evals.list()
+
+            for job in response:
+                print(job.workflow_id)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.list();
+
+            for (const job of response) {
+              console.log(job.workflow_id);
+            }
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.list();
+
+            for (const job of response) {
+              console.log(job.workflow_id);
+            }
+      parameters:
+        - name: status
+          in: query
+          required: false
+          schema:
+            type: string
+            default: "pending"
+        - name: limit
+          in: query
+          required: false
+          schema:
+            type: integer
+            default: 10
+        - name: userId
+          in: query
+          required: false
+          description: "Admin users can specify a user ID to filter jobs. Pass empty string to get all jobs."
+          schema:
+            type: string
+      responses:
+        "200":
+          description: "evaluation jobs retrieved successfully"
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: "#/components/schemas/EvaluationJob"
+        "400":
+          description: "Invalid request format"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Error retrieving jobs from manager"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+  /evaluation/model-list:
+    get:
+      tags:
+        - evaluation
+      summary: Get model list
+      operationId: getModelList
+      parameters:
+        - name: model_source
+          in: query
+          required: false
+          schema:
+            type: string
+            default: "all"
+      responses:
+        "200":
+          description: "Model list retrieved successfully"
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  model_list:
+                    type: array
+                    items:
+                      type: string
+                      description: "The name of the model"
+        "400":
+          description: "Invalid request format"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Error retrieving model list"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+  /evaluation/{id}:
+    get:
+      tags:
+        - evaluation
+      summary: Get evaluation job details
+      operationId: getEvaluationJobDetails
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evaluation.retrieve('eval_id')
+
+            print(response)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evals.retrieve('eval_id')
+
+            print(response)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.retrieve('eval_id');
+
+            console.log(response);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.retrieve('eval_id');
+
+            console.log(response);
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: "Evaluation job details retrieved successfully"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/EvaluationJob"
+        "404":
+          description: "Evaluation job not found"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Failed to get evaluation job"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+
+  /evaluation/{id}/status:
+    get:
+      tags:
+        - evaluation
+      summary: Get evaluation job status and results
+      operationId: getEvaluationJobStatusAndResults
+      x-codeSamples:
+        - lang: Python
+          label: Together AI SDK (v1)
+          source: |
+            # Docs for v2 can be found by changing the above selector ^
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evaluation.status('eval_id')
+
+            print(response.status)
+            print(response.results)
+        - lang: Python
+          label: Together AI SDK (v2)
+          source: |
+            from together import Together
+            import os
+
+            client = Together(
+                api_key=os.environ.get("TOGETHER_API_KEY"),
+            )
+
+            response = client.evals.status('eval_id')
+
+            print(response.status)
+            print(response.results)
+        - lang: TypeScript
+          label: Together AI SDK (TypeScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.status('eval_id');
+
+            console.log(response.status);
+            console.log(response.results);
+        - lang: JavaScript
+          label: Together AI SDK (JavaScript)
+          source: |
+            import Together from "together-ai";
+
+            const client = new Together({
+              apiKey: process.env.TOGETHER_API_KEY,
+            });
+
+            const response = await client.evals.status('eval_id');
+
+            console.log(response.status);
+            console.log(response.results);
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: "Evaluation job status and results retrieved successfully"
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  status:
+                    type: string
+                    description: "The status of the evaluation job"
+                    enum: ["completed", "error", "user_error", "running", "queued", "pending"]
+                  results:
+                    description: "The results of the evaluation job"
+                    oneOf:
+                      - $ref: "#/components/schemas/EvaluationClassifyResults"
+                      - $ref: "#/components/schemas/EvaluationScoreResults"
+                      - $ref: "#/components/schemas/EvaluationCompareResults"
+        "404":
+          description: "Evaluation job not found"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+        "500":
+          description: "Failed to get evaluation job"
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ErrorData"
+
+  /realtime:
+    get:
+      tags: ['Audio']
+      summary: Real-time audio transcription via WebSocket
+      description: |
+        Establishes a WebSocket connection for real-time audio transcription. This endpoint uses WebSocket protocol (wss://api.together.ai/v1/realtime) for bidirectional streaming communication.
+
+        **Connection Setup:**
+        - Protocol: WebSocket (wss://)
+        - Authentication: Pass API key as Bearer token in Authorization header
+        - Parameters: Sent as query parameters (model, input_audio_format)
+
+        **Client Events:**
+        - `input_audio_buffer.append`: Send audio chunks as base64-encoded data
+          ```json
+          {
+            "type": "input_audio_buffer.append",
+            "audio": "<base64_encoded_audio_chunk>"
+          }
+          ```
+        - `input_audio_buffer.commit`: Signal end of audio stream
+          ```json
+          {
+            "type": "input_audio_buffer.commit"
+          }
+          ```
+
+        **Server Events:**
+        - `session.created`: Initial session confirmation (sent first)
+          ```json
+          {
+            "type": "session.created",
+            "session": {
+              "id": "session-id",
+              "object": "realtime.session",
+              "modalities": ["audio"],
+              "model": "openai/whisper-large-v3"
+            }
+          }
+          ```
+        - `conversation.item.input_audio_transcription.delta`: Partial transcription results
+          ```json
+          {
+            "type": "conversation.item.input_audio_transcription.delta",
+            "delta": "The quick brown"
+          }
+          ```
+        - `conversation.item.input_audio_transcription.completed`: Final transcription
+          ```json
+          {
+            "type": "conversation.item.input_audio_transcription.completed",
+            "transcript": "The quick brown fox jumps over the lazy dog"
+          }
+          ```
+        - `conversation.item.input_audio_transcription.failed`: Error occurred
+          ```json
+          {
+            "type": "conversation.item.input_audio_transcription.failed",
+            "error": {
+              "message": "Error description",
+              "type": "invalid_request_error",
+              "param": null,
+              "code": "invalid_api_key"
+            }
+          }
+          ```
+
+        **Error Codes:**
+        - `invalid_api_key`: Invalid API key provided (401)
+        - `missing_api_key`: Authorization header missing (401)
+        - `model_not_available`: Invalid or unavailable model (400)
+        - Unsupported audio format errors (400)
+
+      operationId: realtime-transcription
+      x-codeSamples:
+        - lang: Python
+          label: Python WebSocket Client
+          source: |
+            import asyncio
+            import websockets
+            import json
+            import base64
+            import os
+
+            async def transcribe_audio():
+                api_key = os.environ.get("TOGETHER_API_KEY")
+                url = "wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000"
+
+                headers = {
+                    "Authorization": f"Bearer {api_key}"
+                }
+
+                async with websockets.connect(url, additional_headers=headers) as ws:
+                    # Read audio file
+                    with open("audio.wav", "rb") as f:
+                        audio_data = f.read()
+
+                    # Send audio in chunks with delay to simulate real-time
+                    chunk_size = 8192
+                    bytes_per_second = 16000 * 2  # 16kHz * 2 bytes (16-bit)
+                    delay_per_chunk = chunk_size / bytes_per_second
+
+                    for i in range(0, len(audio_data), chunk_size):
+                        chunk = audio_data[i:i+chunk_size]
+                        base64_chunk = base64.b64encode(chunk).decode('utf-8')
+                        await ws.send(json.dumps({
+                            "type": "input_audio_buffer.append",
+                            "audio": base64_chunk
+                        }))
+                        # Simulate real-time streaming
+                        if i + chunk_size < len(audio_data):
+                            await asyncio.sleep(delay_per_chunk)
+
+                    # Commit the audio buffer
+                    await ws.send(json.dumps({
+                        "type": "input_audio_buffer.commit"
+                    }))
+
+                    # Receive transcription results
+                    async for message in ws:
+                        data = json.loads(message)
+                        if data["type"] == "conversation.item.input_audio_transcription.delta":
+                            print(f"Partial: {data['delta']}")
+                        elif data["type"] == "conversation.item.input_audio_transcription.completed":
+                            print(f"Final: {data['transcript']}")
+                            break
+                        elif data["type"] == "conversation.item.input_audio_transcription.failed":
+                            error = data.get("error", {})
+                            print(f"Error: {error.get('message')}")
+                            break
+
+            asyncio.run(transcribe_audio())
+        - lang: JavaScript
+          label: Node.js WebSocket Client
+          source: |
+            import WebSocket from 'ws';
+            import fs from 'fs';
+
+            const apiKey = process.env.TOGETHER_API_KEY;
+            const url = 'wss://api.together.ai/v1/realtime?model=openai/whisper-large-v3&input_audio_format=pcm_s16le_16000';
+
+            const ws = new WebSocket(url, {
+              headers: {
+                'Authorization': `Bearer ${apiKey}`
+              }
+            });
+
+            ws.on('open', async () => {
+              console.log('WebSocket connection established!');
+
+              // Read audio file
+              const audioData = fs.readFileSync('audio.wav');
+
+              // Send audio in chunks with delay to simulate real-time
+              const chunkSize = 8192;
+              const bytesPerSecond = 16000 * 2;  // 16kHz * 2 bytes (16-bit)
+              const delayPerChunk = (chunkSize / bytesPerSecond) * 1000;  // Convert to ms
+
+              for (let i = 0; i < audioData.length; i += chunkSize) {
+                const chunk = audioData.slice(i, i + chunkSize);
+                const base64Chunk = chunk.toString('base64');
+                ws.send(JSON.stringify({
+                  type: 'input_audio_buffer.append',
+                  audio: base64Chunk
+                }));
+
+                // Simulate real-time streaming
+                if (i + chunkSize < audioData.length) {
+                  await new Promise(resolve => setTimeout(resolve, delayPerChunk));
+                }
+              }
+
+              // Commit audio buffer
+              ws.send(JSON.stringify({
+                type: 'input_audio_buffer.commit'
+              }));
+            });
+
+            ws.on('message', (data) => {
+              const message = JSON.parse(data.toString());
+
+              if (message.type === 'conversation.item.input_audio_transcription.delta') {
+                console.log(`Partial: ${message.delta}`);
+              } else if (message.type === 'conversation.item.input_audio_transcription.completed') {
+                console.log(`Final: ${message.transcript}`);
+                ws.close();
+              } else if (message.type === 'conversation.item.input_audio_transcription.failed') {
+                const errorMessage = message.error?.message ?? message.message ?? 'Unknown error';
+                console.error(`Error: ${errorMessage}`);
+                ws.close();
+              }
+            });
+
+            ws.on('error', (error) => {
+              console.error('WebSocket error:', error);
+            });
+      parameters:
+        - in: query
+          name: model
+          required: true
+          schema:
+            type: string
+            enum:
+              - openai/whisper-large-v3
+            default: openai/whisper-large-v3
+          description: The Whisper model to use for transcription
+        - in: query
+          name: input_audio_format
+          required: true
+          schema:
+            type: string
+            enum:
+              - pcm_s16le_16000
+            default: pcm_s16le_16000
+          description: Audio format specification. Currently supports 16-bit PCM at 16kHz sample rate.
+      responses:
+        '101':
+          description: |
+            Switching Protocols - WebSocket connection established successfully.
+
+            Error message format:
+            ```json
+            {
+              "type": "conversation.item.input_audio_transcription.failed",
+              "error": {
+                "message": "Error description",
+                "type": "invalid_request_error",
+                "param": null,
+                "code": "error_code"
+              }
+            }
+            ```
+
+components:
+  securitySchemes:
+    bearerAuth:
+      type: http
+      scheme: bearer
+      x-bearer-format: bearer
+      x-default: default
+
+  schemas:
+    ErrorResponse:
+      type: object
+      properties:
+        code:
+          type: integer
+        message:
+          type: string
+    GPUCLusterDeleteResponse:
+      type: object
+      required: ['cluster_id']
+      properties:
+        cluster_id:
+          type: string
+    GPUClusterControlPlaneNode:
+      type: object
+      required:
+        - node_id
+        - node_name
+        - status
+        - host_name
+        - num_cpu_cores
+        - memory_gib
+        - network
+      properties:
+        node_id:
+          type: string
+        node_name:
+          type: string
+        status:
+          type: string
+        host_name:
+          type: string
+        num_cpu_cores:
+          type: integer
+        memory_gib:
+          type: number
+        network:
+          type: string
+    GPUClusterCreateRequest:
+      description: GPU Cluster create request
+      required:
+        - region
+        - gpu_type
+        - num_gpus
+        - cluster_name
+        - duration_days
+        - driver_version
+        - billing_type
+      type: object
+      properties:
+          cluster_type:
+            type: string
+            enum: [KUBERNETES, SLURM]
+          region:
+            description: Region to create the GPU cluster in. Valid values are us-central-8 and us-central-4.
+            type: string
+            enum: [us-central-8, us-central-4]
+          gpu_type:
+            description: Type of GPU to use in the cluster
+            type: string
+            enum:
+              - H100_SXM
+              - H200_SXM
+              - RTX_6000_PCI
+              - L40_PCIE
+              - B200_SXM
+              - H100_SXM_INF
+          num_gpus:
+            description: Number of GPUs to allocate in the cluster. This must be multiple of 8. For example, 8, 16 or 24
+            type: integer
+          cluster_name:
+            description: Name of the GPU cluster.
+            type: string
+          duration_days:
+            description: Duration in days to keep the cluster running.
+            type: integer
+          driver_version:
+            description: NVIDIA driver version to use in the cluster.
+            type: string
+            enum:
+              - CUDA_12_5_555
+              - CUDA_12_6_560
+              - CUDA_12_6_565
+              - CUDA_12_8_570
+          shared_volume:
+            $ref: '#/components/schemas/SharedVolumeCreateRequest'
+          volume_id:
+            type: string
+          billing_type:
+            type: string
+            enum:
+              - RESERVED
+              - ON_DEMAND
+    GPUClusterCreateResponse:
+      type: object
+      required: ['cluster_id']
+      properties:
+        cluster_id:
+          type: string
+    GPUClusterGPUWorkerNode:
+      type: object
+      required:
+        - node_id
+        - node_name
+        - status
+        - host_name
+        - num_cpu_cores
+        - num_gpus
+        - memory_gib
+        - networks
+      properties:
+        node_id:
+          type: string
+        node_name:
+          type: string
+        status:
+          type: string
+        host_name:
+          type: string
+        num_cpu_cores:
+          type: integer
+        num_gpus:
+          type: integer
+        memory_gib:
+          type: number
+        networks:
+          type: array
+          items:
+            type: string
+    GPUClusterInfo:
+      type: object
+      required:
+        - cluster_id
+        - cluster_type
+        - region
+        - gpu_type
+        - cluster_name
+        - duration_hours
+        - driver_version
+        - volumes
+        - status
+        - control_plane_nodes
+        - gpu_worker_nodes
+        - kube_config
+        - num_gpus
+      properties:
+        cluster_id:
+          type: string
+        cluster_type:
+          enum: [KUBERNETES, SLURM]
+        region:
+          type: string
+        gpu_type:
+          enum:
+            - H100_SXM
+            - H200_SXM
+            - RTX_6000_PCI
+            - L40_PCIE
+            - B200_SXM
+            - H100_SXM_INF
+        cluster_name:
+          type: string
+        duration_hours:
+          type: integer
+        driver_version:
+          enum:
+            - CUDA_12_5_555
+            - CUDA_12_6_560
+            - CUDA_12_6_565
+            - CUDA_12_8_570
+        volumes:
+          type: array
+          items:
+            $ref: '#/components/schemas/GPUClusterVolume'
+        status:
+          description: Current status of the GPU cluster.
+          enum:
+            - WaitingForControlPlaneNodes
+            - WaitingForDataPlaneNodes
+            - WaitingForSubnet
+            - WaitingForSharedVolume
+            - InstallingDrivers
+            - RunningAcceptanceTests
+            - Paused
+            - OnDemandComputePaused
+            - Ready
+            - Degraded
+            - Deleting
+        control_plane_nodes:
+          type: array
+          items:
+            $ref: '#/components/schemas/GPUClusterControlPlaneNode'
+        gpu_worker_nodes:
+          type: array
+          items:
+            $ref: '#/components/schemas/GPUClusterGPUWorkerNode'
+        kube_config:
+          type: string
+        num_gpus:
+          type: integer
+    GPUClusterUpdateRequest:
+        type: object
+        properties:
+          cluster_type:
+            enum: [KUBERNETES, SLURM]
+          num_gpus:
+            type: integer
+    GPUClusterUpdateResponse:
+      type: object
+      required: [cluster_id]
+      properties:
+        cluster_id:
+            type: string
+    GPUClusterVolume:
+      type: object
+      required:
+        - volume_id
+        - volume_name
+        - size_tib
+        - status
+      properties:
+        volume_id:
+          type: string
+        volume_name:
+          type: string
+        size_tib:
+          type: integer
+        status:
+          type: string
+    GPUClusters:
+      type: object
+      required: [clusters]
+      properties:
+        clusters:
+            type: array
+            items:
+                $ref: '#/components/schemas/GPUClusterInfo'
+    InstanceTypesResponse:
+      type: object
+      properties:
+        types:
+          type: array
+          items:
+            enum:
+              - H100_SXM
+              - H200_SXM
+              - RTX_6000_PCI
+              - L40_PCIE
+              - B200_SXM
+              - H100_SXM_INF
+          error:
+            $ref: '#/components/schemas/ErrorResponse'
+    Region:
+      type: object
+      required:
+        - id
+        - name
+        - availability_zones
+        - driver_versions
+      properties:
+        id:
+          type: string
+        name:
+          type: string
+        availability_zones:
+          type: array
+          items:
+            type: string
+        driver_versions:
+          type: array
+          items:
+            type: string
+    RegionListResponse:
+      type: object
+      required: [regions]
+      properties:
+        regions:
+          type: array
+          items:
+            $ref: '#/components/schemas/Region'
+    SharedVolumeCreateRequest:
+      type: object
+      required:
+        - volume_name
+        - size_tib
+        - region
+      properties:
+        volume_name:
+          type: string
+        size_tib:
+          description: Volume size in whole tebibytes (TiB).
+          type: integer
+        region:
+          type: string
+          description: Region name. Usable regions can be found from `client.clusters.list_regions()`
+    SharedVolumeCreateResponse:
+      type: object
+      required: [volume_id]
+      properties:
+        volume_id:
+            type: string
+    SharedVolumeDeleteResponse:
+      type: object
+      required: [success]
+      properties:
+        success:
+          type: boolean
+    SharedVolumeInfo: 
+      type: object
+      required:
+        - volume_id
+        - volume_name
+        - size_tib
+      properties:
+        volume_id:
+          type: string
+        volume_name:
+          type: string
+        size_tib:
+          type: integer
+    SharedVolumeUpdateRequest:
+      type: object
+      properties:
+        volume_id:
+          type: string
+        size_tib:
+          type: integer
+    SharedVolumes:
+      type: object
+      required: [volumes]
+      properties:
+        volumes:
+          type: array
+          items:
+            $ref: '#/components/schemas/SharedVolumeInfo'
+    ListVoicesResponse:
+      description: Response containing a list of models and their available voices.
+      type: object
+      required: ['data']
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelVoices'
+
+    ModelVoices:
+      description: Represents a model with its available voices.
+      type: object
+      required: ['model', 'voices']
+      properties:
+        model:
+          type: string
+
+        voices:
+          type: array
+          items:
+            type: object
+            required: ['id', 'name']
+            properties:
+              id:
+                type: string
+              name:
+                type: string
+
+    ListAvailibilityZonesResponse:
+      description: List of unique availability zones
+      type: object
+      required: ['avzones']
+      properties:
+        avzones:
+          type: array
+          items:
+            type: string
+
+    RerankRequest:
+      type: object
+      properties:
+        model:
+          type: string
+          description: >
+            The model to be used for the rerank request.<br>
+            <br>
+            [See all of Together AI's rerank models](https://docs.together.ai/docs/serverless-models#rerank-models)
+          example: Salesforce/Llama-Rank-V1
+          anyOf:
+            - type: string
+              enum:
+                - Salesforce/Llama-Rank-v1
+            - type: string
+
+        query:
+          type: string
+          description: The search query to be used for ranking.
+          example: What animals can I find near Peru?
+        documents:
+          description: List of documents, which can be either strings or objects.
+          oneOf:
+            - type: array
+              items:
+                type: object
+                additionalProperties: true
+            - type: array
+              items:
+                type: string
+                example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
+          example:
+            - {
+                'title': 'Llama',
+                'text': 'The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era.',
+              }
+            - {
+                'title': 'Panda',
+                'text': 'The giant panda (Ailuropoda melanoleuca), also known as the panda bear or simply panda, is a bear species endemic to China.',
+              }
+            - {
+                'title': 'Guanaco',
+                'text': 'The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations.',
+              }
+            - {
+                'title': 'Wild Bactrian camel',
+                'text': 'The wild Bactrian camel (Camelus ferus) is an endangered species of camel endemic to Northwest China and southwestern Mongolia.',
+              }
+        top_n:
+          type: integer
+          description: The number of top results to return.
+          example: 2
+        return_documents:
+          type: boolean
+          description: Whether to return supplied documents with the response.
+          example: true
+        rank_fields:
+          type: array
+          items:
+            type: string
+          description: List of keys in the JSON Object document to rank by. Defaults to use all supplied keys for ranking.
+          example: ['title', 'text']
+      required:
+        - model
+        - query
+        - documents
+      additionalProperties: false
+
+    RerankResponse:
+      type: object
+      required:
+        - object
+        - model
+        - results
+      properties:
+        object:
+          type: string
+          description: Object type
+          enum:
+            - rerank
+          example: rerank
+        id:
+          type: string
+          description: Request ID
+          example: 9dfa1a09-5ebc-4a40-970f-586cb8f4ae47
+        model:
+          type: string
+          description: The model to be used for the rerank request.
+          example: salesforce/turboranker-0.8-3778-6328
+        results:
+          type: array
+          items:
+            type: object
+            required: [index, relevance_score, document]
+            properties:
+              index:
+                type: integer
+              relevance_score:
+                type: number
+              document:
+                type: object
+                properties:
+                  text:
+                    type: string
+                    nullable: true
+          example:
+            - {
+                'index': 0,
+                'relevance_score': 0.29980177813003117,
+                'document':
+                  {
+                    'text': '{"title":"Llama","text":"The llama is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the pre-Columbian era."}',
+                  },
+              }
+            - {
+                'index': 2,
+                'relevance_score': 0.2752447527354349,
+                'document':
+                  {
+                    'text': '{"title":"Guanaco","text":"The guanaco is a camelid native to South America, closely related to the llama. Guanacos are one of two wild South American camelids; the other species is the vicuña, which lives at higher elevations."}',
+                  },
+              }
+        usage:
+          $ref: '#/components/schemas/UsageData'
+          example:
+            {
+              'prompt_tokens': 1837,
+              'completion_tokens': 0,
+              'total_tokens': 1837,
+            }
+
+    ErrorData:
+      type: object
+      required:
+        - error
+      properties:
+        error:
+          type: object
+          properties:
+            message:
+              type: string
+              nullable: false
+            type:
+              type: string
+              nullable: false
+            param:
+              type: string
+              nullable: true
+              default: null
+            code:
+              type: string
+              nullable: true
+              default: null
+          required:
+            - type
+            - message
+
+    FinishReason:
+      type: string
+      enum:
+        - stop
+        - eos
+        - length
+        - tool_calls
+        - function_call
+
+    LogprobsPart:
+      type: object
+      properties:
+        token_ids:
+          type: array
+          items:
+            type: number
+          description: List of token IDs corresponding to the logprobs
+        tokens:
+          type: array
+          items:
+            type: string
+          description: List of token strings
+        token_logprobs:
+          type: array
+          items:
+            type: number
+          description: List of token log probabilities
+
+    PromptPart:
+      type: array
+      items:
+        type: object
+        properties:
+          text:
+            type: string
+            example: <s>[INST] What is the capital of France? [/INST]
+          logprobs:
+            $ref: '#/components/schemas/LogprobsPart'
+
+    InferenceWarning:
+      type: object
+      required:
+        - message
+      properties:
+        message:
+          type: string
+
+    UsageData:
+      type: object
+      properties:
+        prompt_tokens:
+          type: integer
+        completion_tokens:
+          type: integer
+        total_tokens:
+          type: integer
+      required:
+        - prompt_tokens
+        - completion_tokens
+        - total_tokens
+      nullable: true
 
     CompletionChoicesData:
       type: array
@@ -5996,6 +7207,11 @@ components:
             - type: string
               example: 'tool_name'
             - $ref: '#/components/schemas/ToolChoice'
+        compliance:
+          const: hipaa
+        chat_template_kwargs:
+          type: object
+          additional_properties: true
         safety_model:
           type: string
           description: The name of the moderation model used to validate tokens. Choose from the available moderation models found [here](https://docs.together.ai/docs/inference-models#moderation-models).
@@ -6431,69 +7647,311 @@ components:
           type: string
         response_format:
           type: string
-          description: The format of audio output. Supported formats are mp3, wav, raw if streaming is false. If streaming is true, the only supported format is raw.
-          default: wav
-          enum:
-            - mp3
-            - wav
-            - raw
-        language:
+          description: The format of audio output. Supported formats are mp3, wav, raw if streaming is false. If streaming is true, the only supported format is raw.
+          default: wav
+          enum:
+            - mp3
+            - wav
+            - raw
+        language:
+          type: string
+          description: Language of input text.
+          default: en
+          enum:
+            - en
+            - de
+            - fr
+            - es
+            - hi
+            - it
+            - ja
+            - ko
+            - nl
+            - pl
+            - pt
+            - ru
+            - sv
+            - tr
+            - zh
+        response_encoding:
+          type: string
+          description: Audio encoding of response
+          default: pcm_f32le
+          enum:
+            - pcm_f32le
+            - pcm_s16le
+            - pcm_mulaw
+            - pcm_alaw
+        sample_rate:
+          type: integer
+          default: 44100
+          description: Sampling rate to use for the output audio. The default sampling rate for canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for cartesia/sonic is 44100.
+        stream:
+          type: boolean
+          default: false
+          description: 'If true, output is streamed for several characters at a time instead of waiting for the full response. The stream terminates with `data: [DONE]`. If false, return the encoded audio as octet stream'
+
+    AudioTranscriptionRequest:
+      type: object
+      required:
+        - file
+      properties:
+        file:
+          oneOf:
+            - $ref: '#/components/schemas/AudioFileBinary'
+            - $ref: '#/components/schemas/AudioFileUrl'
+          description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac.
+        model:
+          type: string
+          description: Model to use for transcription
+          default: openai/whisper-large-v3
+          enum:
+            - openai/whisper-large-v3
+        language:
+          type: string
+          description: Optional ISO 639-1 language code. If `auto` is provided, language is auto-detected.
+          default: en
+          example: en
+        prompt:
+          type: string
+          description: Optional text to bias decoding.
+        response_format:
+          type: string
+          description: The format of the response
+          default: json
+          enum:
+            - json
+            - verbose_json
+        temperature:
+          type: number
+          format: float
+          description: Sampling temperature between 0.0 and 1.0
+          default: 0.0
+          minimum: 0.0
+          maximum: 1.0
+        timestamp_granularities:
+          oneOf:
+            - type: string
+              enum:
+                - segment
+                - word
+            - type: array
+              items:
+                type: string
+                enum:
+                  - segment
+                  - word
+              uniqueItems: true
+              minItems: 1
+              maxItems: 2
+          description: Controls level of timestamp detail in verbose_json. Only used when response_format is verbose_json. Can be a single granularity or an array to get multiple levels.
+          default: segment
+          example: ['word', 'segment']
+        diarize:
+          type: boolean
+          description: >
+            Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription.
+            In the response, in the words array, you will get the speaker id for each word.
+            In addition, we also return the speaker_segments array which contains the speaker id for each speaker segment along with the start and end time of the segment along with all the words in the segment.
+            <br>
+            <br>
+            For eg -
+            ...
+            "speaker_segments": [
+              "speaker_id": "SPEAKER_00",
+              "start": 0,
+              "end": 30.02,
+              "words": [
+                {
+                  "id": 0,
+                  "word": "Tijana",
+                  "start": 0,
+                  "end": 11.475,
+                  "speaker_id": "SPEAKER_00"
+                },
+                ...
+          default: false
+        min_speakers:
+          type: integer
+          description: Minimum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known.
+        max_speakers:
+          type: integer
+          description: Maximum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known.
+
+    AudioTranscriptionResponse:
+      oneOf:
+        - $ref: '#/components/schemas/AudioTranscriptionJsonResponse'
+        - $ref: '#/components/schemas/AudioTranscriptionVerboseJsonResponse'
+
+    AudioTranscriptionJsonResponse:
+      type: object
+      required:
+        - text
+      properties:
+        text:
+          type: string
+          description: The transcribed text
+          example: Hello, world!
+
+    AudioTranscriptionVerboseJsonResponse:
+      type: object
+      required:
+        - task
+        - language
+        - duration
+        - text
+        - segments
+      properties:
+        task:
+          type: string
+          description: The task performed
+          enum:
+            - transcribe
+            - translate
+          example: transcribe
+        language:
+          type: string
+          description: The language of the audio
+          example: english
+        duration:
+          type: number
+          format: float
+          description: The duration of the audio in seconds
+          example: 3.5
+        text:
+          type: string
+          description: The transcribed text
+          example: Hello, world!
+        segments:
+          type: array
+          items:
+            $ref: '#/components/schemas/AudioTranscriptionSegment'
+          description: Array of transcription segments
+        words:
+          type: array
+          items:
+            $ref: '#/components/schemas/AudioTranscriptionWord'
+          description: Array of transcription words (only when timestamp_granularities includes 'word')
+        speaker_segments:
+          type: array
+          items:
+            $ref: '#/components/schemas/AudioTranscriptionSpeakerSegment'
+          description: Array of transcription speaker segments (only when diarize is enabled)
+
+    AudioTranscriptionSegment:
+      type: object
+      required:
+        - id
+        - start
+        - end
+        - text
+      properties:
+        id:
+          type: integer
+          description: Unique identifier for the segment
+          example: 0
+        start:
+          type: number
+          format: float
+          description: Start time of the segment in seconds
+          example: 0.0
+        end:
+          type: number
+          format: float
+          description: End time of the segment in seconds
+          example: 3.5
+        text:
+          type: string
+          description: The text content of the segment
+          example: Hello, world!
+
+    AudioTranscriptionWord:
+      type: object
+      required:
+        - word
+        - start
+        - end
+      properties:
+        word:
+          type: string
+          description: The word
+          example: Hello
+        start:
+          type: number
+          format: float
+          description: Start time of the word in seconds
+          example: 0.0
+        end:
+          type: number
+          format: float
+          description: End time of the word in seconds
+          example: 0.5
+        speaker_id:
+          type: string
+          description: The speaker id for the word (only when diarize is enabled)
+          example: SPEAKER_00
+
+    AudioTranscriptionSpeakerSegment:
+      type: object
+      required:
+        - speaker_id
+        - start
+        - end
+        - words
+        - text
+        - id
+      properties:
+        speaker_id:
           type: string
-          description: Language of input text.
-          default: en
-          enum:
-            - en
-            - de
-            - fr
-            - es
-            - hi
-            - it
-            - ja
-            - ko
-            - nl
-            - pl
-            - pt
-            - ru
-            - sv
-            - tr
-            - zh
-        response_encoding:
+          description: The speaker identifier
+          example: SPEAKER_00
+        start:
+          type: number
+          format: float
+          description: Start time of the speaker segment in seconds
+          example: 0.0
+        end:
+          type: number
+          format: float
+          description: End time of the speaker segment in seconds
+          example: 30.02
+        words:
+          type: array
+          items:
+            $ref: '#/components/schemas/AudioTranscriptionWord'
+          description: Array of words spoken by this speaker in this segment
+        text:
           type: string
-          description: Audio encoding of response
-          default: pcm_f32le
-          enum:
-            - pcm_f32le
-            - pcm_s16le
-            - pcm_mulaw
-            - pcm_alaw
-        sample_rate:
+          description: The full text spoken by this speaker in this segment
+          example: "Hello, how are you doing today?"
+        id:
           type: integer
-          default: 44100
-          description: Sampling rate to use for the output audio. The default sampling rate for canopylabs/orpheus-3b-0.1-ft and hexgrad/Kokoro-82M is 24000 and for cartesia/sonic is 44100.
-        stream:
-          type: boolean
-          default: false
-          description: 'If true, output is streamed for several characters at a time instead of waiting for the full response. The stream terminates with `data: [DONE]`. If false, return the encoded audio as octet stream'
+          description: Unique identifier for the speaker segment
+          example: 1
 
-    AudioTranscriptionRequest:
+    AudioTranslationRequest:
       type: object
       required:
         - file
       properties:
         file:
           oneOf:
-            - $ref: '#/components/schemas/AudioFileBinary'
-            - $ref: '#/components/schemas/AudioFileUrl'
+            - type: string
+              format: binary
+              description: Audio file to translate
+            - type: string
+              format: uri
+              description: Public HTTP/HTTPS URL to audio file
           description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac.
         model:
           type: string
-          description: Model to use for transcription
+          description: Model to use for translation
           default: openai/whisper-large-v3
           enum:
             - openai/whisper-large-v3
         language:
           type: string
-          description: Optional ISO 639-1 language code. If `auto` is provided, language is auto-detected.
+          description: Target output language. Optional ISO 639-1 language code. If omitted, language is set to English.
           default: en
           example: en
         prompt:
@@ -6531,53 +7989,23 @@ components:
           description: Controls level of timestamp detail in verbose_json. Only used when response_format is verbose_json. Can be a single granularity or an array to get multiple levels.
           default: segment
           example: ['word', 'segment']
-        diarize:
-          type: boolean
-          description: >
-            Whether to enable speaker diarization. When enabled, you will get the speaker id for each word in the transcription.
-            In the response, in the words array, you will get the speaker id for each word.
-            In addition, we also return the speaker_segments array which contains the speaker id for each speaker segment along with the start and end time of the segment along with all the words in the segment.
-            <br>
-            <br>
-            For eg -
-            ...
-            "speaker_segments": [
-              "speaker_id": "SPEAKER_00",
-              "start": 0,
-              "end": 30.02,
-              "words": [
-                {
-                  "id": 0,
-                  "word": "Tijana",
-                  "start": 0,
-                  "end": 11.475,
-                  "speaker_id": "SPEAKER_00"
-                },
-                ...
-          default: false
-        min_speakers:
-          type: integer
-          description: Minimum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known.
-        max_speakers:
-          type: integer
-          description: Maximum number of speakers expected in the audio. Used to improve diarization accuracy when the approximate number of speakers is known.
 
-    AudioTranscriptionResponse:
+    AudioTranslationResponse:
       oneOf:
-        - $ref: '#/components/schemas/AudioTranscriptionJsonResponse'
-        - $ref: '#/components/schemas/AudioTranscriptionVerboseJsonResponse'
+        - $ref: '#/components/schemas/AudioTranslationJsonResponse'
+        - $ref: '#/components/schemas/AudioTranslationVerboseJsonResponse'
 
-    AudioTranscriptionJsonResponse:
+    AudioTranslationJsonResponse:
       type: object
       required:
         - text
       properties:
         text:
           type: string
-          description: The transcribed text
+          description: The translated text
           example: Hello, world!
 
-    AudioTranscriptionVerboseJsonResponse:
+    AudioTranslationVerboseJsonResponse:
       type: object
       required:
         - task
@@ -6592,10 +8020,10 @@ components:
           enum:
             - transcribe
             - translate
-          example: transcribe
+          example: translate
         language:
           type: string
-          description: The language of the audio
+          description: The target language of the translation
           example: english
         duration:
           type: number
@@ -6604,611 +8032,855 @@ components:
           example: 3.5
         text:
           type: string
-          description: The transcribed text
+          description: The translated text
           example: Hello, world!
         segments:
           type: array
           items:
             $ref: '#/components/schemas/AudioTranscriptionSegment'
-          description: Array of transcription segments
+          description: Array of translation segments
         words:
           type: array
           items:
             $ref: '#/components/schemas/AudioTranscriptionWord'
-          description: Array of transcription words (only when timestamp_granularities includes 'word')
-        speaker_segments:
-          type: array
-          items:
-            $ref: '#/components/schemas/AudioTranscriptionSpeakerSegment'
-          description: Array of transcription speaker segments (only when diarize is enabled)
+          description: Array of translation words (only when timestamp_granularities includes 'word')
 
-    AudioTranscriptionSegment:
+    AudioSpeechStreamResponse:
+      oneOf:
+        - $ref: '#/components/schemas/AudioSpeechStreamEvent'
+        - $ref: '#/components/schemas/StreamSentinel'
+
+    AudioSpeechStreamEvent:
       type: object
-      required:
-        - id
-        - start
-        - end
-        - text
+      required: [data]
+      properties:
+        data:
+          $ref: '#/components/schemas/AudioSpeechStreamChunk'
+
+    AudioSpeechStreamChunk:
+      type: object
+      required: [object, model, b64]
+      properties:
+        object:
+          type: string
+          enum:
+            - audio.tts.chunk
+        model:
+          type: string
+          example: cartesia/sonic
+        b64:
+          type: string
+          description: base64 encoded audio stream
+
+    StreamSentinel:
+      type: object
+      required: [data]
+      properties:
+        data:
+          title: stream_signal
+          type: string
+          enum:
+            - '[DONE]'
+
+    ChatCompletionToken:
+      type: object
+      required: [id, text, logprob, special]
       properties:
         id:
           type: integer
-          description: Unique identifier for the segment
-          example: 0
-        start:
-          type: number
-          format: float
-          description: Start time of the segment in seconds
-          example: 0.0
-        end:
-          type: number
-          format: float
-          description: End time of the segment in seconds
-          example: 3.5
         text:
           type: string
-          description: The text content of the segment
-          example: Hello, world!
+        logprob:
+          type: number
+        special:
+          type: boolean
 
-    AudioTranscriptionWord:
+    ChatCompletionChoice:
+      type: object
+      required: [index, delta, finish_reason]
+      properties:
+        index:
+          type: integer
+        finish_reason:
+          $ref: '#/components/schemas/FinishReason'
+        logprobs:
+          $ref: '#/components/schemas/LogprobsPart'
+        delta:
+          title: ChatCompletionChoiceDelta
+          type: object
+          required: [role]
+          properties:
+            token_id:
+              type: integer
+            role:
+              type: string
+              enum: ['system', 'user', 'assistant', 'function', 'tool']
+            content:
+              type: string
+              nullable: true
+            tool_calls:
+              type: array
+              items:
+                $ref: '#/components/schemas/ToolChoice'
+            function_call:
+              type: object
+              deprecated: true
+              nullable: true
+              properties:
+                arguments:
+                  type: string
+                name:
+                  type: string
+              required:
+                - arguments
+                - name
+            reasoning:
+              type: string
+              nullable: true
+
+    EmbeddingsRequest:
       type: object
       required:
-        - word
-        - start
-        - end
+        - model
+        - input
       properties:
-        word:
-          type: string
-          description: The word
-          example: Hello
-        start:
-          type: number
-          format: float
-          description: Start time of the word in seconds
-          example: 0.0
-        end:
-          type: number
-          format: float
-          description: End time of the word in seconds
-          example: 0.5
-        speaker_id:
+        model:
           type: string
-          description: The speaker id for the word (only when diarize is enabled)
-          example: SPEAKER_00
+          description: >
+            The name of the embedding model to use.<br>
+            <br>
+            [See all of Together AI's embedding models](https://docs.together.ai/docs/serverless-models#embedding-models)
+          example: togethercomputer/m2-bert-80M-8k-retrieval
+          anyOf:
+            - type: string
+              enum:
+                - WhereIsAI/UAE-Large-V1
+                - BAAI/bge-large-en-v1.5
+                - BAAI/bge-base-en-v1.5
+                - togethercomputer/m2-bert-80M-8k-retrieval
+            - type: string
+        input:
+          oneOf:
+            - type: string
+              description: A string providing the text for the model to embed.
+              example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
+            - type: array
+              items:
+                type: string
+                description: A string providing the text for the model to embed.
+                example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
+          example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
 
-    AudioTranscriptionSpeakerSegment:
+    EmbeddingsResponse:
       type: object
       required:
-        - speaker_id
-        - start
-        - end
-        - words
-        - text
-        - id
+        - object
+        - model
+        - data
       properties:
-        speaker_id:
+        object:
           type: string
-          description: The speaker identifier
-          example: SPEAKER_00
-        start:
-          type: number
-          format: float
-          description: Start time of the speaker segment in seconds
-          example: 0.0
-        end:
-          type: number
-          format: float
-          description: End time of the speaker segment in seconds
-          example: 30.02
-        words:
+          enum:
+            - list
+        model:
+          type: string
+        data:
           type: array
           items:
-            $ref: '#/components/schemas/AudioTranscriptionWord'
-          description: Array of words spoken by this speaker in this segment
-        text:
-          type: string
-          description: The full text spoken by this speaker in this segment
-          example: "Hello, how are you doing today?"
+            type: object
+            required: [index, object, embedding]
+            properties:
+              object:
+                type: string
+                enum:
+                  - embedding
+              embedding:
+                type: array
+                items:
+                  type: number
+              index:
+                type: integer
+
+    ModelInfoList:
+      type: array
+      items:
+        $ref: '#/components/schemas/ModelInfo'
+    ModelInfo:
+      type: object
+      required: [id, object, created, type]
+      properties:
         id:
+          type: string
+          example: 'Austism/chronos-hermes-13b'
+        object:
+          type: string
+          example: 'model'
+        created:
           type: integer
-          description: Unique identifier for the speaker segment
-          example: 1
+          example: 1692896905
+        type:
+          enum:
+            - chat
+            - language
+            - code
+            - image
+            - embedding
+            - moderation
+            - rerank
+          example: 'chat'
+        display_name:
+          type: string
+          example: 'Chronos Hermes (13B)'
+        organization:
+          type: string
+          example: 'Austism'
+        link:
+          type: string
+        license:
+          type: string
+          example: 'other'
+        context_length:
+          type: integer
+          example: 2048
+        pricing:
+          $ref: '#/components/schemas/Pricing'
 
-    AudioTranslationRequest:
+    ModelUploadRequest:
       type: object
       required:
-        - file
+        - model_name
+        - model_source
       properties:
-        file:
-          oneOf:
-            - type: string
-              format: binary
-              description: Audio file to translate
-            - type: string
-              format: uri
-              description: Public HTTP/HTTPS URL to audio file
-          description: Audio file upload or public HTTP/HTTPS URL. Supported formats .wav, .mp3, .m4a, .webm, .flac.
-        model:
+        model_name:
           type: string
-          description: Model to use for translation
-          default: openai/whisper-large-v3
+          description: The name to give to your uploaded model
+          example: 'Qwen2.5-72B-Instruct'
+        model_source:
+          type: string
+          description: The source location of the model (Hugging Face repo or S3 path)
+          example: 'unsloth/Qwen2.5-72B-Instruct'
+        model_type:
+          type: string
+          description: Whether the model is a full model or an adapter
+          default: 'model'
           enum:
-            - openai/whisper-large-v3
-        language:
+            - model
+            - adapter
+          example: 'model'
+        hf_token:
           type: string
-          description: Target output language. Optional ISO 639-1 language code. If omitted, language is set to English.
-          default: en
-          example: en
-        prompt:
+          description: Hugging Face token (if uploading from Hugging Face)
+          example: 'hf_examplehuggingfacetoken'
+        description:
           type: string
-          description: Optional text to bias decoding.
-        response_format:
+          description: A description of your model
+          example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth'
+        base_model:
           type: string
-          description: The format of the response
-          default: json
-          enum:
-            - json
-            - verbose_json
-        temperature:
-          type: number
-          format: float
-          description: Sampling temperature between 0.0 and 1.0
-          default: 0.0
-          minimum: 0.0
-          maximum: 1.0
-        timestamp_granularities:
-          oneOf:
-            - type: string
-              enum:
-                - segment
-                - word
-            - type: array
-              items:
-                type: string
-                enum:
-                  - segment
-                  - word
-              uniqueItems: true
-              minItems: 1
-              maxItems: 2
-          description: Controls level of timestamp detail in verbose_json. Only used when response_format is verbose_json. Can be a single granularity or an array to get multiple levels.
-          default: segment
-          example: ['word', 'segment']
+          description: The base model to use for an adapter if setting it to run against a serverless pool.  Only used for model_type `adapter`.
+          example: 'Qwen/Qwen2.5-72B-Instruct'
+        lora_model:
+          type: string
+          description: The lora pool to use for an adapter if setting it to run against, say, a dedicated pool.  Only used for model_type `adapter`.
+          example: 'my_username/Qwen2.5-72B-Instruct-lora'
 
-    AudioTranslationResponse:
-      oneOf:
-        - $ref: '#/components/schemas/AudioTranslationJsonResponse'
-        - $ref: '#/components/schemas/AudioTranslationVerboseJsonResponse'
+    ModelUploadSuccessResponse:
+      type: object
+      required:
+        - data
+        - message
+      properties:
+        data:
+          type: object
+          required:
+            - job_id
+            - model_name
+            - model_id
+            - model_source
+          properties:
+            job_id:
+              type: string
+              example: 'job-a15dad11-8d8e-4007-97c5-a211304de284'
+            model_name:
+              type: string
+              example: 'necolinehubner/Qwen2.5-72B-Instruct'
+            model_id:
+              type: string
+              example: 'model-c0e32dfc-637e-47b2-bf4e-e9b2e58c9da7'
+            model_source:
+              type: string
+              example: 'huggingface'
+        message:
+          type: string
+          example: 'Processing model weights. Job created.'
 
-    AudioTranslationJsonResponse:
+    ImageResponse:
       type: object
+      properties:
+        id:
+          type: string
+        model:
+          type: string
+        object:
+          enum:
+            - list
+          example: 'list'
+        data:
+          type: array
+          items:
+            oneOf:
+              - $ref: '#/components/schemas/ImageResponseDataB64'
+              - $ref: '#/components/schemas/ImageResponseDataUrl'
+            discriminator:
+              propertyName: type
       required:
-        - text
+        - id
+        - model
+        - object
+        - data
+
+    ImageResponseDataB64:
+      type: object
+      required: [index, b64_json, type]
       properties:
-        text:
+        index:
+          type: integer
+        b64_json:
           type: string
-          description: The translated text
-          example: Hello, world!
+        type:
+          type: string
+          enum: [b64_json]
 
-    AudioTranslationVerboseJsonResponse:
+    ImageResponseDataUrl:
+      type: object
+      required: [index, url, type]
+      properties:
+        index:
+          type: integer
+        url:
+          type: string
+        type:
+          type: string
+          enum: [url]
+
+    JobInfoSuccessResponse:
       type: object
       required:
-        - task
-        - language
-        - duration
-        - text
-        - segments
+        - type
+        - job_id
+        - status
+        - status_updates
+        - args
+        - created_at
+        - updated_at
       properties:
-        task:
+        type:
           type: string
-          description: The task performed
-          enum:
-            - transcribe
-            - translate
-          example: translate
-        language:
+          example: 'model_upload'
+        job_id:
           type: string
-          description: The target language of the translation
-          example: english
-        duration:
-          type: number
-          format: float
-          description: The duration of the audio in seconds
-          example: 3.5
-        text:
+          example: 'job-a15dad11-8d8e-4007-97c5-a211304de284'
+        status:
           type: string
-          description: The translated text
-          example: Hello, world!
-        segments:
+          enum: ['Queued', 'Running', 'Complete', 'Failed']
+          example: 'Complete'
+        status_updates:
           type: array
           items:
-            $ref: '#/components/schemas/AudioTranscriptionSegment'
-          description: Array of translation segments
-        words:
+            type: object
+            required:
+              - status
+              - message
+              - timestamp
+            properties:
+              status:
+                type: string
+                example: 'Complete'
+              message:
+                type: string
+                example: 'Job is Complete'
+              timestamp:
+                type: string
+                format: date-time
+                example: '2025-03-11T22:36:12Z'
+        args:
+          type: object
+          properties:
+            description:
+              type: string
+              example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth'
+            modelName:
+              type: string
+              example: 'necolinehubner/Qwen2.5-72B-Instruct'
+            modelSource:
+              type: string
+              example: 'unsloth/Qwen2.5-72B-Instruct'
+        created_at:
+          type: string
+          format: date-time
+          example: '2025-03-11T22:05:43Z'
+        updated_at:
+          type: string
+          format: date-time
+          example: '2025-03-11T22:36:12Z'
+
+    JobsInfoSuccessResponse:
+      type: object
+      required:
+        - data
+      properties:
+        data:
           type: array
           items:
-            $ref: '#/components/schemas/AudioTranscriptionWord'
-          description: Array of translation words (only when timestamp_granularities includes 'word')
-
-    AudioSpeechStreamResponse:
-      oneOf:
-        - $ref: '#/components/schemas/AudioSpeechStreamEvent'
-        - $ref: '#/components/schemas/StreamSentinel'
+            $ref: '#/components/schemas/JobInfoSuccessResponse'
 
-    AudioSpeechStreamEvent:
+    Pricing:
       type: object
-      required: [data]
+      required: [hourly, input, output, base, finetune]
       properties:
-        data:
-          $ref: '#/components/schemas/AudioSpeechStreamChunk'
+        hourly:
+          type: number
+          example: 0
+        input:
+          type: number
+          example: 0.3
+        output:
+          type: number
+          example: 0.3
+        base:
+          type: number
+          example: 0
+        finetune:
+          type: number
+          example: 0
 
-    AudioSpeechStreamChunk:
+    ToolsPart:
       type: object
-      required: [object, model, b64]
       properties:
-        object:
+        type:
           type: string
-          enum:
-            - audio.tts.chunk
-        model:
+          example: 'tool_type'
+        function:
+          type: object
+          properties:
+            description:
+              type: string
+              example: 'A description of the function.'
+            name:
+              type: string
+              example: 'function_name'
+            parameters:
+              type: object
+              additionalProperties: true
+              description: 'A map of parameter names to their values.'
+    ToolChoice:
+      type: object
+      required: [id, type, function, index]
+      properties:
+        # TODO: is this the right place for index?
+        index:
+          type: number
+        id:
           type: string
-          example: cartesia/sonic
-        b64:
+        type:
           type: string
-          description: base64 encoded audio stream
+          enum: ['function']
+        function:
+          type: object
+          required: [name, arguments]
+          properties:
+            name:
+              type: string
+              example: 'function_name'
+            arguments:
+              type: string
 
-    StreamSentinel:
+    FileResponse:
+      type: object
+      required:
+        - id
+        - object
+        - created_at
+        - filename
+        - bytes
+        - purpose
+        - FileType
+        - Processed
+        - LineCount
+      properties:
+        id:
+          type: string
+        object:
+          type: string
+          example: 'file'
+        created_at:
+          type: integer
+          example: 1715021438
+        filename:
+          type: string
+          example: 'my_file.jsonl'
+        bytes:
+          type: integer
+          example: 2664
+        purpose:
+          $ref: '#/components/schemas/FilePurpose'
+        Processed:
+          type: boolean
+        FileType:
+          $ref: '#/components/schemas/FileType'
+        LineCount:
+          type: integer
+    FileList:
+      required:
+        - data
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/FileResponse'
+    FileObject:
       type: object
-      required: [data]
       properties:
-        data:
-          title: stream_signal
+        object:
           type: string
-          enum:
-            - '[DONE]'
-
-    ChatCompletionToken:
+        id:
+          type: string
+        filename:
+          type: string
+        size:
+          type: integer
+    FilePurpose:
+      type: string
+      description: The purpose of the file
+      example: 'fine-tune'
+      enum:
+        - fine-tune
+        - eval
+        - eval-sample
+        - eval-output
+        - eval-summary
+        - batch-generated
+        - batch-api
+    FileType:
+      type: string
+      description: The type of the file
+      default: 'jsonl'
+      example: 'jsonl'
+      enum:
+        - 'csv'
+        - 'jsonl'
+        - 'parquet'
+    FileDeleteResponse:
       type: object
-      required: [id, text, logprob, special]
       properties:
         id:
-          type: integer
-        text:
           type: string
-        logprob:
-          type: number
-        special:
+        deleted:
           type: boolean
-
-    ChatCompletionChoice:
-      type: object
-      required: [index, delta, finish_reason]
-      properties:
-        index:
-          type: integer
-        finish_reason:
-          $ref: '#/components/schemas/FinishReason'
-        logprobs:
-          $ref: '#/components/schemas/LogprobsPart'
-        delta:
-          title: ChatCompletionChoiceDelta
-          type: object
-          required: [role]
-          properties:
-            token_id:
-              type: integer
-            role:
-              type: string
-              enum: ['system', 'user', 'assistant', 'function', 'tool']
-            content:
-              type: string
-              nullable: true
-            tool_calls:
-              type: array
-              items:
-                $ref: '#/components/schemas/ToolChoice'
-            function_call:
-              type: object
-              deprecated: true
-              nullable: true
-              properties:
-                arguments:
-                  type: string
-                name:
-                  type: string
-              required:
-                - arguments
-                - name
-            reasoning:
-              type: string
-              nullable: true
-
-    EmbeddingsRequest:
+    FinetuneResponse:
       type: object
       required:
-        - model
-        - input
+        - id
+        - status
       properties:
+        id:
+          type: string
+          format: uuid
+        training_file:
+          type: string
+        validation_file:
+          type: string
         model:
           type: string
-          description: >
-            The name of the embedding model to use.<br>
-            <br>
-            [See all of Together AI's embedding models](https://docs.together.ai/docs/serverless-models#embedding-models)
-          example: togethercomputer/m2-bert-80M-8k-retrieval
-          anyOf:
+        model_output_name:
+          type: string
+        model_output_path:
+          type: string
+        trainingfile_numlines:
+          type: integer
+        trainingfile_size:
+          type: integer
+        created_at:
+          type: string
+          format: date-time
+        updated_at:
+          type: string
+          format: date-time
+        n_epochs:
+          type: integer
+        n_checkpoints:
+          type: integer
+        n_evals:
+          type: integer
+        batch_size:
+          oneOf:
+            - type: integer
             - type: string
               enum:
-                - WhereIsAI/UAE-Large-V1
-                - BAAI/bge-large-en-v1.5
-                - BAAI/bge-base-en-v1.5
-                - togethercomputer/m2-bert-80M-8k-retrieval
-            - type: string
-        input:
+                - max
+          default: 'max'
+        learning_rate:
+          type: number
+        lr_scheduler:
+          type: object
+          $ref: '#/components/schemas/LRScheduler'
+        warmup_ratio:
+          type: number
+        max_grad_norm:
+          type: number
+          format: float
+        weight_decay:
+          type: number
+          format: float
+        eval_steps:
+          type: integer
+        train_on_inputs:
           oneOf:
+            - type: boolean
             - type: string
-              description: A string providing the text for the model to embed.
-              example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
-            - type: array
-              items:
-                type: string
-                description: A string providing the text for the model to embed.
-                example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
-          example: Our solar system orbits the Milky Way galaxy at about 515,000 mph
-
-    EmbeddingsResponse:
-      type: object
-      required:
-        - object
-        - model
-        - data
-      properties:
-        object:
-          type: string
-          enum:
-            - list
-        model:
+              enum:
+                - auto
+          default: auto
+        training_method:
+          type: object
+          oneOf:
+            - $ref: '#/components/schemas/TrainingMethodSFT'
+            - $ref: '#/components/schemas/TrainingMethodDPO'
+        training_type:
+          type: object
+          oneOf:
+            - $ref: '#/components/schemas/FullTrainingType'
+            - $ref: '#/components/schemas/LoRATrainingType'
+        multimodal_params:
+          $ref: '#/components/schemas/MultimodalParams'
+        status:
+          $ref: '#/components/schemas/FinetuneJobStatus'
+        job_id:
           type: string
-        data:
+        events:
           type: array
           items:
-            type: object
-            required: [index, object, embedding]
-            properties:
-              object:
-                type: string
-                enum:
-                  - embedding
-              embedding:
-                type: array
-                items:
-                  type: number
-              index:
-                type: integer
+            $ref: '#/components/schemas/FineTuneEvent'
+        token_count:
+          type: integer
+        param_count:
+          type: integer
+        total_price:
+          type: integer
+        epochs_completed:
+          type: integer
+        queue_depth:
+          type: integer
+        wandb_project_name:
+          type: string
+        wandb_url:
+          type: string
+        from_checkpoint:
+          type: string
+        from_hf_model:
+          type: string
+        hf_model_revision:
+          type: string
+        progress:
+          $ref: '#/components/schemas/FineTuneProgress'
 
-    ModelInfoList:
-      type: array
-      items:
-        $ref: '#/components/schemas/ModelInfo'
-    ModelInfo:
+    FinetuneResponseTruncated:
       type: object
-      required: [id, object, created, type]
+      description: A truncated version of the fine-tune response, used for POST /fine-tunes, GET /fine-tunes and POST /fine-tunes/{id}/cancel endpoints
+      required:
+        - id
+        - status
+        - created_at
+        - updated_at
+      example:
+        id: ft-01234567890123456789
+        status: completed
+        created_at: '2023-05-17T17:35:45.123Z'
+        updated_at: '2023-05-17T18:46:23.456Z'
+        user_id: 'user_01234567890123456789'
+        owner_address: 'user@example.com'
+        total_price: 1500
+        token_count: 850000
+        events: [] # FineTuneTruncated object has no events
+        model: 'meta-llama/Llama-2-7b-hf'
+        model_output_name: 'mynamespace/meta-llama/Llama-2-7b-hf-32162631'
+        n_epochs: 3
+        training_file: 'file-01234567890123456789'
+        wandb_project_name: 'my-finetune-project'
       properties:
         id:
           type: string
-          example: 'Austism/chronos-hermes-13b'
-        object:
-          type: string
-          example: 'model'
-        created:
-          type: integer
-          example: 1692896905
-        type:
-          enum:
-            - chat
-            - language
-            - code
-            - image
-            - embedding
-            - moderation
-            - rerank
-          example: 'chat'
-        display_name:
+          description: Unique identifier for the fine-tune job
+        status:
+          $ref: '#/components/schemas/FinetuneJobStatus'
+        created_at:
           type: string
-          example: 'Chronos Hermes (13B)'
-        organization:
+          format: date-time
+          description: Creation timestamp of the fine-tune job
+        updated_at:
           type: string
-          example: 'Austism'
-        link:
+          format: date-time
+          description: Last update timestamp of the fine-tune job
+        user_id:
           type: string
-        license:
+          description: Identifier for the user who created the job
+        owner_address:
           type: string
-          example: 'other'
-        context_length:
+          description: Owner address information
+        total_price:
           type: integer
-          example: 2048
-        pricing:
-          $ref: '#/components/schemas/Pricing'
-
-    ModelUploadRequest:
-      type: object
-      required:
-        - model_name
-        - model_source
-      properties:
-        model_name:
+          description: Total price for the fine-tuning job
+        token_count:
+          type: integer
+          description: Count of tokens processed
+        events:
+          type: array
+          items:
+            $ref: '#/components/schemas/FineTuneEvent'
+          description: Events related to this fine-tune job
+        # FineTuneUserParams fields
+        training_file:
           type: string
-          description: The name to give to your uploaded model
-          example: 'Qwen2.5-72B-Instruct'
-        model_source:
+          description: File-ID of the training file
+        validation_file:
           type: string
-          description: The source location of the model (Hugging Face repo or S3 path)
-          example: 'unsloth/Qwen2.5-72B-Instruct'
-        model_type:
+          description: File-ID of the validation file
+        model:
           type: string
-          description: Whether the model is a full model or an adapter
-          default: 'model'
-          enum:
-            - model
-            - adapter
-          example: 'model'
-        hf_token:
+          description: Base model used for fine-tuning
+        model_output_name:
           type: string
-          description: Hugging Face token (if uploading from Hugging Face)
-          example: 'hf_examplehuggingfacetoken'
-        description:
+        suffix:
           type: string
-          description: A description of your model
-          example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth'
-        base_model:
+          description: Suffix added to the fine-tuned model name
+        n_epochs:
+          type: integer
+          description: Number of training epochs
+        n_evals:
+          type: integer
+          description: Number of evaluations during training
+        n_checkpoints:
+          type: integer
+          description: Number of checkpoints saved during training
+        batch_size:
+          type: integer
+          description: Batch size used for training
+        training_type:
+          oneOf:
+            - $ref: '#/components/schemas/FullTrainingType'
+            - $ref: '#/components/schemas/LoRATrainingType'
+          description: Type of training used (full or LoRA)
+        training_method:
+          oneOf:
+            - $ref: '#/components/schemas/TrainingMethodSFT'
+            - $ref: '#/components/schemas/TrainingMethodDPO'
+          description: Method of training used
+        learning_rate:
+          type: number
+          format: float
+          description: Learning rate used for training
+        lr_scheduler:
+          $ref: '#/components/schemas/LRScheduler'
+          description: Learning rate scheduler configuration
+        warmup_ratio:
+          type: number
+          format: float
+          description: Ratio of warmup steps
+        max_grad_norm:
+          type: number
+          format: float
+          description: Maximum gradient norm for clipping
+        weight_decay:
+          type: number
+          format: float
+          description: Weight decay value used
+        wandb_project_name:
           type: string
-          description: The base model to use for an adapter if setting it to run against a serverless pool.  Only used for model_type `adapter`.
-          example: 'Qwen/Qwen2.5-72B-Instruct'
-        lora_model:
+          description: Weights & Biases project name
+        wandb_name:
           type: string
-          description: The lora pool to use for an adapter if setting it to run against, say, a dedicated pool.  Only used for model_type `adapter`.
-          example: 'my_username/Qwen2.5-72B-Instruct-lora'
-
-    ModelUploadSuccessResponse:
-      type: object
-      required:
-        - data
-        - message
-      properties:
-        data:
-          type: object
-          required:
-            - job_id
-            - model_name
-            - model_id
-            - model_source
-          properties:
-            job_id:
-              type: string
-              example: 'job-a15dad11-8d8e-4007-97c5-a211304de284'
-            model_name:
-              type: string
-              example: 'necolinehubner/Qwen2.5-72B-Instruct'
-            model_id:
-              type: string
-              example: 'model-c0e32dfc-637e-47b2-bf4e-e9b2e58c9da7'
-            model_source:
-              type: string
-              example: 'huggingface'
-        message:
+          description: Weights & Biases run name
+        from_checkpoint:
           type: string
-          example: 'Processing model weights. Job created.'
-
-    ImageResponse:
-      type: object
-      properties:
-        id:
+          description: Checkpoint used to continue training
+        from_hf_model:
           type: string
-        model:
+          description: Hugging Face Hub repo to start training from
+        hf_model_revision:
           type: string
-        object:
-          enum:
-            - list
-          example: 'list'
-        data:
-          type: array
-          items:
-            oneOf:
-              - $ref: '#/components/schemas/ImageResponseDataB64'
-              - $ref: '#/components/schemas/ImageResponseDataUrl'
-            discriminator:
-              propertyName: type
-      required:
-        - id
-        - model
-        - object
-        - data
-
-    ImageResponseDataB64:
+          description: The revision of the Hugging Face Hub model to continue training from
+        progress:
+          $ref: '#/components/schemas/FineTuneProgress'
+          description: Progress information for the fine-tuning job
+    FinetuneDeleteResponse:
       type: object
-      required: [index, b64_json, type]
       properties:
-        index:
-          type: integer
-        b64_json:
-          type: string
-        type:
+        message:
           type: string
-          enum: [b64_json]
+          description: Message indicating the result of the deletion
+    FinetuneJobStatus:
+      type: string
+      enum:
+        - pending
+        - queued
+        - running
+        - compressing
+        - uploading
+        - cancel_requested
+        - cancelled
+        - error
+        - completed
 
-    ImageResponseDataUrl:
-      type: object
-      required: [index, url, type]
-      properties:
-        index:
-          type: integer
-        url:
-          type: string
-        type:
-          type: string
-          enum: [url]
+    FinetuneEventLevels:
+      type: string
+      enum:
+        - null
+        - info
+        - warning
+        - error
+        - legacy_info
+        - legacy_iwarning
+        - legacy_ierror
+    FinetuneEventType:
+      type: string
+      enum:
+        - job_pending
+        - job_start
+        - job_stopped
+        - model_downloading
+        - model_download_complete
+        - training_data_downloading
+        - training_data_download_complete
+        - validation_data_downloading
+        - validation_data_download_complete
+        - wandb_init
+        - training_start
+        - checkpoint_save
+        - billing_limit
+        - epoch_complete
+        - training_complete
+        - model_compressing
+        - model_compression_complete
+        - model_uploading
+        - model_upload_complete
+        - job_complete
+        - job_error
+        - cancel_requested
+        - job_restarted
+        - refund
+        - warning
 
-    JobInfoSuccessResponse:
+    FinetuneTruncatedList:
       type: object
       required:
-        - type
-        - job_id
-        - status
-        - status_updates
-        - args
-        - created_at
-        - updated_at
+        - data
       properties:
-        type:
-          type: string
-          example: 'model_upload'
-        job_id:
-          type: string
-          example: 'job-a15dad11-8d8e-4007-97c5-a211304de284'
-        status:
-          type: string
-          enum: ['Queued', 'Running', 'Complete', 'Failed']
-          example: 'Complete'
-        status_updates:
+        data:
           type: array
           items:
-            type: object
-            required:
-              - status
-              - message
-              - timestamp
-            properties:
-              status:
-                type: string
-                example: 'Complete'
-              message:
-                type: string
-                example: 'Job is Complete'
-              timestamp:
-                type: string
-                format: date-time
-                example: '2025-03-11T22:36:12Z'
-        args:
-          type: object
-          properties:
-            description:
-              type: string
-              example: 'Finetuned Qwen2.5-72B-Instruct by Unsloth'
-            modelName:
-              type: string
-              example: 'necolinehubner/Qwen2.5-72B-Instruct'
-            modelSource:
-              type: string
-              example: 'unsloth/Qwen2.5-72B-Instruct'
-        created_at:
-          type: string
-          format: date-time
-          example: '2025-03-11T22:05:43Z'
-        updated_at:
-          type: string
-          format: date-time
-          example: '2025-03-11T22:36:12Z'
-
-    JobsInfoSuccessResponse:
+            $ref: '#/components/schemas/FinetuneResponseTruncated'
+    FinetuneListEvents:
       type: object
       required:
         - data
@@ -7216,1967 +8888,2214 @@ components:
         data:
           type: array
           items:
-            $ref: '#/components/schemas/JobInfoSuccessResponse'
-
-    Pricing:
-      type: object
-      required: [hourly, input, output, base, finetune]
-      properties:
-        hourly:
-          type: number
-          example: 0
-        input:
-          type: number
-          example: 0.3
-        output:
-          type: number
-          example: 0.3
-        base:
-          type: number
-          example: 0
-        finetune:
-          type: number
-          example: 0
-
-    ToolsPart:
-      type: object
-      properties:
-        type:
-          type: string
-          example: 'tool_type'
-        function:
-          type: object
-          properties:
-            description:
-              type: string
-              example: 'A description of the function.'
-            name:
-              type: string
-              example: 'function_name'
-            parameters:
-              type: object
-              additionalProperties: true
-              description: 'A map of parameter names to their values.'
-    ToolChoice:
-      type: object
-      required: [id, type, function, index]
-      properties:
-        # TODO: is this the right place for index?
-        index:
-          type: number
-        id:
-          type: string
-        type:
-          type: string
-          enum: ['function']
-        function:
-          type: object
-          required: [name, arguments]
-          properties:
-            name:
-              type: string
-              example: 'function_name'
-            arguments:
-              type: string
-
-    FileResponse:
+            $ref: '#/components/schemas/FineTuneEvent'
+    FineTuneEvent:
       type: object
       required:
-        - id
         - object
         - created_at
-        - filename
-        - bytes
-        - purpose
-        - FileType
-        - Processed
-        - LineCount
+        - message
+        - type
+        - param_count
+        - token_count
+        - total_steps
+        - wandb_url
+        - step
+        - checkpoint_path
+        - model_path
+        - training_offset
+        - hash
       properties:
-        id:
-          type: string
         object:
           type: string
-          example: 'file'
+          enum: [fine-tune-event]
         created_at:
-          type: integer
-          example: 1715021438
-        filename:
           type: string
-          example: 'my_file.jsonl'
-        bytes:
+        level:
+          anyOf:
+            - $ref: '#/components/schemas/FinetuneEventLevels'
+        message:
+          type: string
+        type:
+          $ref: '#/components/schemas/FinetuneEventType'
+        param_count:
           type: integer
-          example: 2664
-        purpose:
-          $ref: '#/components/schemas/FilePurpose'
-        Processed:
-          type: boolean
-        FileType:
-          $ref: '#/components/schemas/FileType'
-        LineCount:
+        token_count:
           type: integer
-    FileList:
-      required:
-        - data
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/FileResponse'
-    FileObject:
-      type: object
-      properties:
-        object:
+        total_steps:
+          type: integer
+        wandb_url:
           type: string
-        id:
+        step:
+          type: integer
+        checkpoint_path:
           type: string
-        filename:
+        model_path:
           type: string
-        size:
+        training_offset:
           type: integer
-    FilePurpose:
-      type: string
-      description: The purpose of the file
-      example: 'fine-tune'
-      enum:
-        - fine-tune
-        - eval
-        - eval-sample
-        - eval-output
-        - eval-summary
-        - batch-generated
-        - batch-api
-    FileType:
-      type: string
-      description: The type of the file
-      default: 'jsonl'
-      example: 'jsonl'
-      enum:
-        - 'csv'
-        - 'jsonl'
-        - 'parquet'
-    FileDeleteResponse:
+        hash:
+          type: string
+    FineTuneProgress:
       type: object
+      description: Progress information for a fine-tuning job
+      required:
+        - estimate_available
+        - seconds_remaining
       properties:
-        id:
-          type: string
-        deleted:
+        estimate_available:
           type: boolean
-    FinetuneResponse:
+          description: Whether time estimate is available
+        seconds_remaining:
+          type: integer
+          description: Estimated time remaining in seconds for the fine-tuning job to next state
+    FinetuneListCheckpoints:
+      type: object
+      required:
+        - data
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/FineTuneCheckpoint'
+    FineTuneCheckpoint:
       type: object
       required:
-        - id
-        - status
+        - step
+        - path
+        - created_at
+        - checkpoint_type
       properties:
-        id:
-          type: string
-          format: uuid
-        training_file:
+        step:
+          type: integer
+        created_at:
           type: string
-        validation_file:
+        path:
           type: string
-        model:
+        checkpoint_type:
           type: string
-        model_output_name:
+
+    FullTrainingType:
+      type: object
+      properties:
+        type:
           type: string
-        model_output_path:
+          enum: ['Full']
+      required:
+        - type
+    LoRATrainingType:
+      type: object
+      properties:
+        type:
           type: string
-        trainingfile_numlines:
+          enum: ['Lora']
+        lora_r:
           type: integer
-        trainingfile_size:
+        lora_alpha:
           type: integer
-        created_at:
+        lora_dropout:
+          type: number
+          format: float
+          default: 0.0
+        lora_trainable_modules:
           type: string
-          format: date-time
-        updated_at:
+          default: 'all-linear'
+      required:
+        - type
+        - lora_r
+        - lora_alpha
+
+    TrainingMethodSFT:
+      type: object
+      properties:
+        method:
           type: string
-          format: date-time
-        n_epochs:
-          type: integer
-        n_checkpoints:
-          type: integer
-        n_evals:
-          type: integer
-        batch_size:
+          enum: ['sft']
+        train_on_inputs:
           oneOf:
-            - type: integer
+            - type: boolean
             - type: string
               enum:
-                - max
-          default: 'max'
-        learning_rate:
+                - auto
+          type: boolean
+          default: auto
+          description: Whether to mask the user messages in conversational data or prompts in instruction data.
+      required:
+        - method
+        - train_on_inputs
+    TrainingMethodDPO:
+      type: object
+      properties:
+        method:
+          type: string
+          enum: ['dpo']
+        dpo_beta:
           type: number
-        lr_scheduler:
-          type: object
-          $ref: '#/components/schemas/LRScheduler'
-        warmup_ratio:
+          format: float
+          default: 0.1
+        rpo_alpha:
           type: number
-        max_grad_norm:
+          format: float
+          default: 0.0
+        dpo_normalize_logratios_by_length:
+          type: boolean
+          default: false
+        dpo_reference_free:
+          type: boolean
+          default: false
+        simpo_gamma:
           type: number
           format: float
-        weight_decay:
+          default: 0.0
+      required:
+        - method
+
+    MultimodalParams:
+      type: object
+      properties:
+        train_vision:
+          type: boolean
+          description: Whether to train the vision encoder of the model. Only available for multimodal models.
+
+    LRScheduler:
+      type: object
+      properties:
+        lr_scheduler_type:
+          type: string
+          enum:
+            - linear
+            - cosine
+        lr_scheduler_args:
+          oneOf:
+            - $ref: '#/components/schemas/LinearLRSchedulerArgs'
+            - $ref: '#/components/schemas/CosineLRSchedulerArgs'
+      required:
+        - lr_scheduler_type
+    CosineLRSchedulerArgs:
+      type: object
+      properties:
+        min_lr_ratio:
           type: number
           format: float
-        eval_steps:
+          default: 0.0
+          description: The ratio of the final learning rate to the peak learning rate
+        num_cycles:
+          type: number
+          format: float
+          default: 0.5
+          description: Number or fraction of cycles for the cosine learning rate scheduler
+      required:
+        - min_lr_ratio
+        - num_cycles
+    LinearLRSchedulerArgs:
+      type: object
+      properties:
+        min_lr_ratio:
+          type: number
+          format: float
+          default: 0.0
+          description: The ratio of the final learning rate to the peak learning rate
+
+    Autoscaling:
+      type: object
+      description: Configuration for automatic scaling of replicas based on demand.
+      required:
+        - min_replicas
+        - max_replicas
+      properties:
+        min_replicas:
           type: integer
-        train_on_inputs:
-          oneOf:
-            - type: boolean
-            - type: string
-              enum:
-                - auto
-          default: auto
-        training_method:
-          type: object
-          oneOf:
-            - $ref: '#/components/schemas/TrainingMethodSFT'
-            - $ref: '#/components/schemas/TrainingMethodDPO'
-        training_type:
-          type: object
-          oneOf:
-            - $ref: '#/components/schemas/FullTrainingType'
-            - $ref: '#/components/schemas/LoRATrainingType'
+          format: int32
+          description: The minimum number of replicas to maintain, even when there is no load
+          examples:
+            - 2
+        max_replicas:
+          type: integer
+          format: int32
+          description: The maximum number of replicas to scale up to under load
+          examples:
+            - 5
+
+    HardwareSpec:
+      type: object
+      description: Detailed specifications of a hardware configuration
+      required:
+        - gpu_type
+        - gpu_link
+        - gpu_memory
+        - gpu_count
+      properties:
+        gpu_type:
+          type: string
+          description: The type/model of GPU
+          examples:
+            - a100-80gb
+        gpu_link:
+          type: string
+          description: The GPU interconnect technology
+          examples:
+            - sxm
+        gpu_memory:
+          type: number
+          format: float
+          description: Amount of GPU memory in GB
+          examples:
+            - 80
+        gpu_count:
+          type: integer
+          format: int32
+          description: Number of GPUs in this configuration
+          examples:
+            - 2
+
+    EndpointPricing:
+      type: object
+      description: Pricing details for using an endpoint
+      required:
+        - cents_per_minute
+      properties:
+        cents_per_minute:
+          type: number
+          format: float
+          description: Cost per minute of endpoint uptime in cents
+          examples:
+            - 5.42
+
+    HardwareAvailability:
+      type: object
+      description: Indicates the current availability status of a hardware configuration
+      required:
+        - status
+      properties:
         status:
-          $ref: '#/components/schemas/FinetuneJobStatus'
-        job_id:
           type: string
-        events:
-          type: array
-          items:
-            $ref: '#/components/schemas/FineTuneEvent'
-        token_count:
-          type: integer
-        param_count:
-          type: integer
-        total_price:
-          type: integer
-        epochs_completed:
-          type: integer
-        queue_depth:
-          type: integer
-        wandb_project_name:
+          description: The availability status of the hardware configuration
+          enum:
+            - available
+            - unavailable
+            - insufficient
+
+    HardwareWithStatus:
+      type: object
+      description: Hardware configuration details with optional availability status
+      required:
+        - object
+        - id
+        - pricing
+        - specs
+        - updated_at
+      properties:
+        object:
+          type: string
+          enum:
+            - hardware
+        id:
           type: string
-        wandb_url:
+          description: Unique identifier for the hardware configuration
+          examples:
+            - 2x_nvidia_a100_80gb_sxm
+        pricing:
+          $ref: '#/components/schemas/EndpointPricing'
+        specs:
+          $ref: '#/components/schemas/HardwareSpec'
+        availability:
+          $ref: '#/components/schemas/HardwareAvailability'
+        updated_at:
           type: string
-        from_checkpoint:
+          format: date-time
+          description: Timestamp of when the hardware status was last updated
+
+    CreateEndpointRequest:
+      type: object
+      required:
+        - model
+        - hardware
+        - autoscaling
+      properties:
+        display_name:
           type: string
-        from_hf_model:
+          description: A human-readable name for the endpoint
+          examples:
+            - My Llama3 70b endpoint
+        model:
           type: string
-        hf_model_revision:
+          description: The model to deploy on this endpoint
+          examples:
+            - meta-llama/Llama-3-8b-chat-hf
+        hardware:
           type: string
-        progress:
-          $ref: '#/components/schemas/FineTuneProgress'
+          description: The hardware configuration to use for this endpoint
+          examples:
+            - 1x_nvidia_a100_80gb_sxm
+        autoscaling:
+          $ref: '#/components/schemas/Autoscaling'
+          description: Configuration for automatic scaling of the endpoint
+        disable_prompt_cache:
+          type: boolean
+          description: Whether to disable the prompt cache for this endpoint
+          default: false
+        disable_speculative_decoding:
+          type: boolean
+          description: Whether to disable speculative decoding for this endpoint
+          default: false
+        state:
+          type: string
+          description: The desired state of the endpoint
+          enum:
+            - STARTED
+            - STOPPED
+          default: STARTED
+          example: STARTED
+        inactive_timeout:
+          type: integer
+          description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to null, omit or set to 0 to disable automatic timeout.
+          nullable: true
+          example: 60
+        availability_zone:
+          type: string
+          description: Create the endpoint in a specified availability zone (e.g., us-central-4b)
 
-    FinetuneResponseTruncated:
+    DedicatedEndpoint:
       type: object
-      description: A truncated version of the fine-tune response, used for POST /fine-tunes, GET /fine-tunes and POST /fine-tunes/{id}/cancel endpoints
+      description: Details about a dedicated endpoint deployment
       required:
+        - object
         - id
-        - status
+        - name
+        - display_name
+        - model
+        - hardware
+        - type
+        - owner
+        - state
+        - autoscaling
         - created_at
-        - updated_at
-      example:
-        id: ft-01234567890123456789
-        status: completed
-        created_at: '2023-05-17T17:35:45.123Z'
-        updated_at: '2023-05-17T18:46:23.456Z'
-        user_id: 'user_01234567890123456789'
-        owner_address: 'user@example.com'
-        total_price: 1500
-        token_count: 850000
-        events: [] # FineTuneTruncated object has no events
-        model: 'meta-llama/Llama-2-7b-hf'
-        model_output_name: 'mynamespace/meta-llama/Llama-2-7b-hf-32162631'
-        n_epochs: 3
-        training_file: 'file-01234567890123456789'
-        wandb_project_name: 'my-finetune-project'
       properties:
+        object:
+          type: string
+          enum:
+            - endpoint
+          description: The type of object
+          example: endpoint
         id:
           type: string
-          description: Unique identifier for the fine-tune job
-        status:
-          $ref: '#/components/schemas/FinetuneJobStatus'
-        created_at:
+          description: Unique identifier for the endpoint
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+        name:
           type: string
-          format: date-time
-          description: Creation timestamp of the fine-tune job
-        updated_at:
+          description: System name for the endpoint
+          example: devuser/meta-llama/Llama-3-8b-chat-hf-a32b82a1
+        display_name:
           type: string
-          format: date-time
-          description: Last update timestamp of the fine-tune job
-        user_id:
+          description: Human-readable name for the endpoint
+          example: My Llama3 70b endpoint
+        model:
           type: string
-          description: Identifier for the user who created the job
-        owner_address:
+          description: The model deployed on this endpoint
+          example: meta-llama/Llama-3-8b-chat-hf
+        hardware:
           type: string
-          description: Owner address information
-        total_price:
-          type: integer
-          description: Total price for the fine-tuning job
-        token_count:
-          type: integer
-          description: Count of tokens processed
-        events:
-          type: array
-          items:
-            $ref: '#/components/schemas/FineTuneEvent'
-          description: Events related to this fine-tune job
-        # FineTuneUserParams fields
-        training_file:
+          description: The hardware configuration used for this endpoint
+          example: 1x_nvidia_a100_80gb_sxm
+        type:
           type: string
-          description: File-ID of the training file
-        validation_file:
+          enum:
+            - dedicated
+          description: The type of endpoint
+          example: dedicated
+        owner:
           type: string
-          description: File-ID of the validation file
-        model:
+          description: The owner of this endpoint
+          example: devuser
+        state:
           type: string
-          description: Base model used for fine-tuning
-        model_output_name:
+          enum:
+            - PENDING
+            - STARTING
+            - STARTED
+            - STOPPING
+            - STOPPED
+            - ERROR
+          description: Current state of the endpoint
+          example: STARTED
+        autoscaling:
+          $ref: '#/components/schemas/Autoscaling'
+          description: Configuration for automatic scaling of the endpoint
+        created_at:
           type: string
-        suffix:
+          format: date-time
+          description: Timestamp when the endpoint was created
+          example: 2025-02-04T10:43:55.405Z
+
+    ListEndpoint:
+      type: object
+      description: Details about an endpoint when listed via the list endpoint
+      required:
+        - id
+        - object
+        - name
+        - model
+        - type
+        - owner
+        - state
+        - created_at
+      properties:
+        object:
           type: string
-          description: Suffix added to the fine-tuned model name
-        n_epochs:
-          type: integer
-          description: Number of training epochs
-        n_evals:
-          type: integer
-          description: Number of evaluations during training
-        n_checkpoints:
-          type: integer
-          description: Number of checkpoints saved during training
-        batch_size:
-          type: integer
-          description: Batch size used for training
-        training_type:
-          oneOf:
-            - $ref: '#/components/schemas/FullTrainingType'
-            - $ref: '#/components/schemas/LoRATrainingType'
-          description: Type of training used (full or LoRA)
-        training_method:
-          oneOf:
-            - $ref: '#/components/schemas/TrainingMethodSFT'
-            - $ref: '#/components/schemas/TrainingMethodDPO'
-          description: Method of training used
-        learning_rate:
-          type: number
-          format: float
-          description: Learning rate used for training
-        lr_scheduler:
-          $ref: '#/components/schemas/LRScheduler'
-          description: Learning rate scheduler configuration
-        warmup_ratio:
-          type: number
-          format: float
-          description: Ratio of warmup steps
-        max_grad_norm:
-          type: number
-          format: float
-          description: Maximum gradient norm for clipping
-        weight_decay:
-          type: number
-          format: float
-          description: Weight decay value used
-        wandb_project_name:
+          enum:
+            - endpoint
+          description: The type of object
+          example: endpoint
+        id:
           type: string
-          description: Weights & Biases project name
-        wandb_name:
+          description: Unique identifier for the endpoint
+          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
+        name:
           type: string
-          description: Weights & Biases run name
-        from_checkpoint:
+          description: System name for the endpoint
+          example: allenai/OLMo-7B
+        model:
           type: string
-          description: Checkpoint used to continue training
-        from_hf_model:
+          description: The model deployed on this endpoint
+          example: allenai/OLMo-7B
+        type:
           type: string
-          description: Hugging Face Hub repo to start training from
-        hf_model_revision:
+          enum:
+            - serverless
+            - dedicated
+          description: The type of endpoint
+          example: serverless
+        owner:
           type: string
-          description: The revision of the Hugging Face Hub model to continue training from
-        progress:
-          $ref: '#/components/schemas/FineTuneProgress'
-          description: Progress information for the fine-tuning job
-    FinetuneDeleteResponse:
-      type: object
+          description: The owner of this endpoint
+          example: together
+        state:
+          type: string
+          enum:
+            - PENDING
+            - STARTING
+            - STARTED
+            - STOPPING
+            - STOPPED
+            - ERROR
+          description: Current state of the endpoint
+          example: STARTED
+        created_at:
+          type: string
+          format: date-time
+          description: Timestamp when the endpoint was created
+          example: 2024-02-28T21:34:35.444Z
+
+    DisplayorExecuteOutput:
       properties:
-        message:
+        data:
+          properties:
+            application/geo+json:
+              type: object
+            application/javascript:
+              type: string
+            application/json:
+              type: object
+            application/pdf:
+              format: byte
+              type: string
+            application/vnd.vega.v5+json:
+              type: object
+            application/vnd.vegalite.v4+json:
+              type: object
+            image/gif:
+              format: byte
+              type: string
+            image/jpeg:
+              format: byte
+              type: string
+            image/png:
+              format: byte
+              type: string
+            image/svg+xml:
+              type: string
+            text/html:
+              type: string
+            text/latex:
+              type: string
+            text/markdown:
+              type: string
+            text/plain:
+              type: string
+          type: object
+        type:
+          enum:
+            - display_data
+            - execute_result
           type: string
-          description: Message indicating the result of the deletion
-    FinetuneJobStatus:
-      type: string
-      enum:
-        - pending
-        - queued
-        - running
-        - compressing
-        - uploading
-        - cancel_requested
-        - cancelled
-        - error
-        - completed
+      required:
+        - type
+        - data
+      title: DisplayorExecuteOutput
+
+    Error:
+      oneOf:
+        - type: string
+        - additionalProperties: true
+          type: object
+      title: Error
+
+    ErrorOutput:
+      title: ErrorOutput
+      description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
+      properties:
+        data:
+          type: string
+        type:
+          enum:
+            - error
+          type: string
+      required:
+        - type
+        - data
+
+    ExecuteRequest:
+      title: ExecuteRequest
+      required:
+        - language
+        - code
+      properties:
+        code:
+          description: 'Code snippet to execute.'
+          example: "print('Hello, world!')"
+          type: string
+        files:
+          description: Files to upload to the session. If present, files will be uploaded before executing the given code.
+          items:
+            properties:
+              content:
+                type: string
+              encoding:
+                description: Encoding of the file content. Use `string` for text files such as code, and `base64` for binary files, such as images.
+                enum:
+                  - string
+                  - base64
+                type: string
+              name:
+                type: string
+            required:
+              - name
+              - encoding
+              - content
+            type: object
+          type: array
+        language:
+          default: python
+          description: Programming language for the code to execute. Currently only supports Python, but more will be added.
+          enum:
+            - python
+        session_id:
+          description: Identifier of the current session. Used to make follow-up calls. Requests will return an error if the session does not belong to the caller or has expired.
+          example: ses_abcDEF123
+          nullable: false
+          type: string
+
+    ExecuteResponse:
+      title: ExecuteResponse
+      type: object
+      description: 'The result of the execution. If successful, `data` contains the result and `errors` will be null. If unsuccessful, `data` will be null and `errors` will contain the errors.'
+      oneOf:
+        - title: SuccessfulExecution
+          type: object
+          required: [data, errors]
+          properties:
+            errors:
+              type: 'null'
+            data:
+              type: object
+              nullable: false
+              required: [session_id, outputs]
+              properties:
+                outputs:
+                  type: array
+                  items:
+                    discriminator:
+                      propertyName: type
+                    oneOf:
+                      - title: StreamOutput
+                        description: Outputs that were printed to stdout or stderr
+                        type: object
+                        required: [type, data]
+                        properties:
+                          type:
+                            enum:
+                              - stdout
+                              - stderr
+                            type: string
+                          data:
+                            type: string
+                      - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
+                        properties:
+                          data:
+                            type: string
+                          type:
+                            enum:
+                              - error
+                            type: string
+                        required:
+                          - type
+                          - data
+                        title: ErrorOutput
+                      - properties:
+                          data:
+                            properties:
+                              application/geo+json:
+                                type: object
+                                additionalProperties: true
+                              application/javascript:
+                                type: string
+                              application/json:
+                                type: object
+                                additionalProperties: true
+                              application/pdf:
+                                format: byte
+                                type: string
+                              application/vnd.vega.v5+json:
+                                type: object
+                                additionalProperties: true
+                              application/vnd.vegalite.v4+json:
+                                type: object
+                                additionalProperties: true
+                              image/gif:
+                                format: byte
+                                type: string
+                              image/jpeg:
+                                format: byte
+                                type: string
+                              image/png:
+                                format: byte
+                                type: string
+                              image/svg+xml:
+                                type: string
+                              text/html:
+                                type: string
+                              text/latex:
+                                type: string
+                              text/markdown:
+                                type: string
+                              text/plain:
+                                type: string
+                            type: object
+                          type:
+                            enum:
+                              - display_data
+                              - execute_result
+                            type: string
+                        required:
+                          - type
+                          - data
+                        title: DisplayorExecuteOutput
+                    title: InterpreterOutput
+                session_id:
+                  type: string
+                  description: Identifier of the current session. Used to make follow-up calls.
+                  example: ses_abcDEF123
+                  nullable: false
+                status:
+                  type: string
+                  enum:
+                    - success
+                  description: Status of the execution. Currently only supports success.
+        - title: FailedExecution
+          type: object
+          required: [data, errors]
+          properties:
+            data:
+              type: 'null'
+            errors:
+              type: array
+              items:
+                title: Error
+                oneOf:
+                  - type: string
+                  - type: object
+                    additionalProperties: true
 
-    FinetuneEventLevels:
-      type: string
-      enum:
-        - null
-        - info
-        - warning
-        - error
-        - legacy_info
-        - legacy_iwarning
-        - legacy_ierror
-    FinetuneEventType:
-      type: string
-      enum:
-        - job_pending
-        - job_start
-        - job_stopped
-        - model_downloading
-        - model_download_complete
-        - training_data_downloading
-        - training_data_download_complete
-        - validation_data_downloading
-        - validation_data_download_complete
-        - wandb_init
-        - training_start
-        - checkpoint_save
-        - billing_limit
-        - epoch_complete
-        - training_complete
-        - model_compressing
-        - model_compression_complete
-        - model_uploading
-        - model_upload_complete
-        - job_complete
-        - job_error
-        - cancel_requested
-        - job_restarted
-        - refund
-        - warning
+    InterpreterOutput:
+      discriminator:
+        propertyName: type
+      oneOf:
+        - description: Outputs that were printed to stdout or stderr
+          properties:
+            data:
+              type: string
+            type:
+              enum:
+                - stdout
+                - stderr
+              type: string
+          required:
+            - type
+            - data
+          title: StreamOutput
+        - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
+          properties:
+            data:
+              type: string
+            type:
+              enum:
+                - error
+              type: string
+          required:
+            - type
+            - data
+          title: ErrorOutput
+        - properties:
+            data:
+              properties:
+                application/geo+json:
+                  type: object
+                application/javascript:
+                  type: string
+                application/json:
+                  type: object
+                application/pdf:
+                  format: byte
+                  type: string
+                application/vnd.vega.v5+json:
+                  type: object
+                application/vnd.vegalite.v4+json:
+                  type: object
+                image/gif:
+                  format: byte
+                  type: string
+                image/jpeg:
+                  format: byte
+                  type: string
+                image/png:
+                  format: byte
+                  type: string
+                image/svg+xml:
+                  type: string
+                text/html:
+                  type: string
+                text/latex:
+                  type: string
+                text/markdown:
+                  type: string
+                text/plain:
+                  type: string
+              type: object
+            type:
+              enum:
+                - display_data
+                - execute_result
+              type: string
+          required:
+            - type
+            - data
+          title: DisplayorExecuteOutput
+      title: InterpreterOutput
 
-    FinetuneTruncatedList:
-      type: object
-      required:
-        - data
+    Response:
       properties:
-        data:
-          type: array
+        errors:
           items:
-            $ref: '#/components/schemas/FinetuneResponseTruncated'
-    FinetuneListEvents:
-      type: object
-      required:
-        - data
-      properties:
-        data:
+            oneOf:
+              - type: string
+              - additionalProperties: true
+                type: object
+            title: Error
           type: array
-          items:
-            $ref: '#/components/schemas/FineTuneEvent'
-    FineTuneEvent:
-      type: object
-      required:
-        - object
-        - created_at
-        - message
-        - type
-        - param_count
-        - token_count
-        - total_steps
-        - wandb_url
-        - step
-        - checkpoint_path
-        - model_path
-        - training_offset
-        - hash
-      properties:
-        object:
-          type: string
-          enum: [fine-tune-event]
-        created_at:
-          type: string
-        level:
-          anyOf:
-            - $ref: '#/components/schemas/FinetuneEventLevels'
-        message:
-          type: string
-        type:
-          $ref: '#/components/schemas/FinetuneEventType'
-        param_count:
-          type: integer
-        token_count:
-          type: integer
-        total_steps:
-          type: integer
-        wandb_url:
-          type: string
-        step:
-          type: integer
-        checkpoint_path:
-          type: string
-        model_path:
-          type: string
-        training_offset:
-          type: integer
-        hash:
-          type: string
-    FineTuneProgress:
+      title: Response
       type: object
-      description: Progress information for a fine-tuning job
-      required:
-        - estimate_available
-        - seconds_remaining
-      properties:
-        estimate_available:
-          type: boolean
-          description: Whether time estimate is available
-        seconds_remaining:
-          type: integer
-          description: Estimated time remaining in seconds for the fine-tuning job to next state
-    FinetuneListCheckpoints:
+
+    SessionListResponse:
+      allOf:
+        - properties:
+            errors:
+              items:
+                oneOf:
+                  - type: string
+                  - additionalProperties: true
+                    type: object
+                title: Error
+              type: array
+          title: Response
+          type: object
+        - properties:
+            data:
+              properties:
+                sessions:
+                  items:
+                    properties:
+                      execute_count:
+                        type: integer
+                      expires_at:
+                        format: date-time
+                        type: string
+                      id:
+                        description: Session Identifier. Used to make follow-up calls.
+                        example: ses_abcDEF123
+                        type: string
+                      last_execute_at:
+                        format: date-time
+                        type: string
+                      started_at:
+                        format: date-time
+                        type: string
+                    required:
+                      - execute_count
+                      - expires_at
+                      - id
+                      - last_execute_at
+                      - started_at
+                    type: object
+                  type: array
+              required:
+                - sessions
+          type: object
+      title: SessionListResponse
       type: object
-      required:
-        - data
+
+    StreamOutput:
+      description: Outputs that were printed to stdout or stderr
       properties:
         data:
-          type: array
-          items:
-            $ref: '#/components/schemas/FineTuneCheckpoint'
-    FineTuneCheckpoint:
-      type: object
-      required:
-        - step
-        - path
-        - created_at
-        - checkpoint_type
-      properties:
-        step:
-          type: integer
-        created_at:
-          type: string
-        path:
-          type: string
-        checkpoint_type:
           type: string
-
-    FullTrainingType:
-      type: object
-      properties:
         type:
+          enum:
+            - stdout
+            - stderr
           type: string
-          enum: ['Full']
       required:
         - type
-    LoRATrainingType:
+        - data
+      title: StreamOutput
+
+    CreateBatchRequest:
       type: object
+      required: [endpoint, input_file_id]
       properties:
-        type:
+        endpoint:
           type: string
-          enum: ['Lora']
-        lora_r:
-          type: integer
-        lora_alpha:
+          description: The endpoint to use for batch processing
+          example: '/v1/chat/completions'
+        input_file_id:
+          type: string
+          description: ID of the uploaded input file containing batch requests
+          example: 'file-abc123def456ghi789'
+        completion_window:
+          type: string
+          description: Time window for batch completion (optional)
+          example: '24h'
+        priority:
           type: integer
-        lora_dropout:
-          type: number
-          format: float
-          default: 0.0
-        lora_trainable_modules:
+          description: Priority for batch processing (optional)
+          example: 1
+        model_id:
           type: string
-          default: 'all-linear'
-      required:
-        - type
-        - lora_r
-        - lora_alpha
-
-    TrainingMethodSFT:
+          description: 'Model to use for processing batch requests'
+          example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
+    BatchErrorResponse:
       type: object
       properties:
-        method:
+        error:
           type: string
-          enum: ['sft']
-        train_on_inputs:
-          oneOf:
-            - type: boolean
-            - type: string
-              enum:
-                - auto
-          type: boolean
-          default: auto
-          description: Whether to mask the user messages in conversational data or prompts in instruction data.
-      required:
-        - method
-        - train_on_inputs
-    TrainingMethodDPO:
+    BatchJobWithWarning:
       type: object
       properties:
-        method:
+        job:
+          $ref: '#/components/schemas/BatchJob'
+        warning:
           type: string
-          enum: ['dpo']
-        dpo_beta:
-          type: number
-          format: float
-          default: 0.1
-        rpo_alpha:
-          type: number
-          format: float
-          default: 0.0
-        dpo_normalize_logratios_by_length:
-          type: boolean
-          default: false
-        dpo_reference_free:
-          type: boolean
-          default: false
-        simpo_gamma:
-          type: number
-          format: float
-          default: 0.0
-      required:
-        - method
-
-    LRScheduler:
+    BatchJob:
       type: object
       properties:
-        lr_scheduler_type:
+        id:
           type: string
-          enum:
-            - linear
-            - cosine
-        lr_scheduler_args:
-          oneOf:
-            - $ref: '#/components/schemas/LinearLRSchedulerArgs'
-            - $ref: '#/components/schemas/CosineLRSchedulerArgs'
-      required:
-        - lr_scheduler_type
-    CosineLRSchedulerArgs:
-      type: object
-      properties:
-        min_lr_ratio:
-          type: number
-          format: float
-          default: 0.0
-          description: The ratio of the final learning rate to the peak learning rate
-        num_cycles:
-          type: number
-          format: float
-          default: 0.5
-          description: Number or fraction of cycles for the cosine learning rate scheduler
-      required:
-        - min_lr_ratio
-        - num_cycles
-    LinearLRSchedulerArgs:
-      type: object
-      properties:
-        min_lr_ratio:
-          type: number
-          format: float
-          default: 0.0
-          description: The ratio of the final learning rate to the peak learning rate
-
-    Autoscaling:
-      type: object
-      description: Configuration for automatic scaling of replicas based on demand.
-      required:
-        - min_replicas
-        - max_replicas
-      properties:
-        min_replicas:
-          type: integer
-          format: int32
-          description: The minimum number of replicas to maintain, even when there is no load
-          examples:
-            - 2
-        max_replicas:
+          format: uuid
+          example: '01234567-8901-2345-6789-012345678901'
+        user_id:
+          type: string
+          example: 'user_789xyz012'
+        input_file_id:
+          type: string
+          example: 'file-input123abc456def'
+        file_size_bytes:
           type: integer
-          format: int32
-          description: The maximum number of replicas to scale up to under load
-          examples:
-            - 5
-
-    HardwareSpec:
-      type: object
-      description: Detailed specifications of a hardware configuration
-      required:
-        - gpu_type
-        - gpu_link
-        - gpu_memory
-        - gpu_count
-      properties:
-        gpu_type:
+          format: int64
+          example: 1048576
+          description: 'Size of input file in bytes'
+        status:
+          $ref: '#/components/schemas/BatchJobStatus'
+        job_deadline:
           type: string
-          description: The type/model of GPU
-          examples:
-            - a100-80gb
-        gpu_link:
+          format: date-time
+          example: '2024-01-15T15:30:00Z'
+        created_at:
           type: string
-          description: The GPU interconnect technology
-          examples:
-            - sxm
-        gpu_memory:
+          format: date-time
+          example: '2024-01-15T14:30:00Z'
+        endpoint:
+          type: string
+          example: '/v1/chat/completions'
+        progress:
           type: number
-          format: float
-          description: Amount of GPU memory in GB
-          examples:
-            - 80
-        gpu_count:
-          type: integer
-          format: int32
-          description: Number of GPUs in this configuration
-          examples:
-            - 2
+          format: float64
+          example: 75.0
+          description: 'Completion progress (0.0 to 100)'
+        model_id:
+          type: string
+          example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
+          description: 'Model used for processing requests'
+        output_file_id:
+          type: string
+          example: 'file-output789xyz012ghi'
+        error_file_id:
+          type: string
+          example: 'file-errors456def789jkl'
+        error:
+          type: string
+        completed_at:
+          type: string
+          format: date-time
+          example: '2024-01-15T15:45:30Z'
+    BatchJobStatus:
+      type: string
+      enum:
+        - VALIDATING
+        - IN_PROGRESS
+        - COMPLETED
+        - FAILED
+        - EXPIRED
+        - CANCELLED
+      example: 'IN_PROGRESS'
+      description: 'Current status of the batch job'
 
-    EndpointPricing:
+    EvaluationTypedRequest:
       type: object
-      description: Pricing details for using an endpoint
       required:
-        - cents_per_minute
+        - type
+        - parameters
       properties:
-        cents_per_minute:
-          type: number
-          format: float
-          description: Cost per minute of endpoint uptime in cents
-          examples:
-            - 5.42
+        type:
+          type: string
+          enum: [classify, score, compare]
+          description: The type of evaluation to perform
+          example: 'classify'
+        parameters:
+          oneOf:
+            - $ref: '#/components/schemas/EvaluationClassifyParameters'
+            - $ref: '#/components/schemas/EvaluationScoreParameters'
+            - $ref: '#/components/schemas/EvaluationCompareParameters'
+          description: Type-specific parameters for the evaluation
 
-    HardwareAvailability:
+    EvaluationClassifyParameters:
       type: object
-      description: Indicates the current availability status of a hardware configuration
       required:
-        - status
+        - judge
+        - labels
+        - pass_labels
+        - input_data_file_path
       properties:
-        status:
+        judge:
+          $ref: '#/components/schemas/EvaluationJudgeModelConfig'
+        labels:
+          type: array
+          items:
+            type: string
+          minItems: 2
+          description: List of possible classification labels
+          example: ['yes', 'no']
+        pass_labels:
+          type: array
+          items:
+            type: string
+          minItems: 1
+          description: List of labels that are considered passing
+          example: ['yes']
+        model_to_evaluate:
+          $ref: '#/components/schemas/EvaluationModelOrString'
+        input_data_file_path:
           type: string
-          description: The availability status of the hardware configuration
-          enum:
-            - available
-            - unavailable
-            - insufficient
+          description: Data file ID
+          example: 'file-1234-aefd'
 
-    HardwareWithStatus:
+    EvaluationScoreParameters:
       type: object
-      description: Hardware configuration details with optional availability status
       required:
-        - object
-        - id
-        - pricing
-        - specs
-        - updated_at
+        - judge
+        - min_score
+        - max_score
+        - pass_threshold
+        - input_data_file_path
       properties:
-        object:
-          type: string
-          enum:
-            - hardware
-        id:
-          type: string
-          description: Unique identifier for the hardware configuration
-          examples:
-            - 2x_nvidia_a100_80gb_sxm
-        pricing:
-          $ref: '#/components/schemas/EndpointPricing'
-        specs:
-          $ref: '#/components/schemas/HardwareSpec'
-        availability:
-          $ref: '#/components/schemas/HardwareAvailability'
-        updated_at:
+        judge:
+          $ref: '#/components/schemas/EvaluationJudgeModelConfig'
+        min_score:
+          type: number
+          format: float
+          example: 0.0
+          description: Minimum possible score
+        max_score:
+          type: number
+          format: float
+          example: 10.0
+          description: Maximum possible score
+        pass_threshold:
+          type: number
+          format: float
+          example: 7.0
+          description: Score threshold for passing
+        model_to_evaluate:
+          $ref: '#/components/schemas/EvaluationModelOrString'
+        input_data_file_path:
           type: string
-          format: date-time
-          description: Timestamp of when the hardware status was last updated
+          example: 'file-01234567890123456789'
+          description: Data file ID
 
-    CreateEndpointRequest:
+    EvaluationCompareParameters:
       type: object
       required:
-        - model
-        - hardware
-        - autoscaling
+        - judge
+        - input_data_file_path
       properties:
-        display_name:
-          type: string
-          description: A human-readable name for the endpoint
-          examples:
-            - My Llama3 70b endpoint
-        model:
-          type: string
-          description: The model to deploy on this endpoint
-          examples:
-            - meta-llama/Llama-3-8b-chat-hf
-        hardware:
-          type: string
-          description: The hardware configuration to use for this endpoint
-          examples:
-            - 1x_nvidia_a100_80gb_sxm
-        autoscaling:
-          $ref: '#/components/schemas/Autoscaling'
-          description: Configuration for automatic scaling of the endpoint
-        disable_prompt_cache:
-          type: boolean
-          description: Whether to disable the prompt cache for this endpoint
-          default: false
-        disable_speculative_decoding:
-          type: boolean
-          description: Whether to disable speculative decoding for this endpoint
-          default: false
-        state:
-          type: string
-          description: The desired state of the endpoint
-          enum:
-            - STARTED
-            - STOPPED
-          default: STARTED
-          example: STARTED
-        inactive_timeout:
-          type: integer
-          description: The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to null, omit or set to 0 to disable automatic timeout.
-          nullable: true
-          example: 60
-        availability_zone:
+        judge:
+          $ref: '#/components/schemas/EvaluationJudgeModelConfig'
+        model_a:
+          $ref: '#/components/schemas/EvaluationModelOrString'
+        model_b:
+          $ref: '#/components/schemas/EvaluationModelOrString'
+        input_data_file_path:
           type: string
-          description: Create the endpoint in a specified availability zone (e.g., us-central-4b)
+          description: Data file name
 
-    DedicatedEndpoint:
+    EvaluationJudgeModelConfig:
       type: object
-      description: Details about a dedicated endpoint deployment
       required:
-        - object
-        - id
-        - name
-        - display_name
         - model
-        - hardware
-        - type
-        - owner
-        - state
-        - autoscaling
-        - created_at
+        - system_template
+        - model_source
       properties:
-        object:
-          type: string
-          enum:
-            - endpoint
-          description: The type of object
-          example: endpoint
-        id:
-          type: string
-          description: Unique identifier for the endpoint
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
-        name:
-          type: string
-          description: System name for the endpoint
-          example: devuser/meta-llama/Llama-3-8b-chat-hf-a32b82a1
-        display_name:
-          type: string
-          description: Human-readable name for the endpoint
-          example: My Llama3 70b endpoint
         model:
           type: string
-          description: The model deployed on this endpoint
-          example: meta-llama/Llama-3-8b-chat-hf
-        hardware:
-          type: string
-          description: The hardware configuration used for this endpoint
-          example: 1x_nvidia_a100_80gb_sxm
-        type:
-          type: string
-          enum:
-            - dedicated
-          description: The type of endpoint
-          example: dedicated
-        owner:
+          description: Name of the judge model
+          example: 'meta-llama/Llama-3-70B-Instruct-Turbo'
+        system_template:
           type: string
-          description: The owner of this endpoint
-          example: devuser
-        state:
+          description: System prompt template for the judge
+          example: 'Imagine you are a helpful assistant'
+        model_source:
           type: string
-          enum:
-            - PENDING
-            - STARTING
-            - STARTED
-            - STOPPING
-            - STOPPED
-            - ERROR
-          description: Current state of the endpoint
-          example: STARTED
-        autoscaling:
-          $ref: '#/components/schemas/Autoscaling'
-          description: Configuration for automatic scaling of the endpoint
-        created_at:
+          description: "Source of the judge model."
+          enum: [serverless, dedicated, external]
+        external_api_token:
           type: string
-          format: date-time
-          description: Timestamp when the endpoint was created
-          example: 2025-02-04T10:43:55.405Z
+          description: "Bearer/API token for external judge models."
+        external_base_url:
+          type: string
+          description: "Base URL for external judge models. Must be OpenAI-compatible base URL."
 
-    ListEndpoint:
+    EvaluationModelOrString:
+      oneOf:
+        - type: string
+          description: Field name in the input data
+        - $ref: '#/components/schemas/EvaluationModelRequest'
+
+    EvaluationModelRequest:
       type: object
-      description: Details about an endpoint when listed via the list endpoint
       required:
-        - id
-        - object
-        - name
         - model
-        - type
-        - owner
-        - state
-        - created_at
+        - max_tokens
+        - temperature
+        - system_template
+        - input_template
+        - model_source
       properties:
-        object:
+        model:
           type: string
-          enum:
-            - endpoint
-          description: The type of object
-          example: endpoint
-        id:
+          description: Name of the model to evaluate
+          example: 'meta-llama/Llama-3-70B-Instruct-Turbo'
+        max_tokens:
+          type: integer
+          minimum: 1
+          description: Maximum number of tokens to generate
+          example: 512
+        temperature:
+          type: number
+          format: float
+          minimum: 0
+          maximum: 2
+          description: Sampling temperature
+          example: 0.7
+        system_template:
           type: string
-          description: Unique identifier for the endpoint
-          example: endpoint-d23901de-ef8f-44bf-b3e7-de9c1ca8f2d7
-        name:
+          description: System prompt template
+          example: 'Imagine you are helpful assistant'
+        input_template:
           type: string
-          description: System name for the endpoint
-          example: allenai/OLMo-7B
-        model:
+          description: Input prompt template
+          example: 'Please classify {{prompt}} based on the labels below'
+        model_source:
           type: string
-          description: The model deployed on this endpoint
-          example: allenai/OLMo-7B
-        type:
+          description: "Source of the model."
+          enum: [serverless, dedicated, external]
+        external_api_token:
           type: string
-          enum:
-            - serverless
-            - dedicated
-          description: The type of endpoint
-          example: serverless
-        owner:
+          description: "Bearer/API token for external models."
+        external_base_url:
           type: string
-          description: The owner of this endpoint
-          example: together
-        state:
+          description: "Base URL for external models. Must be OpenAI-compatible base URL"
+
+    EvaluationResponse:
+      type: object
+      properties:
+        workflow_id:
           type: string
-          enum:
-            - PENDING
-            - STARTING
-            - STARTED
-            - STOPPING
-            - STOPPED
-            - ERROR
-          description: Current state of the endpoint
-          example: STARTED
-        created_at:
+          description: The ID of the created evaluation job
+          example: 'eval-1234-1244513'
+        status:
           type: string
-          format: date-time
-          description: Timestamp when the endpoint was created
-          example: 2024-02-28T21:34:35.444Z
+          enum: [pending]
+          description: Initial status of the job
 
-    DisplayorExecuteOutput:
+    EvaluationJob:
+      type: object
       properties:
-        data:
-          properties:
-            application/geo+json:
-              type: object
-            application/javascript:
-              type: string
-            application/json:
-              type: object
-            application/pdf:
-              format: byte
-              type: string
-            application/vnd.vega.v5+json:
-              type: object
-            application/vnd.vegalite.v4+json:
-              type: object
-            image/gif:
-              format: byte
-              type: string
-            image/jpeg:
-              format: byte
-              type: string
-            image/png:
-              format: byte
-              type: string
-            image/svg+xml:
-              type: string
-            text/html:
-              type: string
-            text/latex:
-              type: string
-            text/markdown:
-              type: string
-            text/plain:
-              type: string
-          type: object
+        workflow_id:
+          type: string
+          description: The evaluation job ID
+          example: 'eval-1234aedf'
         type:
-          enum:
-            - display_data
-            - execute_result
           type: string
-      required:
-        - type
-        - data
-      title: DisplayorExecuteOutput
-
-    Error:
-      oneOf:
-        - type: string
-        - additionalProperties: true
+          enum: [classify, score, compare]
+          description: The type of evaluation
+          example: classify
+        owner_id:
+          type: string
+          description: ID of the job owner (admin only)
+        status:
+          type: string
+          enum: [pending, queued, running, completed, error, user_error]
+          description: Current status of the job
+          example: completed
+        status_updates:
+          type: array
+          items:
+            $ref: '#/components/schemas/EvaluationJobStatusUpdate'
+          description: History of status updates (admin only)
+        parameters:
           type: object
-      title: Error
-
-    ErrorOutput:
-      title: ErrorOutput
-      description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
-      properties:
-        data:
+          description: The parameters used for this evaluation
+          additionalProperties: true
+        created_at:
           type: string
-        type:
-          enum:
-            - error
+          format: date-time
+          description: When the job was created
+          example: '2025-07-23T17:10:04.837888Z'
+        updated_at:
           type: string
-      required:
-        - type
-        - data
+          format: date-time
+          description: When the job was last updated
+          example: '2025-07-23T17:10:04.837888Z'
+        results:
+          oneOf:
+            - $ref: '#/components/schemas/EvaluationClassifyResults'
+            - $ref: '#/components/schemas/EvaluationScoreResults'
+            - $ref: '#/components/schemas/EvaluationCompareResults'
+            - type: object
+              properties:
+                error:
+                  type: string
+          nullable: true
+          description: Results of the evaluation (when completed)
 
-    ExecuteRequest:
-      title: ExecuteRequest
-      required:
-        - language
-        - code
+    EvaluationJobStatusUpdate:
+      type: object
       properties:
-        code:
-          description: 'Code snippet to execute.'
-          example: "print('Hello, world!')"
+        status:
           type: string
-        files:
-          description: Files to upload to the session. If present, files will be uploaded before executing the given code.
-          items:
-            properties:
-              content:
-                type: string
-              encoding:
-                description: Encoding of the file content. Use `string` for text files such as code, and `base64` for binary files, such as images.
-                enum:
-                  - string
-                  - base64
-                type: string
-              name:
-                type: string
-            required:
-              - name
-              - encoding
-              - content
-            type: object
-          type: array
-        language:
-          default: python
-          description: Programming language for the code to execute. Currently only supports Python, but more will be added.
-          enum:
-            - python
-        session_id:
-          description: Identifier of the current session. Used to make follow-up calls. Requests will return an error if the session does not belong to the caller or has expired.
-          example: ses_abcDEF123
-          nullable: false
+          description: The status at this update
+          example: pending
+        message:
+          type: string
+          description: Additional message for this update
+          example: Job is pending evaluation
+        timestamp:
+          type: string
+          format: date-time
+          description: When this update occurred
+          example: '2025-07-23T17:10:04.837888Z'
+
+    EvaluationClassifyResults:
+      type: object
+      properties:
+        generation_fail_count:
+          type: number
+          format: integer
+          nullable: true
+          description: Number of failed generations.
+          example: 0
+        judge_fail_count:
+          type: number
+          format: integer
+          nullable: true
+          description: Number of failed judge generations
+          example: 0
+        invalid_label_count:
+          type: number
+          format: float
+          nullable: true
+          description: Number of invalid labels
+          example: 0
+        result_file_id:
+          type: string
+          description: Data File ID
+          example: file-1234-aefd
+        pass_percentage:
+          type: number
+          format: integer
+          nullable: true
+          description: Pecentage of pass labels.
+          example: 10
+        label_counts:
           type: string
+          description: JSON string representing label counts
+          example: '{"yes": 10, "no": 0}'
 
-    ExecuteResponse:
-      title: ExecuteResponse
+    EvaluationScoreResults:
       type: object
-      description: 'The result of the execution. If successful, `data` contains the result and `errors` will be null. If unsuccessful, `data` will be null and `errors` will contain the errors.'
-      oneOf:
-        - title: SuccessfulExecution
-          type: object
-          required: [data, errors]
-          properties:
-            errors:
-              type: 'null'
-            data:
-              type: object
-              nullable: false
-              required: [session_id, outputs]
-              properties:
-                outputs:
-                  type: array
-                  items:
-                    discriminator:
-                      propertyName: type
-                    oneOf:
-                      - title: StreamOutput
-                        description: Outputs that were printed to stdout or stderr
-                        type: object
-                        required: [type, data]
-                        properties:
-                          type:
-                            enum:
-                              - stdout
-                              - stderr
-                            type: string
-                          data:
-                            type: string
-                      - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
-                        properties:
-                          data:
-                            type: string
-                          type:
-                            enum:
-                              - error
-                            type: string
-                        required:
-                          - type
-                          - data
-                        title: ErrorOutput
-                      - properties:
-                          data:
-                            properties:
-                              application/geo+json:
-                                type: object
-                                additionalProperties: true
-                              application/javascript:
-                                type: string
-                              application/json:
-                                type: object
-                                additionalProperties: true
-                              application/pdf:
-                                format: byte
-                                type: string
-                              application/vnd.vega.v5+json:
-                                type: object
-                                additionalProperties: true
-                              application/vnd.vegalite.v4+json:
-                                type: object
-                                additionalProperties: true
-                              image/gif:
-                                format: byte
-                                type: string
-                              image/jpeg:
-                                format: byte
-                                type: string
-                              image/png:
-                                format: byte
-                                type: string
-                              image/svg+xml:
-                                type: string
-                              text/html:
-                                type: string
-                              text/latex:
-                                type: string
-                              text/markdown:
-                                type: string
-                              text/plain:
-                                type: string
-                            type: object
-                          type:
-                            enum:
-                              - display_data
-                              - execute_result
-                            type: string
-                        required:
-                          - type
-                          - data
-                        title: DisplayorExecuteOutput
-                    title: InterpreterOutput
-                session_id:
-                  type: string
-                  description: Identifier of the current session. Used to make follow-up calls.
-                  example: ses_abcDEF123
-                  nullable: false
-                status:
-                  type: string
-                  enum:
-                    - success
-                  description: Status of the execution. Currently only supports success.
-        - title: FailedExecution
+      properties:
+        aggregated_scores:
           type: object
-          required: [data, errors]
-          properties:
-            data:
-              type: 'null'
-            errors:
-              type: array
-              items:
-                title: Error
-                oneOf:
-                  - type: string
-                  - type: object
-                    additionalProperties: true
-
-    InterpreterOutput:
-      discriminator:
-        propertyName: type
-      oneOf:
-        - description: Outputs that were printed to stdout or stderr
-          properties:
-            data:
-              type: string
-            type:
-              enum:
-                - stdout
-                - stderr
-              type: string
-          required:
-            - type
-            - data
-          title: StreamOutput
-        - description: Errors and exceptions that occurred. If this output type is present, your code did not execute successfully.
           properties:
-            data:
-              type: string
-            type:
-              enum:
-                - error
-              type: string
-          required:
-            - type
-            - data
-          title: ErrorOutput
-        - properties:
-            data:
-              properties:
-                application/geo+json:
-                  type: object
-                application/javascript:
-                  type: string
-                application/json:
-                  type: object
-                application/pdf:
-                  format: byte
-                  type: string
-                application/vnd.vega.v5+json:
-                  type: object
-                application/vnd.vegalite.v4+json:
-                  type: object
-                image/gif:
-                  format: byte
-                  type: string
-                image/jpeg:
-                  format: byte
-                  type: string
-                image/png:
-                  format: byte
-                  type: string
-                image/svg+xml:
-                  type: string
-                text/html:
-                  type: string
-                text/latex:
-                  type: string
-                text/markdown:
-                  type: string
-                text/plain:
-                  type: string
-              type: object
-            type:
-              enum:
-                - display_data
-                - execute_result
-              type: string
-          required:
-            - type
-            - data
-          title: DisplayorExecuteOutput
-      title: InterpreterOutput
+            mean_score:
+              type: number
+              format: float
+            std_score:
+              type: number
+              format: float
+            pass_percentage:
+              type: number
+              format: float
+        generation_fail_count:
+          type: number
+          format: integer
+          nullable: true
+          description: Number of failed generations.
+          example: 0
+        judge_fail_count:
+          type: number
+          format: integer
+          nullable: true
+          description: Number of failed judge generations
+          example: 0
+        invalid_score_count:
+          type: number
+          format: integer
+          description: number of invalid scores generated from model
+        failed_samples:
+          type: number
+          format: integer
+          description: number of failed samples generated from model
+        result_file_id:
+          type: string
+          description: Data File ID
+          example: file-1234-aefd
 
-    Response:
+    EvaluationCompareResults:
+      type: object
       properties:
-        errors:
+        num_samples:
+          type: integer
+          description: Total number of samples compared
+        A_wins:
+          type: integer
+          description: Number of times model A won
+        B_wins:
+          type: integer
+          description: Number of times model B won
+        Ties:
+          type: integer
+          description: Number of ties
+        generation_fail_count:
+          type: number
+          format: integer
+          nullable: true
+          description: Number of failed generations.
+          example: 0
+        judge_fail_count:
+          type: number
+          format: integer
+          nullable: true
+          description: Number of failed judge generations
+          example: 0
+        result_file_id:
+          type: string
+          description: Data File ID
+
+    AudioFileBinary:
+      type: string
+      format: binary
+      description: Audio file to transcribe
+
+    AudioFileUrl:
+      type: string
+      format: uri
+      description: Public HTTPS URL to audio file
+
+    CreateVideoBody:
+      title: Create video request
+      description: Parameters for creating a new video generation job.
+      type: object
+      required:
+        - model
+      properties:
+        model:
+          type: string
+          description: The model to be used for the video creation request.
+        prompt:
+          type: string
+          maxLength: 32000
+          minLength: 1
+          description: Text prompt that describes the video to generate.
+        height:
+          type: integer
+        width:
+          type: integer
+        seconds:
+          type: string
+          description: Clip duration in seconds.
+        fps:
+          type: integer
+          description: Frames per second. Defaults to 24.
+        steps:
+          type: integer
+          minimum: 10
+          maximum: 50
+          description: The number of denoising steps the model performs during video generation. More steps typically result in higher quality output but require longer processing time.
+        seed:
+          type: integer
+          description: Seed to use in initializing the video generation.  Using the same seed allows deterministic video generation.  If not provided a random seed is generated for each request.
+        guidance_scale:
+          type: integer
+          description: Controls how closely the video generation follows your prompt. Higher values make the model adhere more strictly to your text description, while lower values allow more creative freedom. guidence_scale affects both visual content and temporal consistency.Recommended range is 6.0-10.0 for most video models. Values above 12 may cause over-guidance artifacts or unnatural motion patterns.
+        output_format:
+          $ref: '#/components/schemas/VideoOutputFormat'
+          description: Specifies the format of the output video. Defaults to MP4.
+        output_quality:
+          type: integer
+          description: Compression quality. Defaults to 20.
+        negative_prompt:
+          type: string
+          description: Similar to prompt, but specifies what to avoid instead of what to include
+        frame_images:
+          description: Array of images to guide video generation, similar to keyframes.
+          example:
+            - [
+              {
+                "input_image": "aac49721-1964-481a-ae78-8a4e29b91402",
+                "frame": 0
+              },
+              {
+                "input_image": "c00abf5f-6cdb-4642-a01d-1bfff7bc3cf7",
+                "frame": 48
+              },
+              {
+                "input_image": "3ad204c3-a9de-4963-8a1a-c3911e3afafe",
+                "frame": "last"
+              }
+            ]
+          type: array
           items:
-            oneOf:
-              - type: string
-              - additionalProperties: true
-                type: object
-            title: Error
+            $ref: '#/components/schemas/VideoFrameImageInput'
+        reference_images:
+          description: Unlike frame_images which constrain specific timeline positions, reference images guide the general appearance that should appear consistently across the video.
           type: array
-      title: Response
-      type: object
+          items:
+            type: string
+    VideoStatus:
+      description: Current lifecycle status of the video job.
+      type: string
+      enum:
+        - in_progress
+        - completed
+        - failed
 
-    SessionListResponse:
-      allOf:
-        - properties:
-            errors:
-              items:
-                oneOf:
-                  - type: string
-                  - additionalProperties: true
-                    type: object
-                title: Error
-              type: array
-          title: Response
-          type: object
-        - properties:
-            data:
-              properties:
-                sessions:
-                  items:
-                    properties:
-                      execute_count:
-                        type: integer
-                      expires_at:
-                        format: date-time
-                        type: string
-                      id:
-                        description: Session Identifier. Used to make follow-up calls.
-                        example: ses_abcDEF123
-                        type: string
-                      last_execute_at:
-                        format: date-time
-                        type: string
-                      started_at:
-                        format: date-time
-                        type: string
-                    required:
-                      - execute_count
-                      - expires_at
-                      - id
-                      - last_execute_at
-                      - started_at
-                    type: object
-                  type: array
-              required:
-                - sessions
-          type: object
-      title: SessionListResponse
+    VideoFrameImageInput:
       type: object
+      required: ['input_image']
+      properties:
+        input_image:
+          type: string
+          description: URL path to hosted image that is used for a frame
+        frame:
+          description: |
+            Optional param to specify where to insert the frame. If this is omitted, the following heuristics are applied:
+            - frame_images size is one, frame is first.
+            - If size is two, frames are first and last.
+            - If size is larger, frames are first, last and evenly spaced between.
+          anyOf:
+            - type: number
+            - type: string
+              enum:
+                - first
+                - last
 
-    StreamOutput:
-      description: Outputs that were printed to stdout or stderr
+    VideoOutputFormat:
+      type: string
+      enum:
+        - MP4
+        - WEBM
+
+    VideoJob:
       properties:
-        data:
+        id:
+          type: string
+          description: Unique identifier for the video job.
+        object:
+          description: The object type, which is always video.
           type: string
-        type:
           enum:
-            - stdout
-            - stderr
+            - video
+        model:
           type: string
-      required:
-        - type
-        - data
-      title: StreamOutput
-
-    CreateBatchRequest:
+          description: The video generation model that produced the job.
+        status:
+          $ref: '#/components/schemas/VideoStatus'
+          description: Current lifecycle status of the video job.
+        created_at:
+          type: number
+          description: Unix timestamp (seconds) for when the job was created.
+        completed_at:
+          type: number
+          description: Unix timestamp (seconds) for when the job completed, if finished.
+        size:
+          type: string
+          description: The resolution of the generated video.
+        seconds:
+          type: string
+          description: Duration of the generated clip in seconds.
+        error:
+          description: Error payload that explains why generation failed, if applicable.
+          type: object
+          properties:
+            code:
+              type: string
+            message:
+              type: string
+          required:
+            - message
+        outputs:
+          description: Available upon completion, the outputs provides the cost charged and the hosted url to access the video
+          type: object
+          properties:
+            cost:
+              type: integer
+              description: The cost of generated video charged to the owners account.
+            video_url:
+              type: string
+              description: URL hosting the generated video
+          required:
+            - cost
+            - video_url
       type: object
-      required: [endpoint, input_file_id]
+      required:
+        - id
+        - model
+        - status
+        - size
+        - seconds
+        - created_at
+      title: Video job
+      description: Structured information describing a generated video job.
+    ContainerStatus:
       properties:
-        endpoint:
+        finishedAt:
+          description: FinishedAt is the timestamp when the container finished execution
+            (if terminated)
           type: string
-          description: The endpoint to use for batch processing
-          example: '/v1/chat/completions'
-        input_file_id:
+        message:
+          description: Message provides a human-readable message with details about the
+            container's status
           type: string
-          description: ID of the uploaded input file containing batch requests
-          example: 'file-abc123def456ghi789'
-        completion_window:
+        name:
+          description: Name is the name of the container
           type: string
-          description: Time window for batch completion (optional)
-          example: '24h'
-        priority:
-          type: integer
-          description: Priority for batch processing (optional)
-          example: 1
-        model_id:
+        reason:
+          description: Reason provides a brief machine-readable reason for the container's
+            current status
           type: string
-          description: 'Model to use for processing batch requests'
-          example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
-    BatchErrorResponse:
-      type: object
-      properties:
-        error:
+        startedAt:
+          description: StartedAt is the timestamp when the container started execution
           type: string
-    BatchJobWithWarning:
-      type: object
-      properties:
-        job:
-          $ref: '#/components/schemas/BatchJob'
-        warning:
+        status:
+          description: Status is the current state of the container (e.g., "Running",
+            "Terminated", "Waiting")
           type: string
-    BatchJob:
       type: object
+    CreateDeploymentRequest:
       properties:
-        id:
-          type: string
-          format: uuid
-          example: '01234567-8901-2345-6789-012345678901'
-        user_id:
-          type: string
-          example: 'user_789xyz012'
-        input_file_id:
+        args:
+          description: Args overrides the container's CMD. Provide as an array of
+            arguments (e.g., ["python", "app.py"])
+          items:
+            type: string
+          type: array
+        autoscaling:
+          additionalProperties:
+            type: string
+          description: 'Autoscaling configuration as key-value pairs. Example: {"metric":
+            "QueueBacklogPerWorker", "target": "10"} to scale based on queue
+            backlog'
+          type: object
+        command:
+          description: Command overrides the container's ENTRYPOINT. Provide as an array
+            (e.g., ["/bin/sh", "-c"])
+          items:
+            type: string
+          type: array
+        cpu:
+          description: CPU is the number of CPU cores to allocate per container instance
+            (e.g., 0.1 = 100 milli cores)
+          minimum: 0.1
+          type: number
+        description:
+          description: Description is an optional human-readable description of your
+            deployment
           type: string
-          example: 'file-input123abc456def'
-        file_size_bytes:
+        environment_variables:
+          description: EnvironmentVariables is a list of environment variables to set in
+            the container. Each must have a name and either a value or
+            value_from_secret
+          items:
+            $ref: "#/components/schemas/EnvironmentVariable"
+          type: array
+        gpu_count:
+          description: GPUCount is the number of GPUs to allocate per container instance.
+            Defaults to 0 if not specified
           type: integer
-          format: int64
-          example: 1048576
-          description: 'Size of input file in bytes'
-        status:
-          $ref: '#/components/schemas/BatchJobStatus'
-        job_deadline:
+        gpu_type:
+          description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb").
+          enum:
+            - h100-80gb
+            - " a100-80gb"
           type: string
-          format: date-time
-          example: '2024-01-15T15:30:00Z'
-        created_at:
+        health_check_path:
+          description: HealthCheckPath is the HTTP path for health checks (e.g.,
+            "/health"). If set, the platform will check this endpoint to
+            determine container health
           type: string
-          format: date-time
-          example: '2024-01-15T14:30:00Z'
-        endpoint:
+        image:
+          description: Image is the container image to deploy from registry.together.ai.
           type: string
-          example: '/v1/chat/completions'
-        progress:
+        max_replicas:
+          description: MaxReplicas is the maximum number of container instances that can
+            be scaled up to. If not set, will be set to MinReplicas
+          type: integer
+        memory:
+          description: Memory is the amount of RAM to allocate per container instance in
+            GiB (e.g., 0.5 = 512MiB)
+          minimum: 0.1
           type: number
-          format: float64
-          example: 75.0
-          description: 'Completion progress (0.0 to 100)'
-        model_id:
+        min_replicas:
+          description: MinReplicas is the minimum number of container instances to run.
+            Defaults to 1 if not specified
+          type: integer
+        name:
+          description: Name is the unique identifier for your deployment. Must contain
+            only alphanumeric characters, underscores, or hyphens (1-100
+            characters)
+          maxLength: 100
+          minLength: 1
           type: string
-          example: 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
-          description: 'Model used for processing requests'
-        output_file_id:
+        port:
+          description: Port is the container port your application listens on (e.g., 8080
+            for web servers). Required if your application serves traffic
+          type: integer
+        storage:
+          description: Storage is the amount of ephemeral disk storage to allocate per
+            container instance (e.g., 10 = 10GiB)
+          type: integer
+        termination_grace_period_seconds:
+          description: TerminationGracePeriodSeconds is the time in seconds to wait for
+            graceful shutdown before forcefully terminating the replica
+          type: integer
+        volumes:
+          description: Volumes is a list of volume mounts to attach to the container. Each
+            mount must reference an existing volume by name
+          items:
+            $ref: "#/components/schemas/VolumeMount"
+          type: array
+      required:
+        - gpu_type
+        - image
+        - name
+      type: object
+    CreateSecretRequest:
+      properties:
+        description:
+          description: Description is an optional human-readable description of the
+            secret's purpose (max 500 characters)
+          maxLength: 500
           type: string
-          example: 'file-output789xyz012ghi'
-        error_file_id:
+        name:
+          description: Name is the unique identifier for the secret. Can contain
+            alphanumeric characters, underscores, hyphens, forward slashes, and
+            periods (1-100 characters)
+          maxLength: 100
+          minLength: 1
           type: string
-          example: 'file-errors456def789jkl'
-        error:
+        project_id:
+          description: ProjectID is ignored - the project is automatically determined from
+            your authentication
           type: string
-        completed_at:
+        value:
+          description: Value is the sensitive data to store securely (e.g., API keys,
+            passwords, tokens). This value will be encrypted at rest
+          minLength: 1
           type: string
-          format: date-time
-          example: '2024-01-15T15:45:30Z'
-    BatchJobStatus:
-      type: string
-      enum:
-        - VALIDATING
-        - IN_PROGRESS
-        - COMPLETED
-        - FAILED
-        - EXPIRED
-        - CANCELLED
-      example: 'IN_PROGRESS'
-      description: 'Current status of the batch job'
-
-    EvaluationTypedRequest:
+      required:
+        - name
+        - value
       type: object
+    CreateVolumeRequest:
+      properties:
+        content:
+          allOf:
+            - $ref: "#/components/schemas/VolumeContent"
+          description: Content specifies the content configuration for this volume
+        name:
+          description: Name is the unique identifier for the volume within the project
+          type: string
+        type:
+          allOf:
+            - $ref: "#/components/schemas/VolumeType"
+          description: Type is the volume type (currently only "readOnly" is supported)
       required:
+        - content
+        - name
         - type
-        - parameters
+      type: object
+    DeploymentListResponse:
       properties:
-        type:
+        data:
+          description: Data is the array of deployment items
+          items:
+            $ref: "#/components/schemas/DeploymentResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
           type: string
-          enum: [classify, score, compare]
-          description: The type of evaluation to perform
-          example: 'classify'
-        parameters:
-          oneOf:
-            - $ref: '#/components/schemas/EvaluationClassifyParameters'
-            - $ref: '#/components/schemas/EvaluationScoreParameters'
-            - $ref: '#/components/schemas/EvaluationCompareParameters'
-          description: Type-specific parameters for the evaluation
-
-    EvaluationClassifyParameters:
       type: object
-      required:
-        - judge
-        - labels
-        - pass_labels
-        - input_data_file_path
+    DeploymentLogs:
       properties:
-        judge:
-          $ref: '#/components/schemas/EvaluationJudgeModelConfig'
-        labels:
+        lines:
+          items:
+            type: string
           type: array
+      type: object
+    DeploymentResponseItem:
+      properties:
+        args:
+          description: Args are the arguments passed to the container's command
           items:
             type: string
-          minItems: 2
-          description: List of possible classification labels
-          example: ['yes', 'no']
-        pass_labels:
           type: array
+        autoscaling:
+          additionalProperties:
+            type: string
+          description: Autoscaling contains autoscaling configuration parameters for this
+            deployment
+          type: object
+        command:
+          description: Command is the entrypoint command run in the container
           items:
             type: string
-          minItems: 1
-          description: List of labels that are considered passing
-          example: ['yes']
-        model_to_evaluate:
-          $ref: '#/components/schemas/EvaluationModelOrString'
-        input_data_file_path:
-          type: string
-          description: Data file ID
-          example: 'file-1234-aefd'
-
-    EvaluationScoreParameters:
-      type: object
-      required:
-        - judge
-        - min_score
-        - max_score
-        - pass_threshold
-        - input_data_file_path
-      properties:
-        judge:
-          $ref: '#/components/schemas/EvaluationJudgeModelConfig'
-        min_score:
-          type: number
-          format: float
-          example: 0.0
-          description: Minimum possible score
-        max_score:
+          type: array
+        cpu:
+          description: CPU is the amount of CPU resource allocated to each replica in
+            cores (fractional value is allowed)
           type: number
-          format: float
-          example: 10.0
-          description: Maximum possible score
-        pass_threshold:
+        created_at:
+          description: CreatedAt is the ISO8601 timestamp when this deployment was created
+          type: string
+        description:
+          description: Description provides a human-readable explanation of the
+            deployment's purpose or content
+          type: string
+        desired_replicas:
+          description: DesiredReplicas is the number of replicas that the orchestrator is
+            targeting
+          type: integer
+        environment_variables:
+          description: EnvironmentVariables is a list of environment variables set in the
+            container
+          items:
+            $ref: "#/components/schemas/EnvironmentVariable"
+          type: array
+        gpu_count:
+          description: GPUCount is the number of GPUs allocated to each replica in this
+            deployment
+          type: integer
+        gpu_type:
+          description: GPUType specifies the type of GPU requested (if any) for this
+            deployment
+          enum:
+            - h100-80gb
+            - " a100-80gb"
+          type: string
+        health_check_path:
+          description: HealthCheckPath is the HTTP path used for health checks of the
+            application
+          type: string
+        id:
+          description: ID is the unique identifier of the deployment
+          type: string
+        image:
+          description: Image specifies the container image used for this deployment
+          type: string
+        max_replicas:
+          description: MaxReplicas is the maximum number of replicas to run for this
+            deployment
+          type: integer
+        memory:
+          description: Memory is the amount of memory allocated to each replica in GiB
+            (fractional value is allowed)
           type: number
-          format: float
-          example: 7.0
-          description: Score threshold for passing
-        model_to_evaluate:
-          $ref: '#/components/schemas/EvaluationModelOrString'
-        input_data_file_path:
+        min_replicas:
+          description: MinReplicas is the minimum number of replicas to run for this
+            deployment
+          type: integer
+        name:
+          description: Name is the name of the deployment
+          type: string
+        object:
+          description: Object is the type identifier for this response (always "deployment")
+          type: string
+        port:
+          description: Port is the container port that the deployment exposes
+          type: integer
+        ready_replicas:
+          description: ReadyReplicas is the current number of replicas that are in the
+            Ready state
+          type: integer
+        replica_events:
+          additionalProperties:
+            $ref: "#/components/schemas/ReplicaEvent"
+          description: ReplicaEvents is a mapping of replica names or IDs to their status
+            events
+          type: object
+        status:
+          allOf:
+            - $ref: "#/components/schemas/DeploymentStatus"
+          description: Status represents the overall status of the deployment (e.g.,
+            Updating, Scaling, Ready, Failed)
+          enum:
+            - Updating
+            - Scaling
+            - Ready
+            - Failed
+        storage:
+          description: Storage is the amount of storage (in MB or units as defined by the
+            platform) allocated to each replica
+          type: integer
+        updated_at:
+          description: UpdatedAt is the ISO8601 timestamp when this deployment was last
+            updated
           type: string
-          example: 'file-01234567890123456789'
-          description: Data file ID
-
-    EvaluationCompareParameters:
+        volumes:
+          description: Volumes is a list of volume mounts for this deployment
+          items:
+            $ref: "#/components/schemas/VolumeMount"
+          type: array
       type: object
-      required:
-        - judge
-        - input_data_file_path
+    DeploymentStatus:
+      enum:
+        - Updating
+        - Scaling
+        - Ready
+        - Failed
+      type: string
+      x-enum-varnames:
+        - DeploymentStatusUpdating
+        - DeploymentStatusScaling
+        - DeploymentStatusReady
+        - DeploymentStatusFailed
+    EnvironmentVariable:
       properties:
-        judge:
-          $ref: '#/components/schemas/EvaluationJudgeModelConfig'
-        model_a:
-          $ref: '#/components/schemas/EvaluationModelOrString'
-        model_b:
-          $ref: '#/components/schemas/EvaluationModelOrString'
-        input_data_file_path:
+        name:
+          description: Name is the environment variable name (e.g., "DATABASE_URL"). Must
+            start with a letter or underscore, followed by letters, numbers, or
+            underscores
+          type: string
+        value:
+          description: Value is the plain text value for the environment variable. Use
+            this for non-sensitive values. Either Value or ValueFromSecret must
+            be set, but not both
+          type: string
+        value_from_secret:
+          description: ValueFromSecret references a secret by name or ID to use as the
+            value. Use this for sensitive values like API keys or passwords.
+            Either Value or ValueFromSecret must be set, but not both
           type: string
-          description: Data file name
-
-    EvaluationJudgeModelConfig:
-      type: object
       required:
-        - model
-        - system_template
-        - model_source
+        - name
+      type: object
+    ImageListResponse:
       properties:
-        model:
-          type: string
-          description: Name of the judge model
-          example: 'meta-llama/Llama-3-70B-Instruct-Turbo'
-        system_template:
+        data:
+          description: Data is the array of image items
+          items:
+            $ref: "#/components/schemas/ImageResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
           type: string
-          description: System prompt template for the judge
-          example: 'Imagine you are a helpful assistant'
-        model_source:
+      type: object
+    ImageResponseItem:
+      properties:
+        object:
+          description: Object is the type identifier for this response (always "image")
           type: string
-          description: "Source of the judge model."
-          enum: [serverless, dedicated, external]
-        external_api_token:
+        tag:
+          description: Tag is the image tag/version identifier (e.g., "latest", "v1.0.0")
           type: string
-          description: "Bearer/API token for external judge models."
-        external_base_url:
+        url:
+          description: URL is the full registry URL for this image including tag (e.g.,
+            "registry.together.ai/project-id/repository:tag")
           type: string
-          description: "Base URL for external judge models. Must be OpenAI-compatible base URL."
-
-    EvaluationModelOrString:
-      oneOf:
-        - type: string
-          description: Field name in the input data
-        - $ref: '#/components/schemas/EvaluationModelRequest'
-
-    EvaluationModelRequest:
       type: object
-      required:
-        - model
-        - max_tokens
-        - temperature
-        - system_template
-        - input_template
-        - model_source
+    KubernetesEvent:
       properties:
-        model:
+        action:
+          description: Action is the action taken or reported by this event
           type: string
-          description: Name of the model to evaluate
-          example: 'meta-llama/Llama-3-70B-Instruct-Turbo'
-        max_tokens:
+        count:
+          description: Count is the number of times this event has occurred
           type: integer
-          minimum: 1
-          description: Maximum number of tokens to generate
-          example: 512
-        temperature:
-          type: number
-          format: float
-          minimum: 0
-          maximum: 2
-          description: Sampling temperature
-          example: 0.7
-        system_template:
-          type: string
-          description: System prompt template
-          example: 'Imagine you are helpful assistant'
-        input_template:
+        first_seen:
+          description: FirstSeen is the timestamp when this event was first observed
           type: string
-          description: Input prompt template
-          example: 'Please classify {{prompt}} based on the labels below'
-        model_source:
+        last_seen:
+          description: LastSeen is the timestamp when this event was last observed
           type: string
-          description: "Source of the model."
-          enum: [serverless, dedicated, external]
-        external_api_token:
+        message:
+          description: Message is a human-readable description of the event
           type: string
-          description: "Bearer/API token for external models."
-        external_base_url:
+        reason:
+          description: Reason is a brief machine-readable reason for this event (e.g.,
+            "Pulling", "Started", "Failed")
           type: string
-          description: "Base URL for external models. Must be OpenAI-compatible base URL"
-
-    EvaluationResponse:
       type: object
+    ListSecretsResponse:
       properties:
-        workflow_id:
+        data:
+          description: Data is the array of secret items
+          items:
+            $ref: "#/components/schemas/SecretResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
           type: string
-          description: The ID of the created evaluation job
-          example: 'eval-1234-1244513'
-        status:
+      type: object
+    ListVolumesResponse:
+      properties:
+        data:
+          description: Data is the array of volume items
+          items:
+            $ref: "#/components/schemas/VolumeResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
           type: string
-          enum: [pending]
-          description: Initial status of the job
-
-    EvaluationJob:
       type: object
+    ReplicaEvent:
       properties:
-        workflow_id:
+        container_status:
+          allOf:
+            - $ref: "#/components/schemas/ContainerStatus"
+          description: ContainerStatus provides detailed status information about the
+            container within this replica
+        events:
+          description: Events is a list of Kubernetes events related to this replica for
+            troubleshooting
+          items:
+            $ref: "#/components/schemas/KubernetesEvent"
+          type: array
+        replica_completed_at:
+          description: ReplicaCompletedAt is the timestamp when the replica finished
+            execution
           type: string
-          description: The evaluation job ID
-          example: 'eval-1234aedf'
-        type:
+        replica_marked_for_termination_at:
+          description: ReplicaMarkedForTerminationAt is the timestamp when the replica was
+            marked for termination
           type: string
-          enum: [classify, score, compare]
-          description: The type of evaluation
-          example: classify
-        owner_id:
+        replica_ready_since:
+          description: ReplicaReadySince is the timestamp when the replica became ready to
+            serve traffic
           type: string
-          description: ID of the job owner (admin only)
-        status:
+        replica_running_since:
+          description: ReplicaRunningSince is the timestamp when the replica entered the
+            running state
           type: string
-          enum: [pending, queued, running, completed, error, user_error]
-          description: Current status of the job
-          example: completed
-        status_updates:
-          type: array
-          items:
-            $ref: '#/components/schemas/EvaluationJobStatusUpdate'
-          description: History of status updates (admin only)
-        parameters:
-          type: object
-          description: The parameters used for this evaluation
-          additionalProperties: true
-        created_at:
+        replica_started_at:
+          description: ReplicaStartedAt is the timestamp when the replica was created
           type: string
-          format: date-time
-          description: When the job was created
-          example: '2025-07-23T17:10:04.837888Z'
-        updated_at:
+        replica_status:
+          description: ReplicaStatus is the current status of the replica (e.g.,
+            "Running", "Pending", "Failed")
           type: string
-          format: date-time
-          description: When the job was last updated
-          example: '2025-07-23T17:10:04.837888Z'
-        results:
-          oneOf:
-            - $ref: '#/components/schemas/EvaluationClassifyResults'
-            - $ref: '#/components/schemas/EvaluationScoreResults'
-            - $ref: '#/components/schemas/EvaluationCompareResults'
-            - type: object
-              properties:
-                error:
-                  type: string
-          nullable: true
-          description: Results of the evaluation (when completed)
-
-    EvaluationJobStatusUpdate:
-      type: object
-      properties:
-        status:
+        replica_status_message:
+          description: ReplicaStatusMessage provides a human-readable message explaining
+            the replica's status
           type: string
-          description: The status at this update
-          example: pending
-        message:
+        replica_status_reason:
+          description: ReplicaStatusReason provides a brief machine-readable reason for
+            the replica's status
           type: string
-          description: Additional message for this update
-          example: Job is pending evaluation
-        timestamp:
+        scheduled_on_cluster:
+          description: ScheduledOnCluster identifies which cluster this replica is
+            scheduled on
           type: string
-          format: date-time
-          description: When this update occurred
-          example: '2025-07-23T17:10:04.837888Z'
-
-    EvaluationClassifyResults:
       type: object
+    RepositoryListResponse:
       properties:
-        generation_fail_count:
-          type: number
-          format: integer
-          nullable: true
-          description: Number of failed generations.
-          example: 0
-        judge_fail_count:
-          type: number
-          format: integer
-          nullable: true
-          description: Number of failed judge generations
-          example: 0
-        invalid_label_count:
-          type: number
-          format: float
-          nullable: true
-          description: Number of invalid labels
-          example: 0
-        result_file_id:
-          type: string
-          description: Data File ID
-          example: file-1234-aefd
-        pass_percentage:
-          type: number
-          format: integer
-          nullable: true
-          description: Pecentage of pass labels.
-          example: 10
-        label_counts:
+        data:
+          description: Data is the array of repository items
+          items:
+            $ref: "#/components/schemas/RepositoryResponseItem"
+          type: array
+        object:
+          description: Object is the type identifier for this response (always "list")
           type: string
-          description: JSON string representing label counts
-          example: '{"yes": 10, "no": 0}'
-
-    EvaluationScoreResults:
       type: object
+    RepositoryResponseItem:
       properties:
-        aggregated_scores:
-          type: object
-          properties:
-            mean_score:
-              type: number
-              format: float
-            std_score:
-              type: number
-              format: float
-            pass_percentage:
-              type: number
-              format: float
-        generation_fail_count:
-          type: number
-          format: integer
-          nullable: true
-          description: Number of failed generations.
-          example: 0
-        judge_fail_count:
-          type: number
-          format: integer
-          nullable: true
-          description: Number of failed judge generations
-          example: 0
-        invalid_score_count:
-          type: number
-          format: integer
-          description: number of invalid scores generated from model
-        failed_samples:
-          type: number
-          format: integer
-          description: number of failed samples generated from model
-        result_file_id:
+        id:
+          description: ID is the unique identifier for this repository (repository name
+            with slashes replaced by "___")
+          type: string
+        object:
+          description: Object is the type identifier for this response (always
+            "image-repository")
+          type: string
+        url:
+          description: URL is the full registry URL for this repository (e.g.,
+            "registry.together.ai/project-id/repository-name")
           type: string
-          description: Data File ID
-          example: file-1234-aefd
-
-    EvaluationCompareResults:
       type: object
+    SecretResponseItem:
       properties:
-        num_samples:
-          type: integer
-          description: Total number of samples compared
-        A_wins:
-          type: integer
-          description: Number of times model A won
-        B_wins:
-          type: integer
-          description: Number of times model B won
-        Ties:
-          type: integer
-          description: Number of ties
-        generation_fail_count:
-          type: number
-          format: integer
-          nullable: true
-          description: Number of failed generations.
-          example: 0
-        judge_fail_count:
-          type: number
-          format: integer
-          nullable: true
-          description: Number of failed judge generations
-          example: 0
-        result_file_id:
+        created_at:
+          description: CreatedAt is the ISO8601 timestamp when this secret was created
+          type: string
+        created_by:
+          description: CreatedBy is the identifier of the user who created this secret
+          type: string
+        description:
+          description: Description is a human-readable description of the secret's purpose
+          type: string
+        id:
+          description: ID is the unique identifier for this secret
+          type: string
+        last_updated_by:
+          description: LastUpdatedBy is the identifier of the user who last updated this
+            secret
+          type: string
+        name:
+          description: Name is the name/key of the secret
+          type: string
+        object:
+          description: Object is the type identifier for this response (always "secret")
+          type: string
+        updated_at:
+          description: UpdatedAt is the ISO8601 timestamp when this secret was last updated
           type: string
-          description: Data File ID
-
-    AudioFileBinary:
-      type: string
-      format: binary
-      description: Audio file to transcribe
-
-    AudioFileUrl:
-      type: string
-      format: uri
-      description: Public HTTPS URL to audio file
-
-    CreateVideoBody:
-      title: Create video request
-      description: Parameters for creating a new video generation job.
       type: object
-      required:
-        - model
+    UpdateDeploymentRequest:
       properties:
-        model:
-          type: string
-          description: The model to be used for the video creation request.
-        prompt:
+        args:
+          description: Args overrides the container's CMD. Provide as an array of
+            arguments (e.g., ["python", "app.py"])
+          items:
+            type: string
+          type: array
+        autoscaling:
+          additionalProperties:
+            type: string
+          description: 'Autoscaling configuration as key-value pairs. Example: {"metric":
+            "QueueBacklogPerWorker", "target": "10"} to scale based on queue
+            backlog'
+          type: object
+        command:
+          description: Command overrides the container's ENTRYPOINT. Provide as an array
+            (e.g., ["/bin/sh", "-c"])
+          items:
+            type: string
+          type: array
+        cpu:
+          description: CPU is the number of CPU cores to allocate per container instance
+            (e.g., 0.1 = 100 milli cores)
+          minimum: 0.1
+          type: number
+        description:
+          description: Description is an optional human-readable description of your
+            deployment
           type: string
-          maxLength: 32000
-          minLength: 1
-          description: Text prompt that describes the video to generate.
-        height:
-          type: integer
-        width:
+        environment_variables:
+          description: EnvironmentVariables is a list of environment variables to set in
+            the container. This will replace all existing environment variables
+          items:
+            $ref: "#/components/schemas/EnvironmentVariable"
+          type: array
+        gpu_count:
+          description: GPUCount is the number of GPUs to allocate per container instance
           type: integer
-        seconds:
+        gpu_type:
+          description: GPUType specifies the GPU hardware to use (e.g., "h100-80gb")
+          enum:
+            - h100-80gb
+            - " a100-80gb"
           type: string
-          description: Clip duration in seconds.
-        fps:
+        health_check_path:
+          description: HealthCheckPath is the HTTP path for health checks (e.g.,
+            "/health"). Set to empty string to disable health checks
+          type: string
+        image:
+          description: Image is the container image to deploy from registry.together.ai.
+          type: string
+        max_replicas:
+          description: MaxReplicas is the maximum number of replicas that can be scaled up
+            to.
           type: integer
-          description: Frames per second. Defaults to 24.
-        steps:
+        memory:
+          description: Memory is the amount of RAM to allocate per container instance in
+            GiB (e.g., 0.5 = 512MiB)
+          minimum: 0.1
+          type: number
+        min_replicas:
+          description: MinReplicas is the minimum number of replicas to run
           type: integer
-          minimum: 10
-          maximum: 50
-          description: The number of denoising steps the model performs during video generation. More steps typically result in higher quality output but require longer processing time.
-        seed:
+        name:
+          description: Name is the new unique identifier for your deployment. Must contain
+            only alphanumeric characters, underscores, or hyphens (1-100
+            characters)
+          maxLength: 100
+          minLength: 1
+          type: string
+        port:
+          description: Port is the container port your application listens on (e.g., 8080
+            for web servers)
           type: integer
-          description: Seed to use in initializing the video generation.  Using the same seed allows deterministic video generation.  If not provided a random seed is generated for each request.
-        guidance_scale:
+        storage:
+          description: Storage is the amount of ephemeral disk storage to allocate per
+            container instance (e.g., 10 = 10GiB)
           type: integer
-          description: Controls how closely the video generation follows your prompt. Higher values make the model adhere more strictly to your text description, while lower values allow more creative freedom. guidence_scale affects both visual content and temporal consistency.Recommended range is 6.0-10.0 for most video models. Values above 12 may cause over-guidance artifacts or unnatural motion patterns.
-        output_format:
-          $ref: '#/components/schemas/VideoOutputFormat'
-          description: Specifies the format of the output video. Defaults to MP4.
-        output_quality:
+        termination_grace_period_seconds:
+          description: TerminationGracePeriodSeconds is the time in seconds to wait for
+            graceful shutdown before forcefully terminating the replica
           type: integer
-          description: Compression quality. Defaults to 20.
-        negative_prompt:
-          type: string
-          description: Similar to prompt, but specifies what to avoid instead of what to include
-        frame_images:
-          description: Array of images to guide video generation, similar to keyframes.
-          example:
-            - [
-              {
-                "input_image": "aac49721-1964-481a-ae78-8a4e29b91402",
-                "frame": 0
-              },
-              {
-                "input_image": "c00abf5f-6cdb-4642-a01d-1bfff7bc3cf7",
-                "frame": 48
-              },
-              {
-                "input_image": "3ad204c3-a9de-4963-8a1a-c3911e3afafe",
-                "frame": "last"
-              }
-            ]
-          type: array
+        volumes:
+          description: Volumes is a list of volume mounts to attach to the container. This
+            will replace all existing volumes
           items:
-            $ref: '#/components/schemas/VideoFrameImageInput'
-        reference_images:
-          description: Unlike frame_images which constrain specific timeline positions, reference images guide the general appearance that should appear consistently across the video.
+            $ref: "#/components/schemas/VolumeMount"
           type: array
-          items:
-            type: string
-    VideoStatus:
-      description: Current lifecycle status of the video job.
-      type: string
-      enum:
-        - in_progress
-        - completed
-        - failed
-
-    VideoFrameImageInput:
       type: object
-      required: ['input_image']
+    UpdateSecretRequest:
       properties:
-        input_image:
+        description:
+          description: Description is an optional human-readable description of the
+            secret's purpose (max 500 characters)
+          maxLength: 500
           type: string
-          description: URL path to hosted image that is used for a frame
-        frame:
-          description: |
-            Optional param to specify where to insert the frame. If this is omitted, the following heuristics are applied:
-            - frame_images size is one, frame is first.
-            - If size is two, frames are first and last.
-            - If size is larger, frames are first, last and evenly spaced between.
-          anyOf:
-            - type: number
-            - type: string
-              enum:
-                - first
-                - last
-
-    VideoOutputFormat:
-      type: string
-      enum:
-        - MP4
-        - WEBM
-
-    VideoJob:
+        name:
+          description: Name is the new unique identifier for the secret. Can contain
+            alphanumeric characters, underscores, hyphens, forward slashes, and
+            periods (1-100 characters)
+          maxLength: 100
+          minLength: 1
+          type: string
+        project_id:
+          description: ProjectID is ignored - the project is automatically determined from
+            your authentication
+          type: string
+        value:
+          description: Value is the new sensitive data to store securely. Updating this
+            will replace the existing secret value
+          minLength: 1
+          type: string
+      type: object
+    UpdateVolumeRequest:
+      properties:
+        content:
+          allOf:
+            - $ref: "#/components/schemas/VolumeContent"
+          description: Content specifies the new content that will be preloaded to this
+            volume
+        name:
+          description: Name is the new unique identifier for the volume within the project
+          type: string
+        type:
+          allOf:
+            - $ref: "#/components/schemas/VolumeType"
+          description: Type is the new volume type (currently only "readOnly" is supported)
+      type: object
+    VolumeMount:
+      properties:
+        mount_path:
+          description: MountPath is the path in the container where the volume will be
+            mounted (e.g., "/data")
+          type: string
+        name:
+          description: Name is the name of the volume to mount. Must reference an existing
+            volume by name or ID
+          type: string
+      required:
+        - mount_path
+        - name
+      type: object
+    VolumeResponseItem:
       properties:
+        content:
+          allOf:
+            - $ref: "#/components/schemas/VolumeContent"
+          description: Content specifies the content that will be preloaded to this volume
+        created_at:
+          description: CreatedAt is the ISO8601 timestamp when this volume was created
+          type: string
         id:
+          description: ID is the unique identifier for this volume
+          type: string
+        name:
+          description: Name is the name of the volume
           type: string
-          description: Unique identifier for the video job.
         object:
-          description: The object type, which is always video.
+          description: Object is the type identifier for this response (always "volume")
           type: string
-          enum:
-            - video
-        model:
+        type:
+          allOf:
+            - $ref: "#/components/schemas/VolumeType"
+          description: Type is the volume type (e.g., "readOnly")
+        updated_at:
+          description: UpdatedAt is the ISO8601 timestamp when this volume was last updated
           type: string
-          description: The video generation model that produced the job.
-        status:
-          $ref: '#/components/schemas/VideoStatus'
-          description: Current lifecycle status of the video job.
-        created_at:
-          type: number
-          description: Unix timestamp (seconds) for when the job was created.
-        completed_at:
-          type: number
-          description: Unix timestamp (seconds) for when the job completed, if finished.
-        size:
+      type: object
+    VolumeContent:
+      properties:
+        source_prefix:
+          description: SourcePrefix is the file path prefix for the content to be
+            preloaded into the volume
+          example: models/
           type: string
-          description: The resolution of the generated video.
-        seconds:
+        type:
+          description: Type is the content type (currently only "files" is supported which
+            allows preloading files uploaded via Files API into the volume)
+          enum:
+            - files
+          example: files
           type: string
-          description: Duration of the generated clip in seconds.
-        error:
-          description: Error payload that explains why generation failed, if applicable.
-          type: object
-          properties:
-            code:
-              type: string
-            message:
-              type: string
-          required:
-            - message
-        outputs:
-          description: Available upon completion, the outputs provides the cost charged and the hosted url to access the video
-          type: object
-          properties:
-            cost:
-              type: integer
-              description: The cost of generated video charged to the owners account.
-            video_url:
-              type: string
-              description: URL hosting the generated video
-          required:
-            - cost
-            - video_url
       type: object
-      required:
-        - id
-        - model
-        - status
-        - size
-        - seconds
-        - created_at
-      title: Video job
-      description: Structured information describing a generated video job.
+    VolumeType:
+      enum:
+        - readOnly
+      type: string
+      x-enum-varnames:
+        - VolumeTypeReadOnly
\ No newline at end of file