Skip to content

Commit 19d7a38

Browse files
committed
feat: add Ring 1T FP8 model to Chutes provider
- Add inclusionAI/Ring-1T-FP8 to ChutesModelId type union - Add model configuration with $1.0000 input and $3.0000 output pricing per million tokens - Add comprehensive test case for the new model - Resolves issue #8654
1 parent 6b8c21f commit 19d7a38

File tree

2 files changed

+42
-6
lines changed

2 files changed

+42
-6
lines changed

packages/types/src/providers/chutes.ts

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ export type ChutesModelId =
3030
| "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"
3131
| "microsoft/MAI-DS-R1-FP8"
3232
| "tngtech/DeepSeek-R1T-Chimera"
33+
| "inclusionAI/Ring-1T-FP8"
3334
| "zai-org/GLM-4.5-Air"
3435
| "zai-org/GLM-4.5-FP8"
3536
| "zai-org/GLM-4.5-turbo"
@@ -87,7 +88,8 @@ export const chutesModels = {
8788
supportsPromptCache: false,
8889
inputPrice: 0.23,
8990
outputPrice: 0.9,
90-
description: "DeepSeek‑V3.1‑Terminus is an update to V3.1 that improves language consistency by reducing CN/EN mix‑ups and eliminating random characters, while strengthening agent capabilities with notably better Code Agent and Search Agent performance.",
91+
description:
92+
"DeepSeek‑V3.1‑Terminus is an update to V3.1 that improves language consistency by reducing CN/EN mix‑ups and eliminating random characters, while strengthening agent capabilities with notably better Code Agent and Search Agent performance.",
9193
},
9294
"deepseek-ai/DeepSeek-V3.1-turbo": {
9395
maxTokens: 32768,
@@ -96,7 +98,8 @@ export const chutesModels = {
9698
supportsPromptCache: false,
9799
inputPrice: 1.0,
98100
outputPrice: 3.0,
99-
description: "DeepSeek-V3.1-turbo is an FP8, speculative-decoding turbo variant optimized for ultra-fast single-shot queries (~200 TPS), with outputs close to the originals and solid function calling/reasoning/structured output, priced at $1/M input and $3/M output tokens, using 2× quota per request and not intended for bulk workloads.",
101+
description:
102+
"DeepSeek-V3.1-turbo is an FP8, speculative-decoding turbo variant optimized for ultra-fast single-shot queries (~200 TPS), with outputs close to the originals and solid function calling/reasoning/structured output, priced at $1/M input and $3/M output tokens, using 2× quota per request and not intended for bulk workloads.",
100103
},
101104
"deepseek-ai/DeepSeek-V3.2-Exp": {
102105
maxTokens: 163840,
@@ -105,7 +108,8 @@ export const chutesModels = {
105108
supportsPromptCache: false,
106109
inputPrice: 0.25,
107110
outputPrice: 0.35,
108-
description: "DeepSeek-V3.2-Exp is an experimental LLM that introduces DeepSeek Sparse Attention to improve long‑context training and inference efficiency while maintaining performance comparable to V3.1‑Terminus.",
111+
description:
112+
"DeepSeek-V3.2-Exp is an experimental LLM that introduces DeepSeek Sparse Attention to improve long‑context training and inference efficiency while maintaining performance comparable to V3.1‑Terminus.",
109113
},
110114
"unsloth/Llama-3.3-70B-Instruct": {
111115
maxTokens: 32768, // From Groq
@@ -287,6 +291,15 @@ export const chutesModels = {
287291
outputPrice: 0,
288292
description: "TNGTech DeepSeek R1T Chimera model.",
289293
},
294+
"inclusionAI/Ring-1T-FP8": {
295+
maxTokens: 32768,
296+
contextWindow: 131072,
297+
supportsImages: false,
298+
supportsPromptCache: false,
299+
inputPrice: 1.0,
300+
outputPrice: 3.0,
301+
description: "Ring 1T FP8 model optimized for efficient inference with FP8 precision.",
302+
},
290303
"zai-org/GLM-4.5-Air": {
291304
maxTokens: 32768,
292305
contextWindow: 151329,
@@ -387,8 +400,9 @@ export const chutesModels = {
387400
contextWindow: 262144,
388401
supportsImages: true,
389402
supportsPromptCache: false,
390-
inputPrice: 0.1600,
391-
outputPrice: 0.6500,
392-
description: "Qwen3‑VL‑235B‑A22B‑Thinking is an open‑weight MoE vision‑language model (235B total, ~22B activated) optimized for deliberate multi‑step reasoning with strong text‑image‑video understanding and long‑context capabilities.",
403+
inputPrice: 0.16,
404+
outputPrice: 0.65,
405+
description:
406+
"Qwen3‑VL‑235B‑A22B‑Thinking is an open‑weight MoE vision‑language model (235B total, ~22B activated) optimized for deliberate multi‑step reasoning with strong text‑image‑video understanding and long‑context capabilities.",
393407
},
394408
} as const satisfies Record<string, ModelInfo>

src/api/providers/__tests__/chutes.spec.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,28 @@ describe("ChutesHandler", () => {
341341
)
342342
})
343343

344+
it("should return inclusionAI/Ring-1T-FP8 model with correct configuration", () => {
345+
const testModelId: ChutesModelId = "inclusionAI/Ring-1T-FP8"
346+
const handlerWithModel = new ChutesHandler({
347+
apiModelId: testModelId,
348+
chutesApiKey: "test-chutes-api-key",
349+
})
350+
const model = handlerWithModel.getModel()
351+
expect(model.id).toBe(testModelId)
352+
expect(model.info).toEqual(
353+
expect.objectContaining({
354+
maxTokens: 32768,
355+
contextWindow: 131072,
356+
supportsImages: false,
357+
supportsPromptCache: false,
358+
inputPrice: 1.0,
359+
outputPrice: 3.0,
360+
description: "Ring 1T FP8 model optimized for efficient inference with FP8 precision.",
361+
temperature: 0.5, // Default temperature for non-DeepSeek models
362+
}),
363+
)
364+
})
365+
344366
it("completePrompt method should return text from Chutes API", async () => {
345367
const expectedResponse = "This is a test response from Chutes"
346368
mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: expectedResponse } }] })

0 commit comments

Comments
 (0)