feat: add Ring 1T FP8 model to Chutes provider

roomote · roomote · commit 19d7a388606e · 2025-10-14T13:31:08.000Z
- Add inclusionAI/Ring-1T-FP8 to ChutesModelId type union - Add model configuration with $1.0000 input and $3.0000 output pricing per million tokens - Add comprehensive test case for the new model - Resolves issue #8654
diff --git a/packages/types/src/providers/chutes.ts b/packages/types/src/providers/chutes.ts
@@ -30,6 +30,7 @@ export type ChutesModelId =
 	| "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"
 	| "microsoft/MAI-DS-R1-FP8"
 	| "tngtech/DeepSeek-R1T-Chimera"
+	| "inclusionAI/Ring-1T-FP8"
 	| "zai-org/GLM-4.5-Air"
 	| "zai-org/GLM-4.5-FP8"
 	| "zai-org/GLM-4.5-turbo"
@@ -87,7 +88,8 @@ export const chutesModels = {
 		supportsPromptCache: false,
 		inputPrice: 0.23,
 		outputPrice: 0.9,
-		description: "DeepSeek‑V3.1‑Terminus is an update to V3.1 that improves language consistency by reducing CN/EN mix‑ups and eliminating random characters, while strengthening agent capabilities with notably better Code Agent and Search Agent performance.",
+		description:
+			"DeepSeek‑V3.1‑Terminus is an update to V3.1 that improves language consistency by reducing CN/EN mix‑ups and eliminating random characters, while strengthening agent capabilities with notably better Code Agent and Search Agent performance.",
 	},
 	"deepseek-ai/DeepSeek-V3.1-turbo": {
 		maxTokens: 32768,
@@ -96,7 +98,8 @@ export const chutesModels = {
 		supportsPromptCache: false,
 		inputPrice: 1.0,
 		outputPrice: 3.0,
-		description: "DeepSeek-V3.1-turbo is an FP8, speculative-decoding turbo variant optimized for ultra-fast single-shot queries (~200 TPS), with outputs close to the originals and solid function calling/reasoning/structured output, priced at $1/M input and $3/M output tokens, using 2× quota per request and not intended for bulk workloads.",
+		description:
+			"DeepSeek-V3.1-turbo is an FP8, speculative-decoding turbo variant optimized for ultra-fast single-shot queries (~200 TPS), with outputs close to the originals and solid function calling/reasoning/structured output, priced at $1/M input and $3/M output tokens, using 2× quota per request and not intended for bulk workloads.",
 	},
 	"deepseek-ai/DeepSeek-V3.2-Exp": {
 		maxTokens: 163840,
@@ -105,7 +108,8 @@ export const chutesModels = {
 		supportsPromptCache: false,
 		inputPrice: 0.25,
 		outputPrice: 0.35,
-		description: "DeepSeek-V3.2-Exp is an experimental LLM that introduces DeepSeek Sparse Attention to improve long‑context training and inference efficiency while maintaining performance comparable to V3.1‑Terminus.",
+		description:
+			"DeepSeek-V3.2-Exp is an experimental LLM that introduces DeepSeek Sparse Attention to improve long‑context training and inference efficiency while maintaining performance comparable to V3.1‑Terminus.",
 	},
 	"unsloth/Llama-3.3-70B-Instruct": {
 		maxTokens: 32768, // From Groq
@@ -287,6 +291,15 @@ export const chutesModels = {
 		outputPrice: 0,
 		description: "TNGTech DeepSeek R1T Chimera model.",
 	},
+	"inclusionAI/Ring-1T-FP8": {
+		maxTokens: 32768,
+		contextWindow: 131072,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 1.0,
+		outputPrice: 3.0,
+		description: "Ring 1T FP8 model optimized for efficient inference with FP8 precision.",
+	},
 	"zai-org/GLM-4.5-Air": {
 		maxTokens: 32768,
 		contextWindow: 151329,
@@ -387,8 +400,9 @@ export const chutesModels = {
 		contextWindow: 262144,
 		supportsImages: true,
 		supportsPromptCache: false,
-		inputPrice: 0.1600,
-		outputPrice: 0.6500,
-		description: "Qwen3‑VL‑235B‑A22B‑Thinking is an open‑weight MoE vision‑language model (235B total, ~22B activated) optimized for deliberate multi‑step reasoning with strong text‑image‑video understanding and long‑context capabilities.",
+		inputPrice: 0.16,
+		outputPrice: 0.65,
+		description:
+			"Qwen3‑VL‑235B‑A22B‑Thinking is an open‑weight MoE vision‑language model (235B total, ~22B activated) optimized for deliberate multi‑step reasoning with strong text‑image‑video understanding and long‑context capabilities.",
 	},
 } as const satisfies Record<string, ModelInfo>
diff --git a/src/api/providers/__tests__/chutes.spec.ts b/src/api/providers/__tests__/chutes.spec.ts
@@ -341,6 +341,28 @@ describe("ChutesHandler", () => {
 		)
 	})
 
+	it("should return inclusionAI/Ring-1T-FP8 model with correct configuration", () => {
+		const testModelId: ChutesModelId = "inclusionAI/Ring-1T-FP8"
+		const handlerWithModel = new ChutesHandler({
+			apiModelId: testModelId,
+			chutesApiKey: "test-chutes-api-key",
+		})
+		const model = handlerWithModel.getModel()
+		expect(model.id).toBe(testModelId)
+		expect(model.info).toEqual(
+			expect.objectContaining({
+				maxTokens: 32768,
+				contextWindow: 131072,
+				supportsImages: false,
+				supportsPromptCache: false,
+				inputPrice: 1.0,
+				outputPrice: 3.0,
+				description: "Ring 1T FP8 model optimized for efficient inference with FP8 precision.",
+				temperature: 0.5, // Default temperature for non-DeepSeek models
+			}),
+		)
+	})
+
 	it("completePrompt method should return text from Chutes API", async () => {
 		const expectedResponse = "This is a test response from Chutes"
 		mockCreate.mockResolvedValueOnce({ choices: [{ message: { content: expectedResponse } }] })