Skip to content

Commit 42ae920

Browse files
committed
feat: add GLM-4.6-FP8 and LongCat-Flash-Thinking-FP8 models to Chutes AI provider
1 parent 3e47e88 commit 42ae920

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed

packages/types/src/providers/chutes.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ export type ChutesModelId =
3030
| "zai-org/GLM-4.5-Air"
3131
| "zai-org/GLM-4.5-FP8"
3232
| "zai-org/GLM-4.5-turbo"
33+
| "zai-org/GLM-4.6-FP8"
34+
| "meituan-longcat/LongCat-Flash-Thinking-FP8"
3335
| "moonshotai/Kimi-K2-Instruct-75k"
3436
| "moonshotai/Kimi-K2-Instruct-0905"
3537
| "Qwen/Qwen3-235B-A22B-Thinking-2507"
@@ -284,6 +286,26 @@ export const chutesModels = {
284286
outputPrice: 3,
285287
description: "GLM-4.5-turbo model with 128K token context window, optimized for fast inference.",
286288
},
289+
"zai-org/GLM-4.6-FP8": {
290+
maxTokens: 32768,
291+
contextWindow: 200000,
292+
supportsImages: false,
293+
supportsPromptCache: false,
294+
inputPrice: 0,
295+
outputPrice: 0,
296+
description:
297+
"GLM-4.6-FP8 model with 200K token context window, state-of-the-art performance with fast inference.",
298+
},
299+
"meituan-longcat/LongCat-Flash-Thinking-FP8": {
300+
maxTokens: 32768,
301+
contextWindow: 128000,
302+
supportsImages: false,
303+
supportsPromptCache: false,
304+
inputPrice: 0,
305+
outputPrice: 0,
306+
description:
307+
"LongCat Flash Thinking FP8 model with 128K context window, optimized for complex reasoning and coding tasks.",
308+
},
287309
"Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": {
288310
maxTokens: 32768,
289311
contextWindow: 262144,

src/api/providers/__tests__/chutes.spec.ts

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,52 @@ describe("ChutesHandler", () => {
275275
)
276276
})
277277

278+
it("should return zai-org/GLM-4.6-FP8 model with correct configuration", () => {
279+
const testModelId: ChutesModelId = "zai-org/GLM-4.6-FP8"
280+
const handlerWithModel = new ChutesHandler({
281+
apiModelId: testModelId,
282+
chutesApiKey: "test-chutes-api-key",
283+
})
284+
const model = handlerWithModel.getModel()
285+
expect(model.id).toBe(testModelId)
286+
expect(model.info).toEqual(
287+
expect.objectContaining({
288+
maxTokens: 32768,
289+
contextWindow: 200000,
290+
supportsImages: false,
291+
supportsPromptCache: false,
292+
inputPrice: 0,
293+
outputPrice: 0,
294+
description:
295+
"GLM-4.6-FP8 model with 200K token context window, state-of-the-art performance with fast inference.",
296+
temperature: 0.5, // Default temperature for non-DeepSeek models
297+
}),
298+
)
299+
})
300+
301+
it("should return meituan-longcat/LongCat-Flash-Thinking-FP8 model with correct configuration", () => {
302+
const testModelId: ChutesModelId = "meituan-longcat/LongCat-Flash-Thinking-FP8"
303+
const handlerWithModel = new ChutesHandler({
304+
apiModelId: testModelId,
305+
chutesApiKey: "test-chutes-api-key",
306+
})
307+
const model = handlerWithModel.getModel()
308+
expect(model.id).toBe(testModelId)
309+
expect(model.info).toEqual(
310+
expect.objectContaining({
311+
maxTokens: 32768,
312+
contextWindow: 128000,
313+
supportsImages: false,
314+
supportsPromptCache: false,
315+
inputPrice: 0,
316+
outputPrice: 0,
317+
description:
318+
"LongCat Flash Thinking FP8 model with 128K context window, optimized for complex reasoning and coding tasks.",
319+
temperature: 0.5, // Default temperature for non-DeepSeek models
320+
}),
321+
)
322+
})
323+
278324
it("should return Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 model with correct configuration", () => {
279325
const testModelId: ChutesModelId = "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8"
280326
const handlerWithModel = new ChutesHandler({

0 commit comments

Comments
 (0)