Skip to content

Commit 953dd0f

Browse files
rcogalclaudesteebchen
authored
feat(models): add Novita Qwen models and Kimi K2.5 (#1514)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com> Co-authored-by: Luca Steeb <contact@luca-steeb.com>
1 parent 8e0083e commit 953dd0f

File tree

6 files changed

+481
-0
lines changed

6 files changed

+481
-0
lines changed

apps/gateway/src/chat/chat.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2312,6 +2312,38 @@ chat.openapi(completions, async (c) => {
23122312
}
23132313
}
23142314

2315+
// For Moonshot provider, enrich assistant messages with cached reasoning_content
2316+
// This is needed for multi-turn tool call conversations with thinking models
2317+
// Moonshot requires reasoning_content in assistant messages with tool_calls
2318+
if (usedProvider === "moonshot") {
2319+
const { redisClient } = await import("@llmgateway/cache");
2320+
for (const message of messages) {
2321+
if (
2322+
message.role === "assistant" &&
2323+
message.tool_calls &&
2324+
Array.isArray(message.tool_calls) &&
2325+
message.tool_calls.length > 0 &&
2326+
!(message as any).reasoning_content // Only add if not already present
2327+
) {
2328+
// Get reasoning_content from the first tool call (all tool calls share the same reasoning)
2329+
const firstToolCall = message.tool_calls[0];
2330+
if (firstToolCall?.id) {
2331+
try {
2332+
const cachedReasoningContent = await redisClient.get(
2333+
`reasoning_content:${firstToolCall.id}`,
2334+
);
2335+
if (cachedReasoningContent) {
2336+
// Add reasoning_content to the message for Moonshot
2337+
(message as any).reasoning_content = cachedReasoningContent;
2338+
}
2339+
} catch {
2340+
// Silently fail - reasoning_content caching is optional
2341+
}
2342+
}
2343+
}
2344+
}
2345+
}
2346+
23152347
const requestBody: ProviderRequestBody = await prepareRequestBody(
23162348
usedProvider,
23172349
usedModel,

apps/gateway/src/chat/tools/parse-provider-response.ts

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,31 @@ export function parseProviderResponse(
715715
break;
716716
}
717717

718+
// Cache reasoning_content for Moonshot thinking models when tool_calls are present
719+
// This is needed for multi-turn tool call conversations because Moonshot requires
720+
// reasoning_content to be included in assistant messages with tool_calls
721+
if (
722+
usedProvider === "moonshot" &&
723+
reasoningContent &&
724+
toolResults &&
725+
Array.isArray(toolResults) &&
726+
toolResults.length > 0
727+
) {
728+
for (const toolCall of toolResults) {
729+
if (toolCall.id) {
730+
redisClient
731+
.setex(
732+
`reasoning_content:${toolCall.id}`,
733+
86400, // 1 day expiration
734+
reasoningContent,
735+
)
736+
.catch((err) => {
737+
logger.error("Failed to cache reasoning_content", { err });
738+
});
739+
}
740+
}
741+
}
742+
718743
return {
719744
content,
720745
reasoningContent,

packages/models/src/models/alibaba.ts

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,20 @@ export const alibabaModels = [
292292
tools: true,
293293
jsonOutput: true,
294294
},
295+
{
296+
providerId: "novita",
297+
modelName: "qwen/qwen3-235b-a22b-instruct-2507",
298+
inputPrice: 0.09 / 1e6,
299+
outputPrice: 0.58 / 1e6,
300+
requestPrice: 0,
301+
contextSize: 131072,
302+
maxOutput: 16384,
303+
reasoning: false,
304+
streaming: true,
305+
vision: false,
306+
tools: true,
307+
jsonOutput: true,
308+
},
295309
],
296310
},
297311
{
@@ -316,6 +330,43 @@ export const alibabaModels = [
316330
stability: "unstable",
317331
jsonOutput: true,
318332
},
333+
{
334+
providerId: "novita",
335+
modelName: "qwen/qwen3-235b-a22b-thinking-2507",
336+
inputPrice: 0.3 / 1e6,
337+
outputPrice: 3 / 1e6,
338+
requestPrice: 0,
339+
contextSize: 131072,
340+
maxOutput: 32768,
341+
reasoning: false,
342+
streaming: true,
343+
vision: false,
344+
tools: true,
345+
jsonOutput: false,
346+
},
347+
],
348+
},
349+
{
350+
id: "qwen3-235b-a22b-fp8",
351+
name: "Qwen3 235B A22B FP8",
352+
description: "Qwen 3 235B MoE with FP8 quantization.",
353+
family: "alibaba",
354+
releasedAt: new Date("2025-04-28"),
355+
providers: [
356+
{
357+
providerId: "novita",
358+
modelName: "qwen/qwen3-235b-a22b-fp8",
359+
inputPrice: 0.2 / 1e6,
360+
outputPrice: 0.8 / 1e6,
361+
requestPrice: 0,
362+
contextSize: 40960,
363+
maxOutput: 20000,
364+
reasoning: false,
365+
streaming: true,
366+
vision: false,
367+
tools: false,
368+
jsonOutput: true,
369+
},
319370
],
320371
},
321372
{
@@ -555,6 +606,19 @@ export const alibabaModels = [
555606
tools: true,
556607
jsonOutput: true,
557608
},
609+
{
610+
providerId: "novita",
611+
modelName: "qwen/qwen3-coder-480b-a35b-instruct",
612+
inputPrice: 0.3 / 1e6,
613+
outputPrice: 1.3 / 1e6,
614+
requestPrice: 0,
615+
contextSize: 262144,
616+
maxOutput: 65536,
617+
streaming: true,
618+
vision: false,
619+
tools: true,
620+
jsonOutput: true,
621+
},
558622
],
559623
},
560624
{
@@ -577,6 +641,19 @@ export const alibabaModels = [
577641
tools: true,
578642
jsonOutput: true,
579643
},
644+
{
645+
providerId: "novita",
646+
modelName: "qwen/qwen3-coder-30b-a3b-instruct",
647+
inputPrice: 0.07 / 1e6,
648+
outputPrice: 0.27 / 1e6,
649+
requestPrice: 0,
650+
contextSize: 160000,
651+
maxOutput: 32768,
652+
streaming: true,
653+
vision: false,
654+
tools: true,
655+
jsonOutput: true,
656+
},
580657
],
581658
},
582659
{
@@ -691,6 +768,20 @@ export const alibabaModels = [
691768
tools: true,
692769
jsonOutput: false,
693770
},
771+
{
772+
providerId: "novita",
773+
modelName: "qwen/qwen3-next-80b-a3b-thinking",
774+
inputPrice: 0.15 / 1e6,
775+
outputPrice: 1.5 / 1e6,
776+
requestPrice: 0,
777+
contextSize: 131072,
778+
maxOutput: 32768,
779+
reasoning: true,
780+
streaming: true,
781+
vision: false,
782+
tools: true,
783+
jsonOutput: false,
784+
},
694785
],
695786
},
696787
{
@@ -713,6 +804,19 @@ export const alibabaModels = [
713804
tools: true,
714805
jsonOutput: true,
715806
},
807+
{
808+
providerId: "novita",
809+
modelName: "qwen/qwen3-next-80b-a3b-instruct",
810+
inputPrice: 0.15 / 1e6,
811+
outputPrice: 1.5 / 1e6,
812+
requestPrice: 0,
813+
contextSize: 131072,
814+
maxOutput: 32768,
815+
streaming: true,
816+
vision: false,
817+
tools: true,
818+
jsonOutput: true,
819+
},
716820
],
717821
},
718822
{
@@ -739,6 +843,19 @@ export const alibabaModels = [
739843
tools: true,
740844
jsonOutput: true,
741845
},
846+
{
847+
providerId: "novita",
848+
modelName: "qwen/qwen3-max",
849+
inputPrice: 0.845 / 1e6,
850+
outputPrice: 3.38 / 1e6,
851+
requestPrice: 0,
852+
contextSize: 262144,
853+
maxOutput: 65536,
854+
streaming: true,
855+
vision: false,
856+
tools: true,
857+
jsonOutput: true,
858+
},
742859
],
743860
},
744861
{
@@ -764,6 +881,120 @@ export const alibabaModels = [
764881
},
765882
],
766883
},
884+
{
885+
id: "qwen3-vl-30b-a3b-instruct",
886+
name: "Qwen3 VL 30B A3B Instruct",
887+
description: "Vision-language Qwen 3 MoE model, 30B size.",
888+
family: "alibaba",
889+
releasedAt: new Date("2025-10-05"),
890+
providers: [
891+
{
892+
providerId: "novita",
893+
modelName: "qwen/qwen3-vl-30b-a3b-instruct",
894+
inputPrice: 0.2 / 1e6,
895+
outputPrice: 0.7 / 1e6,
896+
requestPrice: 0,
897+
contextSize: 131072,
898+
maxOutput: 32768,
899+
streaming: true,
900+
vision: true,
901+
tools: true,
902+
jsonOutput: false,
903+
},
904+
],
905+
},
906+
{
907+
id: "qwen3-vl-30b-a3b-thinking",
908+
name: "Qwen3 VL 30B A3B Thinking",
909+
description: "Vision-language Qwen 3 MoE with reasoning capabilities.",
910+
family: "alibaba",
911+
releasedAt: new Date("2025-10-11"),
912+
providers: [
913+
{
914+
providerId: "novita",
915+
modelName: "qwen/qwen3-vl-30b-a3b-thinking",
916+
inputPrice: 0.2 / 1e6,
917+
outputPrice: 1 / 1e6,
918+
requestPrice: 0,
919+
contextSize: 131072,
920+
maxOutput: 32768,
921+
reasoning: true,
922+
streaming: true,
923+
vision: true,
924+
tools: true,
925+
jsonOutput: true,
926+
},
927+
],
928+
},
929+
{
930+
id: "qwen3-32b-fp8",
931+
name: "Qwen3 32B FP8",
932+
description: "Qwen 3 32B with FP8 quantization.",
933+
family: "alibaba",
934+
releasedAt: new Date("2025-04-28"),
935+
providers: [
936+
{
937+
providerId: "novita",
938+
modelName: "qwen/qwen3-32b-fp8",
939+
inputPrice: 0.1 / 1e6,
940+
outputPrice: 0.45 / 1e6,
941+
requestPrice: 0,
942+
contextSize: 40960,
943+
maxOutput: 20000,
944+
reasoning: false,
945+
streaming: true,
946+
vision: false,
947+
tools: false,
948+
jsonOutput: false,
949+
},
950+
],
951+
},
952+
{
953+
id: "qwen3-30b-a3b-fp8",
954+
name: "Qwen3 30B A3B FP8",
955+
description: "Qwen 3 30B MoE with FP8 quantization.",
956+
family: "alibaba",
957+
releasedAt: new Date("2025-04-28"),
958+
providers: [
959+
{
960+
providerId: "novita",
961+
modelName: "qwen/qwen3-30b-a3b-fp8",
962+
inputPrice: 0.09 / 1e6,
963+
outputPrice: 0.45 / 1e6,
964+
requestPrice: 0,
965+
contextSize: 40960,
966+
maxOutput: 20000,
967+
reasoning: false,
968+
streaming: true,
969+
vision: false,
970+
tools: false,
971+
jsonOutput: false,
972+
},
973+
],
974+
},
975+
{
976+
id: "qwen3-4b-fp8",
977+
name: "Qwen3 4B FP8",
978+
description: "Lightweight Qwen 3 4B with FP8 quantization.",
979+
family: "alibaba",
980+
releasedAt: new Date("2025-04-28"),
981+
providers: [
982+
{
983+
providerId: "novita",
984+
modelName: "qwen/qwen3-4b-fp8",
985+
inputPrice: 0.03 / 1e6,
986+
outputPrice: 0.03 / 1e6,
987+
requestPrice: 0,
988+
contextSize: 128000,
989+
maxOutput: 20000,
990+
reasoning: false,
991+
streaming: true,
992+
vision: false,
993+
tools: false,
994+
jsonOutput: false,
995+
},
996+
],
997+
},
767998
{
768999
id: "qwen-image-plus",
7691000
name: "Qwen Image Plus",
@@ -1081,6 +1312,19 @@ export const alibabaModels = [
10811312
tools: true,
10821313
jsonOutput: true,
10831314
},
1315+
{
1316+
providerId: "novita",
1317+
modelName: "qwen/qwen3-vl-235b-a22b-instruct",
1318+
inputPrice: 0.3 / 1e6,
1319+
outputPrice: 1.5 / 1e6,
1320+
requestPrice: 0,
1321+
contextSize: 131072,
1322+
maxOutput: 32768,
1323+
streaming: true,
1324+
vision: true,
1325+
tools: true,
1326+
jsonOutput: true,
1327+
},
10841328
],
10851329
},
10861330
{
@@ -1106,6 +1350,20 @@ export const alibabaModels = [
11061350
tools: false,
11071351
jsonOutput: false,
11081352
},
1353+
{
1354+
providerId: "novita",
1355+
modelName: "qwen/qwen3-vl-235b-a22b-thinking",
1356+
inputPrice: 0.98 / 1e6,
1357+
outputPrice: 3.95 / 1e6,
1358+
requestPrice: 0,
1359+
contextSize: 131072,
1360+
maxOutput: 32768,
1361+
reasoning: true,
1362+
streaming: true,
1363+
vision: true,
1364+
tools: false,
1365+
jsonOutput: false,
1366+
},
11091367
],
11101368
},
11111369
{

0 commit comments

Comments
 (0)