Skip to content

Commit 401dc60

Browse files
authored
Add BytePlus and Z.ai Coding Plan Cloud support (UI disabled for now) (#1618)
<img width="2212" height="1300" alt="CleanShot 2026-03-26 at 22 31 44@2x" src="https://github.com/user-attachments/assets/02e6bd16-5444-47cc-8dce-eaebee3a5730" />
2 parents b2a98e2 + 061422e commit 401dc60

File tree

14 files changed

+505
-42
lines changed

14 files changed

+505
-42
lines changed

src/app/api/openrouter/models/route.ts

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,32 @@ import { NextResponse } from 'next/server';
33
import { captureException } from '@sentry/nextjs';
44
import type { OpenRouterModelsResponse } from '@/lib/organizations/organization-types';
55
import { getEnhancedOpenRouterModels } from '@/lib/providers/openrouter';
6+
import { getUserFromAuth } from '@/lib/user.server';
7+
import { getCodingPlanModelsForUser } from '@/lib/providers/coding-plans';
8+
import { unstable_cache } from 'next/cache';
9+
import { ENABLE_CODING_PLANS_UI } from '@/lib/constants';
10+
11+
const getCodingPlanModelsForUser_cached = unstable_cache(
12+
(userId: string) => getCodingPlanModelsForUser(userId),
13+
undefined,
14+
{ revalidate: 60 }
15+
);
16+
17+
async function getCodingPlanModels() {
18+
try {
19+
const { user } = await getUserFromAuth({ adminOnly: false });
20+
if (user) {
21+
console.debug('[getCodingPlanModels] authenticated request, fetching coding plan models');
22+
return await getCodingPlanModelsForUser_cached(user.id);
23+
} else {
24+
console.debug('[getCodingPlanModels] anonymous request, no coding plan models');
25+
return [];
26+
}
27+
} catch (e) {
28+
console.debug('[getCodingPlanModels] error, database unavailable?', e);
29+
return [];
30+
}
31+
}
632

733
export const revalidate = 60;
834

@@ -15,7 +41,11 @@ export async function GET(
1541
): Promise<NextResponse<{ error: string; message: string } | OpenRouterModelsResponse>> {
1642
try {
1743
const data = await getEnhancedOpenRouterModels();
18-
return NextResponse.json(data);
44+
return NextResponse.json(
45+
ENABLE_CODING_PLANS_UI && Array.isArray(data.data)
46+
? { data: data.data.concat(await getCodingPlanModels()) }
47+
: data
48+
);
1949
} catch (error) {
2050
captureException(error, {
2151
tags: { endpoint: 'openrouter/models' },

src/components/organizations/byok/BYOKKeysManager.tsx

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,41 @@ import {
3838
} from 'lucide-react';
3939
import { toast } from 'sonner';
4040
import {
41-
AutocompleteUserByokProviderIdSchema,
41+
DirectUserByokInferenceProviderIdSchema,
4242
VercelUserByokInferenceProviderIdSchema,
4343
AwsCredentialsSchema,
4444
} from '@/lib/providers/openrouter/inference-provider-id';
45+
import CODING_PLANS from '@/lib/providers/coding-plans/coding-plan-definitions';
4546
import * as z from 'zod';
47+
import { ENABLE_CODING_PLANS_UI } from '@/lib/constants';
4648

4749
// Hardcoded BYOK providers list
48-
const BYOK_PROVIDERS = [
50+
const VERCEL_BYOK_PROVIDERS = [
4951
{ id: VercelUserByokInferenceProviderIdSchema.enum.anthropic, name: 'Anthropic' },
5052
{ id: VercelUserByokInferenceProviderIdSchema.enum.bedrock, name: 'AWS Bedrock' },
5153
{ id: VercelUserByokInferenceProviderIdSchema.enum.openai, name: 'OpenAI' },
5254
{ id: VercelUserByokInferenceProviderIdSchema.enum.inception, name: 'Inception' },
5355
{ id: VercelUserByokInferenceProviderIdSchema.enum.google, name: 'Google AI Studio' },
5456
{ id: VercelUserByokInferenceProviderIdSchema.enum.minimax, name: 'MiniMax' },
55-
{ id: AutocompleteUserByokProviderIdSchema.enum.codestral, name: 'Mistral AI: Codestral' },
56-
{ id: VercelUserByokInferenceProviderIdSchema.enum.mistral, name: 'Mistral AI: Devstral' },
57+
{ id: DirectUserByokInferenceProviderIdSchema.enum.codestral, name: 'Mistral AI (Codestral)' },
58+
{ id: VercelUserByokInferenceProviderIdSchema.enum.mistral, name: 'Mistral AI (other models)' },
5759
{ id: VercelUserByokInferenceProviderIdSchema.enum.xai, name: 'xAI' },
58-
{ id: VercelUserByokInferenceProviderIdSchema.enum.zai, name: 'Z.AI' },
60+
{
61+
id: VercelUserByokInferenceProviderIdSchema.enum.zai,
62+
name: ENABLE_CODING_PLANS_UI ? 'Z.ai (pay as you go)' : 'Z.ai',
63+
},
5964
] as const;
6065

66+
const CODING_PLAN_PROVIDERS = CODING_PLANS.map(plan => ({
67+
id: plan.id,
68+
name: plan.name,
69+
}));
70+
71+
const BYOK_PROVIDERS = [
72+
...(ENABLE_CODING_PLANS_UI ? CODING_PLAN_PROVIDERS : []),
73+
...VERCEL_BYOK_PROVIDERS,
74+
].toSorted((a, b) => a.name.localeCompare(b.name));
75+
6176
function BYOKDescription() {
6277
return (
6378
<p className="text-muted-foreground">

src/lib/constants.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ export const AUTOCOMPLETE_MODEL = 'codestral-2508';
3636

3737
export const ENABLE_DEPLOY_FEATURE = true;
3838

39+
export const ENABLE_CODING_PLANS_UI = false; // Requires CLI update: https://github.com/Kilo-Org/kilocode/pull/7728
40+
3941
export const IS_DEVELOPMENT = process.env.NODE_ENV === 'development';
4042

4143
// Cloud Agent WebSocket URL (client-side, inlined at build time)
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
import type { CodingPlanProvider } from '@/lib/providers/coding-plans/types';
2+
import { isReasoningExplicitlyDisabled } from '@/lib/providers/openrouter/request-helpers';
3+
4+
export default [
5+
{
6+
id: 'byteplus-coding',
7+
name: 'BytePlus Coding Plan',
8+
base_url: 'https://ark.ap-southeast.bytepluses.com/api/coding/v3',
9+
ai_sdk_provider: 'openai-compatible',
10+
transformRequest(context) {
11+
context.request.body.thinking = {
12+
type: isReasoningExplicitlyDisabled(context.request) ? 'disabled' : 'enabled',
13+
};
14+
},
15+
models: [
16+
{
17+
id: 'bytedance-seed-code',
18+
name: 'Seed-Code',
19+
description:
20+
"ByteDance's latest code model has been deeply optimized for agentic programming tasks.",
21+
flags: ['recommended', 'vision'],
22+
context_length: 262144,
23+
max_completion_tokens: 32768,
24+
},
25+
{
26+
id: 'kimi-k2.5',
27+
name: 'Kimi-K2.5',
28+
description:
29+
'Open-source SoTA native multimodal model with text-only input (for now), stronger code/UI generation.',
30+
flags: ['recommended'],
31+
context_length: 262144,
32+
max_completion_tokens: 32768,
33+
},
34+
{
35+
id: 'glm-4.7',
36+
name: 'GLM-4.7',
37+
description:
38+
"Z.AI's latest flagship model, enhanced programming capabilities and more stable multi-step reasoning/execution.",
39+
flags: ['recommended'],
40+
context_length: 204800,
41+
max_completion_tokens: 131072,
42+
},
43+
{
44+
id: 'deepseek-v3.2',
45+
name: 'DeepSeek-V3.2',
46+
description:
47+
'Designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance.',
48+
flags: [],
49+
context_length: 131072,
50+
max_completion_tokens: 32768,
51+
},
52+
{
53+
id: 'gpt-oss-120b',
54+
name: 'GPT-OSS-120B',
55+
description:
56+
"OpenAI's open-weight model, 117B parameters with 5.1B active parameters for production, general purpose, high reasoning use cases.",
57+
flags: [],
58+
context_length: 131072,
59+
max_completion_tokens: 65536,
60+
},
61+
{
62+
id: 'dola-seed-2.0-pro',
63+
name: 'Dola-Seed-2.0-Pro',
64+
description:
65+
'Focused on long-chain reasoning and stability in complex task execution, designed for complex real-world business scenarios.',
66+
flags: ['vision'],
67+
context_length: 262144,
68+
max_completion_tokens: 131072,
69+
},
70+
{
71+
id: 'dola-seed-2.0-lite',
72+
name: 'Dola-Seed-2.0-Lite',
73+
description:
74+
'Balances generation quality and response speed, making it a strong general-purpose production model.',
75+
flags: ['vision'],
76+
context_length: 262144,
77+
max_completion_tokens: 131072,
78+
},
79+
],
80+
},
81+
{
82+
id: 'zai-coding',
83+
name: 'Z.ai Coding Plan',
84+
base_url: 'https://api.z.ai/api/coding/paas/v4',
85+
ai_sdk_provider: 'openai-compatible',
86+
transformRequest(context) {
87+
context.request.body.thinking = {
88+
type: isReasoningExplicitlyDisabled(context.request) ? 'disabled' : 'enabled',
89+
};
90+
},
91+
models: [
92+
{
93+
id: 'glm-5-turbo',
94+
name: 'GLM-5 Turbo',
95+
description:
96+
'GLM-5 Turbo is a new model from Z.ai designed for fast inference and strong performance in agent-driven environments such as OpenClaw scenarios. It is deeply optimized for real-world agent workflows involving long execution chains, with improved complex instruction decomposition, tool use, scheduled and persistent execution, and overall stability across extended tasks.',
97+
flags: ['recommended'],
98+
context_length: 202752,
99+
max_completion_tokens: 131072,
100+
},
101+
{
102+
id: 'glm-5',
103+
name: 'GLM-5',
104+
description:
105+
"GLM-5 is Z.ai's flagship open-source foundation model engineered for complex systems design and long-horizon agent workflows. Built for expert developers, it delivers production-grade performance on large-scale programming tasks, rivaling leading closed-source models. With advanced agentic planning, deep backend reasoning, and iterative self-correction, GLM-5 moves beyond code generation to full-system construction and autonomous execution.",
106+
flags: ['recommended'],
107+
context_length: 204800,
108+
max_completion_tokens: 131072,
109+
},
110+
{
111+
id: 'glm-4.7',
112+
name: 'GLM-4.7',
113+
description:
114+
"GLM-4.7 is Z.ai's latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.",
115+
flags: ['recommended'],
116+
context_length: 204800,
117+
max_completion_tokens: 131072,
118+
},
119+
{
120+
id: 'glm-4.7-flash',
121+
name: 'GLM-4.7-Flash',
122+
description:
123+
'As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.',
124+
flags: [],
125+
context_length: 200000,
126+
max_completion_tokens: 131072,
127+
},
128+
{
129+
id: 'glm-4.7-flashx',
130+
name: 'GLM-4.7-FlashX',
131+
description:
132+
'GLM-4.7-FlashX is an enhanced variant of GLM-4.7-Flash, offering higher throughput and improved performance for agentic coding workflows. It combines the compact 30B-class efficiency of the Flash series with additional capacity for complex instruction following and multi-step tool use.',
133+
flags: [],
134+
context_length: 200000,
135+
max_completion_tokens: 131072,
136+
},
137+
{
138+
id: 'glm-4.6',
139+
name: 'GLM-4.6',
140+
description:
141+
'GLM-4.6 brings several key improvements over GLM-4.5: an expanded context window from 128K to 200K tokens for more complex agentic tasks; superior coding performance on code benchmarks and better real-world performance in agentic coding applications; advanced reasoning with tool use support during inference; stronger capability in tool-use and search-based agents; and refined writing that aligns more naturally with human preferences in style and readability.',
142+
flags: [],
143+
context_length: 204800,
144+
max_completion_tokens: 131072,
145+
},
146+
{
147+
id: 'glm-4.6v',
148+
name: 'GLM-4.6V',
149+
description:
150+
'GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.',
151+
flags: ['vision'],
152+
context_length: 128000,
153+
max_completion_tokens: 32768,
154+
},
155+
{
156+
id: 'glm-4.5',
157+
name: 'GLM-4.5',
158+
description:
159+
'GLM-4.5 is Z.ai\'s flagship foundation model purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128K tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment, with a hybrid inference mode offering a "thinking mode" for complex reasoning and tool use and a "non-thinking mode" optimized for instant responses.',
160+
flags: [],
161+
context_length: 131072,
162+
max_completion_tokens: 98304,
163+
},
164+
{
165+
id: 'glm-4.5-air',
166+
name: 'GLM-4.5-Air',
167+
description:
168+
'GLM-4.5-Air is the lightweight variant of Z.ai\'s latest flagship model family, purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air supports hybrid inference modes, offering a "thinking mode" for advanced reasoning and tool use and a "non-thinking mode" for real-time interaction.',
169+
flags: [],
170+
context_length: 131072,
171+
max_completion_tokens: 98304,
172+
},
173+
{
174+
id: 'glm-4.5-flash',
175+
name: 'GLM-4.5-Flash',
176+
description:
177+
'GLM-4.5-Flash is the free, high-speed variant of the GLM-4.5 model family, optimized for low-latency agentic coding tasks. It shares the MoE architecture of GLM-4.5 in a smaller, faster form factor, retaining reasoning and tool-use capabilities at no cost.',
178+
flags: [],
179+
context_length: 131072,
180+
max_completion_tokens: 98304,
181+
},
182+
{
183+
id: 'glm-4.5v',
184+
name: 'GLM-4.5V',
185+
description:
186+
'GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding, image Q&A, OCR, and document parsing, with strong gains in front-end web coding, grounding, and spatial reasoning. It supports a hybrid inference mode with "thinking" and "non-thinking" options.',
187+
flags: ['vision'],
188+
context_length: 64000,
189+
max_completion_tokens: 16384,
190+
},
191+
],
192+
},
193+
] satisfies ReadonlyArray<CodingPlanProvider>;

0 commit comments

Comments
 (0)