Skip to content

Commit 7ec2c5f

Browse files
authored
Gateway Coding Plan adjacent improvements (#1648)
commits can be reviewed separately
2 parents 00ecc2b + 13f101a commit 7ec2c5f

File tree

8 files changed

+86
-31
lines changed

8 files changed

+86
-31
lines changed

src/app/api/openrouter/[...path]/route.ts

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import {
3939
wrapInSafeNextResponse,
4040
forbiddenFreeModelResponse,
4141
storeAndPreviousResponseIdIsNotSupported,
42+
apiKindNotSupportedResponse,
4243
} from '@/lib/llm-proxy-helpers';
4344
import { getBalanceAndOrgSettings } from '@/lib/organizations/organization-usage';
4445
import { ENABLE_TOOL_REPAIR, repairTools } from '@/lib/tool-calling';
@@ -302,13 +303,16 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
302303
// Use new shared helper for fraud & project headers
303304
const { fraudHeaders, projectId } = extractFraudAndProjectHeaders(request);
304305
const taskId = extractHeaderAndLimitLength(request, 'x-kilocode-taskid') ?? undefined;
305-
const { provider, userByok, customLlm } = await getProvider(
306+
const { provider, userByok, bypassAccessCheck } = await getProvider(
306307
originalModelIdLowerCased,
307308
requestBodyParsed,
308309
user,
309310
organizationId,
310311
taskId
311312
);
313+
if (!provider.supportedChatApis.includes(requestBodyParsed.kind)) {
314+
return apiKindNotSupportedResponse(requestBodyParsed.kind, provider.supportedChatApis);
315+
}
312316

313317
console.debug(`Routing request to ${provider.id}`);
314318

@@ -377,9 +381,7 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
377381
setTag('ui.ai_model', requestBodyParsed.body.model);
378382

379383
// Skip balance/org checks for anonymous users - they can only use free models
380-
const bypassAccessCheckForCustomLlm =
381-
!!customLlm && !!organizationId && customLlm.organization_ids.includes(organizationId);
382-
if (!isAnonymousContext(user) && !bypassAccessCheckForCustomLlm) {
384+
if (!isAnonymousContext(user) && !bypassAccessCheck) {
383385
const { balance, settings, plan } = await balanceAndSettingsPromise;
384386

385387
if (

src/lib/llm-proxy-helpers.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import type {
3232
} from '@/lib/processUsage.types';
3333
import { getMaxTokens } from '@/lib/providers/openrouter/request-helpers';
3434
import { KILO_AUTO_BALANCED_MODEL, KILO_AUTO_FREE_MODEL } from '@/lib/kilo-auto-model';
35+
import type { GatewayChatApiKind } from '@/lib/providers/types';
3536

3637
// FIM suffix markers for tracking purposes - used to wrap suffix in a fake system prompt format
3738
// This allows FIM requests to be tracked consistently with chat requests
@@ -102,6 +103,14 @@ export function dataCollectionRequiredResponse() {
102103
);
103104
}
104105

106+
export function apiKindNotSupportedResponse(
107+
apiKind: GatewayChatApiKind,
108+
supportedApiKinds: ReadonlyArray<GatewayChatApiKind>
109+
) {
110+
const error = `This model does not support the ${apiKind} API, please use any of: ${supportedApiKinds.join()}`;
111+
return NextResponse.json({ error, message: error }, { status: 400 });
112+
}
113+
105114
export function alphaPeriodEndedResponse() {
106115
// https://github.com/Kilo-Org/kilocode/blob/50d6bd482bec6fae7d1c80b14ffb064de3761507/src/shared/kilocode/errorUtils.ts#L13
107116
const error = `The alpha period for this model has ended.`;

src/lib/providers/coding-plans/index.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import { getBYOKforOrganization, getBYOKforUser } from '@/lib/byok';
55
import { readDb } from '@/lib/drizzle';
66
import { preferredModels } from '@/lib/models';
77
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
8+
import type { OpenCodeSettings } from '@kilocode/db';
9+
import { BINARY_THINKING_VARIANTS } from '@/lib/providers/model-settings';
810

911
export function formatCodingPlanModelId(provider: CodingPlanProvider, model: CodingPlanModel) {
1012
return provider.id + '/' + model.id;
@@ -50,6 +52,10 @@ function convertModel(
5052
supported_parameters: ['max_tokens', 'temperature', 'tools', 'reasoning', 'include_reasoning'],
5153
default_parameters: {},
5254
preferredIndex: model.flags.includes('recommended') ? preferredIndex : undefined,
55+
opencode: {
56+
ai_sdk_provider: provider.ai_sdk_provider,
57+
variants: BINARY_THINKING_VARIANTS,
58+
} satisfies OpenCodeSettings,
5359
};
5460
}
5561

src/lib/providers/index.ts

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import {
1818
isHaikuModel,
1919
} from '@/lib/providers/anthropic';
2020
import { getBYOKforOrganization, getBYOKforUser, getModelUserByokProviders } from '@/lib/byok';
21-
import type { CustomLlm } from '@kilocode/db/schema';
2221
import { custom_llm, type User } from '@kilocode/db/schema';
2322
import { OpenRouterInferenceProviderIdSchema } from '@/lib/providers/openrouter/inference-provider-id';
2423
import { hasAttemptCompletionTool } from '@/lib/tool-calling';
@@ -35,6 +34,15 @@ import { isXiaomiModel } from '@/lib/providers/xiaomi';
3534
import type { BYOKResult, Provider } from '@/lib/providers/types';
3635
import PROVIDERS from '@/lib/providers/provider-definitions';
3736
import { getCodingPlanModel } from '@/lib/providers/coding-plans';
37+
import type { CustomLlmProvider } from '@kilocode/db';
38+
39+
function inferSupportedChatApis(aiSdkProvider: CustomLlmProvider) {
40+
return aiSdkProvider === 'anthropic'
41+
? (['messages'] as const)
42+
: aiSdkProvider === 'openai'
43+
? (['responses'] as const)
44+
: (['chat_completions'] as const);
45+
}
3846

3947
async function checkCodingPlanBYOK(
4048
user: User | AnonymousUserContext,
@@ -56,13 +64,14 @@ async function checkCodingPlanBYOK(
5664
id: 'coding-plan',
5765
apiUrl: codingPlan.base_url,
5866
apiKey: userByok[0].decryptedAPIKey,
67+
supportedChatApis: inferSupportedChatApis(codingPlan.ai_sdk_provider),
5968
transformRequest(context) {
6069
context.request.body.model = codingPlanModel.id;
6170
codingPlan.transformRequest(context);
6271
},
6372
} satisfies Provider,
6473
userByok,
65-
customLlm: null,
74+
bypassAccessCheck: false,
6675
};
6776
}
6877

@@ -85,7 +94,7 @@ export async function getProvider(
8594
user: User | AnonymousUserContext,
8695
organizationId: string | undefined,
8796
taskId: string | undefined
88-
): Promise<{ provider: Provider; userByok: BYOKResult[] | null; customLlm: CustomLlm | null }> {
97+
): Promise<{ provider: Provider; userByok: BYOKResult[] | null; bypassAccessCheck: boolean }> {
8998
const codingPlanByok = await checkCodingPlanBYOK(user, requestedModel, organizationId);
9099
if (codingPlanByok) {
91100
return codingPlanByok;
@@ -96,7 +105,7 @@ export async function getProvider(
96105
return {
97106
provider: PROVIDERS.VERCEL_AI_GATEWAY,
98107
userByok: vercelByok,
99-
customLlm: null,
108+
bypassAccessCheck: false,
100109
};
101110
}
102111

@@ -111,6 +120,7 @@ export async function getProvider(
111120
id: 'custom',
112121
apiUrl: customLlm.base_url,
113122
apiKey: customLlm.api_key,
123+
supportedChatApis: inferSupportedChatApis(customLlm.provider),
114124
transformRequest(context) {
115125
Object.assign(context.request.body, customLlm?.extra_body ?? {});
116126
for (const [key, value] of Object.entries(customLlm.extra_headers ?? {})) {
@@ -120,13 +130,13 @@ export async function getProvider(
120130
},
121131
},
122132
userByok: null,
123-
customLlm,
133+
bypassAccessCheck: true,
124134
};
125135
}
126136
}
127137

128138
if (await shouldRouteToVercel(requestedModel, request, taskId || user.id)) {
129-
return { provider: PROVIDERS.VERCEL_AI_GATEWAY, userByok: null, customLlm: null };
139+
return { provider: PROVIDERS.VERCEL_AI_GATEWAY, userByok: null, bypassAccessCheck: false };
130140
}
131141

132142
const kiloFreeModel = kiloFreeModels.find(m => m.public_id === requestedModel);
@@ -135,7 +145,7 @@ export async function getProvider(
135145
return {
136146
provider: freeModelProvider ?? PROVIDERS.OPENROUTER,
137147
userByok: null,
138-
customLlm: null,
148+
bypassAccessCheck: false,
139149
};
140150
}
141151

src/lib/providers/model-settings.ts

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -43,44 +43,52 @@ export function getVersionedModelSettings(model: string): VersionedSettings | un
4343
return undefined;
4444
}
4545

46+
export const BINARY_THINKING_VARIANTS = {
47+
instant: { reasoning: { enabled: false, effort: 'none' } },
48+
thinking: { reasoning: { enabled: true, effort: 'medium' } },
49+
} as const;
50+
4651
export function getModelVariants(model: string): OpenCodeSettings['variants'] {
4752
// Inlined to avoid importing anthropic.ts (which transitively pulls in Node.js crypto)
4853
if (model.startsWith('anthropic/')) {
4954
return {
50-
none: { reasoning: { enabled: false } },
55+
none: { reasoning: { enabled: false, effort: 'none' } },
5156
low: { reasoning: { enabled: true, effort: 'low' }, verbosity: 'low' },
5257
medium: { reasoning: { enabled: true, effort: 'medium' }, verbosity: 'medium' },
5358
high: { reasoning: { enabled: true, effort: 'high' }, verbosity: 'high' },
5459
max: { reasoning: { enabled: true, effort: 'xhigh' }, verbosity: 'max' },
5560
};
5661
}
57-
if (isOpenAiModel(model) || isGemini3Model(model)) {
58-
const filterNone = model.includes('codex') || isGemini3Model(model);
59-
const efforts = filterNone
60-
? ReasoningEffortSchema.options.filter(e => e !== 'none')
61-
: ReasoningEffortSchema.options;
62+
if (model.includes('codex') || isGemini3Model(model)) {
6263
return Object.fromEntries(
63-
efforts.map(effort => [effort, { reasoning: { enabled: effort !== 'none', effort } }])
64+
ReasoningEffortSchema.options
65+
.filter(e => e !== 'none')
66+
.map(effort => [effort, { reasoning: { enabled: true, effort } }])
67+
);
68+
}
69+
if (isOpenAiModel(model)) {
70+
return Object.fromEntries(
71+
ReasoningEffortSchema.options.map(effort => [
72+
effort,
73+
{ reasoning: { enabled: effort !== 'none', effort } },
74+
])
6475
);
6576
}
6677
if (isMoonshotModel(model) || isZaiModel(model) || isXiaomiModel(model)) {
67-
return {
68-
instant: { reasoning: { enabled: false } },
69-
thinking: { reasoning: { enabled: true } },
70-
};
78+
return BINARY_THINKING_VARIANTS;
7179
}
7280
if (model.startsWith('inception/mercury-2')) {
7381
return {
74-
instant: { reasoning: { enabled: false } },
82+
instant: { reasoning: { enabled: false, effort: 'none' } },
7583
low: { reasoning: { enabled: true, effort: 'low' } },
7684
medium: { reasoning: { enabled: true, effort: 'medium' } },
7785
high: { reasoning: { enabled: true, effort: 'high' } },
7886
};
7987
}
8088
if (model.startsWith('x-ai/grok-4')) {
8189
return {
82-
'non-reasoning': { reasoning: { enabled: false } },
83-
reasoning: { reasoning: { enabled: true } },
90+
'non-reasoning': { reasoning: { enabled: false, effort: 'none' } },
91+
reasoning: { reasoning: { enabled: true, effort: 'medium' } },
8492
};
8593
}
8694
return undefined;

src/lib/providers/provider-definitions.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,14 @@ export default {
1212
id: 'openrouter',
1313
apiUrl: 'https://openrouter.ai/api/v1',
1414
apiKey: getEnvVariable('OPENROUTER_API_KEY'),
15+
supportedChatApis: ['chat_completions', 'messages', 'responses'],
1516
transformRequest() {},
1617
},
1718
ALIBABA: {
1819
id: 'alibaba',
1920
apiUrl: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1',
2021
apiKey: getEnvVariable('ALIBABA_API_KEY'),
22+
supportedChatApis: ['chat_completions'],
2123
transformRequest(context) {
2224
if (context.request.kind === 'chat_completions' || context.request.kind === 'responses') {
2325
context.request.body.enable_thinking = true;
@@ -29,6 +31,7 @@ export default {
2931
id: 'bytedance',
3032
apiUrl: 'https://ark.ap-southeast.bytepluses.com/api/v3',
3133
apiKey: getEnvVariable('BYTEDANCE_API_KEY'),
34+
supportedChatApis: ['chat_completions'],
3235
transformRequest(context) {
3336
if (context.request.kind === 'chat_completions' || context.request.kind === 'responses') {
3437
context.request.body.thinking = { type: 'enabled' };
@@ -45,6 +48,7 @@ export default {
4548
id: 'corethink',
4649
apiUrl: 'https://api.corethink.ai/v1/code',
4750
apiKey: getEnvVariable('CORETHINK_API_KEY'),
51+
supportedChatApis: ['chat_completions'],
4852
transformRequest(context) {
4953
if (context.request.kind !== 'chat_completions') {
5054
return;
@@ -60,6 +64,10 @@ export default {
6064
id: 'martian',
6165
apiUrl: 'https://api.withmartian.com/v1',
6266
apiKey: getEnvVariable('MARTIAN_API_KEY'),
67+
supportedChatApis: [
68+
'chat_completions', // through our custom wrapper
69+
'responses',
70+
],
6371
transformRequest(context) {
6472
if (context.request.kind === 'chat_completions') {
6573
delete context.request.body.reasoning;
@@ -70,18 +78,21 @@ export default {
7078
id: 'mistral',
7179
apiUrl: 'https://api.mistral.ai/v1',
7280
apiKey: getEnvVariable('MISTRAL_API_KEY'),
81+
supportedChatApis: [],
7382
transformRequest() {},
7483
},
7584
MORPH: {
7685
id: 'morph',
7786
apiUrl: 'https://api.morphllm.com/v1',
7887
apiKey: getEnvVariable('MORPH_API_KEY'),
88+
supportedChatApis: ['chat_completions'],
7989
transformRequest() {},
8090
},
8191
VERCEL_AI_GATEWAY: {
8292
id: 'vercel',
8393
apiUrl: 'https://ai-gateway.vercel.sh/v1',
8494
apiKey: getEnvVariable('VERCEL_AI_GATEWAY_API_KEY'),
95+
supportedChatApis: ['chat_completions', 'messages', 'responses'],
8596
transformRequest(context) {
8697
applyVercelSettings(context.model, context.request, context.userByok);
8798
},

src/lib/providers/types.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,12 @@ export type TransformRequestContext = {
2626
userByok: BYOKResult[] | null;
2727
};
2828

29+
export type GatewayChatApiKind = GatewayRequest['kind'];
30+
2931
export type Provider = {
3032
id: ProviderId;
3133
apiUrl: string;
3234
apiKey: string;
35+
supportedChatApis: ReadonlyArray<GatewayChatApiKind>;
3336
transformRequest(context: TransformRequestContext): void;
3437
};

src/tests/openrouter-models-sorting.approved.json

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -193,12 +193,14 @@
193193
"variants": {
194194
"instant": {
195195
"reasoning": {
196-
"enabled": false
196+
"enabled": false,
197+
"effort": "none"
197198
}
198199
},
199200
"thinking": {
200201
"reasoning": {
201-
"enabled": true
202+
"enabled": true,
203+
"effort": "medium"
202204
}
203205
}
204206
}
@@ -237,7 +239,8 @@
237239
"variants": {
238240
"none": {
239241
"reasoning": {
240-
"enabled": false
242+
"enabled": false,
243+
"effort": "none"
241244
}
242245
},
243246
"low": {
@@ -304,7 +307,8 @@
304307
"variants": {
305308
"none": {
306309
"reasoning": {
307-
"enabled": false
310+
"enabled": false,
311+
"effort": "none"
308312
}
309313
},
310314
"low": {
@@ -661,12 +665,14 @@
661665
"variants": {
662666
"instant": {
663667
"reasoning": {
664-
"enabled": false
668+
"enabled": false,
669+
"effort": "none"
665670
}
666671
},
667672
"thinking": {
668673
"reasoning": {
669-
"enabled": true
674+
"enabled": true,
675+
"effort": "medium"
670676
}
671677
}
672678
}

0 commit comments

Comments
 (0)