Gateway Coding Plan adjacent improvements (#1648)

chrarnoldus · web-flow · commit 7ec2c5f453b0 · 2026-03-27T16:08:27.000+01:00
commits can be reviewed separately
diff --git a/src/app/api/openrouter/[...path]/route.ts b/src/app/api/openrouter/[...path]/route.ts
@@ -39,6 +39,7 @@ import {
   wrapInSafeNextResponse,
   forbiddenFreeModelResponse,
   storeAndPreviousResponseIdIsNotSupported,
+  apiKindNotSupportedResponse,
 } from '@/lib/llm-proxy-helpers';
 import { getBalanceAndOrgSettings } from '@/lib/organizations/organization-usage';
 import { ENABLE_TOOL_REPAIR, repairTools } from '@/lib/tool-calling';
@@ -302,13 +303,16 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
   // Use new shared helper for fraud & project headers
   const { fraudHeaders, projectId } = extractFraudAndProjectHeaders(request);
   const taskId = extractHeaderAndLimitLength(request, 'x-kilocode-taskid') ?? undefined;
-  const { provider, userByok, customLlm } = await getProvider(
+  const { provider, userByok, bypassAccessCheck } = await getProvider(
     originalModelIdLowerCased,
     requestBodyParsed,
     user,
     organizationId,
     taskId
   );
+  if (!provider.supportedChatApis.includes(requestBodyParsed.kind)) {
+    return apiKindNotSupportedResponse(requestBodyParsed.kind, provider.supportedChatApis);
+  }
 
   console.debug(`Routing request to ${provider.id}`);
 
@@ -377,9 +381,7 @@ export async function POST(request: NextRequest): Promise<NextResponseType<unkno
   setTag('ui.ai_model', requestBodyParsed.body.model);
 
   // Skip balance/org checks for anonymous users - they can only use free models
-  const bypassAccessCheckForCustomLlm =
-    !!customLlm && !!organizationId && customLlm.organization_ids.includes(organizationId);
-  if (!isAnonymousContext(user) && !bypassAccessCheckForCustomLlm) {
+  if (!isAnonymousContext(user) && !bypassAccessCheck) {
     const { balance, settings, plan } = await balanceAndSettingsPromise;
 
     if (
diff --git a/src/lib/llm-proxy-helpers.ts b/src/lib/llm-proxy-helpers.ts
@@ -32,6 +32,7 @@ import type {
 } from '@/lib/processUsage.types';
 import { getMaxTokens } from '@/lib/providers/openrouter/request-helpers';
 import { KILO_AUTO_BALANCED_MODEL, KILO_AUTO_FREE_MODEL } from '@/lib/kilo-auto-model';
+import type { GatewayChatApiKind } from '@/lib/providers/types';
 
 // FIM suffix markers for tracking purposes - used to wrap suffix in a fake system prompt format
 // This allows FIM requests to be tracked consistently with chat requests
@@ -102,6 +103,14 @@ export function dataCollectionRequiredResponse() {
   );
 }
 
+export function apiKindNotSupportedResponse(
+  apiKind: GatewayChatApiKind,
+  supportedApiKinds: ReadonlyArray<GatewayChatApiKind>
+) {
+  const error = `This model does not support the ${apiKind} API, please use any of: ${supportedApiKinds.join()}`;
+  return NextResponse.json({ error, message: error }, { status: 400 });
+}
+
 export function alphaPeriodEndedResponse() {
   // https://github.com/Kilo-Org/kilocode/blob/50d6bd482bec6fae7d1c80b14ffb064de3761507/src/shared/kilocode/errorUtils.ts#L13
   const error = `The alpha period for this model has ended.`;
diff --git a/src/lib/providers/coding-plans/index.ts b/src/lib/providers/coding-plans/index.ts
@@ -5,6 +5,8 @@ import { getBYOKforOrganization, getBYOKforUser } from '@/lib/byok';
 import { readDb } from '@/lib/drizzle';
 import { preferredModels } from '@/lib/models';
 import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
+import type { OpenCodeSettings } from '@kilocode/db';
+import { BINARY_THINKING_VARIANTS } from '@/lib/providers/model-settings';
 
 export function formatCodingPlanModelId(provider: CodingPlanProvider, model: CodingPlanModel) {
   return provider.id + '/' + model.id;
@@ -50,6 +52,10 @@ function convertModel(
     supported_parameters: ['max_tokens', 'temperature', 'tools', 'reasoning', 'include_reasoning'],
     default_parameters: {},
     preferredIndex: model.flags.includes('recommended') ? preferredIndex : undefined,
+    opencode: {
+      ai_sdk_provider: provider.ai_sdk_provider,
+      variants: BINARY_THINKING_VARIANTS,
+    } satisfies OpenCodeSettings,
   };
 }
 
diff --git a/src/lib/providers/index.ts b/src/lib/providers/index.ts
@@ -18,7 +18,6 @@ import {
   isHaikuModel,
 } from '@/lib/providers/anthropic';
 import { getBYOKforOrganization, getBYOKforUser, getModelUserByokProviders } from '@/lib/byok';
-import type { CustomLlm } from '@kilocode/db/schema';
 import { custom_llm, type User } from '@kilocode/db/schema';
 import { OpenRouterInferenceProviderIdSchema } from '@/lib/providers/openrouter/inference-provider-id';
 import { hasAttemptCompletionTool } from '@/lib/tool-calling';
@@ -35,6 +34,15 @@ import { isXiaomiModel } from '@/lib/providers/xiaomi';
 import type { BYOKResult, Provider } from '@/lib/providers/types';
 import PROVIDERS from '@/lib/providers/provider-definitions';
 import { getCodingPlanModel } from '@/lib/providers/coding-plans';
+import type { CustomLlmProvider } from '@kilocode/db';
+
+function inferSupportedChatApis(aiSdkProvider: CustomLlmProvider) {
+  return aiSdkProvider === 'anthropic'
+    ? (['messages'] as const)
+    : aiSdkProvider === 'openai'
+      ? (['responses'] as const)
+      : (['chat_completions'] as const);
+}
 
 async function checkCodingPlanBYOK(
   user: User | AnonymousUserContext,
@@ -56,13 +64,14 @@ async function checkCodingPlanBYOK(
       id: 'coding-plan',
       apiUrl: codingPlan.base_url,
       apiKey: userByok[0].decryptedAPIKey,
+      supportedChatApis: inferSupportedChatApis(codingPlan.ai_sdk_provider),
       transformRequest(context) {
         context.request.body.model = codingPlanModel.id;
         codingPlan.transformRequest(context);
       },
     } satisfies Provider,
     userByok,
-    customLlm: null,
+    bypassAccessCheck: false,
   };
 }
 
@@ -85,7 +94,7 @@ export async function getProvider(
   user: User | AnonymousUserContext,
   organizationId: string | undefined,
   taskId: string | undefined
-): Promise<{ provider: Provider; userByok: BYOKResult[] | null; customLlm: CustomLlm | null }> {
+): Promise<{ provider: Provider; userByok: BYOKResult[] | null; bypassAccessCheck: boolean }> {
   const codingPlanByok = await checkCodingPlanBYOK(user, requestedModel, organizationId);
   if (codingPlanByok) {
     return codingPlanByok;
@@ -96,7 +105,7 @@ export async function getProvider(
     return {
       provider: PROVIDERS.VERCEL_AI_GATEWAY,
       userByok: vercelByok,
-      customLlm: null,
+      bypassAccessCheck: false,
     };
   }
 
@@ -111,6 +120,7 @@ export async function getProvider(
           id: 'custom',
           apiUrl: customLlm.base_url,
           apiKey: customLlm.api_key,
+          supportedChatApis: inferSupportedChatApis(customLlm.provider),
           transformRequest(context) {
             Object.assign(context.request.body, customLlm?.extra_body ?? {});
             for (const [key, value] of Object.entries(customLlm.extra_headers ?? {})) {
@@ -120,13 +130,13 @@ export async function getProvider(
           },
         },
         userByok: null,
-        customLlm,
+        bypassAccessCheck: true,
       };
     }
   }
 
   if (await shouldRouteToVercel(requestedModel, request, taskId || user.id)) {
-    return { provider: PROVIDERS.VERCEL_AI_GATEWAY, userByok: null, customLlm: null };
+    return { provider: PROVIDERS.VERCEL_AI_GATEWAY, userByok: null, bypassAccessCheck: false };
   }
 
   const kiloFreeModel = kiloFreeModels.find(m => m.public_id === requestedModel);
@@ -135,7 +145,7 @@ export async function getProvider(
   return {
     provider: freeModelProvider ?? PROVIDERS.OPENROUTER,
     userByok: null,
-    customLlm: null,
+    bypassAccessCheck: false,
   };
 }
 
diff --git a/src/lib/providers/model-settings.ts b/src/lib/providers/model-settings.ts
@@ -43,44 +43,52 @@ export function getVersionedModelSettings(model: string): VersionedSettings | un
   return undefined;
 }
 
+export const BINARY_THINKING_VARIANTS = {
+  instant: { reasoning: { enabled: false, effort: 'none' } },
+  thinking: { reasoning: { enabled: true, effort: 'medium' } },
+} as const;
+
 export function getModelVariants(model: string): OpenCodeSettings['variants'] {
   // Inlined to avoid importing anthropic.ts (which transitively pulls in Node.js crypto)
   if (model.startsWith('anthropic/')) {
     return {
-      none: { reasoning: { enabled: false } },
+      none: { reasoning: { enabled: false, effort: 'none' } },
       low: { reasoning: { enabled: true, effort: 'low' }, verbosity: 'low' },
       medium: { reasoning: { enabled: true, effort: 'medium' }, verbosity: 'medium' },
       high: { reasoning: { enabled: true, effort: 'high' }, verbosity: 'high' },
       max: { reasoning: { enabled: true, effort: 'xhigh' }, verbosity: 'max' },
     };
   }
-  if (isOpenAiModel(model) || isGemini3Model(model)) {
-    const filterNone = model.includes('codex') || isGemini3Model(model);
-    const efforts = filterNone
-      ? ReasoningEffortSchema.options.filter(e => e !== 'none')
-      : ReasoningEffortSchema.options;
+  if (model.includes('codex') || isGemini3Model(model)) {
     return Object.fromEntries(
-      efforts.map(effort => [effort, { reasoning: { enabled: effort !== 'none', effort } }])
+      ReasoningEffortSchema.options
+        .filter(e => e !== 'none')
+        .map(effort => [effort, { reasoning: { enabled: true, effort } }])
+    );
+  }
+  if (isOpenAiModel(model)) {
+    return Object.fromEntries(
+      ReasoningEffortSchema.options.map(effort => [
+        effort,
+        { reasoning: { enabled: effort !== 'none', effort } },
+      ])
     );
   }
   if (isMoonshotModel(model) || isZaiModel(model) || isXiaomiModel(model)) {
-    return {
-      instant: { reasoning: { enabled: false } },
-      thinking: { reasoning: { enabled: true } },
-    };
+    return BINARY_THINKING_VARIANTS;
   }
   if (model.startsWith('inception/mercury-2')) {
     return {
-      instant: { reasoning: { enabled: false } },
+      instant: { reasoning: { enabled: false, effort: 'none' } },
       low: { reasoning: { enabled: true, effort: 'low' } },
       medium: { reasoning: { enabled: true, effort: 'medium' } },
       high: { reasoning: { enabled: true, effort: 'high' } },
     };
   }
   if (model.startsWith('x-ai/grok-4')) {
     return {
-      'non-reasoning': { reasoning: { enabled: false } },
-      reasoning: { reasoning: { enabled: true } },
+      'non-reasoning': { reasoning: { enabled: false, effort: 'none' } },
+      reasoning: { reasoning: { enabled: true, effort: 'medium' } },
     };
   }
   return undefined;
diff --git a/src/lib/providers/provider-definitions.ts b/src/lib/providers/provider-definitions.ts
@@ -12,12 +12,14 @@ export default {
     id: 'openrouter',
     apiUrl: 'https://openrouter.ai/api/v1',
     apiKey: getEnvVariable('OPENROUTER_API_KEY'),
+    supportedChatApis: ['chat_completions', 'messages', 'responses'],
     transformRequest() {},
   },
   ALIBABA: {
     id: 'alibaba',
     apiUrl: 'https://dashscope-intl.aliyuncs.com/compatible-mode/v1',
     apiKey: getEnvVariable('ALIBABA_API_KEY'),
+    supportedChatApis: ['chat_completions'],
     transformRequest(context) {
       if (context.request.kind === 'chat_completions' || context.request.kind === 'responses') {
         context.request.body.enable_thinking = true;
@@ -29,6 +31,7 @@ export default {
     id: 'bytedance',
     apiUrl: 'https://ark.ap-southeast.bytepluses.com/api/v3',
     apiKey: getEnvVariable('BYTEDANCE_API_KEY'),
+    supportedChatApis: ['chat_completions'],
     transformRequest(context) {
       if (context.request.kind === 'chat_completions' || context.request.kind === 'responses') {
         context.request.body.thinking = { type: 'enabled' };
@@ -45,6 +48,7 @@ export default {
     id: 'corethink',
     apiUrl: 'https://api.corethink.ai/v1/code',
     apiKey: getEnvVariable('CORETHINK_API_KEY'),
+    supportedChatApis: ['chat_completions'],
     transformRequest(context) {
       if (context.request.kind !== 'chat_completions') {
         return;
@@ -60,6 +64,10 @@ export default {
     id: 'martian',
     apiUrl: 'https://api.withmartian.com/v1',
     apiKey: getEnvVariable('MARTIAN_API_KEY'),
+    supportedChatApis: [
+      'chat_completions', // through our custom wrapper
+      'responses',
+    ],
     transformRequest(context) {
       if (context.request.kind === 'chat_completions') {
         delete context.request.body.reasoning;
@@ -70,18 +78,21 @@ export default {
     id: 'mistral',
     apiUrl: 'https://api.mistral.ai/v1',
     apiKey: getEnvVariable('MISTRAL_API_KEY'),
+    supportedChatApis: [],
     transformRequest() {},
   },
   MORPH: {
     id: 'morph',
     apiUrl: 'https://api.morphllm.com/v1',
     apiKey: getEnvVariable('MORPH_API_KEY'),
+    supportedChatApis: ['chat_completions'],
     transformRequest() {},
   },
   VERCEL_AI_GATEWAY: {
     id: 'vercel',
     apiUrl: 'https://ai-gateway.vercel.sh/v1',
     apiKey: getEnvVariable('VERCEL_AI_GATEWAY_API_KEY'),
+    supportedChatApis: ['chat_completions', 'messages', 'responses'],
     transformRequest(context) {
       applyVercelSettings(context.model, context.request, context.userByok);
     },
diff --git a/src/lib/providers/types.ts b/src/lib/providers/types.ts
@@ -26,9 +26,12 @@ export type TransformRequestContext = {
   userByok: BYOKResult[] | null;
 };
 
+export type GatewayChatApiKind = GatewayRequest['kind'];
+
 export type Provider = {
   id: ProviderId;
   apiUrl: string;
   apiKey: string;
+  supportedChatApis: ReadonlyArray<GatewayChatApiKind>;
   transformRequest(context: TransformRequestContext): void;
 };
diff --git a/src/tests/openrouter-models-sorting.approved.json b/src/tests/openrouter-models-sorting.approved.json
@@ -193,12 +193,14 @@
         "variants": {
           "instant": {
             "reasoning": {
-              "enabled": false
+              "enabled": false,
+              "effort": "none"
             }
           },
           "thinking": {
             "reasoning": {
-              "enabled": true
+              "enabled": true,
+              "effort": "medium"
             }
           }
         }
@@ -237,7 +239,8 @@
         "variants": {
           "none": {
             "reasoning": {
-              "enabled": false
+              "enabled": false,
+              "effort": "none"
             }
           },
           "low": {
@@ -304,7 +307,8 @@
         "variants": {
           "none": {
             "reasoning": {
-              "enabled": false
+              "enabled": false,
+              "effort": "none"
             }
           },
           "low": {
@@ -661,12 +665,14 @@
         "variants": {
           "instant": {
             "reasoning": {
-              "enabled": false
+              "enabled": false,
+              "effort": "none"
             }
           },
           "thinking": {
             "reasoning": {
-              "enabled": true
+              "enabled": true,
+              "effort": "medium"
             }
           }
         }

Original file line number	Diff line number	Diff line change
`@@ -193,12 +193,14 @@`
`193`	`193`	`"variants": {`
`194`	`194`	`"instant": {`
`195`	`195`	`"reasoning": {`
`196`		`- "enabled": false`
	`196`	`+ "enabled": false,`
	`197`	`+ "effort": "none"`
`197`	`198`	`}`
`198`	`199`	`},`
`199`	`200`	`"thinking": {`
`200`	`201`	`"reasoning": {`
`201`		`- "enabled": true`
	`202`	`+ "enabled": true,`
	`203`	`+ "effort": "medium"`
`202`	`204`	`}`
`203`	`205`	`}`
`204`	`206`	`}`
`@@ -237,7 +239,8 @@`
`237`	`239`	`"variants": {`
`238`	`240`	`"none": {`
`239`	`241`	`"reasoning": {`
`240`		`- "enabled": false`
	`242`	`+ "enabled": false,`
	`243`	`+ "effort": "none"`
`241`	`244`	`}`
`242`	`245`	`},`
`243`	`246`	`"low": {`
`@@ -304,7 +307,8 @@`
`304`	`307`	`"variants": {`
`305`	`308`	`"none": {`
`306`	`309`	`"reasoning": {`
`307`		`- "enabled": false`
	`310`	`+ "enabled": false,`
	`311`	`+ "effort": "none"`
`308`	`312`	`}`
`309`	`313`	`},`
`310`	`314`	`"low": {`
`@@ -661,12 +665,14 @@`
`661`	`665`	`"variants": {`
`662`	`666`	`"instant": {`
`663`	`667`	`"reasoning": {`
`664`		`- "enabled": false`
	`668`	`+ "enabled": false,`
	`669`	`+ "effort": "none"`
`665`	`670`	`}`
`666`	`671`	`},`
`667`	`672`	`"thinking": {`
`668`	`673`	`"reasoning": {`
`669`		`- "enabled": true`
	`674`	`+ "enabled": true,`
	`675`	`+ "effort": "medium"`
`670`	`676`	`}`
`671`	`677`	`}`
`672`	`678`	`}`