Skip to content

Commit ce90707

Browse files
feat(fim): add Mercury Edit FIM support with provider-based routing (#1693)
* feat(fim): add Mercury Edit FIM support with provider-based routing * style: format files with oxfmt * fix(types): add inception to ProviderId union type * Remove fake type assertion * Use startsWith --------- Co-authored-by: kiloconnect[bot] <240665456+kiloconnect[bot]@users.noreply.github.com> Co-authored-by: Christiaan Arnoldus <christiaan@kilocode.ai> Co-authored-by: Christiaan Arnoldus <christiaan.arnoldus@outlook.com>
1 parent 9d33aa3 commit ce90707

File tree

4 files changed

+112
-62
lines changed

4 files changed

+112
-62
lines changed

src/app/api/fim/completions/route.ts

Lines changed: 75 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { MISTRAL_API_KEY } from '@/lib/config.server';
1+
import { MISTRAL_API_KEY, INCEPTION_API_KEY } from '@/lib/config.server';
22
import type { NextRequest } from 'next/server';
33
import { NextResponse } from 'next/server';
44
import z from 'zod';
@@ -25,12 +25,45 @@ import { debugSaveProxyRequest } from '@/lib/debugUtils';
2525
import { sentryLogger } from '@/lib/utils.server';
2626
import { getBYOKforOrganization, getBYOKforUser } from '@/lib/byok';
2727

28-
const MISTRAL_URL = 'https://api.mistral.ai/v1/fim/completions';
28+
const MISTRAL_FIM_URL = 'https://api.mistral.ai/v1/fim/completions';
29+
const INCEPTION_FIM_URL = 'https://api.inceptionlabs.ai/v1/fim/completions';
2930
const FIM_MAX_TOKENS_LIMIT = 1000;
3031

32+
type FimProvider = 'mistral' | 'inception';
33+
34+
function resolveFimProvider(model: string): {
35+
provider: FimProvider;
36+
upstreamModel: string;
37+
upstreamUrl: string;
38+
} | null {
39+
if (model.startsWith('mistralai/')) {
40+
return {
41+
provider: 'mistral',
42+
upstreamModel: model.slice('mistralai/'.length),
43+
upstreamUrl: MISTRAL_FIM_URL,
44+
};
45+
}
46+
if (model.startsWith('inception/')) {
47+
return {
48+
provider: 'inception',
49+
upstreamModel: model.slice('inception/'.length),
50+
upstreamUrl: INCEPTION_FIM_URL,
51+
};
52+
}
53+
return null;
54+
}
55+
56+
function getSystemApiKey(provider: FimProvider): string | null {
57+
switch (provider) {
58+
case 'mistral':
59+
return MISTRAL_API_KEY || null;
60+
case 'inception':
61+
return INCEPTION_API_KEY || null;
62+
}
63+
}
64+
3165
const FIMRequestBody = z.object({
3266
//ref: https://docs.mistral.ai/api/endpoint/fim#operation-fim_completion_v1_fim_completions_post
33-
provider: z.enum(['mistral', 'inceptionlabs']).optional(),
3467
model: z.string(),
3568
prompt: z.string(),
3669
suffix: z.string().optional(),
@@ -82,17 +115,15 @@ export async function POST(request: NextRequest) {
82115
return invalidRequestResponse();
83116
}
84117

85-
if ((requestBody.provider ?? 'mistral') !== 'mistral') {
118+
// Resolve provider from model name
119+
const resolved = resolveFimProvider(requestBody.model);
120+
if (!resolved) {
86121
return NextResponse.json(
87-
{ error: requestBody.provider + ' provider not yet supported' },
122+
{ error: requestBody.model + ' is not a supported FIM model' },
88123
{ status: 400 }
89124
);
90-
//NOTE: mistral does not do data collection on paid org accounts like ours.
91-
//If we ever support OTHER providers, we need to either ensure they don't
92-
//either, or at least enforce the rules the org settings configure
93-
//see getBalanceAndOrgSettings below and its usage in the openrouter proxy.
94-
//ref: https://help.mistral.ai/en/articles/347617-do-you-use-my-user-data-to-train-your-artificial-intelligence-models
95125
}
126+
const { provider: fimProvider, upstreamModel, upstreamUrl } = resolved;
96127

97128
// Validate max_tokens
98129
if (!requestBody.max_tokens || requestBody.max_tokens > FIM_MAX_TOKENS_LIMIT) {
@@ -102,35 +133,24 @@ export async function POST(request: NextRequest) {
102133
return temporarilyUnavailableResponse();
103134
}
104135

105-
// Map FIM model to OpenRouter format for org settings compatibility
106-
const fimModel_withOpenRouterStyleProviderPrefix = requestBody.model;
107-
108-
const requiredModelPrefix = 'mistralai/';
109-
if (!fimModel_withOpenRouterStyleProviderPrefix.startsWith(requiredModelPrefix)) {
110-
return NextResponse.json(
111-
{ error: fimModel_withOpenRouterStyleProviderPrefix + ' is not a mistralai model' },
112-
{ status: 400 }
113-
);
114-
}
115-
116-
const mistralModel = fimModel_withOpenRouterStyleProviderPrefix.slice(requiredModelPrefix.length);
117-
118136
// Use new shared helper for fraud & project headers
119137
const { fraudHeaders, projectId } = extractFraudAndProjectHeaders(request);
120138
const taskId = extractHeaderAndLimitLength(request, 'x-kilocode-taskid') ?? undefined;
121139

122140
// Extract properties for usage context
123141
const promptInfo = extractFimPromptInfo(requestBody);
124142

143+
const byokProviderKey = fimProvider === 'mistral' ? 'codestral' : 'inception';
144+
125145
const userByok = organizationId
126-
? await getBYOKforOrganization(readDb, organizationId, ['codestral'])
127-
: await getBYOKforUser(readDb, user.id, ['codestral']);
146+
? await getBYOKforOrganization(readDb, organizationId, [byokProviderKey])
147+
: await getBYOKforUser(readDb, user.id, [byokProviderKey]);
128148

129149
const usageContext: MicrodollarUsageContext = {
130150
api_kind: 'fim_completions',
131151
kiloUserId: user.id,
132-
provider: 'mistral',
133-
requested_model: fimModel_withOpenRouterStyleProviderPrefix,
152+
provider: fimProvider,
153+
requested_model: requestBody.model,
134154
promptInfo,
135155
max_tokens: requestBody.max_tokens ?? null,
136156
has_middle_out_transform: null, // N/A for FIM
@@ -151,44 +171,57 @@ export async function POST(request: NextRequest) {
151171
auto_model: null,
152172
};
153173

154-
setTag('ui.ai_model', fimModel_withOpenRouterStyleProviderPrefix);
174+
setTag('ui.ai_model', requestBody.model);
155175
// Use read replica for balance check - this is a read-only operation that can tolerate
156176
// slight replication lag, and provides lower latency for US users
157177
const { balance, settings, plan } = await getBalanceAndOrgSettings(organizationId, user, readDb);
158178

159-
if (balance <= 0 && !isFreeModel(fimModel_withOpenRouterStyleProviderPrefix) && !userByok) {
179+
if (balance <= 0 && !isFreeModel(requestBody.model) && !userByok) {
160180
return NextResponse.json({ error: { message: 'Insufficient credits' } }, { status: 402 });
161181
}
162182

163183
// Use shared helper for organization model restrictions
164184
// Model allow list only applies to Enterprise plans
165185
// Provider allow list applies to Enterprise plans; data collection applies to all plans (but FIM doesn't use provider config)
166186
const { error: modelRestrictionError } = checkOrganizationModelRestrictions({
167-
modelId: fimModel_withOpenRouterStyleProviderPrefix,
187+
modelId: requestBody.model,
168188
settings,
169189
organizationPlan: plan,
170190
});
171191
if (modelRestrictionError) return modelRestrictionError;
172192

193+
const systemKey = getSystemApiKey(fimProvider);
194+
const apiKey = userByok?.at(0)?.decryptedAPIKey ?? systemKey;
195+
196+
if (!apiKey) {
197+
return NextResponse.json(
198+
{
199+
error: 'This model requires a BYOK API key. Please configure your API key in settings.',
200+
},
201+
{ status: 400 }
202+
);
203+
}
204+
173205
sentryRootSpan()?.setAttribute(
174-
'mistral-fim.time_to_request_start_ms',
206+
'fim.time_to_request_start_ms',
175207
performance.now() - requestStartedAt
176208
);
177209

178-
const mistralRequestSpan = startInactiveSpan({
179-
name: 'mistral-fim-request-start',
210+
const fimRequestSpan = startInactiveSpan({
211+
name: 'fim-request-start',
180212
op: 'http.client',
181213
});
182214

183-
const bodyWithCorrectedModel = { ...requestBody, model: mistralModel };
184-
// Make upstream request to Mistral
185-
const proxyRes = await fetch(MISTRAL_URL, {
215+
const bodyForUpstream = { ...requestBody, model: upstreamModel };
216+
217+
// Make upstream request to the resolved provider
218+
const proxyRes = await fetch(upstreamUrl, {
186219
method: 'POST',
187220
headers: {
188221
'Content-Type': 'application/json',
189-
Authorization: `Bearer ${userByok?.at(0)?.decryptedAPIKey ?? MISTRAL_API_KEY}`,
222+
Authorization: `Bearer ${apiKey}`,
190223
},
191-
body: JSON.stringify(bodyWithCorrectedModel),
224+
body: JSON.stringify(bodyForUpstream),
192225
});
193226
usageContext.status_code = proxyRes.status;
194227

@@ -200,19 +233,19 @@ export async function POST(request: NextRequest) {
200233
if (proxyRes.status >= 400) {
201234
await captureProxyError({
202235
user,
203-
request: bodyWithCorrectedModel,
236+
request: bodyForUpstream,
204237
response: proxyRes,
205238
organizationId,
206-
model: fimModel_withOpenRouterStyleProviderPrefix,
207-
errorMessage: `Mistral FIM returned error ${proxyRes.status}`,
239+
model: requestBody.model,
240+
errorMessage: `FIM provider returned error ${proxyRes.status}`,
208241
trackInSentry: proxyRes.status >= 500,
209242
});
210243
}
211244

212245
const clonedResponse = proxyRes.clone(); // reading from body is side-effectful
213246

214247
// Account for usage using FIM-specific parser
215-
countAndStoreFimUsage(clonedResponse, usageContext, mistralRequestSpan);
248+
countAndStoreFimUsage(clonedResponse, usageContext, fimRequestSpan);
216249

217250
return wrapInSafeNextResponse(proxyRes);
218251
}

src/lib/config.server.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ export const NEXTAUTH_SECRET = getEnvVariable('NEXTAUTH_SECRET');
2828
export const OPENROUTER_API_KEY = getEnvVariable('OPENROUTER_API_KEY');
2929
export const MISTRAL_API_KEY = getEnvVariable('MISTRAL_API_KEY');
3030
export const OPENAI_API_KEY = getEnvVariable('OPENAI_API_KEY');
31+
export const INCEPTION_API_KEY = getEnvVariable('INCEPTION_API_KEY');
3132
export const INTERNAL_API_SECRET = getEnvVariable('INTERNAL_API_SECRET');
3233
export const CODE_REVIEW_WORKER_AUTH_TOKEN = getEnvVariable('CODE_REVIEW_WORKER_AUTH_TOKEN');
3334

src/lib/llm-proxy-helpers.ts

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import type {
3737
} from '@/lib/processUsage.types';
3838
import { getMaxTokens } from '@/lib/providers/openrouter/request-helpers';
3939
import { KILO_AUTO_BALANCED_MODEL, KILO_AUTO_FREE_MODEL } from '@/lib/kilo-auto-model';
40-
import type { GatewayChatApiKind } from '@/lib/providers/types';
40+
import type { GatewayChatApiKind, ProviderId } from '@/lib/providers/types';
4141

4242
// FIM suffix markers for tracking purposes - used to wrap suffix in a fake system prompt format
4343
// This allows FIM requests to be tracked consistently with chat requests
@@ -389,7 +389,7 @@ export function extractFimPromptInfo(body: { prompt: string; suffix?: string | n
389389
// FIM-Specific Code
390390
// ============================================================================
391391

392-
export type MistralFimUsage = {
392+
export type FimUsage = {
393393
prompt_tokens: number;
394394
completion_tokens: number;
395395
total_tokens: number;
@@ -399,7 +399,7 @@ export type MistralFimCompletion = {
399399
id: string;
400400
object: 'fim.completion';
401401
model: string;
402-
usage: MistralFimUsage;
402+
usage: FimUsage;
403403
created: number;
404404
choices: Array<{
405405
index: number;
@@ -420,23 +420,38 @@ export type MistralFimStreamChunk = {
420420
};
421421
finish_reason: string | null;
422422
}>;
423-
usage?: MistralFimUsage; // Only present in final chunk
423+
usage?: FimUsage; // Only present in final chunk
424424
};
425425

426-
function computeMistralFimMicrodollarCost(usage: MistralFimUsage): number {
427-
return Math.round(usage.prompt_tokens * 0.3 + usage.completion_tokens * 0.9);
426+
function computeInceptionFimMicrodollarCost(usage: FimUsage): number {
427+
return Math.round(usage.prompt_tokens * 0.25 + usage.completion_tokens * 0.75);
428428
}
429429

430-
function parseMistralFimUsageFromString(response: string): MicrodollarUsageStats {
430+
function computeFimMicrodollarCost(usage: FimUsage, provider: ProviderId): number {
431+
switch (provider) {
432+
case 'mistral':
433+
return Math.round(usage.prompt_tokens * 0.3 + usage.completion_tokens * 0.9);
434+
case 'inception':
435+
return computeInceptionFimMicrodollarCost(usage);
436+
default:
437+
console.error('Unknown provider for FIM cost calculation', provider);
438+
return 0;
439+
}
440+
}
441+
442+
function parseMistralFimUsageFromString(
443+
response: string,
444+
provider: ProviderId
445+
): MicrodollarUsageStats {
431446
const json: MistralFimCompletion = JSON.parse(response);
432-
const cost_mUsd = computeMistralFimMicrodollarCost(json.usage);
447+
const cost_mUsd = computeFimMicrodollarCost(json.usage, provider);
433448

434449
return {
435450
messageId: json.id,
436451
model: json.model,
437452
responseContent: json.choices[0]?.text || '',
438453
hasError: !json.model,
439-
inference_provider: 'mistral',
454+
inference_provider: provider,
440455
inputTokens: json.usage.prompt_tokens,
441456
outputTokens: json.usage.completion_tokens,
442457
cacheHitTokens: 0,
@@ -455,11 +470,12 @@ function parseMistralFimUsageFromString(response: string): MicrodollarUsageStats
455470

456471
async function parseMistralFimUsageFromStream(
457472
stream: ReadableStream,
458-
requestSpan: Span | undefined
473+
requestSpan: Span | undefined,
474+
provider: ProviderId
459475
): Promise<MicrodollarUsageStats> {
460476
requestSpan?.end();
461477
const streamProcessingSpan = startInactiveSpan({
462-
name: 'mistral-fim-stream-processing',
478+
name: 'fim-stream-processing',
463479
op: 'performance',
464480
});
465481
const timeToFirstTokenSpan = startInactiveSpan({
@@ -473,18 +489,15 @@ async function parseMistralFimUsageFromStream(
473489
let reportedError = false;
474490
const startedAt = performance.now();
475491
let firstTokenReceived = false;
476-
let usage: MistralFimUsage | undefined;
492+
let usage: FimUsage | undefined;
477493

478494
const reader = stream.getReader();
479495
const decoder = new TextDecoder();
480496

481497
const sseStreamParser = createParser({
482498
onEvent(event: EventSourceMessage) {
483499
if (!firstTokenReceived) {
484-
sentryRootSpan()?.setAttribute(
485-
'mistral.time_to_first_token_ms',
486-
performance.now() - startedAt
487-
);
500+
sentryRootSpan()?.setAttribute('fim.time_to_first_token_ms', performance.now() - startedAt);
488501
firstTokenReceived = true;
489502
timeToFirstTokenSpan.end();
490503
}
@@ -536,12 +549,12 @@ async function parseMistralFimUsageFromStream(
536549
model,
537550
responseContent,
538551
hasError: reportedError,
539-
inference_provider: 'mistral',
552+
inference_provider: provider,
540553
inputTokens: usage?.prompt_tokens ?? 0,
541554
outputTokens: usage?.completion_tokens ?? 0,
542555
cacheHitTokens: 0,
543556
cacheWriteTokens: 0,
544-
cost_mUsd: usage ? computeMistralFimMicrodollarCost(usage) : 0,
557+
cost_mUsd: usage ? computeFimMicrodollarCost(usage, provider) : 0,
545558
is_byok: null,
546559
upstream_id: null,
547560
finish_reason: null,
@@ -564,8 +577,10 @@ export function countAndStoreFimUsage(
564577
const usageStatsPromise = !clonedResponse.body
565578
? Promise.resolve(null)
566579
: usageContext.isStreaming
567-
? parseMistralFimUsageFromStream(clonedResponse.body, requestSpan)
568-
: clonedResponse.text().then(content => parseMistralFimUsageFromString(content));
580+
? parseMistralFimUsageFromStream(clonedResponse.body, requestSpan, usageContext.provider)
581+
: clonedResponse
582+
.text()
583+
.then(content => parseMistralFimUsageFromString(content, usageContext.provider));
569584

570585
after(
571586
usageStatsPromise.then(usageStats => {

src/lib/providers/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ export type ProviderId =
77
| 'bytedance'
88
| 'coding-plan'
99
| 'corethink'
10+
| 'inception'
1011
| 'martian'
1112
| 'mistral'
1213
| 'morph'

0 commit comments

Comments
 (0)