Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 75 additions & 42 deletions src/app/api/fim/completions/route.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { MISTRAL_API_KEY } from '@/lib/config.server';
import { MISTRAL_API_KEY, INCEPTION_API_KEY } from '@/lib/config.server';
import type { NextRequest } from 'next/server';
import { NextResponse } from 'next/server';
import z from 'zod';
Expand All @@ -25,12 +25,45 @@ import { debugSaveProxyRequest } from '@/lib/debugUtils';
import { sentryLogger } from '@/lib/utils.server';
import { getBYOKforOrganization, getBYOKforUser } from '@/lib/byok';

const MISTRAL_URL = 'https://api.mistral.ai/v1/fim/completions';
const MISTRAL_FIM_URL = 'https://api.mistral.ai/v1/fim/completions';
const INCEPTION_FIM_URL = 'https://api.inceptionlabs.ai/v1/fim/completions';
const FIM_MAX_TOKENS_LIMIT = 1000;

type FimProvider = 'mistral' | 'inception';

function resolveFimProvider(model: string): {
provider: FimProvider;
upstreamModel: string;
upstreamUrl: string;
} | null {
if (model.startsWith('mistralai/')) {
return {
provider: 'mistral',
upstreamModel: model.slice('mistralai/'.length),
upstreamUrl: MISTRAL_FIM_URL,
};
}
if (model === 'inception/mercury-edit') {
return {
provider: 'inception',
upstreamModel: 'mercury-edit',
upstreamUrl: INCEPTION_FIM_URL,
};
}
return null;
}

function getSystemApiKey(provider: FimProvider): string | null {
switch (provider) {
case 'mistral':
return MISTRAL_API_KEY || null;
case 'inception':
return INCEPTION_API_KEY || null;
}
}

const FIMRequestBody = z.object({
//ref: https://docs.mistral.ai/api/endpoint/fim#operation-fim_completion_v1_fim_completions_post
provider: z.enum(['mistral', 'inceptionlabs']).optional(),
model: z.string(),
prompt: z.string(),
suffix: z.string().optional(),
Expand Down Expand Up @@ -82,17 +115,15 @@ export async function POST(request: NextRequest) {
return invalidRequestResponse();
}

if ((requestBody.provider ?? 'mistral') !== 'mistral') {
// Resolve provider from model name
const resolved = resolveFimProvider(requestBody.model);
if (!resolved) {
return NextResponse.json(
{ error: requestBody.provider + ' provider not yet supported' },
{ error: requestBody.model + ' is not a supported FIM model' },
{ status: 400 }
);
//NOTE: mistral does not do data collection on paid org accounts like ours.
//If we ever support OTHER providers, we need to either ensure they don't
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this note relevant?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we just remove it and collect now?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

inception doesn't do collection either

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there are real a gaps, we don't enforce the provider blocklist for example, but that's not a regression

//either, or at least enforce the rules the org settings configure
//see getBalanceAndOrgSettings below and its usage in the openrouter proxy.
//ref: https://help.mistral.ai/en/articles/347617-do-you-use-my-user-data-to-train-your-artificial-intelligence-models
}
const { provider: fimProvider, upstreamModel, upstreamUrl } = resolved;

// Validate max_tokens
if (!requestBody.max_tokens || requestBody.max_tokens > FIM_MAX_TOKENS_LIMIT) {
Expand All @@ -102,35 +133,24 @@ export async function POST(request: NextRequest) {
return temporarilyUnavailableResponse();
}

// Map FIM model to OpenRouter format for org settings compatibility
const fimModel_withOpenRouterStyleProviderPrefix = requestBody.model;

const requiredModelPrefix = 'mistralai/';
if (!fimModel_withOpenRouterStyleProviderPrefix.startsWith(requiredModelPrefix)) {
return NextResponse.json(
{ error: fimModel_withOpenRouterStyleProviderPrefix + ' is not a mistralai model' },
{ status: 400 }
);
}

const mistralModel = fimModel_withOpenRouterStyleProviderPrefix.slice(requiredModelPrefix.length);

// Use new shared helper for fraud & project headers
const { fraudHeaders, projectId } = extractFraudAndProjectHeaders(request);
const taskId = extractHeaderAndLimitLength(request, 'x-kilocode-taskid') ?? undefined;

// Extract properties for usage context
const promptInfo = extractFimPromptInfo(requestBody);

const byokProviderKey = fimProvider === 'mistral' ? 'codestral' : 'inception';
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not just make the fim provider be codestral in the first place?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's called mistral everywhere, but this hack is needed for the "not all API keys support all models" feature that mistral has.


const userByok = organizationId
? await getBYOKforOrganization(readDb, organizationId, ['codestral'])
: await getBYOKforUser(readDb, user.id, ['codestral']);
? await getBYOKforOrganization(readDb, organizationId, [byokProviderKey])
: await getBYOKforUser(readDb, user.id, [byokProviderKey]);

const usageContext: MicrodollarUsageContext = {
api_kind: 'fim_completions',
kiloUserId: user.id,
provider: 'mistral',
requested_model: fimModel_withOpenRouterStyleProviderPrefix,
provider: fimProvider,
requested_model: requestBody.model,
promptInfo,
max_tokens: requestBody.max_tokens ?? null,
has_middle_out_transform: null, // N/A for FIM
Expand All @@ -151,44 +171,57 @@ export async function POST(request: NextRequest) {
auto_model: null,
};

setTag('ui.ai_model', fimModel_withOpenRouterStyleProviderPrefix);
setTag('ui.ai_model', requestBody.model);
// Use read replica for balance check - this is a read-only operation that can tolerate
// slight replication lag, and provides lower latency for US users
const { balance, settings, plan } = await getBalanceAndOrgSettings(organizationId, user, readDb);

if (balance <= 0 && !isFreeModel(fimModel_withOpenRouterStyleProviderPrefix) && !userByok) {
if (balance <= 0 && !isFreeModel(requestBody.model) && !userByok) {
return NextResponse.json({ error: { message: 'Insufficient credits' } }, { status: 402 });
}

// Use shared helper for organization model restrictions
// Model allow list only applies to Enterprise plans
// Provider allow list applies to Enterprise plans; data collection applies to all plans (but FIM doesn't use provider config)
const { error: modelRestrictionError } = checkOrganizationModelRestrictions({
modelId: fimModel_withOpenRouterStyleProviderPrefix,
modelId: requestBody.model,
settings,
organizationPlan: plan,
});
if (modelRestrictionError) return modelRestrictionError;

const systemKey = getSystemApiKey(fimProvider);
const apiKey = userByok?.at(0)?.decryptedAPIKey ?? systemKey;

if (!apiKey) {
return NextResponse.json(
{
error: 'This model requires a BYOK API key. Please configure your API key in settings.',
},
{ status: 400 }
);
}

sentryRootSpan()?.setAttribute(
'mistral-fim.time_to_request_start_ms',
'fim.time_to_request_start_ms',
performance.now() - requestStartedAt
);

const mistralRequestSpan = startInactiveSpan({
name: 'mistral-fim-request-start',
const fimRequestSpan = startInactiveSpan({
name: 'fim-request-start',
op: 'http.client',
});

const bodyWithCorrectedModel = { ...requestBody, model: mistralModel };
// Make upstream request to Mistral
const proxyRes = await fetch(MISTRAL_URL, {
const bodyForUpstream = { ...requestBody, model: upstreamModel };

// Make upstream request to the resolved provider
const proxyRes = await fetch(upstreamUrl, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${userByok?.at(0)?.decryptedAPIKey ?? MISTRAL_API_KEY}`,
Authorization: `Bearer ${apiKey}`,
},
body: JSON.stringify(bodyWithCorrectedModel),
body: JSON.stringify(bodyForUpstream),
});
usageContext.status_code = proxyRes.status;

Expand All @@ -200,19 +233,19 @@ export async function POST(request: NextRequest) {
if (proxyRes.status >= 400) {
await captureProxyError({
user,
request: bodyWithCorrectedModel,
request: bodyForUpstream,
response: proxyRes,
organizationId,
model: fimModel_withOpenRouterStyleProviderPrefix,
errorMessage: `Mistral FIM returned error ${proxyRes.status}`,
model: requestBody.model,
errorMessage: `FIM provider returned error ${proxyRes.status}`,
trackInSentry: proxyRes.status >= 500,
});
}

const clonedResponse = proxyRes.clone(); // reading from body is side-effectful

// Account for usage using FIM-specific parser
countAndStoreFimUsage(clonedResponse, usageContext, mistralRequestSpan);
countAndStoreFimUsage(clonedResponse, usageContext, fimRequestSpan);

return wrapInSafeNextResponse(proxyRes);
}
1 change: 1 addition & 0 deletions src/lib/config.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export const NEXTAUTH_SECRET = getEnvVariable('NEXTAUTH_SECRET');
export const OPENROUTER_API_KEY = getEnvVariable('OPENROUTER_API_KEY');
export const MISTRAL_API_KEY = getEnvVariable('MISTRAL_API_KEY');
export const OPENAI_API_KEY = getEnvVariable('OPENAI_API_KEY');
export const INCEPTION_API_KEY = getEnvVariable('INCEPTION_API_KEY');
export const INTERNAL_API_SECRET = getEnvVariable('INTERNAL_API_SECRET');
export const CODE_REVIEW_WORKER_AUTH_TOKEN = getEnvVariable('CODE_REVIEW_WORKER_AUTH_TOKEN');

Expand Down
55 changes: 35 additions & 20 deletions src/lib/llm-proxy-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import type {
} from '@/lib/processUsage.types';
import { getMaxTokens } from '@/lib/providers/openrouter/request-helpers';
import { KILO_AUTO_BALANCED_MODEL, KILO_AUTO_FREE_MODEL } from '@/lib/kilo-auto-model';
import type { GatewayChatApiKind } from '@/lib/providers/types';
import type { GatewayChatApiKind, ProviderId } from '@/lib/providers/types';

// FIM suffix markers for tracking purposes - used to wrap suffix in a fake system prompt format
// This allows FIM requests to be tracked consistently with chat requests
Expand Down Expand Up @@ -378,7 +378,7 @@ export function extractFimPromptInfo(body: { prompt: string; suffix?: string | n
// FIM-Specific Code
// ============================================================================

export type MistralFimUsage = {
export type FimUsage = {
prompt_tokens: number;
completion_tokens: number;
total_tokens: number;
Expand All @@ -388,7 +388,7 @@ export type MistralFimCompletion = {
id: string;
object: 'fim.completion';
model: string;
usage: MistralFimUsage;
usage: FimUsage;
created: number;
choices: Array<{
index: number;
Expand All @@ -409,23 +409,38 @@ export type MistralFimStreamChunk = {
};
finish_reason: string | null;
}>;
usage?: MistralFimUsage; // Only present in final chunk
usage?: FimUsage; // Only present in final chunk
};

function computeMistralFimMicrodollarCost(usage: MistralFimUsage): number {
return Math.round(usage.prompt_tokens * 0.3 + usage.completion_tokens * 0.9);
function computeInceptionFimMicrodollarCost(usage: FimUsage): number {
return Math.round(usage.prompt_tokens * 0.25 + usage.completion_tokens * 0.75);
}

function parseMistralFimUsageFromString(response: string): MicrodollarUsageStats {
function computeFimMicrodollarCost(usage: FimUsage, provider: ProviderId): number {
switch (provider) {
case 'mistral':
return Math.round(usage.prompt_tokens * 0.3 + usage.completion_tokens * 0.9);
case 'inception':
return computeInceptionFimMicrodollarCost(usage);
default:
console.error('Unknown provider for FIM cost calculation', provider);
return 0;
}
}

function parseMistralFimUsageFromString(
response: string,
provider: ProviderId
): MicrodollarUsageStats {
const json: MistralFimCompletion = JSON.parse(response);
const cost_mUsd = computeMistralFimMicrodollarCost(json.usage);
const cost_mUsd = computeFimMicrodollarCost(json.usage, provider);

return {
messageId: json.id,
model: json.model,
responseContent: json.choices[0]?.text || '',
hasError: !json.model,
inference_provider: 'mistral',
inference_provider: provider,
inputTokens: json.usage.prompt_tokens,
outputTokens: json.usage.completion_tokens,
cacheHitTokens: 0,
Expand All @@ -444,11 +459,12 @@ function parseMistralFimUsageFromString(response: string): MicrodollarUsageStats

async function parseMistralFimUsageFromStream(
stream: ReadableStream,
requestSpan: Span | undefined
requestSpan: Span | undefined,
provider: ProviderId
): Promise<MicrodollarUsageStats> {
requestSpan?.end();
const streamProcessingSpan = startInactiveSpan({
name: 'mistral-fim-stream-processing',
name: 'fim-stream-processing',
op: 'performance',
});
const timeToFirstTokenSpan = startInactiveSpan({
Expand All @@ -462,18 +478,15 @@ async function parseMistralFimUsageFromStream(
let reportedError = false;
const startedAt = performance.now();
let firstTokenReceived = false;
let usage: MistralFimUsage | undefined;
let usage: FimUsage | undefined;

const reader = stream.getReader();
const decoder = new TextDecoder();

const sseStreamParser = createParser({
onEvent(event: EventSourceMessage) {
if (!firstTokenReceived) {
sentryRootSpan()?.setAttribute(
'mistral.time_to_first_token_ms',
performance.now() - startedAt
);
sentryRootSpan()?.setAttribute('fim.time_to_first_token_ms', performance.now() - startedAt);
firstTokenReceived = true;
timeToFirstTokenSpan.end();
}
Expand Down Expand Up @@ -525,12 +538,12 @@ async function parseMistralFimUsageFromStream(
model,
responseContent,
hasError: reportedError,
inference_provider: 'mistral',
inference_provider: provider,
inputTokens: usage?.prompt_tokens ?? 0,
outputTokens: usage?.completion_tokens ?? 0,
cacheHitTokens: 0,
cacheWriteTokens: 0,
cost_mUsd: usage ? computeMistralFimMicrodollarCost(usage) : 0,
cost_mUsd: usage ? computeFimMicrodollarCost(usage, provider) : 0,
is_byok: null,
upstream_id: null,
finish_reason: null,
Expand All @@ -553,8 +566,10 @@ export function countAndStoreFimUsage(
const usageStatsPromise = !clonedResponse.body
? Promise.resolve(null)
: usageContext.isStreaming
? parseMistralFimUsageFromStream(clonedResponse.body, requestSpan)
: clonedResponse.text().then(content => parseMistralFimUsageFromString(content));
? parseMistralFimUsageFromStream(clonedResponse.body, requestSpan, usageContext.provider)
: clonedResponse
.text()
.then(content => parseMistralFimUsageFromString(content, usageContext.provider));

after(
usageStatsPromise.then(usageStats => {
Expand Down
1 change: 1 addition & 0 deletions src/lib/providers/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export type ProviderId =
| 'bytedance'
| 'coding-plan'
| 'corethink'
| 'inception'
| 'martian'
| 'mistral'
| 'morph'
Expand Down
Loading