Skip to content

Commit 47a259b

Browse files
feat(byok): byok for hosted model capabilities (#2574)
* feat(byok): byok for hosted model capabilities * fix type * add ignore lint * accidentally added feature flags * centralize byok fetch for LLM calls * remove feature flags ts * fix tests * update docs
1 parent 40a6bf5 commit 47a259b

File tree

35 files changed

+9657
-182
lines changed

35 files changed

+9657
-182
lines changed

apps/docs/content/docs/en/execution/costs.mdx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ The model breakdown shows:
104104
Pricing shown reflects rates as of September 10, 2025. Check provider documentation for current pricing.
105105
</Callout>
106106

107+
## Bring Your Own Key (BYOK)
108+
109+
You can use your own API keys for hosted models (OpenAI, Anthropic, Google, Mistral) in **Settings → BYOK** to pay base prices. Keys are encrypted and apply workspace-wide.
110+
107111
## Cost Optimization Strategies
108112

109113
- **Model Selection**: Choose models based on task complexity. Simple tasks can use GPT-4.1-nano while complex reasoning might need o1 or Claude Opus.

apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/[chunkId]/route.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,12 @@ export async function PUT(
100100
try {
101101
const validatedData = UpdateChunkSchema.parse(body)
102102

103-
const updatedChunk = await updateChunk(chunkId, validatedData, requestId)
103+
const updatedChunk = await updateChunk(
104+
chunkId,
105+
validatedData,
106+
requestId,
107+
accessCheck.knowledgeBase?.workspaceId
108+
)
104109

105110
logger.info(
106111
`[${requestId}] Chunk updated: ${chunkId} in document ${documentId} in knowledge base ${knowledgeBaseId}`

apps/sim/app/api/knowledge/[id]/documents/[documentId]/chunks/route.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,8 @@ export async function POST(
184184
documentId,
185185
docTags,
186186
validatedData,
187-
requestId
187+
requestId,
188+
accessCheck.knowledgeBase?.workspaceId
188189
)
189190

190191
let cost = null

apps/sim/app/api/knowledge/search/route.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,11 +183,11 @@ export async function POST(request: NextRequest) {
183183
)
184184
}
185185

186-
// Generate query embedding only if query is provided
186+
const workspaceId = accessChecks.find((ac) => ac?.hasAccess)?.knowledgeBase?.workspaceId
187+
187188
const hasQuery = validatedData.query && validatedData.query.trim().length > 0
188-
// Start embedding generation early and await when needed
189189
const queryEmbeddingPromise = hasQuery
190-
? generateSearchEmbedding(validatedData.query!)
190+
? generateSearchEmbedding(validatedData.query!, undefined, workspaceId)
191191
: Promise.resolve(null)
192192

193193
// Check if any requested knowledge bases were not accessible

apps/sim/app/api/knowledge/utils.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ export interface EmbeddingData {
9999

100100
export interface KnowledgeBaseAccessResult {
101101
hasAccess: true
102-
knowledgeBase: Pick<KnowledgeBaseData, 'id' | 'userId'>
102+
knowledgeBase: Pick<KnowledgeBaseData, 'id' | 'userId' | 'workspaceId'>
103103
}
104104

105105
export interface KnowledgeBaseAccessDenied {
@@ -113,7 +113,7 @@ export type KnowledgeBaseAccessCheck = KnowledgeBaseAccessResult | KnowledgeBase
113113
export interface DocumentAccessResult {
114114
hasAccess: true
115115
document: DocumentData
116-
knowledgeBase: Pick<KnowledgeBaseData, 'id' | 'userId'>
116+
knowledgeBase: Pick<KnowledgeBaseData, 'id' | 'userId' | 'workspaceId'>
117117
}
118118

119119
export interface DocumentAccessDenied {
@@ -128,7 +128,7 @@ export interface ChunkAccessResult {
128128
hasAccess: true
129129
chunk: EmbeddingData
130130
document: DocumentData
131-
knowledgeBase: Pick<KnowledgeBaseData, 'id' | 'userId'>
131+
knowledgeBase: Pick<KnowledgeBaseData, 'id' | 'userId' | 'workspaceId'>
132132
}
133133

134134
export interface ChunkAccessDenied {

apps/sim/app/api/providers/route.ts

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import { createLogger } from '@/lib/logs/console/logger'
77
import { refreshTokenIfNeeded } from '@/app/api/auth/oauth/utils'
88
import type { StreamingExecution } from '@/executor/types'
99
import { executeProviderRequest } from '@/providers'
10-
import { getApiKey } from '@/providers/utils'
1110

1211
const logger = createLogger('ProvidersAPI')
1312

@@ -80,23 +79,20 @@ export async function POST(request: NextRequest) {
8079
verbosity,
8180
})
8281

83-
let finalApiKey: string
82+
let finalApiKey: string | undefined = apiKey
8483
try {
8584
if (provider === 'vertex' && vertexCredential) {
8685
finalApiKey = await resolveVertexCredential(requestId, vertexCredential)
87-
} else {
88-
finalApiKey = getApiKey(provider, model, apiKey)
8986
}
9087
} catch (error) {
91-
logger.error(`[${requestId}] Failed to get API key:`, {
88+
logger.error(`[${requestId}] Failed to resolve Vertex credential:`, {
9289
provider,
9390
model,
9491
error: error instanceof Error ? error.message : String(error),
95-
hasProvidedApiKey: !!apiKey,
9692
hasVertexCredential: !!vertexCredential,
9793
})
9894
return NextResponse.json(
99-
{ error: error instanceof Error ? error.message : 'API key error' },
95+
{ error: error instanceof Error ? error.message : 'Credential error' },
10096
{ status: 400 }
10197
)
10298
}
@@ -108,7 +104,6 @@ export async function POST(request: NextRequest) {
108104
hasApiKey: !!finalApiKey,
109105
})
110106

111-
// Execute provider request directly with the managed key
112107
const response = await executeProviderRequest(provider, {
113108
model,
114109
systemPrompt,

apps/sim/app/api/tools/search/route.ts

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { type NextRequest, NextResponse } from 'next/server'
22
import { z } from 'zod'
3+
import { getBYOKKey } from '@/lib/api-key/byok'
34
import { checkHybridAuth } from '@/lib/auth/hybrid'
45
import { SEARCH_TOOL_COST } from '@/lib/billing/constants'
56
import { env } from '@/lib/core/config/env'
@@ -10,6 +11,7 @@ const logger = createLogger('search')
1011

1112
const SearchRequestSchema = z.object({
1213
query: z.string().min(1),
14+
workspaceId: z.string().optional(),
1315
})
1416

1517
export const maxDuration = 60
@@ -39,8 +41,20 @@ export async function POST(request: NextRequest) {
3941
const body = await request.json()
4042
const validated = SearchRequestSchema.parse(body)
4143

42-
if (!env.EXA_API_KEY) {
43-
logger.error(`[${requestId}] EXA_API_KEY not configured`)
44+
let exaApiKey = env.EXA_API_KEY
45+
let isBYOK = false
46+
47+
if (validated.workspaceId) {
48+
const byokResult = await getBYOKKey(validated.workspaceId, 'exa')
49+
if (byokResult) {
50+
exaApiKey = byokResult.apiKey
51+
isBYOK = true
52+
logger.info(`[${requestId}] Using workspace BYOK key for Exa search`)
53+
}
54+
}
55+
56+
if (!exaApiKey) {
57+
logger.error(`[${requestId}] No Exa API key available`)
4458
return NextResponse.json(
4559
{ success: false, error: 'Search service not configured' },
4660
{ status: 503 }
@@ -50,14 +64,15 @@ export async function POST(request: NextRequest) {
5064
logger.info(`[${requestId}] Executing search`, {
5165
userId,
5266
query: validated.query,
67+
isBYOK,
5368
})
5469

5570
const result = await executeTool('exa_search', {
5671
query: validated.query,
5772
type: 'auto',
5873
useAutoprompt: true,
5974
highlights: true,
60-
apiKey: env.EXA_API_KEY,
75+
apiKey: exaApiKey,
6176
})
6277

6378
if (!result.success) {
@@ -85,7 +100,7 @@ export async function POST(request: NextRequest) {
85100
const cost = {
86101
input: 0,
87102
output: 0,
88-
total: SEARCH_TOOL_COST,
103+
total: isBYOK ? 0 : SEARCH_TOOL_COST,
89104
tokens: {
90105
input: 0,
91106
output: 0,
@@ -104,6 +119,7 @@ export async function POST(request: NextRequest) {
104119
userId,
105120
resultCount: results.length,
106121
cost: cost.total,
122+
isBYOK,
107123
})
108124

109125
return NextResponse.json({

apps/sim/app/api/wand/route.ts

Lines changed: 48 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { userStats, workflow } from '@sim/db/schema'
33
import { eq, sql } from 'drizzle-orm'
44
import { type NextRequest, NextResponse } from 'next/server'
55
import OpenAI, { AzureOpenAI } from 'openai'
6+
import { getBYOKKey } from '@/lib/api-key/byok'
67
import { getSession } from '@/lib/auth'
78
import { logModelUsage } from '@/lib/billing/core/usage-log'
89
import { checkAndBillOverageThreshold } from '@/lib/billing/threshold-billing'
@@ -75,7 +76,8 @@ async function updateUserStatsForWand(
7576
completion_tokens?: number
7677
total_tokens?: number
7778
},
78-
requestId: string
79+
requestId: string,
80+
isBYOK = false
7981
): Promise<void> {
8082
if (!isBillingEnabled) {
8183
logger.debug(`[${requestId}] Billing is disabled, skipping wand usage cost update`)
@@ -93,21 +95,24 @@ async function updateUserStatsForWand(
9395
const completionTokens = usage.completion_tokens || 0
9496

9597
const modelName = useWandAzure ? wandModelName : 'gpt-4o'
96-
const pricing = getModelPricing(modelName)
97-
98-
const costMultiplier = getCostMultiplier()
99-
let modelCost = 0
98+
let costToStore = 0
99+
100+
if (!isBYOK) {
101+
const pricing = getModelPricing(modelName)
102+
const costMultiplier = getCostMultiplier()
103+
let modelCost = 0
104+
105+
if (pricing) {
106+
const inputCost = (promptTokens / 1000000) * pricing.input
107+
const outputCost = (completionTokens / 1000000) * pricing.output
108+
modelCost = inputCost + outputCost
109+
} else {
110+
modelCost = (promptTokens / 1000000) * 0.005 + (completionTokens / 1000000) * 0.015
111+
}
100112

101-
if (pricing) {
102-
const inputCost = (promptTokens / 1000000) * pricing.input
103-
const outputCost = (completionTokens / 1000000) * pricing.output
104-
modelCost = inputCost + outputCost
105-
} else {
106-
modelCost = (promptTokens / 1000000) * 0.005 + (completionTokens / 1000000) * 0.015
113+
costToStore = modelCost * costMultiplier
107114
}
108115

109-
const costToStore = modelCost * costMultiplier
110-
111116
await db
112117
.update(userStats)
113118
.set({
@@ -122,6 +127,7 @@ async function updateUserStatsForWand(
122127
userId,
123128
tokensUsed: totalTokens,
124129
costAdded: costToStore,
130+
isBYOK,
125131
})
126132

127133
await logModelUsage({
@@ -149,14 +155,6 @@ export async function POST(req: NextRequest) {
149155
return NextResponse.json({ success: false, error: 'Unauthorized' }, { status: 401 })
150156
}
151157

152-
if (!client) {
153-
logger.error(`[${requestId}] AI client not initialized. Missing API key.`)
154-
return NextResponse.json(
155-
{ success: false, error: 'Wand generation service is not configured.' },
156-
{ status: 503 }
157-
)
158-
}
159-
160158
try {
161159
const body = (await req.json()) as RequestBody
162160

@@ -170,6 +168,7 @@ export async function POST(req: NextRequest) {
170168
)
171169
}
172170

171+
let workspaceId: string | null = null
173172
if (workflowId) {
174173
const [workflowRecord] = await db
175174
.select({ workspaceId: workflow.workspaceId, userId: workflow.userId })
@@ -182,6 +181,8 @@ export async function POST(req: NextRequest) {
182181
return NextResponse.json({ success: false, error: 'Workflow not found' }, { status: 404 })
183182
}
184183

184+
workspaceId = workflowRecord.workspaceId
185+
185186
if (workflowRecord.workspaceId) {
186187
const permission = await verifyWorkspaceMembership(
187188
session.user.id,
@@ -199,6 +200,28 @@ export async function POST(req: NextRequest) {
199200
}
200201
}
201202

203+
let isBYOK = false
204+
let activeClient = client
205+
let byokApiKey: string | null = null
206+
207+
if (workspaceId && !useWandAzure) {
208+
const byokResult = await getBYOKKey(workspaceId, 'openai')
209+
if (byokResult) {
210+
isBYOK = true
211+
byokApiKey = byokResult.apiKey
212+
activeClient = new OpenAI({ apiKey: byokResult.apiKey })
213+
logger.info(`[${requestId}] Using BYOK OpenAI key for wand generation`)
214+
}
215+
}
216+
217+
if (!activeClient) {
218+
logger.error(`[${requestId}] AI client not initialized. Missing API key.`)
219+
return NextResponse.json(
220+
{ success: false, error: 'Wand generation service is not configured.' },
221+
{ status: 503 }
222+
)
223+
}
224+
202225
const finalSystemPrompt =
203226
systemPrompt ||
204227
'You are a helpful AI assistant. Generate content exactly as requested by the user.'
@@ -241,7 +264,7 @@ export async function POST(req: NextRequest) {
241264
if (useWandAzure) {
242265
headers['api-key'] = azureApiKey!
243266
} else {
244-
headers.Authorization = `Bearer ${openaiApiKey}`
267+
headers.Authorization = `Bearer ${byokApiKey || openaiApiKey}`
245268
}
246269

247270
logger.debug(`[${requestId}] Making streaming request to: ${apiUrl}`)
@@ -310,7 +333,7 @@ export async function POST(req: NextRequest) {
310333
logger.info(`[${requestId}] Received [DONE] signal`)
311334

312335
if (finalUsage) {
313-
await updateUserStatsForWand(session.user.id, finalUsage, requestId)
336+
await updateUserStatsForWand(session.user.id, finalUsage, requestId, isBYOK)
314337
}
315338

316339
controller.enqueue(
@@ -395,7 +418,7 @@ export async function POST(req: NextRequest) {
395418
}
396419
}
397420

398-
const completion = await client.chat.completions.create({
421+
const completion = await activeClient.chat.completions.create({
399422
model: useWandAzure ? wandModelName : 'gpt-4o',
400423
messages: messages,
401424
temperature: 0.3,
@@ -417,7 +440,7 @@ export async function POST(req: NextRequest) {
417440
logger.info(`[${requestId}] Wand generation successful`)
418441

419442
if (completion.usage) {
420-
await updateUserStatsForWand(session.user.id, completion.usage, requestId)
443+
await updateUserStatsForWand(session.user.id, completion.usage, requestId, isBYOK)
421444
}
422445

423446
return NextResponse.json({ success: true, content: generatedContent })

0 commit comments

Comments
 (0)