Skip to content

Commit 886e5db

Browse files
Add Bedrock cost calculations with intelligent prompt routing support
1 parent d3fbf74 commit 886e5db

File tree

3 files changed

+401
-2
lines changed

3 files changed

+401
-2
lines changed

src/api/providers/bedrock.ts

Lines changed: 102 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,11 @@ export interface StreamEvent {
8686
latencyMs: number
8787
}
8888
}
89+
trace?: {
90+
promptRouter?: {
91+
invokedModelId?: string
92+
}
93+
}
8994
}
9095

9196
export class AwsBedrockHandler extends BaseProvider implements SingleCompletionHandler {
@@ -252,10 +257,49 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
252257

253258
// Handle metadata events first
254259
if (streamEvent.metadata?.usage) {
260+
// Check if this is a response from an intelligent prompt router
261+
const invokedModelId = streamEvent.trace?.promptRouter?.invokedModelId
262+
263+
// If invokedModelId is present, extract it from the ARN format
264+
let modelIdForCost: string | undefined
265+
if (invokedModelId) {
266+
// Extract the model name from the ARN
267+
// Example ARN: arn:aws:bedrock:us-west-2:699475926481:inference-profile/us.anthropic.claude-3-5-sonnet-20240620-v1:0
268+
const modelMatch = invokedModelId.match(/\/([^\/]+)(?::|$)/)
269+
if (modelMatch && modelMatch[1]) {
270+
const modelName = modelMatch[1]
271+
272+
// Map the model name to the format expected by the cost calculation function
273+
if (modelName.includes("claude-3-5-sonnet")) {
274+
modelIdForCost = "claude-3-5-sonnet"
275+
} else if (modelName.includes("claude-3-sonnet")) {
276+
modelIdForCost = "claude-3-sonnet"
277+
} else if (modelName.includes("claude-3-opus")) {
278+
modelIdForCost = "claude-3-opus"
279+
} else if (modelName.includes("claude-3-haiku")) {
280+
modelIdForCost = "claude-3-haiku"
281+
} else if (modelName.includes("claude-3-5-haiku")) {
282+
modelIdForCost = "claude-3-5-haiku"
283+
} else if (modelName.includes("claude-3-7-sonnet")) {
284+
modelIdForCost = "claude-3-7-sonnet"
285+
}
286+
287+
logger.debug("Extracted model ID from intelligent prompt router", {
288+
ctx: "bedrock",
289+
originalArn: invokedModelId,
290+
extractedModelId: modelIdForCost,
291+
})
292+
}
293+
}
294+
295+
const inputTokens = streamEvent.metadata.usage.inputTokens || 0
296+
const outputTokens = streamEvent.metadata.usage.outputTokens || 0
297+
255298
yield {
256299
type: "usage",
257-
inputTokens: streamEvent.metadata.usage.inputTokens || 0,
258-
outputTokens: streamEvent.metadata.usage.outputTokens || 0,
300+
inputTokens: inputTokens,
301+
outputTokens: outputTokens,
302+
invokedModelId: modelIdForCost,
259303
}
260304
continue
261305
}
@@ -491,6 +535,22 @@ Please check:
491535
supportsPromptCache: false,
492536
supportsImages: true,
493537
}
538+
} else if (arnLower.includes("llama3.3") || arnLower.includes("llama-3.3")) {
539+
// Llama 3.3 models
540+
modelInfo = {
541+
maxTokens: 8192,
542+
contextWindow: 128_000,
543+
supportsPromptCache: true,
544+
supportsImages: true,
545+
}
546+
} else if (arnLower.includes("llama3.2") || arnLower.includes("llama-3.2")) {
547+
// Llama 3.2 models
548+
modelInfo = {
549+
maxTokens: 8192,
550+
contextWindow: 128_000,
551+
supportsPromptCache: true,
552+
supportsImages: arnLower.includes("90b") || arnLower.includes("11b"),
553+
}
494554
} else if (arnLower.includes("llama3") || arnLower.includes("llama-3")) {
495555
// Llama 3 models typically have 8192 tokens in Bedrock
496556
modelInfo = {
@@ -499,6 +559,46 @@ Please check:
499559
supportsPromptCache: false,
500560
supportsImages: arnLower.includes("90b") || arnLower.includes("11b"),
501561
}
562+
} else if (arnLower.includes("titan-text-lite")) {
563+
// Amazon Titan Text Lite
564+
modelInfo = {
565+
maxTokens: 4096,
566+
contextWindow: 8_000,
567+
supportsPromptCache: false,
568+
supportsImages: false,
569+
}
570+
} else if (arnLower.includes("titan-text-express")) {
571+
// Amazon Titan Text Express
572+
modelInfo = {
573+
maxTokens: 4096,
574+
contextWindow: 8_000,
575+
supportsPromptCache: false,
576+
supportsImages: false,
577+
}
578+
} else if (arnLower.includes("titan-text-embeddings")) {
579+
// Amazon Titan Text Embeddings
580+
modelInfo = {
581+
maxTokens: 8192,
582+
contextWindow: 8_000,
583+
supportsPromptCache: false,
584+
supportsImages: false,
585+
}
586+
} else if (arnLower.includes("nova-micro")) {
587+
// Amazon Nova Micro
588+
modelInfo = {
589+
maxTokens: 4096,
590+
contextWindow: 128_000,
591+
supportsPromptCache: false,
592+
supportsImages: false,
593+
}
594+
} else if (arnLower.includes("nova-lite")) {
595+
// Amazon Nova Lite
596+
modelInfo = {
597+
maxTokens: 4096,
598+
contextWindow: 128_000,
599+
supportsPromptCache: false,
600+
supportsImages: false,
601+
}
502602
} else if (arnLower.includes("nova-pro")) {
503603
// Amazon Nova Pro
504604
modelInfo = {

0 commit comments

Comments
 (0)