@@ -86,6 +86,11 @@ export interface StreamEvent {
8686 latencyMs : number
8787 }
8888 }
89+ trace ?: {
90+ promptRouter ?: {
91+ invokedModelId ?: string
92+ }
93+ }
8994}
9095
9196export class AwsBedrockHandler extends BaseProvider implements SingleCompletionHandler {
@@ -252,10 +257,49 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH
252257
253258 // Handle metadata events first
254259 if ( streamEvent . metadata ?. usage ) {
260+ // Check if this is a response from an intelligent prompt router
261+ const invokedModelId = streamEvent . trace ?. promptRouter ?. invokedModelId
262+
263+ // If invokedModelId is present, extract it from the ARN format
264+ let modelIdForCost : string | undefined
265+ if ( invokedModelId ) {
266+ // Extract the model name from the ARN
267+ // Example ARN: arn:aws:bedrock:us-west-2:699475926481:inference-profile/us.anthropic.claude-3-5-sonnet-20240620-v1:0
268+ const modelMatch = invokedModelId . match ( / \/ ( [ ^ \/ ] + ) (?: : | $ ) / )
269+ if ( modelMatch && modelMatch [ 1 ] ) {
270+ const modelName = modelMatch [ 1 ]
271+
272+ // Map the model name to the format expected by the cost calculation function
273+ if ( modelName . includes ( "claude-3-5-sonnet" ) ) {
274+ modelIdForCost = "claude-3-5-sonnet"
275+ } else if ( modelName . includes ( "claude-3-sonnet" ) ) {
276+ modelIdForCost = "claude-3-sonnet"
277+ } else if ( modelName . includes ( "claude-3-opus" ) ) {
278+ modelIdForCost = "claude-3-opus"
279+ } else if ( modelName . includes ( "claude-3-haiku" ) ) {
280+ modelIdForCost = "claude-3-haiku"
281+ } else if ( modelName . includes ( "claude-3-5-haiku" ) ) {
282+ modelIdForCost = "claude-3-5-haiku"
283+ } else if ( modelName . includes ( "claude-3-7-sonnet" ) ) {
284+ modelIdForCost = "claude-3-7-sonnet"
285+ }
286+
287+ logger . debug ( "Extracted model ID from intelligent prompt router" , {
288+ ctx : "bedrock" ,
289+ originalArn : invokedModelId ,
290+ extractedModelId : modelIdForCost ,
291+ } )
292+ }
293+ }
294+
295+ const inputTokens = streamEvent . metadata . usage . inputTokens || 0
296+ const outputTokens = streamEvent . metadata . usage . outputTokens || 0
297+
255298 yield {
256299 type : "usage" ,
257- inputTokens : streamEvent . metadata . usage . inputTokens || 0 ,
258- outputTokens : streamEvent . metadata . usage . outputTokens || 0 ,
300+ inputTokens : inputTokens ,
301+ outputTokens : outputTokens ,
302+ invokedModelId : modelIdForCost ,
259303 }
260304 continue
261305 }
@@ -491,6 +535,22 @@ Please check:
491535 supportsPromptCache : false ,
492536 supportsImages : true ,
493537 }
538+ } else if ( arnLower . includes ( "llama3.3" ) || arnLower . includes ( "llama-3.3" ) ) {
539+ // Llama 3.3 models
540+ modelInfo = {
541+ maxTokens : 8192 ,
542+ contextWindow : 128_000 ,
543+ supportsPromptCache : true ,
544+ supportsImages : true ,
545+ }
546+ } else if ( arnLower . includes ( "llama3.2" ) || arnLower . includes ( "llama-3.2" ) ) {
547+ // Llama 3.2 models
548+ modelInfo = {
549+ maxTokens : 8192 ,
550+ contextWindow : 128_000 ,
551+ supportsPromptCache : true ,
552+ supportsImages : arnLower . includes ( "90b" ) || arnLower . includes ( "11b" ) ,
553+ }
494554 } else if ( arnLower . includes ( "llama3" ) || arnLower . includes ( "llama-3" ) ) {
495555 // Llama 3 models typically have 8192 tokens in Bedrock
496556 modelInfo = {
@@ -499,6 +559,46 @@ Please check:
499559 supportsPromptCache : false ,
500560 supportsImages : arnLower . includes ( "90b" ) || arnLower . includes ( "11b" ) ,
501561 }
562+ } else if ( arnLower . includes ( "titan-text-lite" ) ) {
563+ // Amazon Titan Text Lite
564+ modelInfo = {
565+ maxTokens : 4096 ,
566+ contextWindow : 8_000 ,
567+ supportsPromptCache : false ,
568+ supportsImages : false ,
569+ }
570+ } else if ( arnLower . includes ( "titan-text-express" ) ) {
571+ // Amazon Titan Text Express
572+ modelInfo = {
573+ maxTokens : 4096 ,
574+ contextWindow : 8_000 ,
575+ supportsPromptCache : false ,
576+ supportsImages : false ,
577+ }
578+ } else if ( arnLower . includes ( "titan-text-embeddings" ) ) {
579+ // Amazon Titan Text Embeddings
580+ modelInfo = {
581+ maxTokens : 8192 ,
582+ contextWindow : 8_000 ,
583+ supportsPromptCache : false ,
584+ supportsImages : false ,
585+ }
586+ } else if ( arnLower . includes ( "nova-micro" ) ) {
587+ // Amazon Nova Micro
588+ modelInfo = {
589+ maxTokens : 4096 ,
590+ contextWindow : 128_000 ,
591+ supportsPromptCache : false ,
592+ supportsImages : false ,
593+ }
594+ } else if ( arnLower . includes ( "nova-lite" ) ) {
595+ // Amazon Nova Lite
596+ modelInfo = {
597+ maxTokens : 4096 ,
598+ contextWindow : 128_000 ,
599+ supportsPromptCache : false ,
600+ supportsImages : false ,
601+ }
502602 } else if ( arnLower . includes ( "nova-pro" ) ) {
503603 // Amazon Nova Pro
504604 modelInfo = {
0 commit comments