@@ -529,9 +529,8 @@ export const BedrockChatCompleteResponseTransform: (
529
529
}
530
530
531
531
if ( 'output' in response ) {
532
- const shouldSendCacheUsage =
533
- response . usage . cacheWriteInputTokens ||
534
- response . usage . cacheReadInputTokens ;
532
+ const cacheReadInputTokens = response . usage ?. cacheReadInputTokens || 0 ;
533
+ const cacheWriteInputTokens = response . usage ?. cacheWriteInputTokens || 0 ;
535
534
536
535
let content : string = '' ;
537
536
content = response . output . message . content
@@ -565,12 +564,19 @@ export const BedrockChatCompleteResponseTransform: (
565
564
} ,
566
565
] ,
567
566
usage : {
568
- prompt_tokens : response . usage . inputTokens ,
567
+ prompt_tokens :
568
+ response . usage . inputTokens +
569
+ cacheReadInputTokens +
570
+ cacheWriteInputTokens ,
569
571
completion_tokens : response . usage . outputTokens ,
570
572
total_tokens : response . usage . totalTokens , // contains the cache usage as well
571
- ...( shouldSendCacheUsage && {
572
- cache_read_input_tokens : response . usage . cacheReadInputTokens ,
573
- cache_creation_input_tokens : response . usage . cacheWriteInputTokens ,
573
+ prompt_tokens_details : {
574
+ cached_tokens : cacheReadInputTokens ,
575
+ } ,
576
+ // we only want to be sending this for anthropic models and this is not openai compliant
577
+ ...( ( cacheReadInputTokens > 0 || cacheWriteInputTokens > 0 ) && {
578
+ cache_read_input_tokens : cacheReadInputTokens ,
579
+ cache_creation_input_tokens : cacheWriteInputTokens ,
574
580
} ) ,
575
581
} ,
576
582
} ;
@@ -663,9 +669,9 @@ export const BedrockChatCompleteStreamChunkTransform: (
663
669
664
670
// final chunk
665
671
if ( parsedChunk . usage ) {
666
- const shouldSendCacheUsage =
667
- parsedChunk . usage . cacheWriteInputTokens ||
668
- parsedChunk . usage . cacheReadInputTokens ;
672
+ const cacheReadInputTokens = parsedChunk . usage ?. cacheReadInputTokens || 0 ;
673
+ const cacheWriteInputTokens = parsedChunk . usage ? .cacheWriteInputTokens || 0 ;
674
+
669
675
return [
670
676
`data: ${ JSON . stringify ( {
671
677
id : fallbackId ,
@@ -684,10 +690,17 @@ export const BedrockChatCompleteStreamChunkTransform: (
684
690
} ,
685
691
] ,
686
692
usage : {
687
- prompt_tokens : parsedChunk . usage . inputTokens ,
693
+ prompt_tokens :
694
+ parsedChunk . usage . inputTokens +
695
+ cacheReadInputTokens +
696
+ cacheWriteInputTokens ,
688
697
completion_tokens : parsedChunk . usage . outputTokens ,
689
698
total_tokens : parsedChunk . usage . totalTokens ,
690
- ...( shouldSendCacheUsage && {
699
+ prompt_tokens_details : {
700
+ cached_tokens : cacheReadInputTokens ,
701
+ } ,
702
+ // we only want to be sending this for anthropic models and this is not openai compliant
703
+ ...( ( cacheReadInputTokens > 0 || cacheWriteInputTokens > 0 ) && {
691
704
cache_read_input_tokens : parsedChunk . usage . cacheReadInputTokens ,
692
705
cache_creation_input_tokens :
693
706
parsedChunk . usage . cacheWriteInputTokens ,
0 commit comments