@@ -40,11 +40,13 @@ import {
4040 transformFinishReason ,
4141} from '../utils' ;
4242import { transformGenerationConfig } from './transformGenerationConfig' ;
43- import type {
43+ import {
4444 GoogleErrorResponse ,
4545 GoogleGenerateContentResponse ,
4646 VertexLlamaChatCompleteStreamChunk ,
4747 VertexLLamaChatCompleteResponse ,
48+ GoogleSearchRetrievalTool ,
49+ VERTEX_MODALITY ,
4850} from './types' ;
4951import {
5052 getMimeType ,
@@ -431,7 +433,18 @@ export const GoogleChatCompleteResponseTransform: (
431433 candidatesTokenCount = 0 ,
432434 totalTokenCount = 0 ,
433435 thoughtsTokenCount = 0 ,
436+ cachedContentTokenCount = 0 ,
437+ promptTokensDetails = [ ] ,
438+ candidatesTokensDetails = [ ] ,
434439 } = response . usageMetadata ;
440+ const inputAudioTokens = promptTokensDetails . reduce ( ( acc , curr ) => {
441+ if ( curr . modality === VERTEX_MODALITY . AUDIO ) return acc + curr . tokenCount ;
442+ return acc ;
443+ } , 0 ) ;
444+ const outputAudioTokens = candidatesTokensDetails . reduce ( ( acc , curr ) => {
445+ if ( curr . modality === VERTEX_MODALITY . AUDIO ) return acc + curr . tokenCount ;
446+ return acc ;
447+ } , 0 ) ;
435448
436449 return {
437450 id : 'portkey-' + crypto . randomUUID ( ) ,
@@ -510,6 +523,11 @@ export const GoogleChatCompleteResponseTransform: (
510523 total_tokens : totalTokenCount ,
511524 completion_tokens_details : {
512525 reasoning_tokens : thoughtsTokenCount ,
526+ audio_tokens : outputAudioTokens ,
527+ } ,
528+ prompt_tokens_details : {
529+ cached_tokens : cachedContentTokenCount ,
530+ audio_tokens : inputAudioTokens ,
513531 } ,
514532 } ,
515533 } ;
@@ -603,6 +621,26 @@ export const GoogleChatCompleteStreamChunkTransform: (
603621 total_tokens : parsedChunk . usageMetadata . totalTokenCount ,
604622 completion_tokens_details : {
605623 reasoning_tokens : parsedChunk . usageMetadata . thoughtsTokenCount ?? 0 ,
624+ audio_tokens :
625+ parsedChunk . usageMetadata ?. candidatesTokensDetails ?. reduce (
626+ ( acc , curr ) => {
627+ if ( curr . modality === VERTEX_MODALITY . AUDIO )
628+ return acc + curr . tokenCount ;
629+ return acc ;
630+ } ,
631+ 0
632+ ) ,
633+ } ,
634+ prompt_tokens_details : {
635+ cached_tokens : parsedChunk . usageMetadata . cachedContentTokenCount ,
636+ audio_tokens : parsedChunk . usageMetadata ?. promptTokensDetails ?. reduce (
637+ ( acc , curr ) => {
638+ if ( curr . modality === VERTEX_MODALITY . AUDIO )
639+ return acc + curr . tokenCount ;
640+ return acc ;
641+ } ,
642+ 0
643+ ) ,
606644 } ,
607645 } ;
608646 }
@@ -739,7 +777,22 @@ export const VertexAnthropicChatCompleteResponseTransform: (
739777 }
740778
741779 if ( 'content' in response ) {
742- const { input_tokens = 0 , output_tokens = 0 } = response ?. usage ?? { } ;
780+ const {
781+ input_tokens = 0 ,
782+ output_tokens = 0 ,
783+ cache_creation_input_tokens = 0 ,
784+ cache_read_input_tokens = 0 ,
785+ } = response ?. usage ?? { } ;
786+
787+ const totalTokens =
788+ input_tokens +
789+ output_tokens +
790+ cache_creation_input_tokens +
791+ cache_read_input_tokens ;
792+
793+ const shouldSendCacheUsage =
794+ ! strictOpenAiCompliance &&
795+ ( cache_creation_input_tokens || cache_read_input_tokens ) ;
743796
744797 let content : AnthropicContentItem [ ] | string = strictOpenAiCompliance
745798 ? ''
@@ -794,7 +847,14 @@ export const VertexAnthropicChatCompleteResponseTransform: (
794847 usage : {
795848 prompt_tokens : input_tokens ,
796849 completion_tokens : output_tokens ,
797- total_tokens : input_tokens + output_tokens ,
850+ total_tokens : totalTokens ,
851+ prompt_tokens_details : {
852+ cached_tokens : cache_read_input_tokens ,
853+ } ,
854+ ...( shouldSendCacheUsage && {
855+ cache_read_input_tokens : cache_read_input_tokens ,
856+ cache_creation_input_tokens : cache_creation_input_tokens ,
857+ } ) ,
798858 } ,
799859 } ;
800860 }
@@ -863,10 +923,20 @@ export const VertexAnthropicChatCompleteStreamChunkTransform: (
863923 }
864924
865925 if ( parsedChunk . type === 'message_start' && parsedChunk . message ?. usage ) {
926+ const shouldSendCacheUsage =
927+ parsedChunk . message ?. usage ?. cache_read_input_tokens ||
928+ parsedChunk . message ?. usage ?. cache_creation_input_tokens ;
929+
866930 streamState . model = parsedChunk ?. message ?. model ?? '' ;
867931
868932 streamState . usage = {
869933 prompt_tokens : parsedChunk . message . usage ?. input_tokens ,
934+ ...( shouldSendCacheUsage && {
935+ cache_read_input_tokens :
936+ parsedChunk . message ?. usage ?. cache_read_input_tokens ,
937+ cache_creation_input_tokens :
938+ parsedChunk . message ?. usage ?. cache_creation_input_tokens ,
939+ } ) ,
870940 } ;
871941 return (
872942 `data: ${ JSON . stringify ( {
@@ -893,6 +963,12 @@ export const VertexAnthropicChatCompleteStreamChunkTransform: (
893963 }
894964
895965 if ( parsedChunk . type === 'message_delta' && parsedChunk . usage ) {
966+ const totalTokens =
967+ ( streamState ?. usage ?. prompt_tokens ?? 0 ) +
968+ ( streamState ?. usage ?. cache_creation_input_tokens ?? 0 ) +
969+ ( streamState ?. usage ?. cache_read_input_tokens ?? 0 ) +
970+ ( parsedChunk . usage . output_tokens ?? 0 ) ;
971+
896972 return (
897973 `data: ${ JSON . stringify ( {
898974 id : fallbackId ,
@@ -911,11 +987,12 @@ export const VertexAnthropicChatCompleteStreamChunkTransform: (
911987 } ,
912988 ] ,
913989 usage : {
990+ ...streamState . usage ,
914991 completion_tokens : parsedChunk . usage ?. output_tokens ,
915- prompt_tokens : streamState . usage ?. prompt_tokens ,
916- total_tokens :
917- ( streamState . usage ?. prompt_tokens || 0 ) +
918- ( parsedChunk . usage ?. output_tokens || 0 ) ,
992+ total_tokens : totalTokens ,
993+ prompt_tokens_details : {
994+ cached_tokens : streamState . usage ?. cache_read_input_tokens ?? 0 ,
995+ } ,
919996 } ,
920997 } ) } ` + '\n\n'
921998 ) ;
0 commit comments