Skip to content

Commit a17cf29

Browse files
authored
Merge pull request #1211 from Portkey-AI/fix/bedrock-usage-object-prompt_tokens-include-cache-tokens
fix tokens calculation for bedrock models when cache tokens are present
2 parents fe7f143 + cdd9aac commit a17cf29

File tree

2 files changed

+35
-12
lines changed

2 files changed

+35
-12
lines changed

src/providers/bedrock/chatComplete.ts

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -529,9 +529,8 @@ export const BedrockChatCompleteResponseTransform: (
529529
}
530530

531531
if ('output' in response) {
532-
const shouldSendCacheUsage =
533-
response.usage.cacheWriteInputTokens ||
534-
response.usage.cacheReadInputTokens;
532+
const cacheReadInputTokens = response.usage?.cacheReadInputTokens || 0;
533+
const cacheWriteInputTokens = response.usage?.cacheWriteInputTokens || 0;
535534

536535
let content: string = '';
537536
content = response.output.message.content
@@ -565,12 +564,19 @@ export const BedrockChatCompleteResponseTransform: (
565564
},
566565
],
567566
usage: {
568-
prompt_tokens: response.usage.inputTokens,
567+
prompt_tokens:
568+
response.usage.inputTokens +
569+
cacheReadInputTokens +
570+
cacheWriteInputTokens,
569571
completion_tokens: response.usage.outputTokens,
570572
total_tokens: response.usage.totalTokens, // contains the cache usage as well
571-
...(shouldSendCacheUsage && {
572-
cache_read_input_tokens: response.usage.cacheReadInputTokens,
573-
cache_creation_input_tokens: response.usage.cacheWriteInputTokens,
573+
prompt_tokens_details: {
574+
cached_tokens: cacheReadInputTokens,
575+
},
576+
// we only want to be sending this for anthropic models and this is not openai compliant
577+
...((cacheReadInputTokens > 0 || cacheWriteInputTokens > 0) && {
578+
cache_read_input_tokens: cacheReadInputTokens,
579+
cache_creation_input_tokens: cacheWriteInputTokens,
574580
}),
575581
},
576582
};
@@ -663,9 +669,9 @@ export const BedrockChatCompleteStreamChunkTransform: (
663669

664670
// final chunk
665671
if (parsedChunk.usage) {
666-
const shouldSendCacheUsage =
667-
parsedChunk.usage.cacheWriteInputTokens ||
668-
parsedChunk.usage.cacheReadInputTokens;
672+
const cacheReadInputTokens = parsedChunk.usage?.cacheReadInputTokens || 0;
673+
const cacheWriteInputTokens = parsedChunk.usage?.cacheWriteInputTokens || 0;
674+
669675
return [
670676
`data: ${JSON.stringify({
671677
id: fallbackId,
@@ -684,10 +690,17 @@ export const BedrockChatCompleteStreamChunkTransform: (
684690
},
685691
],
686692
usage: {
687-
prompt_tokens: parsedChunk.usage.inputTokens,
693+
prompt_tokens:
694+
parsedChunk.usage.inputTokens +
695+
cacheReadInputTokens +
696+
cacheWriteInputTokens,
688697
completion_tokens: parsedChunk.usage.outputTokens,
689698
total_tokens: parsedChunk.usage.totalTokens,
690-
...(shouldSendCacheUsage && {
699+
prompt_tokens_details: {
700+
cached_tokens: cacheReadInputTokens,
701+
},
702+
// we only want to be sending this for anthropic models and this is not openai compliant
703+
...((cacheReadInputTokens > 0 || cacheWriteInputTokens > 0) && {
691704
cache_read_input_tokens: parsedChunk.usage.cacheReadInputTokens,
692705
cache_creation_input_tokens:
693706
parsedChunk.usage.cacheWriteInputTokens,

src/providers/types.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,16 @@ export interface CResponse extends BaseResponse {
154154
prompt_tokens: number;
155155
completion_tokens: number;
156156
total_tokens: number;
157+
completion_tokens_details?: {
158+
accepted_prediction_tokens?: number;
159+
audio_tokens?: number;
160+
reasoning_tokens?: number;
161+
rejected_prediction_tokens?: number;
162+
};
163+
prompt_tokens_details?: {
164+
audio_tokens?: number;
165+
cached_tokens?: number;
166+
};
157167
/*
158168
* Anthropic Prompt cache token usage
159169
*/

0 commit comments

Comments
 (0)