Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions packages/__tests__/cost/usageProcessor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,81 @@ describe("AnthropicUsageProcessor", () => {
});
});

it("should extract cache_creation_input_tokens without TTL breakdown", async () => {
// Test case for older API responses that have cache_creation_input_tokens
// but no cache_creation.ephemeral_5m_input_tokens breakdown
const mockResponse = {
id: "msg_test",
type: "message",
role: "assistant",
content: [{ type: "text", text: "Hello" }],
model: "claude-sonnet-4",
stop_reason: "end_turn",
usage: {
input_tokens: 100,
output_tokens: 50,
cache_creation_input_tokens: 500,
cache_read_input_tokens: 200,
// Note: no cache_creation.ephemeral_5m_input_tokens
},
};

const result = await processor.parse({
responseBody: JSON.stringify(mockResponse),
isStream: false,
model: "claude-sonnet-4",
});

expect(result.error).toBeNull();
expect(result.data).toEqual({
input: 100,
output: 50,
cacheDetails: {
cachedInput: 200,
write5m: 500, // Should fall back to cache_creation_input_tokens
},
});
});

it("should prefer TTL breakdown over total cache_creation_input_tokens", async () => {
// When both total and breakdown are provided, use the breakdown
const mockResponse = {
id: "msg_test",
type: "message",
role: "assistant",
content: [{ type: "text", text: "Hello" }],
model: "claude-sonnet-4",
stop_reason: "end_turn",
usage: {
input_tokens: 100,
output_tokens: 50,
cache_creation_input_tokens: 600, // Total (should be 5m + 1h)
cache_read_input_tokens: 200,
cache_creation: {
ephemeral_5m_input_tokens: 400,
ephemeral_1h_input_tokens: 200,
},
},
};

const result = await processor.parse({
responseBody: JSON.stringify(mockResponse),
isStream: false,
model: "claude-sonnet-4",
});

expect(result.error).toBeNull();
expect(result.data).toEqual({
input: 100,
output: 50,
cacheDetails: {
cachedInput: 200,
write5m: 400, // Use breakdown, not total
write1h: 200,
},
});
});

it("usage processing snapshot", async () => {
const testCases = [
{
Expand Down
12 changes: 12 additions & 0 deletions packages/cost/usage/anthropicUsageProcessor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ export class AnthropicUsageProcessor implements IUsageProcessor {
const outputTokens = usage.output_tokens ?? 0;
const cacheReadInputTokens = usage.cache_read_input_tokens ?? 0;

// Total cache creation tokens (always present when caching occurs)
const cacheCreationInputTokens = usage.cache_creation_input_tokens ?? 0;

// TTL breakdown (may not be present in all API versions/responses)
const cacheCreation = usage.cache_creation || {};
const ephemeral5mTokens = cacheCreation.ephemeral_5m_input_tokens ?? 0;
const ephemeral1hTokens = cacheCreation.ephemeral_1h_input_tokens ?? 0;
Expand All @@ -119,13 +123,21 @@ export class AnthropicUsageProcessor implements IUsageProcessor {

if (
cacheReadInputTokens > 0 ||
cacheCreationInputTokens > 0 ||
ephemeral5mTokens > 0 ||
ephemeral1hTokens > 0
) {
modelUsage.cacheDetails = { cachedInput: cacheReadInputTokens };

// Use TTL breakdown if available, otherwise fall back to total cache creation tokens
// This handles cases where cache_creation_input_tokens is set but TTL breakdown is not
const ttlBreakdownTotal = ephemeral5mTokens + ephemeral1hTokens;

if (ephemeral5mTokens > 0) {
modelUsage.cacheDetails.write5m = ephemeral5mTokens;
} else if (cacheCreationInputTokens > 0 && ttlBreakdownTotal === 0) {
// No TTL breakdown provided, use total cache creation tokens as 5m (default TTL)
modelUsage.cacheDetails.write5m = cacheCreationInputTokens;
}

if (ephemeral1hTokens > 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ export function toOpenAI(response: AnthropicResponseBody): OpenAIResponseBody {
cache_write_details: {
write_5m_tokens:
anthropicUsage.cache_creation?.ephemeral_5m_input_tokens ??
cachedTokens ??
cacheWriteTokens ??
0,
write_1h_tokens:
anthropicUsage.cache_creation?.ephemeral_1h_input_tokens ?? 0,
Expand Down
Loading