Skip to content

Commit 449b9ef

Browse files
authored
Merge pull request #642 from websentry-ai/vs/fix-anthropic-cache-tokens
Fixes the cached token count for unbound provider models
2 parents 117c4ab + d643359 commit 449b9ef

File tree

2 files changed

+60
-15
lines changed

2 files changed

+60
-15
lines changed

src/api/providers/__tests__/unbound.test.ts

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import { UnboundHandler } from "../unbound"
22
import { ApiHandlerOptions } from "../../../shared/api"
3-
import OpenAI from "openai"
43
import { Anthropic } from "@anthropic-ai/sdk"
54

65
// Mock OpenAI client
@@ -16,6 +15,7 @@ jest.mock("openai", () => {
1615
create: (...args: any[]) => {
1716
const stream = {
1817
[Symbol.asyncIterator]: async function* () {
18+
// First chunk with content
1919
yield {
2020
choices: [
2121
{
@@ -24,13 +24,25 @@ jest.mock("openai", () => {
2424
},
2525
],
2626
}
27+
// Second chunk with usage data
2728
yield {
28-
choices: [
29-
{
30-
delta: {},
31-
index: 0,
32-
},
33-
],
29+
choices: [{ delta: {}, index: 0 }],
30+
usage: {
31+
prompt_tokens: 10,
32+
completion_tokens: 5,
33+
total_tokens: 15,
34+
},
35+
}
36+
// Third chunk with cache usage data
37+
yield {
38+
choices: [{ delta: {}, index: 0 }],
39+
usage: {
40+
prompt_tokens: 8,
41+
completion_tokens: 4,
42+
total_tokens: 12,
43+
cache_creation_input_tokens: 3,
44+
cache_read_input_tokens: 2,
45+
},
3446
}
3547
},
3648
}
@@ -95,19 +107,37 @@ describe("UnboundHandler", () => {
95107
},
96108
]
97109

98-
it("should handle streaming responses", async () => {
110+
it("should handle streaming responses with text and usage data", async () => {
99111
const stream = handler.createMessage(systemPrompt, messages)
100-
const chunks: any[] = []
112+
const chunks: Array<{ type: string } & Record<string, any>> = []
101113
for await (const chunk of stream) {
102114
chunks.push(chunk)
103115
}
104116

105-
expect(chunks.length).toBe(1)
117+
expect(chunks.length).toBe(3)
118+
119+
// Verify text chunk
106120
expect(chunks[0]).toEqual({
107121
type: "text",
108122
text: "Test response",
109123
})
110124

125+
// Verify regular usage data
126+
expect(chunks[1]).toEqual({
127+
type: "usage",
128+
inputTokens: 10,
129+
outputTokens: 5,
130+
})
131+
132+
// Verify usage data with cache information
133+
expect(chunks[2]).toEqual({
134+
type: "usage",
135+
inputTokens: 8,
136+
outputTokens: 4,
137+
cacheWriteTokens: 3,
138+
cacheReadTokens: 2,
139+
})
140+
111141
expect(mockCreate).toHaveBeenCalledWith(
112142
expect.objectContaining({
113143
model: "claude-3-5-sonnet-20241022",

src/api/providers/unbound.ts

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@ import OpenAI from "openai"
33
import { ApiHandler, SingleCompletionHandler } from "../"
44
import { ApiHandlerOptions, ModelInfo, UnboundModelId, unboundDefaultModelId, unboundModels } from "../../shared/api"
55
import { convertToOpenAiMessages } from "../transform/openai-format"
6-
import { ApiStream } from "../transform/stream"
6+
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
7+
8+
interface UnboundUsage extends OpenAI.CompletionUsage {
9+
cache_creation_input_tokens?: number
10+
cache_read_input_tokens?: number
11+
}
712

813
export class UnboundHandler implements ApiHandler, SingleCompletionHandler {
914
private options: ApiHandlerOptions
@@ -96,7 +101,7 @@ export class UnboundHandler implements ApiHandler, SingleCompletionHandler {
96101

97102
for await (const chunk of completion) {
98103
const delta = chunk.choices[0]?.delta
99-
const usage = chunk.usage
104+
const usage = chunk.usage as UnboundUsage
100105

101106
if (delta?.content) {
102107
yield {
@@ -106,11 +111,21 @@ export class UnboundHandler implements ApiHandler, SingleCompletionHandler {
106111
}
107112

108113
if (usage) {
109-
yield {
114+
const usageData: ApiStreamUsageChunk = {
110115
type: "usage",
111-
inputTokens: usage?.prompt_tokens || 0,
112-
outputTokens: usage?.completion_tokens || 0,
116+
inputTokens: usage.prompt_tokens || 0,
117+
outputTokens: usage.completion_tokens || 0,
113118
}
119+
120+
// Only add cache tokens if they exist
121+
if (usage.cache_creation_input_tokens) {
122+
usageData.cacheWriteTokens = usage.cache_creation_input_tokens
123+
}
124+
if (usage.cache_read_input_tokens) {
125+
usageData.cacheReadTokens = usage.cache_read_input_tokens
126+
}
127+
128+
yield usageData
114129
}
115130
}
116131
}

0 commit comments

Comments
 (0)