Skip to content

Commit 24eca3f

Browse files
hannesrudolphdaniel-lxs
authored andcommitted
fix: address PR review comments
- Extract IMAGE_TOKEN_ESTIMATE as a named constant for clarity - Update token counting tests to use exact counts instead of ranges for deterministic testing - Fix test expectations to match actual tokenizer output
1 parent 15531b8 commit 24eca3f

File tree

2 files changed

+44
-36
lines changed

2 files changed

+44
-36
lines changed

src/api/providers/__tests__/claude-code-token-counting.spec.ts

Lines changed: 39 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,9 @@ describe("ClaudeCodeHandler Token Counting", () => {
2727

2828
const tokenCount = await handler.countTokens(content)
2929

30-
// The text has approximately 13-15 tokens
31-
// With no fudge factor, we expect the exact token count
32-
// With the old 1.5x fudge factor, it would have been around 20-23 tokens
33-
expect(tokenCount).toBeLessThan(16)
34-
expect(tokenCount).toBeGreaterThan(12)
30+
// The exact token count for this text using o200k_base tokenizer is 13
31+
// With the old 1.5x fudge factor, it would have been 20 tokens
32+
expect(tokenCount).toBe(13)
3533
})
3634

3735
it("should handle empty content", async () => {
@@ -49,10 +47,9 @@ describe("ClaudeCodeHandler Token Counting", () => {
4947

5048
const tokenCount = await handler.countTokens(content)
5149

52-
// Each block is approximately 2-3 tokens, so 6-9 tokens total
53-
// With no fudge factor, expect exact count
54-
expect(tokenCount).toBeLessThan(10) // Would be ~15 with old 1.5x factor
55-
expect(tokenCount).toBeGreaterThan(5)
50+
// "First block" = 2 tokens, "Second block" = 2 tokens, "Third block" = 2 tokens
51+
// Total: 6 tokens (would have been 9 with old 1.5x factor)
52+
expect(tokenCount).toBe(6)
5653
})
5754

5855
it("should handle image blocks with conservative estimate", async () => {
@@ -74,44 +71,52 @@ describe("ClaudeCodeHandler Token Counting", () => {
7471
})
7572

7673
it("should provide accurate token counts for typical messages", async () => {
77-
// Simulate a typical user message with environment details
74+
// Use a simpler, predictable message for exact token counting
7875
const content: Anthropic.Messages.ContentBlockParam[] = [
7976
{
8077
type: "text",
81-
text: `Hi
78+
text: "This is a simple test message with exactly predictable token count.",
79+
},
80+
]
8281

83-
<environment_details>
84-
# VSCode Visible Files
85-
src/app.ts
86-
src/utils.ts
82+
const tokenCount = await handler.countTokens(content)
8783

88-
# VSCode Open Tabs
89-
src/app.ts
84+
// This specific text has exactly 12 tokens with o200k_base tokenizer
85+
// With old 1.5x factor, it would have been 18 tokens
86+
expect(tokenCount).toBe(12)
87+
})
9088

91-
# Current Time
92-
2024-01-01 12:00:00 PM
89+
it("should handle mixed content types", async () => {
90+
const content: Anthropic.Messages.ContentBlockParam[] = [
91+
{ type: "text", text: "Hello world" }, // 2 tokens
92+
{
93+
type: "image",
94+
source: {
95+
type: "base64",
96+
media_type: "image/jpeg",
97+
data: "base64data",
98+
},
99+
}, // 300 tokens (IMAGE_TOKEN_ESTIMATE)
100+
{ type: "text", text: "Goodbye" }, // 1 token
101+
]
93102

94-
# Current Context Size (Tokens)
95-
1000 (5%)
103+
const tokenCount = await handler.countTokens(content)
96104

97-
# Current Cost
98-
$0.05
105+
// Total: 2 + 300 + 2 = 304 tokens ("Goodbye" is actually 2 tokens)
106+
expect(tokenCount).toBe(304)
107+
})
99108

100-
# Current Mode
101-
<slug>code</slug>
102-
<name>Code</name>
103-
<model>claude-3-5-sonnet-20241022</model>
104-
</environment_details>`,
105-
},
109+
it("should handle empty text blocks", async () => {
110+
const content: Anthropic.Messages.ContentBlockParam[] = [
111+
{ type: "text", text: "" },
112+
{ type: "text", text: "Hello" }, // 1 token
113+
{ type: "text", text: "" },
106114
]
107115

108116
const tokenCount = await handler.countTokens(content)
109117

110-
// This content is approximately 100-120 tokens
111-
// With no fudge factor, expect exact count
112-
// With old 1.5x factor, it would have been 150-180 tokens
113-
expect(tokenCount).toBeLessThan(125)
114-
expect(tokenCount).toBeGreaterThan(95)
118+
// Only "Hello" contributes tokens
119+
expect(tokenCount).toBe(1)
115120
})
116121
})
117122
})

src/api/providers/claude-code.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ import { ApiHandlerOptions } from "../../shared/api"
1010
import { Tiktoken } from "tiktoken/lite"
1111
import o200kBase from "tiktoken/encoders/o200k_base"
1212

13+
// Conservative token estimate for images (even though Claude Code doesn't support them)
14+
// This matches the estimate used in src/utils/tiktoken.ts for consistency
15+
const IMAGE_TOKEN_ESTIMATE = 300
16+
1317
export class ClaudeCodeHandler extends BaseProvider implements ApiHandler {
1418
private options: ApiHandlerOptions
1519
private encoder: Tiktoken | null = null
@@ -176,8 +180,7 @@ export class ClaudeCodeHandler extends BaseProvider implements ApiHandler {
176180
}
177181
} else if (block.type === "image") {
178182
// Claude Code doesn't support images, but we handle them just in case
179-
// Use a conservative estimate
180-
totalTokens += 300
183+
totalTokens += IMAGE_TOKEN_ESTIMATE
181184
}
182185
}
183186

0 commit comments

Comments
 (0)