@@ -27,11 +27,9 @@ describe("ClaudeCodeHandler Token Counting", () => {
2727
2828 const tokenCount = await handler . countTokens ( content )
2929
30- // The text has approximately 13-15 tokens
31- // With no fudge factor, we expect the exact token count
32- // With the old 1.5x fudge factor, it would have been around 20-23 tokens
33- expect ( tokenCount ) . toBeLessThan ( 16 )
34- expect ( tokenCount ) . toBeGreaterThan ( 12 )
30+ // The exact token count for this text using o200k_base tokenizer is 13
31+ // With the old 1.5x fudge factor, it would have been 20 tokens
32+ expect ( tokenCount ) . toBe ( 13 )
3533 } )
3634
3735 it ( "should handle empty content" , async ( ) => {
@@ -49,10 +47,9 @@ describe("ClaudeCodeHandler Token Counting", () => {
4947
5048 const tokenCount = await handler . countTokens ( content )
5149
52- // Each block is approximately 2-3 tokens, so 6-9 tokens total
53- // With no fudge factor, expect exact count
54- expect ( tokenCount ) . toBeLessThan ( 10 ) // Would be ~15 with old 1.5x factor
55- expect ( tokenCount ) . toBeGreaterThan ( 5 )
50+ // "First block" = 2 tokens, "Second block" = 2 tokens, "Third block" = 2 tokens
51+ // Total: 6 tokens (would have been 9 with old 1.5x factor)
52+ expect ( tokenCount ) . toBe ( 6 )
5653 } )
5754
5855 it ( "should handle image blocks with conservative estimate" , async ( ) => {
@@ -74,44 +71,52 @@ describe("ClaudeCodeHandler Token Counting", () => {
7471 } )
7572
7673 it ( "should provide accurate token counts for typical messages" , async ( ) => {
77- // Simulate a typical user message with environment details
74+ // Use a simpler, predictable message for exact token counting
7875 const content : Anthropic . Messages . ContentBlockParam [ ] = [
7976 {
8077 type : "text" ,
81- text : `Hi
78+ text : "This is a simple test message with exactly predictable token count." ,
79+ } ,
80+ ]
8281
83- <environment_details>
84- # VSCode Visible Files
85- src/app.ts
86- src/utils.ts
82+ const tokenCount = await handler . countTokens ( content )
8783
88- # VSCode Open Tabs
89- src/app.ts
84+ // This specific text has exactly 12 tokens with o200k_base tokenizer
85+ // With old 1.5x factor, it would have been 18 tokens
86+ expect ( tokenCount ) . toBe ( 12 )
87+ } )
9088
91- # Current Time
92- 2024-01-01 12:00:00 PM
89+ it ( "should handle mixed content types" , async ( ) => {
90+ const content : Anthropic . Messages . ContentBlockParam [ ] = [
91+ { type : "text" , text : "Hello world" } , // 2 tokens
92+ {
93+ type : "image" ,
94+ source : {
95+ type : "base64" ,
96+ media_type : "image/jpeg" ,
97+ data : "base64data" ,
98+ } ,
99+ } , // 300 tokens (IMAGE_TOKEN_ESTIMATE)
100+ { type : "text" , text : "Goodbye" } , // 1 token
101+ ]
93102
94- # Current Context Size (Tokens)
95- 1000 (5%)
103+ const tokenCount = await handler . countTokens ( content )
96104
97- # Current Cost
98- $0.05
105+ // Total: 2 + 300 + 2 = 304 tokens ("Goodbye" is actually 2 tokens)
106+ expect ( tokenCount ) . toBe ( 304 )
107+ } )
99108
100- # Current Mode
101- <slug>code</slug>
102- <name>Code</name>
103- <model>claude-3-5-sonnet-20241022</model>
104- </environment_details>` ,
105- } ,
109+ it ( "should handle empty text blocks" , async ( ) => {
110+ const content : Anthropic . Messages . ContentBlockParam [ ] = [
111+ { type : "text" , text : "" } ,
112+ { type : "text" , text : "Hello" } , // 1 token
113+ { type : "text" , text : "" } ,
106114 ]
107115
108116 const tokenCount = await handler . countTokens ( content )
109117
110- // This content is approximately 100-120 tokens
111- // With no fudge factor, expect exact count
112- // With old 1.5x factor, it would have been 150-180 tokens
113- expect ( tokenCount ) . toBeLessThan ( 125 )
114- expect ( tokenCount ) . toBeGreaterThan ( 95 )
118+ // Only "Hello" contributes tokens
119+ expect ( tokenCount ) . toBe ( 1 )
115120 } )
116121 } )
117122} )
0 commit comments