Skip to content

Commit c52ccc4

Browse files
committed
Count tokens worker
1 parent dcef0fc commit c52ccc4

File tree

9 files changed

+304
-58
lines changed

9 files changed

+304
-58
lines changed

esbuild.js

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ const extensionConfig = {
167167
{
168168
name: "alias-plugin",
169169
setup(build) {
170-
build.onResolve({ filter: /^pkce-challenge$/ }, (args) => {
170+
build.onResolve({ filter: /^pkce-challenge$/ }, (_args) => {
171171
return { path: require.resolve("pkce-challenge/dist/index.browser.js") }
172172
})
173173
},
@@ -181,22 +181,31 @@ const extensionConfig = {
181181
external: ["vscode"],
182182
}
183183

184+
const workerConfig = {
185+
bundle: true,
186+
minify: production,
187+
sourcemap: !production,
188+
logLevel: "silent",
189+
entryPoints: ["src/workers/countTokens.ts"],
190+
format: "cjs",
191+
sourcesContent: false,
192+
platform: "node",
193+
outdir: "dist/workers",
194+
}
195+
184196
async function main() {
185-
const extensionCtx = await esbuild.context(extensionConfig)
197+
const [extensionCtx, workerCtx] = await Promise.all([
198+
esbuild.context(extensionConfig),
199+
esbuild.context(workerConfig),
200+
])
186201

187202
if (watch) {
188-
// Start the esbuild watcher
189-
await extensionCtx.watch()
190-
191-
// Copy and watch locale files
192-
console.log("Copying locale files initially...")
203+
await Promise.all([extensionCtx.watch(), workerCtx.watch()])
193204
copyLocaleFiles()
194-
195-
// Set up the watcher for locale files
196205
setupLocaleWatcher()
197206
} else {
198-
await extensionCtx.rebuild()
199-
await extensionCtx.dispose()
207+
await Promise.all([extensionCtx.rebuild(), workerCtx.rebuild()])
208+
await Promise.all([extensionCtx.dispose(), workerCtx.dispose()])
200209
}
201210
}
202211

package-lock.json

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@
433433
"turndown": "^7.2.0",
434434
"vscode-material-icons": "^0.1.1",
435435
"web-tree-sitter": "^0.22.6",
436+
"workerpool": "^9.2.0",
436437
"zod": "^3.23.8"
437438
},
438439
"devDependencies": {

src/api/providers/base-provider.ts

Lines changed: 11 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,30 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
2-
import { ApiHandler } from ".."
2+
33
import { ModelInfo } from "../../shared/api"
4-
import { ApiStream } from "../transform/stream"
5-
import { Tiktoken } from "tiktoken/lite"
6-
import o200kBase from "tiktoken/encoders/o200k_base"
74

8-
// Reuse the fudge factor used in the original code
9-
const TOKEN_FUDGE_FACTOR = 1.5
5+
import { ApiHandler } from "../index"
6+
import { ApiStream } from "../transform/stream"
7+
import { countTokens } from "../../utils/countTokens"
108

119
/**
12-
* Base class for API providers that implements common functionality
10+
* Base class for API providers that implements common functionality.
1311
*/
1412
export abstract class BaseProvider implements ApiHandler {
15-
// Cache the Tiktoken encoder instance since it's stateless
16-
private encoder: Tiktoken | null = null
1713
abstract createMessage(systemPrompt: string, messages: Anthropic.Messages.MessageParam[]): ApiStream
1814
abstract getModel(): { id: string; info: ModelInfo }
1915

2016
/**
21-
* Default token counting implementation using tiktoken
22-
* Providers can override this to use their native token counting endpoints
23-
*
24-
* Uses a cached Tiktoken encoder instance for performance since it's stateless.
25-
* The encoder is created lazily on first use and reused for subsequent calls.
17+
* Default token counting implementation using tiktoken.
18+
* Providers can override this to use their native token counting endpoints.
2619
*
2720
* @param content The content to count tokens for
2821
* @returns A promise resolving to the token count
2922
*/
30-
async countTokens(content: Array<Anthropic.Messages.ContentBlockParam>): Promise<number> {
31-
if (!content || content.length === 0) return 0
32-
33-
let totalTokens = 0
34-
35-
// Lazily create and cache the encoder if it doesn't exist
36-
if (!this.encoder) {
37-
this.encoder = new Tiktoken(o200kBase.bpe_ranks, o200kBase.special_tokens, o200kBase.pat_str)
38-
}
39-
40-
// Process each content block using the cached encoder
41-
for (const block of content) {
42-
if (block.type === "text") {
43-
// Use tiktoken for text token counting
44-
const text = block.text || ""
45-
46-
if (text.length > 0) {
47-
const tokens = this.encoder.encode(text)
48-
totalTokens += tokens.length
49-
}
50-
} else if (block.type === "image") {
51-
// For images, calculate based on data size
52-
const imageSource = block.source
53-
54-
if (imageSource && typeof imageSource === "object" && "data" in imageSource) {
55-
const base64Data = imageSource.data as string
56-
totalTokens += Math.ceil(Math.sqrt(base64Data.length))
57-
} else {
58-
totalTokens += 300 // Conservative estimate for unknown images
59-
}
60-
}
23+
async countTokens(content: Anthropic.Messages.ContentBlockParam[]): Promise<number> {
24+
if (content.length === 0) {
25+
return 0
6126
}
6227

63-
// Add a fudge factor to account for the fact that tiktoken is not always accurate
64-
return Math.ceil(totalTokens * TOKEN_FUDGE_FACTOR)
28+
return countTokens(content, { useWorker: true })
6529
}
6630
}
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
// npx jest src/utils/__tests__/tiktoken.test.ts
2+
3+
import { tiktoken } from "../tiktoken"
4+
import { Anthropic } from "@anthropic-ai/sdk"
5+
6+
describe("tiktoken", () => {
7+
it("should return 0 for empty content array", async () => {
8+
const result = await tiktoken([])
9+
expect(result).toBe(0)
10+
})
11+
12+
it("should correctly count tokens for text content", async () => {
13+
const content: Anthropic.Messages.ContentBlockParam[] = [{ type: "text", text: "Hello world" }]
14+
15+
const result = await tiktoken(content)
16+
// We can't predict the exact token count without mocking,
17+
// but we can verify it's a positive number
18+
expect(result).toEqual(3)
19+
})
20+
21+
it("should handle empty text content", async () => {
22+
const content: Anthropic.Messages.ContentBlockParam[] = [{ type: "text", text: "" }]
23+
24+
const result = await tiktoken(content)
25+
expect(result).toBe(0)
26+
})
27+
28+
it("should handle missing text content", async () => {
29+
// Using 'as any' to bypass TypeScript's type checking for this test case
30+
// since we're specifically testing how the function handles undefined text
31+
const content = [{ type: "text" }] as any as Anthropic.Messages.ContentBlockParam[]
32+
33+
const result = await tiktoken(content)
34+
expect(result).toBe(0)
35+
})
36+
37+
it("should correctly count tokens for image content with data", async () => {
38+
const base64Data =
39+
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="
40+
const content: Anthropic.Messages.ContentBlockParam[] = [
41+
{
42+
type: "image",
43+
source: {
44+
type: "base64",
45+
media_type: "image/png",
46+
data: base64Data,
47+
},
48+
},
49+
]
50+
51+
const result = await tiktoken(content)
52+
// For images, we expect a token count based on the square root of the data length
53+
// plus the fudge factor
54+
const expectedMinTokens = Math.ceil(Math.sqrt(base64Data.length))
55+
expect(result).toBeGreaterThanOrEqual(expectedMinTokens)
56+
})
57+
58+
it("should use conservative estimate for image content without data", async () => {
59+
// Using 'as any' to bypass TypeScript's type checking for this test case
60+
// since we're specifically testing the fallback behavior
61+
const content = [
62+
{
63+
type: "image",
64+
source: {
65+
type: "base64",
66+
media_type: "image/png",
67+
// data is intentionally missing to test fallback
68+
},
69+
},
70+
] as any as Anthropic.Messages.ContentBlockParam[]
71+
72+
const result = await tiktoken(content)
73+
// Conservative estimate is 300 tokens, plus the fudge factor
74+
const expectedMinTokens = 300
75+
expect(result).toBeGreaterThanOrEqual(expectedMinTokens)
76+
})
77+
78+
it("should correctly count tokens for mixed content", async () => {
79+
const base64Data =
80+
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8BQDwAEhQGAhKmMIQAAAABJRU5ErkJggg=="
81+
const content: Anthropic.Messages.ContentBlockParam[] = [
82+
{ type: "text", text: "Hello world" },
83+
{
84+
type: "image",
85+
source: {
86+
type: "base64",
87+
media_type: "image/png",
88+
data: base64Data,
89+
},
90+
},
91+
{ type: "text", text: "Goodbye world" },
92+
]
93+
94+
const result = await tiktoken(content)
95+
// We expect a positive token count for mixed content
96+
expect(result).toBeGreaterThan(0)
97+
})
98+
99+
it("should apply a fudge factor to the token count", async () => {
100+
// We can test the fudge factor by comparing the token count with a rough estimate
101+
const content: Anthropic.Messages.ContentBlockParam[] = [{ type: "text", text: "Test" }]
102+
103+
const result = await tiktoken(content)
104+
105+
// Run the function again with the same content to get a consistent result
106+
const result2 = await tiktoken(content)
107+
108+
// Both calls should return the same token count
109+
expect(result).toBe(result2)
110+
111+
// The result should be greater than 0
112+
expect(result).toBeGreaterThan(0)
113+
})
114+
115+
it("should reuse the encoder for multiple calls", async () => {
116+
// We can't directly test the caching behavior without mocking,
117+
// but we can test that multiple calls with the same content return the same result
118+
// which indirectly verifies the encoder is working consistently
119+
120+
const content: Anthropic.Messages.ContentBlockParam[] = [{ type: "text", text: "Hello world" }]
121+
122+
// Time the first call which should create the encoder
123+
const startTime1 = performance.now()
124+
const result1 = await tiktoken(content)
125+
const endTime1 = performance.now()
126+
const duration1 = endTime1 - startTime1
127+
128+
// Time the second call which should reuse the encoder
129+
const startTime2 = performance.now()
130+
const result2 = await tiktoken(content)
131+
const endTime2 = performance.now()
132+
const duration2 = endTime2 - startTime2
133+
134+
// Both calls should return the same token count
135+
expect(result1).toBe(result2)
136+
137+
// This is a loose test and might occasionally fail due to system load,
138+
// but generally the second call should be faster or similar in speed
139+
// since it reuses the encoder
140+
expect(duration2).toBeLessThanOrEqual(duration1 * 1.5)
141+
})
142+
})

src/utils/countTokens.ts

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import { Anthropic } from "@anthropic-ai/sdk"
2+
import workerpool from "workerpool"
3+
4+
import { countTokensResultSchema } from "../workers/types"
5+
import { tiktoken } from "./tiktoken"
6+
7+
let pool: workerpool.Pool | null | undefined = undefined
8+
9+
export type CountTokensOptions = {
10+
useWorker?: boolean
11+
}
12+
13+
export async function countTokens(
14+
content: Anthropic.Messages.ContentBlockParam[],
15+
{ useWorker = true }: CountTokensOptions = {},
16+
): Promise<number> {
17+
// Lazily create the worker pool if it doesn't exist.
18+
if (useWorker && typeof pool === "undefined") {
19+
pool = workerpool.pool(__dirname + "/workers/countTokens.js", {
20+
maxWorkers: 1,
21+
maxQueueSize: 10,
22+
})
23+
}
24+
25+
// If the worker pool doesn't exist or the caller doesn't want to use it
26+
// then, use the non-worker implementation.
27+
if (!useWorker || !pool) {
28+
return tiktoken(content)
29+
}
30+
31+
try {
32+
const data = await pool.exec("countTokens", [content])
33+
const result = countTokensResultSchema.parse(data)
34+
35+
if (!result.success) {
36+
throw new Error(result.error)
37+
}
38+
39+
return result.count
40+
} catch (error) {
41+
pool = null
42+
console.error(error)
43+
return tiktoken(content)
44+
}
45+
}

src/utils/tiktoken.ts

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import { Anthropic } from "@anthropic-ai/sdk"
2+
import { Tiktoken } from "tiktoken/lite"
3+
import o200kBase from "tiktoken/encoders/o200k_base"
4+
5+
const TOKEN_FUDGE_FACTOR = 1.5
6+
7+
let encoder: Tiktoken | null = null
8+
9+
export async function tiktoken(content: Anthropic.Messages.ContentBlockParam[]): Promise<number> {
10+
if (content.length === 0) {
11+
return 0
12+
}
13+
14+
let totalTokens = 0
15+
16+
// Lazily create and cache the encoder if it doesn't exist.
17+
if (!encoder) {
18+
encoder = new Tiktoken(o200kBase.bpe_ranks, o200kBase.special_tokens, o200kBase.pat_str)
19+
}
20+
21+
// Process each content block using the cached encoder.
22+
for (const block of content) {
23+
if (block.type === "text") {
24+
const text = block.text || ""
25+
26+
if (text.length > 0) {
27+
const tokens = encoder.encode(text)
28+
totalTokens += tokens.length
29+
}
30+
} else if (block.type === "image") {
31+
// For images, calculate based on data size.
32+
const imageSource = block.source
33+
34+
if (imageSource && typeof imageSource === "object" && "data" in imageSource) {
35+
const base64Data = imageSource.data as string
36+
totalTokens += Math.ceil(Math.sqrt(base64Data.length))
37+
} else {
38+
totalTokens += 300 // Conservative estimate for unknown images
39+
}
40+
}
41+
}
42+
43+
// Add a fudge factor to account for the fact that tiktoken is not always
44+
// accurate.
45+
return Math.ceil(totalTokens * TOKEN_FUDGE_FACTOR)
46+
}

0 commit comments

Comments
 (0)