Skip to content

Commit 38d5bf6

Browse files
committed
Use a WASM-based tiktoken implementation
1 parent c6f91a3 commit 38d5bf6

File tree

5 files changed

+33
-24
lines changed

5 files changed

+33
-24
lines changed

esbuild.js

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,23 @@ const copyWasmFiles = {
2929
name: "copy-wasm-files",
3030
setup(build) {
3131
build.onEnd(() => {
32-
// tree sitter
33-
const sourceDir = path.join(__dirname, "node_modules", "web-tree-sitter")
34-
const targetDir = path.join(__dirname, "dist")
35-
36-
// Copy tree-sitter.wasm
37-
fs.copyFileSync(path.join(sourceDir, "tree-sitter.wasm"), path.join(targetDir, "tree-sitter.wasm"))
38-
39-
// Copy language-specific WASM files
40-
const languageWasmDir = path.join(__dirname, "node_modules", "tree-sitter-wasms", "out")
32+
const nodeModulesDir = path.join(__dirname, "node_modules")
33+
const distDir = path.join(__dirname, "dist")
34+
35+
// tiktoken
36+
fs.copyFileSync(
37+
path.join(nodeModulesDir, "tiktoken", "tiktoken_bg.wasm"),
38+
path.join(distDir, "tiktoken_bg.wasm"),
39+
)
40+
41+
// tree-sitter WASM
42+
fs.copyFileSync(
43+
path.join(nodeModulesDir, "web-tree-sitter", "tree-sitter.wasm"),
44+
path.join(distDir, "tree-sitter.wasm"),
45+
)
46+
47+
// language-specific tree-sitter WASMs
48+
const languageWasmDir = path.join(nodeModulesDir, "tree-sitter-wasms", "out")
4149
const languages = [
4250
"typescript",
4351
"tsx",
@@ -57,7 +65,7 @@ const copyWasmFiles = {
5765

5866
languages.forEach((lang) => {
5967
const filename = `tree-sitter-${lang}.wasm`
60-
fs.copyFileSync(path.join(languageWasmDir, filename), path.join(targetDir, filename))
68+
fs.copyFileSync(path.join(languageWasmDir, filename), path.join(distDir, filename))
6169
})
6270
})
6371
},

package-lock.json

Lines changed: 7 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,6 @@
429429
"get-folder-size": "^5.0.0",
430430
"i18next": "^24.2.2",
431431
"isbinaryfile": "^5.0.2",
432-
"js-tiktoken": "^1.0.19",
433432
"mammoth": "^1.8.0",
434433
"monaco-vscode-textmate-theme-converter": "^0.1.7",
435434
"node-ipc": "^12.0.0",
@@ -450,6 +449,7 @@
450449
"string-similarity": "^4.0.4",
451450
"strip-ansi": "^7.1.0",
452451
"strip-bom": "^5.0.0",
452+
"tiktoken": "^1.0.21",
453453
"tmp": "^0.2.3",
454454
"tree-sitter-wasms": "^0.1.11",
455455
"turndown": "^7.2.0",

src/api/providers/base-provider.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ import { Anthropic } from "@anthropic-ai/sdk"
22
import { ApiHandler } from ".."
33
import { ModelInfo } from "../../shared/api"
44
import { ApiStream } from "../transform/stream"
5-
import { Tiktoken } from "js-tiktoken/lite"
6-
import o200kBase from "js-tiktoken/ranks/o200k_base"
5+
import { Tiktoken } from "tiktoken/lite"
6+
import o200kBase from "tiktoken/encoders/o200k_base"
77

88
// Reuse the fudge factor used in the original code
99
const TOKEN_FUDGE_FACTOR = 1.5
@@ -34,7 +34,7 @@ export abstract class BaseProvider implements ApiHandler {
3434

3535
// Lazily create and cache the encoder if it doesn't exist
3636
if (!this.encoder) {
37-
this.encoder = new Tiktoken(o200kBase)
37+
this.encoder = new Tiktoken(o200kBase.bpe_ranks, o200kBase.special_tokens, o200kBase.pat_str)
3838
}
3939

4040
// Process each content block using the cached encoder

src/core/Cline.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -853,7 +853,7 @@ export class Cline extends EventEmitter<ClineEvents> {
853853
}
854854

855855
const wasRecent = lastClineMessage?.ts && Date.now() - lastClineMessage.ts < 30_000
856-
856+
857857
newUserContent.push({
858858
type: "text",
859859
text:
@@ -1050,10 +1050,13 @@ export class Cline extends EventEmitter<ClineEvents> {
10501050
const DEFAULT_THINKING_MODEL_MAX_TOKENS = 16_384
10511051

10521052
const modelInfo = this.api.getModel().info
1053+
10531054
const maxTokens = modelInfo.thinking
10541055
? this.apiConfiguration.modelMaxTokens || DEFAULT_THINKING_MODEL_MAX_TOKENS
10551056
: modelInfo.maxTokens
1057+
10561058
const contextWindow = modelInfo.contextWindow
1059+
10571060
const trimmedMessages = await truncateConversationIfNeeded({
10581061
messages: this.apiConversationHistory,
10591062
totalTokens,

0 commit comments

Comments
 (0)