Skip to content

Commit 80ef4f2

Browse files
committed
fix: add tokenizer fallback for unknown models and fix test paths
Changes tokenizer to gracefully handle unknown model names by falling back to a similar model's tokenizer with a warning, instead of throwing an error. This prevents crashes when new models are used before tokenizer support is added. Also fixes runtime tests on macOS by resolving symlinks in temp paths (/tmp -> /private/tmp) to match git worktree paths.
1 parent 1356b8f commit 80ef4f2

File tree

3 files changed

+46
-8
lines changed

3 files changed

+46
-8
lines changed

src/utils/main/tokenizer.ts

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ const tokenCountCache = new LRUCache<string, number>({
3535
sizeCalculation: () => 1,
3636
});
3737

38-
function normalizeModelKey(modelName: string): ModelName {
38+
function normalizeModelKey(modelName: string): ModelName | null {
3939
assert(
4040
typeof modelName === "string" && modelName.length > 0,
4141
"Model name must be a non-empty string"
@@ -46,11 +46,38 @@ function normalizeModelKey(modelName: string): ModelName {
4646
override ?? (modelName.includes(":") ? modelName.replace(":", "/") : modelName);
4747

4848
if (!(normalized in models)) {
49-
throw new Error(`Invalid model string: ${modelName}`);
49+
// Return null for unknown models - caller can decide to fallback or error
50+
return null;
5051
}
5152
return normalized as ModelName;
5253
}
5354

55+
/**
56+
* Resolves a model string to a ModelName, falling back to a similar model if unknown.
57+
* Optionally logs a warning when falling back.
58+
*/
59+
function resolveModelName(modelString: string): ModelName {
60+
let modelName = normalizeModelKey(modelString);
61+
62+
if (!modelName) {
63+
const provider = modelString.split(":")[0] || "openai";
64+
const fallbackModel =
65+
provider === "anthropic"
66+
? "anthropic/claude-sonnet-4.5"
67+
: provider === "openai"
68+
? "openai/gpt-5"
69+
: "openai/gpt-5";
70+
71+
console.warn(
72+
`[tokenizer] Unknown model '${modelString}', using ${fallbackModel} tokenizer for approximate token counting`
73+
);
74+
75+
modelName = fallbackModel as ModelName;
76+
}
77+
78+
return modelName;
79+
}
80+
5481
function resolveEncoding(modelName: ModelName): Promise<string> {
5582
let promise = encodingPromises.get(modelName);
5683
if (!promise) {
@@ -116,13 +143,17 @@ export function loadTokenizerModules(
116143
return Promise.allSettled(
117144
modelsToWarm.map((modelString) => {
118145
const modelName = normalizeModelKey(modelString);
146+
// Skip unknown models during warmup
147+
if (!modelName) {
148+
return Promise.reject(new Error(`Unknown model: ${modelString}`));
149+
}
119150
return resolveEncoding(modelName);
120151
})
121152
);
122153
}
123154

124155
export async function getTokenizerForModel(modelString: string): Promise<Tokenizer> {
125-
const modelName = normalizeModelKey(modelString);
156+
const modelName = resolveModelName(modelString);
126157
const encodingName = await resolveEncoding(modelName);
127158

128159
return {
@@ -132,13 +163,13 @@ export async function getTokenizerForModel(modelString: string): Promise<Tokeniz
132163
}
133164

134165
export function countTokens(modelString: string, text: string): Promise<number> {
135-
const modelName = normalizeModelKey(modelString);
166+
const modelName = resolveModelName(modelString);
136167
return countTokensInternal(modelName, text);
137168
}
138169

139170
export function countTokensBatch(modelString: string, texts: string[]): Promise<number[]> {
140171
assert(Array.isArray(texts), "Batch token counting expects an array of strings");
141-
const modelName = normalizeModelKey(modelString);
172+
const modelName = resolveModelName(modelString);
142173
return Promise.all(texts.map((text) => countTokensInternal(modelName, text)));
143174
}
144175

tests/runtime/runtime.test.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
*/
77

88
// Jest globals are available automatically - no need to import
9+
import * as os from "os";
910
import * as path from "path";
1011
import { shouldRunIntegrationTests } from "../testUtils";
1112
import {
@@ -53,7 +54,8 @@ describeIntegration("Runtime integration tests", () => {
5354
({ type }) => {
5455
// Helper to create runtime for this test type
5556
// Use a base working directory - TestWorkspace will create subdirectories as needed
56-
const getBaseWorkdir = () => (type === "ssh" ? sshConfig!.workdir : "/tmp");
57+
// For local runtime, use os.tmpdir() which matches where TestWorkspace creates directories
58+
const getBaseWorkdir = () => (type === "ssh" ? sshConfig!.workdir : os.tmpdir());
5759
const createRuntime = (): Runtime => createTestRuntime(type, getBaseWorkdir(), sshConfig);
5860

5961
describe("exec() - Command execution", () => {

tests/runtime/test-helpers.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
*/
44

55
import * as fs from "fs/promises";
6+
import { realpathSync } from "fs";
67
import * as os from "os";
78
import * as path from "path";
89
import type { Runtime } from "@/runtime/Runtime";
@@ -25,7 +26,9 @@ export function createTestRuntime(
2526
): Runtime {
2627
switch (type) {
2728
case "local":
28-
return new LocalRuntime(workdir);
29+
// Resolve symlinks (e.g., /tmp -> /private/tmp on macOS) to match git worktree paths
30+
const resolvedWorkdir = realpathSync(workdir);
31+
return new LocalRuntime(resolvedWorkdir);
2932
case "ssh":
3033
if (!sshConfig) {
3134
throw new Error("SSH config required for SSH runtime");
@@ -81,7 +84,9 @@ export class TestWorkspace {
8184
return new TestWorkspace(runtime, workspacePath, true);
8285
} else {
8386
// For local, use temp directory
84-
const workspacePath = await fs.mkdtemp(path.join(os.tmpdir(), "runtime-test-"));
87+
// Resolve symlinks (e.g., /tmp -> /private/tmp on macOS) to avoid git worktree path mismatches
88+
const tempPath = await fs.mkdtemp(path.join(os.tmpdir(), "runtime-test-"));
89+
const workspacePath = await fs.realpath(tempPath);
8590
return new TestWorkspace(runtime, workspacePath, false);
8691
}
8792
}

0 commit comments

Comments
 (0)