@@ -35,7 +35,7 @@ const tokenCountCache = new LRUCache<string, number>({
3535 sizeCalculation : ( ) => 1 ,
3636} ) ;
3737
38- function normalizeModelKey ( modelName : string ) : ModelName {
38+ function normalizeModelKey ( modelName : string ) : ModelName | null {
3939 assert (
4040 typeof modelName === "string" && modelName . length > 0 ,
4141 "Model name must be a non-empty string"
@@ -46,11 +46,38 @@ function normalizeModelKey(modelName: string): ModelName {
4646 override ?? ( modelName . includes ( ":" ) ? modelName . replace ( ":" , "/" ) : modelName ) ;
4747
4848 if ( ! ( normalized in models ) ) {
49- throw new Error ( `Invalid model string: ${ modelName } ` ) ;
49+ // Return null for unknown models - caller can decide to fallback or error
50+ return null ;
5051 }
5152 return normalized as ModelName ;
5253}
5354
55+ /**
56+ * Resolves a model string to a ModelName, falling back to a similar model if unknown.
57+ * Optionally logs a warning when falling back.
58+ */
59+ function resolveModelName ( modelString : string ) : ModelName {
60+ let modelName = normalizeModelKey ( modelString ) ;
61+
62+ if ( ! modelName ) {
63+ const provider = modelString . split ( ":" ) [ 0 ] || "openai" ;
64+ const fallbackModel =
65+ provider === "anthropic"
66+ ? "anthropic/claude-sonnet-4.5"
67+ : provider === "openai"
68+ ? "openai/gpt-5"
69+ : "openai/gpt-5" ;
70+
71+ console . warn (
72+ `[tokenizer] Unknown model '${ modelString } ', using ${ fallbackModel } tokenizer for approximate token counting`
73+ ) ;
74+
75+ modelName = fallbackModel as ModelName ;
76+ }
77+
78+ return modelName ;
79+ }
80+
5481function resolveEncoding ( modelName : ModelName ) : Promise < string > {
5582 let promise = encodingPromises . get ( modelName ) ;
5683 if ( ! promise ) {
@@ -116,13 +143,17 @@ export function loadTokenizerModules(
116143 return Promise . allSettled (
117144 modelsToWarm . map ( ( modelString ) => {
118145 const modelName = normalizeModelKey ( modelString ) ;
146+ // Skip unknown models during warmup
147+ if ( ! modelName ) {
148+ return Promise . reject ( new Error ( `Unknown model: ${ modelString } ` ) ) ;
149+ }
119150 return resolveEncoding ( modelName ) ;
120151 } )
121152 ) ;
122153}
123154
124155export async function getTokenizerForModel ( modelString : string ) : Promise < Tokenizer > {
125- const modelName = normalizeModelKey ( modelString ) ;
156+ const modelName = resolveModelName ( modelString ) ;
126157 const encodingName = await resolveEncoding ( modelName ) ;
127158
128159 return {
@@ -132,13 +163,13 @@ export async function getTokenizerForModel(modelString: string): Promise<Tokeniz
132163}
133164
134165export function countTokens ( modelString : string , text : string ) : Promise < number > {
135- const modelName = normalizeModelKey ( modelString ) ;
166+ const modelName = resolveModelName ( modelString ) ;
136167 return countTokensInternal ( modelName , text ) ;
137168}
138169
139170export function countTokensBatch ( modelString : string , texts : string [ ] ) : Promise < number [ ] > {
140171 assert ( Array . isArray ( texts ) , "Batch token counting expects an array of strings" ) ;
141- const modelName = normalizeModelKey ( modelString ) ;
172+ const modelName = resolveModelName ( modelString ) ;
142173 return Promise . all ( texts . map ( ( text ) => countTokensInternal ( modelName , text ) ) ) ;
143174}
144175
0 commit comments