@@ -20,7 +20,7 @@ const LIMITS = {
2020 '32k' : 32_768 ,
2121 '64k' : 65_536 ,
2222 '128k' : 131_072 ,
23- '200k' : 200_000 , // vendor-declared decimal (OpenAI / Anthropic use 200k)
23+ '200k' : 200_000 , // vendor-declared decimal, used by OpenAI, Anthropic, GLM etc.
2424 '256k' : 262_144 ,
2525 '512k' : 524_288 ,
2626 '1m' : 1_048_576 ,
@@ -49,15 +49,18 @@ export function normalize(model: string): string {
4949 s = s . replace ( / - p r e v i e w / g, '' ) ;
5050 // Special handling for Qwen model names that include "-latest" as part of the model name
5151 if ( ! s . match ( / ^ q w e n - (?: p l u s | f l a s h | v l - m a x ) - l a t e s t $ / ) ) {
52- // \d{6,} - Match 6 or more digits (dates) like -20250219 (6+ digit dates)
53- // \d+x\d+b - Match patterns like 4x8b, -7b, -70b
54- // v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3
55- // -\d+(?:\.\d+)+ - Match version numbers with dots (that are preceded by a dash),
56- // like -1.1, -2.0.1 but only when they're suffixes, Example: model-test-1.1 → model-test;
57- // Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before 4.1 in that context.
58- // latest - Match the literal string "latest"
52+ // Regex breakdown:
53+ // -(?:...)$ - Non-capturing group for suffixes at the end of the string
54+ // The following patterns are matched within the group:
55+ // \d{4,} - Match 4 or more digits (dates) like -20250219 -0528 (4+ digit dates)
56+ // \d+x\d+b - Match patterns like 4x8b, -7b, -70b
57+ // v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3
58+ // (?<=-[^-]+-)\d+(?:\.\d+)+ - Match version numbers with dots that are preceded by another dash,
59+ // like -1.1, -2.0.1 but only when they are preceded by another dash, Example: model-test-1.1 → model-test;
60+ // Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before -4.1 in that context.
61+ // latest|exp - Match the literal string "latest" or "exp"
5962 s = s . replace (
60- / - (?: \d { 6 , } | \d + x \d + b | v \d + (?: \. \d + ) * | - \d + (?: \. \d + ) + | l a t e s t ) $ / g,
63+ / - (?: \d { 4 , } | \d + x \d + b | v \d + (?: \. \d + ) * | (?< = - [ ^ - ] + - ) \d + (?: \. \d + ) + | l a t e s t | e x p ) $ / g,
6164 '' ,
6265 ) ;
6366 }
@@ -149,18 +152,24 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
149152 // -------------------
150153 // Zhipu GLM
151154 // -------------------
152- [ / ^ g l m - 4 \. 5 v .* $ / , LIMITS [ '64k' ] ] ,
153- [ / ^ g l m - 4 \. 5 - a i r .* $ / , LIMITS [ '128k' ] ] ,
154- [ / ^ g l m - 4 \. 5 .* $ / , LIMITS [ '128k' ] ] ,
155+ [ / ^ g l m - 4 \. 5 v (?: - .* ) ? $ / , LIMITS [ '64k' ] ] ,
156+ [ / ^ g l m - 4 \. 5 - a i r (?: - .* ) ? $ / , LIMITS [ '128k' ] ] ,
157+ [ / ^ g l m - 4 \. 5 (?: - .* ) ? $ / , LIMITS [ '128k' ] ] ,
158+ [ / ^ g l m - 4 \. 6 (?: - .* ) ? $ / , 202_752 as unknown as TokenCount ] , // exact limit from the model config file
155159
156160 // -------------------
157- // DeepSeek / GPT-OSS / Kimi / Llama & Mistral examples
161+ // DeepSeek
162+ // -------------------
163+ [ / ^ d e e p s e e k $ / , LIMITS [ '128k' ] ] ,
164+ [ / ^ d e e p s e e k - r 1 (?: - .* ) ? $ / , LIMITS [ '128k' ] ] ,
165+ [ / ^ d e e p s e e k - v 3 (?: \. \d + ) ? (?: - .* ) ? $ / , LIMITS [ '128k' ] ] ,
166+
167+ // -------------------
168+ // GPT-OSS / Kimi / Llama & Mistral examples
158169 // -------------------
159- [ / ^ d e e p s e e k - r 1 .* $ / , LIMITS [ '128k' ] ] ,
160- [ / ^ d e e p s e e k - v 3 (?: \. 1 ) ? .* $ / , LIMITS [ '128k' ] ] ,
161170 [ / ^ k i m i - k 2 - i n s t r u c t .* $ / , LIMITS [ '128k' ] ] ,
162171 [ / ^ g p t - o s s .* $ / , LIMITS [ '128k' ] ] ,
163- [ / ^ l l a m a - 4 - s c o u t .* $ / , LIMITS [ '10m' ] as unknown as TokenCount ] , // ultra-long variants - handle carefully
172+ [ / ^ l l a m a - 4 - s c o u t .* $ / , LIMITS [ '10m' ] ] ,
164173 [ / ^ m i s t r a l - l a r g e - 2 .* $ / , LIMITS [ '128k' ] ] ,
165174] ;
166175
0 commit comments