Skip to content

Commit 9d66462

Browse files
authored
Fix and update the token limits handling (QwenLM#754)
* fix: make token limits regex normalize e.g. `some-model-1.1` -> `some-model` while preserve e.g. `gpt-4.1` as-is. * feat: update token limits regex for latest models `GLM-4.6`, `deepseek-v3.2-exp`. * feat: add exact token limit number 202752 per the model config file for `GLM-4.6`.
1 parent a779d44 commit 9d66462

File tree

2 files changed

+53
-18
lines changed

2 files changed

+53
-18
lines changed

packages/core/src/core/tokenLimits.test.ts

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,29 @@ describe('normalize', () => {
5858
expect(normalize('gemini-2.0-flash-preview')).toBe('gemini-2.0-flash');
5959
});
6060

61-
it('should remove version numbers with dots when they are at the end', () => {
62-
expect(normalize('gpt-4.1.1-latest')).toBe('gpt-4.1.1');
61+
it('should not remove "-latest" from specific Qwen model names', () => {
62+
expect(normalize('qwen-plus-latest')).toBe('qwen-plus-latest');
63+
expect(normalize('qwen-flash-latest')).toBe('qwen-flash-latest');
64+
expect(normalize('qwen-vl-max-latest')).toBe('qwen-vl-max-latest');
65+
});
66+
67+
it('should remove date like suffixes', () => {
68+
expect(normalize('deepseek-r1-0528')).toBe('deepseek-r1');
69+
});
70+
71+
it('should remove literal "-latest" "-exp" suffixes', () => {
6372
expect(normalize('gpt-4.1-latest')).toBe('gpt-4.1');
73+
expect(normalize('deepseek-v3.2-exp')).toBe('deepseek-v3.2');
74+
});
75+
76+
it('should remove suffix version numbers with "v" prefix', () => {
77+
expect(normalize('model-test-v1.1')).toBe('model-test');
78+
expect(normalize('model-v1.1')).toBe('model');
79+
});
80+
81+
it('should remove suffix version numbers w/o "v" prefix only if they are preceded by another dash', () => {
82+
expect(normalize('model-test-1.1')).toBe('model-test');
83+
expect(normalize('gpt-4.1')).toBe('gpt-4.1');
6484
});
6585
});
6686

@@ -188,6 +208,9 @@ describe('tokenLimit', () => {
188208
it('should return the correct limit for glm-4.5', () => {
189209
expect(tokenLimit('glm-4.5')).toBe(131072);
190210
});
211+
it('should return the correct limit for glm-4.6', () => {
212+
expect(tokenLimit('glm-4.6')).toBe(202752);
213+
});
191214
});
192215

193216
describe('Other models', () => {
@@ -200,6 +223,9 @@ describe('tokenLimit', () => {
200223
it('should return the correct limit for deepseek-v3.1', () => {
201224
expect(tokenLimit('deepseek-v3.1')).toBe(131072);
202225
});
226+
it('should return the correct limit for deepseek-v3.2', () => {
227+
expect(tokenLimit('deepseek-v3.2-exp')).toBe(131072);
228+
});
203229
it('should return the correct limit for kimi-k2-instruct', () => {
204230
expect(tokenLimit('kimi-k2-instruct')).toBe(131072);
205231
});

packages/core/src/core/tokenLimits.ts

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ const LIMITS = {
2020
'32k': 32_768,
2121
'64k': 65_536,
2222
'128k': 131_072,
23-
'200k': 200_000, // vendor-declared decimal (OpenAI / Anthropic use 200k)
23+
'200k': 200_000, // vendor-declared decimal, used by OpenAI, Anthropic, GLM etc.
2424
'256k': 262_144,
2525
'512k': 524_288,
2626
'1m': 1_048_576,
@@ -49,15 +49,18 @@ export function normalize(model: string): string {
4949
s = s.replace(/-preview/g, '');
5050
// Special handling for Qwen model names that include "-latest" as part of the model name
5151
if (!s.match(/^qwen-(?:plus|flash|vl-max)-latest$/)) {
52-
// \d{6,} - Match 6 or more digits (dates) like -20250219 (6+ digit dates)
53-
// \d+x\d+b - Match patterns like 4x8b, -7b, -70b
54-
// v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3
55-
// -\d+(?:\.\d+)+ - Match version numbers with dots (that are preceded by a dash),
56-
// like -1.1, -2.0.1 but only when they're suffixes, Example: model-test-1.1 → model-test;
57-
// Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before 4.1 in that context.
58-
// latest - Match the literal string "latest"
52+
// Regex breakdown:
53+
// -(?:...)$ - Non-capturing group for suffixes at the end of the string
54+
// The following patterns are matched within the group:
55+
// \d{4,} - Match 4 or more digits (dates) like -20250219 -0528 (4+ digit dates)
56+
// \d+x\d+b - Match patterns like 4x8b, -7b, -70b
57+
// v\d+(?:\.\d+)* - Match version patterns starting with 'v' like -v1, -v1.2, -v2.1.3
58+
// (?<=-[^-]+-)\d+(?:\.\d+)+ - Match version numbers with dots that are preceded by another dash,
59+
// like -1.1, -2.0.1 but only when they are preceded by another dash, Example: model-test-1.1 → model-test;
60+
// Note: this does NOT match 4.1 in gpt-4.1 because there's no dash before -4.1 in that context.
61+
// latest|exp - Match the literal string "latest" or "exp"
5962
s = s.replace(
60-
/-(?:\d{6,}|\d+x\d+b|v\d+(?:\.\d+)*|-\d+(?:\.\d+)+|latest)$/g,
63+
/-(?:\d{4,}|\d+x\d+b|v\d+(?:\.\d+)*|(?<=-[^-]+-)\d+(?:\.\d+)+|latest|exp)$/g,
6164
'',
6265
);
6366
}
@@ -149,18 +152,24 @@ const PATTERNS: Array<[RegExp, TokenCount]> = [
149152
// -------------------
150153
// Zhipu GLM
151154
// -------------------
152-
[/^glm-4\.5v.*$/, LIMITS['64k']],
153-
[/^glm-4\.5-air.*$/, LIMITS['128k']],
154-
[/^glm-4\.5.*$/, LIMITS['128k']],
155+
[/^glm-4\.5v(?:-.*)?$/, LIMITS['64k']],
156+
[/^glm-4\.5-air(?:-.*)?$/, LIMITS['128k']],
157+
[/^glm-4\.5(?:-.*)?$/, LIMITS['128k']],
158+
[/^glm-4\.6(?:-.*)?$/, 202_752 as unknown as TokenCount], // exact limit from the model config file
155159

156160
// -------------------
157-
// DeepSeek / GPT-OSS / Kimi / Llama & Mistral examples
161+
// DeepSeek
162+
// -------------------
163+
[/^deepseek$/, LIMITS['128k']],
164+
[/^deepseek-r1(?:-.*)?$/, LIMITS['128k']],
165+
[/^deepseek-v3(?:\.\d+)?(?:-.*)?$/, LIMITS['128k']],
166+
167+
// -------------------
168+
// GPT-OSS / Kimi / Llama & Mistral examples
158169
// -------------------
159-
[/^deepseek-r1.*$/, LIMITS['128k']],
160-
[/^deepseek-v3(?:\.1)?.*$/, LIMITS['128k']],
161170
[/^kimi-k2-instruct.*$/, LIMITS['128k']],
162171
[/^gpt-oss.*$/, LIMITS['128k']],
163-
[/^llama-4-scout.*$/, LIMITS['10m'] as unknown as TokenCount], // ultra-long variants - handle carefully
172+
[/^llama-4-scout.*$/, LIMITS['10m']],
164173
[/^mistral-large-2.*$/, LIMITS['128k']],
165174
];
166175

0 commit comments

Comments
 (0)