Skip to content

Commit 8119af6

Browse files
committed
fix: calculate LM Studio maxTokens as 20% of context window
- Fix context limit crash issue by properly calculating maxTokens - Previously maxTokens was set equal to contextWindow causing overflow - Now maxTokens is calculated as 20% of contextWindow to leave room for input - Add test coverage for the new calculation logic Fixes #7388
1 parent 0c481a3 commit 8119af6

File tree

2 files changed

+37
-2
lines changed

2 files changed

+37
-2
lines changed

src/api/providers/fetchers/__tests__/lmstudio.test.ts

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ describe("LMStudio Fetcher", () => {
6060
supportsPromptCache: true,
6161
supportsImages: rawModel.vision,
6262
supportsComputerUse: false,
63-
maxTokens: rawModel.contextLength,
63+
maxTokens: Math.ceil(rawModel.contextLength * 0.2), // Should be 20% of context window
6464
inputPrice: 0,
6565
outputPrice: 0,
6666
cacheWritesPrice: 0,
@@ -70,6 +70,36 @@ describe("LMStudio Fetcher", () => {
7070
const result = parseLMStudioModel(rawModel)
7171
expect(result).toEqual(expectedModelInfo)
7272
})
73+
74+
it("should calculate maxTokens as 20% of context window", () => {
75+
const testCases = [
76+
{ contextLength: 8192, expectedMaxTokens: Math.ceil(8192 * 0.2) }, // 1639
77+
{ contextLength: 128000, expectedMaxTokens: Math.ceil(128000 * 0.2) }, // 25600
78+
{ contextLength: 200000, expectedMaxTokens: Math.ceil(200000 * 0.2) }, // 40000
79+
]
80+
81+
testCases.forEach(({ contextLength, expectedMaxTokens }) => {
82+
const rawModel: LLMInstanceInfo = {
83+
type: "llm",
84+
modelKey: "test-model",
85+
format: "safetensors",
86+
displayName: "Test Model",
87+
path: "test/model",
88+
sizeBytes: 1000000,
89+
architecture: "test",
90+
identifier: "test/model",
91+
instanceReference: "TEST123",
92+
vision: false,
93+
trainedForToolUse: false,
94+
maxContextLength: contextLength,
95+
contextLength: contextLength,
96+
}
97+
98+
const result = parseLMStudioModel(rawModel)
99+
expect(result.maxTokens).toBe(expectedMaxTokens)
100+
expect(result.contextWindow).toBe(contextLength)
101+
})
102+
})
73103
})
74104

75105
describe("getLMStudioModels", () => {

src/api/providers/fetchers/lmstudio.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,18 @@ export const parseLMStudioModel = (rawModel: LLMInstanceInfo | LLMInfo): ModelIn
3838
// Handle both LLMInstanceInfo (from loaded models) and LLMInfo (from downloaded models)
3939
const contextLength = "contextLength" in rawModel ? rawModel.contextLength : rawModel.maxContextLength
4040

41+
// Calculate maxTokens as 20% of context window to prevent context overflow
42+
// This ensures there's always room for input tokens and prevents crashes
43+
// when approaching the context limit
44+
const maxOutputTokens = Math.ceil(contextLength * 0.2)
45+
4146
const modelInfo: ModelInfo = Object.assign({}, lMStudioDefaultModelInfo, {
4247
description: `${rawModel.displayName} - ${rawModel.path}`,
4348
contextWindow: contextLength,
4449
supportsPromptCache: true,
4550
supportsImages: rawModel.vision,
4651
supportsComputerUse: false,
47-
maxTokens: contextLength,
52+
maxTokens: maxOutputTokens,
4853
})
4954

5055
return modelInfo

0 commit comments

Comments
 (0)