diff --git a/src/api/providers/fetchers/__tests__/lmstudio.test.ts b/src/api/providers/fetchers/__tests__/lmstudio.test.ts index 8e7e36c73f..28a162d3d6 100644 --- a/src/api/providers/fetchers/__tests__/lmstudio.test.ts +++ b/src/api/providers/fetchers/__tests__/lmstudio.test.ts @@ -60,7 +60,7 @@ describe("LMStudio Fetcher", () => { supportsPromptCache: true, supportsImages: rawModel.vision, supportsComputerUse: false, - maxTokens: rawModel.contextLength, + maxTokens: Math.ceil(rawModel.contextLength * 0.2), // Should be 20% of context window inputPrice: 0, outputPrice: 0, cacheWritesPrice: 0, @@ -70,6 +70,36 @@ describe("LMStudio Fetcher", () => { const result = parseLMStudioModel(rawModel) expect(result).toEqual(expectedModelInfo) }) + + it("should calculate maxTokens as 20% of context window", () => { + const testCases = [ + { contextLength: 8192, expectedMaxTokens: Math.ceil(8192 * 0.2) }, // 1639 + { contextLength: 128000, expectedMaxTokens: Math.ceil(128000 * 0.2) }, // 25600 + { contextLength: 200000, expectedMaxTokens: Math.ceil(200000 * 0.2) }, // 40000 + ] + + testCases.forEach(({ contextLength, expectedMaxTokens }) => { + const rawModel: LLMInstanceInfo = { + type: "llm", + modelKey: "test-model", + format: "safetensors", + displayName: "Test Model", + path: "test/model", + sizeBytes: 1000000, + architecture: "test", + identifier: "test/model", + instanceReference: "TEST123", + vision: false, + trainedForToolUse: false, + maxContextLength: contextLength, + contextLength: contextLength, + } + + const result = parseLMStudioModel(rawModel) + expect(result.maxTokens).toBe(expectedMaxTokens) + expect(result.contextWindow).toBe(contextLength) + }) + }) }) describe("getLMStudioModels", () => { diff --git a/src/api/providers/fetchers/lmstudio.ts b/src/api/providers/fetchers/lmstudio.ts index 1e2e016df2..ff3426191f 100644 --- a/src/api/providers/fetchers/lmstudio.ts +++ b/src/api/providers/fetchers/lmstudio.ts @@ -38,13 +38,18 @@ export const parseLMStudioModel = (rawModel: LLMInstanceInfo | LLMInfo): ModelIn // Handle both LLMInstanceInfo (from loaded models) and LLMInfo (from downloaded models) const contextLength = "contextLength" in rawModel ? rawModel.contextLength : rawModel.maxContextLength + // Calculate maxTokens as 20% of context window to prevent context overflow + // This ensures there's always room for input tokens and prevents crashes + // when approaching the context limit + const maxOutputTokens = Math.ceil(contextLength * 0.2) + const modelInfo: ModelInfo = Object.assign({}, lMStudioDefaultModelInfo, { description: `${rawModel.displayName} - ${rawModel.path}`, contextWindow: contextLength, supportsPromptCache: true, supportsImages: rawModel.vision, supportsComputerUse: false, - maxTokens: contextLength, + maxTokens: maxOutputTokens, }) return modelInfo