diff --git a/src/api/providers/__tests__/native-ollama.spec.ts b/src/api/providers/__tests__/native-ollama.spec.ts index f8792937db..a282eb5536 100644 --- a/src/api/providers/__tests__/native-ollama.spec.ts +++ b/src/api/providers/__tests__/native-ollama.spec.ts @@ -120,6 +120,64 @@ describe("NativeOllamaHandler", () => { }) expect(result).toBe("This is the response") }) + + it("should not override num_ctx in options", async () => { + mockChat.mockResolvedValue({ + message: { content: "Response" }, + }) + + await handler.completePrompt("Test prompt") + + // Verify that num_ctx is NOT in the options + expect(mockChat).toHaveBeenCalledWith({ + model: "llama2", + messages: [{ role: "user", content: "Test prompt" }], + stream: false, + options: { + temperature: 0, + // num_ctx should NOT be present here + }, + }) + + // Explicitly check that num_ctx is not in the options + const callArgs = mockChat.mock.calls[0][0] + expect(callArgs.options).not.toHaveProperty("num_ctx") + }) + }) + + describe("createMessage num_ctx handling", () => { + it("should not set num_ctx in options for createMessage", async () => { + // Mock the chat response + mockChat.mockImplementation(async function* () { + yield { + message: { content: "Test" }, + eval_count: 1, + prompt_eval_count: 1, + } + }) + + const stream = handler.createMessage("System", [{ role: "user" as const, content: "Test" }]) + + // Consume the stream + for await (const _ of stream) { + // Just consume + } + + // Verify the call was made without num_ctx + expect(mockChat).toHaveBeenCalledWith({ + model: "llama2", + messages: expect.any(Array), + stream: true, + options: { + temperature: 0, + // num_ctx should NOT be present + }, + }) + + // Explicitly verify num_ctx is not in options + const callArgs = mockChat.mock.calls[0][0] + expect(callArgs.options).not.toHaveProperty("num_ctx") + }) }) describe("error handling", () => { diff --git a/src/api/providers/fetchers/__tests__/ollama.test.ts b/src/api/providers/fetchers/__tests__/ollama.test.ts index bf1bf3c6b2..cf6c25afe6 100644 --- a/src/api/providers/fetchers/__tests__/ollama.test.ts +++ b/src/api/providers/fetchers/__tests__/ollama.test.ts @@ -32,6 +32,40 @@ describe("Ollama Fetcher", () => { }) }) + it("should parse num_ctx from parameters field when present", () => { + const modelDataWithNumCtx = { + ...ollamaModelsData["qwen3-2to16:latest"], + parameters: "num_ctx 16384\nstop_token ", + model_info: { + "ollama.context_length": 40960, + }, + } + + const parsedModel = parseOllamaModel(modelDataWithNumCtx as any) + + // Should use the configured num_ctx (16384) instead of the default context_length (40960) + expect(parsedModel.contextWindow).toBe(16384) + expect(parsedModel.maxTokens).toBe(16384) + expect(parsedModel.description).toBe("Family: qwen3, Context: 16384, Size: 32.8B") + }) + + it("should use default context_length when num_ctx is not in parameters", () => { + const modelDataWithoutNumCtx = { + ...ollamaModelsData["qwen3-2to16:latest"], + parameters: "stop_token ", // No num_ctx here + model_info: { + "ollama.context_length": 40960, + }, + } + + const parsedModel = parseOllamaModel(modelDataWithoutNumCtx as any) + + // Should use the default context_length (40960) + expect(parsedModel.contextWindow).toBe(40960) + expect(parsedModel.maxTokens).toBe(40960) + expect(parsedModel.description).toBe("Family: qwen3, Context: 40960, Size: 32.8B") + }) + it("should handle models with null families field", () => { const modelDataWithNullFamilies = { ...ollamaModelsData["qwen3-2to16:latest"], diff --git a/src/api/providers/fetchers/ollama.ts b/src/api/providers/fetchers/ollama.ts index 8e1e3f7f07..e29c36849d 100644 --- a/src/api/providers/fetchers/ollama.ts +++ b/src/api/providers/fetchers/ollama.ts @@ -39,16 +39,30 @@ type OllamaModelInfoResponse = z.infer export const parseOllamaModel = (rawModel: OllamaModelInfoResponse): ModelInfo => { const contextKey = Object.keys(rawModel.model_info).find((k) => k.includes("context_length")) - const contextWindow = + const defaultContextWindow = contextKey && typeof rawModel.model_info[contextKey] === "number" ? rawModel.model_info[contextKey] : undefined + // Parse the parameters field to check for user-configured num_ctx + let configuredNumCtx: number | undefined + if (rawModel.parameters) { + // The parameters field contains modelfile parameters as a string + // Look for num_ctx setting in the format "num_ctx " + const numCtxMatch = rawModel.parameters.match(/num_ctx\s+(\d+)/i) + if (numCtxMatch && numCtxMatch[1]) { + configuredNumCtx = parseInt(numCtxMatch[1], 10) + } + } + + // Use the configured num_ctx if available, otherwise fall back to the default + const actualContextWindow = configuredNumCtx || defaultContextWindow || ollamaDefaultModelInfo.contextWindow + const modelInfo: ModelInfo = Object.assign({}, ollamaDefaultModelInfo, { - description: `Family: ${rawModel.details.family}, Context: ${contextWindow}, Size: ${rawModel.details.parameter_size}`, - contextWindow: contextWindow || ollamaDefaultModelInfo.contextWindow, + description: `Family: ${rawModel.details.family}, Context: ${actualContextWindow}, Size: ${rawModel.details.parameter_size}`, + contextWindow: actualContextWindow, supportsPromptCache: true, supportsImages: rawModel.capabilities?.includes("vision"), supportsComputerUse: false, - maxTokens: contextWindow || ollamaDefaultModelInfo.contextWindow, + maxTokens: actualContextWindow, }) return modelInfo diff --git a/src/api/providers/native-ollama.ts b/src/api/providers/native-ollama.ts index 8ab4ebe2e1..7658275a57 100644 --- a/src/api/providers/native-ollama.ts +++ b/src/api/providers/native-ollama.ts @@ -181,7 +181,7 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio messages: ollamaMessages, stream: true, options: { - num_ctx: modelInfo.contextWindow, + // Don't override num_ctx - let Ollama use the model's configured value temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0), }, })