fix: respect Ollama Modelfile num_ctx configuration

roomote · roomote · commit 4486b2fae772 · 2025-09-08T21:54:46.000Z
- Remove automatic num_ctx override in NativeOllamaHandler - Add optional ollamaNumCtx parameter to ApiHandlerOptions for explicit overrides - Update both createMessage and completePrompt to only include num_ctx when explicitly set - Add tests to verify num_ctx is not sent by default This allows Ollama to use the Modelfile-defined num_ctx by default while still providing users the ability to override it when needed. Fixes #7797
diff --git a/src/api/providers/__tests__/native-ollama.spec.ts b/src/api/providers/__tests__/native-ollama.spec.ts
@@ -73,6 +73,61 @@ describe("NativeOllamaHandler", () => {
 			expect(results[2]).toEqual({ type: "usage", inputTokens: 10, outputTokens: 2 })
 		})
 
+		it("should not include num_ctx by default", async () => {
+			// Mock the chat response
+			mockChat.mockImplementation(async function* () {
+				yield { message: { content: "Response" } }
+			})
+
+			const stream = handler.createMessage("System", [{ role: "user" as const, content: "Test" }])
+
+			// Consume the stream
+			for await (const _ of stream) {
+				// consume stream
+			}
+
+			// Verify that num_ctx was NOT included in the options
+			expect(mockChat).toHaveBeenCalledWith(
+				expect.objectContaining({
+					options: expect.not.objectContaining({
+						num_ctx: expect.anything(),
+					}),
+				}),
+			)
+		})
+
+		it("should include num_ctx when explicitly set via ollamaNumCtx", async () => {
+			const options: ApiHandlerOptions = {
+				apiModelId: "llama2",
+				ollamaModelId: "llama2",
+				ollamaBaseUrl: "http://localhost:11434",
+				ollamaNumCtx: 8192, // Explicitly set num_ctx
+			}
+
+			handler = new NativeOllamaHandler(options)
+
+			// Mock the chat response
+			mockChat.mockImplementation(async function* () {
+				yield { message: { content: "Response" } }
+			})
+
+			const stream = handler.createMessage("System", [{ role: "user" as const, content: "Test" }])
+
+			// Consume the stream
+			for await (const _ of stream) {
+				// consume stream
+			}
+
+			// Verify that num_ctx was included with the specified value
+			expect(mockChat).toHaveBeenCalledWith(
+				expect.objectContaining({
+					options: expect.objectContaining({
+						num_ctx: 8192,
+					}),
+				}),
+			)
+		})
+
 		it("should handle DeepSeek R1 models with reasoning detection", async () => {
 			const options: ApiHandlerOptions = {
 				apiModelId: "deepseek-r1",
@@ -120,6 +175,49 @@ describe("NativeOllamaHandler", () => {
 			})
 			expect(result).toBe("This is the response")
 		})
+
+		it("should not include num_ctx in completePrompt by default", async () => {
+			mockChat.mockResolvedValue({
+				message: { content: "Response" },
+			})
+
+			await handler.completePrompt("Test prompt")
+
+			// Verify that num_ctx was NOT included in the options
+			expect(mockChat).toHaveBeenCalledWith(
+				expect.objectContaining({
+					options: expect.not.objectContaining({
+						num_ctx: expect.anything(),
+					}),
+				}),
+			)
+		})
+
+		it("should include num_ctx in completePrompt when explicitly set", async () => {
+			const options: ApiHandlerOptions = {
+				apiModelId: "llama2",
+				ollamaModelId: "llama2",
+				ollamaBaseUrl: "http://localhost:11434",
+				ollamaNumCtx: 4096, // Explicitly set num_ctx
+			}
+
+			handler = new NativeOllamaHandler(options)
+
+			mockChat.mockResolvedValue({
+				message: { content: "Response" },
+			})
+
+			await handler.completePrompt("Test prompt")
+
+			// Verify that num_ctx was included with the specified value
+			expect(mockChat).toHaveBeenCalledWith(
+				expect.objectContaining({
+					options: expect.objectContaining({
+						num_ctx: 4096,
+					}),
+				}),
+			)
+		})
 	})
 
 	describe("error handling", () => {
diff --git a/src/api/providers/native-ollama.ts b/src/api/providers/native-ollama.ts
@@ -184,15 +184,22 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
 		)
 
 		try {
+			// Build options object conditionally
+			const chatOptions: any = {
+				temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),
+			}
+
+			// Only include num_ctx if explicitly set via ollamaNumCtx
+			if (this.options.ollamaNumCtx !== undefined) {
+				chatOptions.num_ctx = this.options.ollamaNumCtx
+			}
+
 			// Create the actual API request promise
 			const stream = await client.chat({
 				model: modelId,
 				messages: ollamaMessages,
 				stream: true,
-				options: {
-					num_ctx: modelInfo.contextWindow,
-					temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),
-				},
+				options: chatOptions,
 			})
 
 			let totalInputTokens = 0
@@ -274,13 +281,21 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
 			const { id: modelId } = await this.fetchModel()
 			const useR1Format = modelId.toLowerCase().includes("deepseek-r1")
 
+			// Build options object conditionally
+			const chatOptions: any = {
+				temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),
+			}
+
+			// Only include num_ctx if explicitly set via ollamaNumCtx
+			if (this.options.ollamaNumCtx !== undefined) {
+				chatOptions.num_ctx = this.options.ollamaNumCtx
+			}
+
 			const response = await client.chat({
 				model: modelId,
 				messages: [{ role: "user", content: prompt }],
 				stream: false,
-				options: {
-					temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),
-				},
+				options: chatOptions,
 			})
 
 			return response.message?.content || ""
diff --git a/src/shared/api.ts b/src/shared/api.ts
@@ -14,6 +14,12 @@ export type ApiHandlerOptions = Omit<ProviderSettings, "apiProvider"> & {
 	 * Defaults to true; set to false to disable summaries.
 	 */
 	enableGpt5ReasoningSummary?: boolean
+	/**
+	 * Optional override for Ollama's num_ctx parameter.
+	 * When set, this value will be used in Ollama chat requests.
+	 * When undefined, Ollama will use the model's default num_ctx from the Modelfile.
+	 */
+	ollamaNumCtx?: number
 }
 
 // RouterName