RooCodeInc · roomote · Aug 27, 2025 · Sep 1, 2025 · Sep 1, 2025 · Sep 1, 2025
@@ -72,34 +72,6 @@ describe("NativeOllamaHandler", () => {
 			expect(results[1]).toEqual({ type: "text", text: " world" })
 			expect(results[2]).toEqual({ type: "usage", inputTokens: 10, outputTokens: 2 })
 		})
-
-		it("should handle DeepSeek R1 models with reasoning detection", async () => {
-			const options: ApiHandlerOptions = {
-				apiModelId: "deepseek-r1",
-				ollamaModelId: "deepseek-r1",
-				ollamaBaseUrl: "http://localhost:11434",
-			}
-
-			handler = new NativeOllamaHandler(options)
-
-			// Mock response with thinking tags
-			mockChat.mockImplementation(async function* () {
-				yield { message: { content: "<think>Let me think" } }
-				yield { message: { content: " about this</think>" } }
-				yield { message: { content: "The answer is 42" } }
-			})
-
-			const stream = handler.createMessage("System", [{ role: "user" as const, content: "Question?" }])
-			const results = []
-
-			for await (const chunk of stream) {
-				results.push(chunk)
-			}
-
-			// Should detect reasoning vs regular text
-			expect(results.some((r) => r.type === "reasoning")).toBe(true)
-			expect(results.some((r) => r.type === "text")).toBe(true)
-		})
 	})
 
 	describe("completePrompt", () => {
@@ -134,7 +106,7 @@ describe("NativeOllamaHandler", () => {
 				for await (const _ of stream) {
 					// consume stream
 				}
-			}).rejects.toThrow("Ollama service is not running")
+			}).rejects.toThrow("errors.ollama.serviceNotRunning")
 		})
 
 		it("should handle model not found errors", async () => {
@@ -148,7 +120,7 @@ describe("NativeOllamaHandler", () => {
 				for await (const _ of stream) {
 					// consume stream
 				}
-			}).rejects.toThrow("Model llama2 not found in Ollama")
+			}).rejects.toThrow("errors.ollama.modelNotFound")
 		})
 	})
 

@@ -18,16 +18,16 @@ describe("Ollama Fetcher", () => {
 			const parsedModel = parseOllamaModel(modelData)
 
 			expect(parsedModel).toEqual({
-				maxTokens: 40960,
-				contextWindow: 40960,
+				maxTokens: 4096, // Changed from 40960 - Ollama's quirk fix
+				contextWindow: 4096, // Changed from 40960 - Ollama's quirk fix
 				supportsImages: false,
 				supportsComputerUse: false,
 				supportsPromptCache: true,
 				inputPrice: 0,
 				outputPrice: 0,
 				cacheWritesPrice: 0,
 				cacheReadsPrice: 0,
-				description: "Family: qwen3, Context: 40960, Size: 32.8B",
+				description: "Family: qwen3, Context: 4096, Size: 32.8B", // Changed from 40960
 			})
 		})
 
@@ -43,18 +43,54 @@ describe("Ollama Fetcher", () => {
 			const parsedModel = parseOllamaModel(modelDataWithNullFamilies as any)
 
 			expect(parsedModel).toEqual({
-				maxTokens: 40960,
-				contextWindow: 40960,
+				maxTokens: 4096, // Changed from 40960 - Ollama's quirk fix
+				contextWindow: 4096, // Changed from 40960 - Ollama's quirk fix
 				supportsImages: false,
 				supportsComputerUse: false,
 				supportsPromptCache: true,
 				inputPrice: 0,
 				outputPrice: 0,
 				cacheWritesPrice: 0,
 				cacheReadsPrice: 0,
-				description: "Family: qwen3, Context: 40960, Size: 32.8B",
+				description: "Family: qwen3, Context: 4096, Size: 32.8B", // Changed from 40960
 			})
 		})
+
+		it("should use num_ctx from parameters when available", () => {
+			const modelDataWithNumCtx = {
+				...ollamaModelsData["qwen3-2to16:latest"],
+				parameters: "num_ctx 8192\nstop_token <eos>",
+			}
+
+			const parsedModel = parseOllamaModel(modelDataWithNumCtx as any)
+
+			expect(parsedModel.contextWindow).toBe(8192)
+			expect(parsedModel.maxTokens).toBe(8192)
+			expect(parsedModel.description).toContain("Context: 8192")
+		})
+
+		it("should use OLLAMA_NUM_CTX environment variable as fallback", () => {
+			const originalEnv = process.env.OLLAMA_NUM_CTX
+			process.env.OLLAMA_NUM_CTX = "16384"
+
+			const modelDataWithoutContext = {
+				...ollamaModelsData["qwen3-2to16:latest"],
+				model_info: {}, // No context_length in model_info
+				parameters: undefined, // No parameters
+			}
+
+			const parsedModel = parseOllamaModel(modelDataWithoutContext as any)
+
+			expect(parsedModel.contextWindow).toBe(16384)
+			expect(parsedModel.maxTokens).toBe(16384)
+
+			// Restore original env
+			if (originalEnv !== undefined) {
+				process.env.OLLAMA_NUM_CTX = originalEnv
+			} else {
+				delete process.env.OLLAMA_NUM_CTX
+			}
+		})
 	})
 
 	describe("getOllamaModels", () => {

@@ -38,17 +38,33 @@ type OllamaModelsResponse = z.infer<typeof OllamaModelsResponseSchema>
 type OllamaModelInfoResponse = z.infer<typeof OllamaModelInfoResponseSchema>
 
 export const parseOllamaModel = (rawModel: OllamaModelInfoResponse): ModelInfo => {
+	// Check for context window in model parameters first
+	const contextLengthFromModelParameters = rawModel.parameters
+		? parseInt(rawModel.parameters.match(/^num_ctx\s+(\d+)/m)?.[1] ?? "", 10) || undefined
+		: undefined
+
+	// Check for context window in model_info
 	const contextKey = Object.keys(rawModel.model_info).find((k) => k.includes("context_length"))
-	const contextWindow =
+	const contextLengthFromModelInfo =
 		contextKey && typeof rawModel.model_info[contextKey] === "number" ? rawModel.model_info[contextKey] : undefined
 
+	// Use environment variable as fallback
+	const contextLengthFromEnvironment = parseInt(process.env.OLLAMA_NUM_CTX || "4096", 10)
+
+	let contextWindow = contextLengthFromModelParameters ?? contextLengthFromModelInfo ?? contextLengthFromEnvironment
+
+	// Handle Ollama's quirk of returning 40960 for undefined context
+	if (contextWindow === 40960 && !contextLengthFromModelParameters) {
+		contextWindow = 4096 // For some unknown reason, Ollama returns an undefined context as "40960" rather than 4096, which is what it actually enforces.
+	}
+
 	const modelInfo: ModelInfo = Object.assign({}, ollamaDefaultModelInfo, {
 		description: `Family: ${rawModel.details.family}, Context: ${contextWindow}, Size: ${rawModel.details.parameter_size}`,
-		contextWindow: contextWindow || ollamaDefaultModelInfo.contextWindow,
+		contextWindow: contextWindow,
 		supportsPromptCache: true,
 		supportsImages: rawModel.capabilities?.includes("vision"),
 		supportsComputerUse: false,
-		maxTokens: contextWindow || ollamaDefaultModelInfo.contextWindow,
+		maxTokens: contextWindow,
 	})
 
 	return modelInfo

@@ -1,12 +1,20 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import { Message, Ollama, type Config as OllamaOptions } from "ollama"
-import { ModelInfo, openAiModelInfoSaneDefaults, DEEP_SEEK_DEFAULT_TEMPERATURE } from "@roo-code/types"
+import { ModelInfo, DEEP_SEEK_DEFAULT_TEMPERATURE } from "@roo-code/types"
 import { ApiStream } from "../transform/stream"
 import { BaseProvider } from "./base-provider"
 import type { ApiHandlerOptions } from "../../shared/api"
 import { getOllamaModels } from "./fetchers/ollama"
 import { XmlMatcher } from "../../utils/xml-matcher"
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
+import { t } from "../../i18n"
+
+const TOKEN_ESTIMATION_FACTOR = 4 // Industry standard technique for estimating token counts without actually implementing a parser/tokenizer
+
+function estimateOllamaTokenCount(messages: Message[]): number {
+	const totalChars = messages.reduce((acc, msg) => acc + (msg.content?.length || 0), 0)
+	return Math.ceil(totalChars / TOKEN_ESTIMATION_FACTOR)
+}
 
 function convertToOllamaMessages(anthropicMessages: Anthropic.Messages.MessageParam[]): Message[] {
 	const ollamaMessages: Message[] = []
@@ -131,10 +139,20 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
 	protected options: ApiHandlerOptions
 	private client: Ollama | undefined
 	protected models: Record<string, ModelInfo> = {}
+	private isInitialized = false
 
 	constructor(options: ApiHandlerOptions) {
 		super()
 		this.options = options
+		this.initialize()
+	}
+
+	private async initialize(): Promise<void> {
+		if (this.isInitialized) {
+			return
+		}
+		await this.fetchModel()
+		this.isInitialized = true
 	}
 
 	private ensureClient(): Ollama {
@@ -154,7 +172,7 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
 
 				this.client = new Ollama(clientOptions)
 			} catch (error: any) {
-				throw new Error(`Error creating Ollama client: ${error.message}`)
+				throw new Error(t("common:errors.ollama.clientCreationError", { error: error.message }))
 			}
 		}
 		return this.client
@@ -165,15 +183,27 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
+		if (!this.isInitialized) {
+			await this.initialize()
+		}
+
 		const client = this.ensureClient()
-		const { id: modelId, info: modelInfo } = await this.fetchModel()
+		const { id: modelId, info: modelInfo } = this.getModel()
 		const useR1Format = modelId.toLowerCase().includes("deepseek-r1")
 
 		const ollamaMessages: Message[] = [
 			{ role: "system", content: systemPrompt },
 			...convertToOllamaMessages(messages),
 		]
 
+		// Check if the estimated token count exceeds the model's limit
+		const estimatedTokenCount = estimateOllamaTokenCount(ollamaMessages)
+		if (modelInfo.maxTokens && estimatedTokenCount > modelInfo.maxTokens) {
+			throw new Error(
+				t("common:errors.ollama.inputTooLong", { estimatedTokenCount, maxTokens: modelInfo.maxTokens }),
+			)
+		}
+
 		const matcher = new XmlMatcher(
 			"think",
 			(chunk) =>
@@ -190,7 +220,6 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
 				messages: ollamaMessages,
 				stream: true,
 				options: {
-					num_ctx: modelInfo.contextWindow,
 					temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),
 				},
 			})
@@ -233,7 +262,11 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
 				}
 			} catch (streamError: any) {
 				console.error("Error processing Ollama stream:", streamError)
-				throw new Error(`Ollama stream processing error: ${streamError.message || "Unknown error"}`)
+				throw new Error(
+					t("common:errors.ollama.streamProcessingError", {
+						error: streamError.message || t("common:errors.ollama.unknownError"),
+					}),
+				)
 			}
 		} catch (error: any) {
 			// Enhance error reporting
@@ -242,12 +275,12 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
 
 			if (error.code === "ECONNREFUSED") {
 				throw new Error(
-					`Ollama service is not running at ${this.options.ollamaBaseUrl || "http://localhost:11434"}. Please start Ollama first.`,
+					t("common:errors.ollama.serviceNotRunning", {
+						baseUrl: this.options.ollamaBaseUrl || "http://localhost:11434",
+					}),
 				)
 			} else if (statusCode === 404) {
-				throw new Error(
-					`Model ${this.getModel().id} not found in Ollama. Please pull the model first with: ollama pull ${this.getModel().id}`,
-				)
+				throw new Error(t("common:errors.ollama.modelNotFound", { modelId: this.getModel().id }))
 			}
 
 			console.error(`Ollama API error (${statusCode || "unknown"}): ${errorMessage}`)
@@ -262,16 +295,34 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
 
 	override getModel(): { id: string; info: ModelInfo } {
 		const modelId = this.options.ollamaModelId || ""
+
+		const modelInfo = this.models[modelId]
+		if (!modelInfo) {
+			const availableModels = Object.keys(this.models)
+			const errorMessage =
+				availableModels.length > 0
+					? t("common:errors.ollama.modelNotFoundWithAvailable", {
+							modelId,
+							availableModels: availableModels.join(", "),
+						})
+					: t("common:errors.ollama.modelNotFoundNoModels", { modelId })
+			throw new Error(errorMessage)
+		}
+
 		return {
 			id: modelId,
-			info: this.models[modelId] || openAiModelInfoSaneDefaults,
+			info: modelInfo,
 		}
 	}
 
 	async completePrompt(prompt: string): Promise<string> {
 		try {
+			if (!this.isInitialized) {
+				await this.initialize()
+			}
+
 			const client = this.ensureClient()
-			const { id: modelId } = await this.fetchModel()
+			const { id: modelId } = this.getModel()
 			const useR1Format = modelId.toLowerCase().includes("deepseek-r1")
 
 			const response = await client.chat({
@@ -286,7 +337,7 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
 			return response.message?.content || ""
 		} catch (error) {
 			if (error instanceof Error) {
-				throw new Error(`Ollama completion error: ${error.message}`)
+				throw new Error(t("common:errors.ollama.completionError", { error: error.message }))
 			}
 			throw error
 		}