RooCodeInc
diff --git a/‎src/api/providers/__tests__/native-ollama.spec.ts‎
Lines changed: 6 additions & 0 deletions b/‎src/api/providers/__tests__/native-ollama.spec.ts‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/api/providers/fetchers/__tests__/ollama.test.ts‎
Lines changed: 42 additions & 6 deletions b/‎src/api/providers/fetchers/__tests__/ollama.test.ts‎
Lines changed: 42 additions & 6 deletions
diff --git a/‎src/api/providers/fetchers/ollama.ts‎
Lines changed: 19 additions & 3 deletions b/‎src/api/providers/fetchers/ollama.ts‎
Lines changed: 19 additions & 3 deletions
diff --git a/‎src/api/providers/native-ollama.ts‎
Lines changed: 53 additions & 5 deletions b/‎src/api/providers/native-ollama.ts‎
Lines changed: 53 additions & 5 deletions
diff --git a/‎src/i18n/locales/ca/common.json‎
Lines changed: 8 additions & 0 deletions b/‎src/i18n/locales/ca/common.json‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/i18n/locales/de/common.json‎
Lines changed: 8 additions & 0 deletions b/‎src/i18n/locales/de/common.json‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/i18n/locales/en/common.json‎
Lines changed: 8 additions & 0 deletions b/‎src/i18n/locales/en/common.json‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/i18n/locales/es/common.json‎
Lines changed: 8 additions & 0 deletions b/‎src/i18n/locales/es/common.json‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/i18n/locales/fr/common.json‎
Lines changed: 8 additions & 0 deletions b/‎src/i18n/locales/fr/common.json‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/i18n/locales/hi/common.json‎
Lines changed: 8 additions & 0 deletions b/‎src/i18n/locales/hi/common.json‎
Lines changed: 8 additions & 0 deletions
@@ -23,6 +23,12 @@ vitest.mock("../fetchers/ollama", () => ({
 			supportsImages: false,
 			supportsPromptCache: false,
 		},
+		"deepseek-r1": {
+			contextWindow: 32768,
+			maxTokens: 32768,
+			supportsImages: false,
+			supportsPromptCache: false,
+		},
 	}),
 }))
 
 
@@ -18,16 +18,16 @@ describe("Ollama Fetcher", () => {
 			const parsedModel = parseOllamaModel(modelData)
 
 			expect(parsedModel).toEqual({
-				maxTokens: 40960,
-				contextWindow: 40960,
+				maxTokens: 4096, // Changed from 40960 - Ollama's quirk fix
+				contextWindow: 4096, // Changed from 40960 - Ollama's quirk fix
 				supportsImages: false,
 				supportsComputerUse: false,
 				supportsPromptCache: true,
 				inputPrice: 0,
 				outputPrice: 0,
 				cacheWritesPrice: 0,
 				cacheReadsPrice: 0,
-				description: "Family: qwen3, Context: 40960, Size: 32.8B",
+				description: "Family: qwen3, Context: 4096, Size: 32.8B", // Changed from 40960
 			})
 		})
 
@@ -43,18 +43,54 @@ describe("Ollama Fetcher", () => {
 			const parsedModel = parseOllamaModel(modelDataWithNullFamilies as any)
 
 			expect(parsedModel).toEqual({
-				maxTokens: 40960,
-				contextWindow: 40960,
+				maxTokens: 4096, // Changed from 40960 - Ollama's quirk fix
+				contextWindow: 4096, // Changed from 40960 - Ollama's quirk fix
 				supportsImages: false,
 				supportsComputerUse: false,
 				supportsPromptCache: true,
 				inputPrice: 0,
 				outputPrice: 0,
 				cacheWritesPrice: 0,
 				cacheReadsPrice: 0,
-				description: "Family: qwen3, Context: 40960, Size: 32.8B",
+				description: "Family: qwen3, Context: 4096, Size: 32.8B", // Changed from 40960
 			})
 		})
+
+		it("should use num_ctx from parameters when available", () => {
+			const modelDataWithNumCtx = {
+				...ollamaModelsData["qwen3-2to16:latest"],
+				parameters: "num_ctx 8192\nstop_token <eos>",
+			}
+
+			const parsedModel = parseOllamaModel(modelDataWithNumCtx as any)
+
+			expect(parsedModel.contextWindow).toBe(8192)
+			expect(parsedModel.maxTokens).toBe(8192)
+			expect(parsedModel.description).toContain("Context: 8192")
+		})
+
+		it("should use OLLAMA_NUM_CTX environment variable as fallback", () => {
+			const originalEnv = process.env.OLLAMA_NUM_CTX
+			process.env.OLLAMA_NUM_CTX = "16384"
+
+			const modelDataWithoutContext = {
+				...ollamaModelsData["qwen3-2to16:latest"],
+				model_info: {}, // No context_length in model_info
+				parameters: undefined, // No parameters
+			}
+
+			const parsedModel = parseOllamaModel(modelDataWithoutContext as any)
+
+			expect(parsedModel.contextWindow).toBe(16384)
+			expect(parsedModel.maxTokens).toBe(16384)
+
+			// Restore original env
+			if (originalEnv !== undefined) {
+				process.env.OLLAMA_NUM_CTX = originalEnv
+			} else {
+				delete process.env.OLLAMA_NUM_CTX
+			}
+		})
 	})
 
 	describe("getOllamaModels", () => {
 
@@ -38,17 +38,33 @@ type OllamaModelsResponse = z.infer<typeof OllamaModelsResponseSchema>
 type OllamaModelInfoResponse = z.infer<typeof OllamaModelInfoResponseSchema>
 
 export const parseOllamaModel = (rawModel: OllamaModelInfoResponse): ModelInfo => {
+	// Check for context window in model parameters first
+	const contextLengthFromModelParameters = rawModel.parameters
+		? parseInt(rawModel.parameters.match(/^num_ctx\s+(\d+)/m)?.[1] ?? "", 10) || undefined
+		: undefined
+
+	// Check for context window in model_info
 	const contextKey = Object.keys(rawModel.model_info).find((k) => k.includes("context_length"))
-	const contextWindow =
+	const contextLengthFromModelInfo =
 		contextKey && typeof rawModel.model_info[contextKey] === "number" ? rawModel.model_info[contextKey] : undefined
 
+	// Use environment variable as fallback
+	const contextLengthFromEnvironment = parseInt(process.env.OLLAMA_NUM_CTX || "4096", 10)
+
+	let contextWindow = contextLengthFromModelParameters ?? contextLengthFromModelInfo ?? contextLengthFromEnvironment
+
+	// Handle Ollama's quirk of returning 40960 for undefined context
+	if (contextWindow === 40960 && !contextLengthFromModelParameters) {
+		contextWindow = 4096 // For some unknown reason, Ollama returns an undefined context as "40960" rather than 4096, which is what it actually enforces.
+	}
+
 	const modelInfo: ModelInfo = Object.assign({}, ollamaDefaultModelInfo, {
 		description: `Family: ${rawModel.details.family}, Context: ${contextWindow}, Size: ${rawModel.details.parameter_size}`,
-		contextWindow: contextWindow || ollamaDefaultModelInfo.contextWindow,
+		contextWindow: contextWindow,
 		supportsPromptCache: true,
 		supportsImages: rawModel.capabilities?.includes("vision"),
 		supportsComputerUse: false,
-		maxTokens: contextWindow || ollamaDefaultModelInfo.contextWindow,
+		maxTokens: contextWindow,
 	})
 
 	return modelInfo
 
@@ -1,12 +1,20 @@
 import { Anthropic } from "@anthropic-ai/sdk"
 import { Message, Ollama, type Config as OllamaOptions } from "ollama"
-import { ModelInfo, openAiModelInfoSaneDefaults, DEEP_SEEK_DEFAULT_TEMPERATURE } from "@roo-code/types"
+import { ModelInfo, DEEP_SEEK_DEFAULT_TEMPERATURE } from "@roo-code/types"
 import { ApiStream } from "../transform/stream"
 import { BaseProvider } from "./base-provider"
 import type { ApiHandlerOptions } from "../../shared/api"
 import { getOllamaModels } from "./fetchers/ollama"
 import { XmlMatcher } from "../../utils/xml-matcher"
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
+import { t } from "../../i18n"
+
+const TOKEN_ESTIMATION_FACTOR = 4 // Industry standard technique for estimating token counts without actually implementing a parser/tokenizer
+
+function estimateOllamaTokenCount(messages: Message[]): number {
+	const totalChars = messages.reduce((acc, msg) => acc + (msg.content?.length || 0), 0)
+	return Math.ceil(totalChars / TOKEN_ESTIMATION_FACTOR)
+}
 
 function convertToOllamaMessages(anthropicMessages: Anthropic.Messages.MessageParam[]): Message[] {
 	const ollamaMessages: Message[] = []
@@ -131,10 +139,20 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
 	protected options: ApiHandlerOptions
 	private client: Ollama | undefined
 	protected models: Record<string, ModelInfo> = {}
+	private isInitialized = false
 
 	constructor(options: ApiHandlerOptions) {
 		super()
 		this.options = options
+		this.initialize()
+	}
+
+	private async initialize(): Promise<void> {
+		if (this.isInitialized) {
+			return
+		}
+		await this.fetchModel()
+		this.isInitialized = true
 	}
 
 	private ensureClient(): Ollama {
@@ -165,15 +183,27 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
+		if (!this.isInitialized) {
+			await this.initialize()
+		}
+
 		const client = this.ensureClient()
-		const { id: modelId, info: modelInfo } = await this.fetchModel()
+		const { id: modelId, info: modelInfo } = this.getModel()
 		const useR1Format = modelId.toLowerCase().includes("deepseek-r1")
 
 		const ollamaMessages: Message[] = [
 			{ role: "system", content: systemPrompt },
 			...convertToOllamaMessages(messages),
 		]
 
+		// Check if the estimated token count exceeds the model's limit
+		const estimatedTokenCount = estimateOllamaTokenCount(ollamaMessages)
+		if (modelInfo.maxTokens && estimatedTokenCount > modelInfo.maxTokens) {
+			throw new Error(
+				`Input message is too long for the selected model. Estimated tokens: ${estimatedTokenCount}, Max tokens: ${modelInfo.maxTokens}. To increase the context window size, please set the OLLAMA_NUM_CTX environment variable or see Ollama documentation.`,
+			)
+		}
+
 		const matcher = new XmlMatcher(
 			"think",
 			(chunk) =>
@@ -256,21 +286,39 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
 
 	async fetchModel() {
 		this.models = await getOllamaModels(this.options.ollamaBaseUrl)
-		return this.getModel()
+		return this.models
 	}
 
 	override getModel(): { id: string; info: ModelInfo } {
 		const modelId = this.options.ollamaModelId || ""
+
+		const modelInfo = this.models[modelId]
+		if (!modelInfo) {
+			const availableModels = Object.keys(this.models)
+			const errorMessage =
+				availableModels.length > 0
+					? t("common:errors.ollama.modelNotFoundWithAvailable", {
+							modelId,
+							availableModels: availableModels.join(", "),
+						})
+					: t("common:errors.ollama.modelNotFoundNoModels", { modelId })
+			throw new Error(errorMessage)
+		}
+
 		return {
 			id: modelId,
-			info: this.models[modelId] || openAiModelInfoSaneDefaults,
+			info: modelInfo,
 		}
 	}
 
 	async completePrompt(prompt: string): Promise<string> {
 		try {
+			if (!this.isInitialized) {
+				await this.initialize()
+			}
+
 			const client = this.ensureClient()
-			const { id: modelId } = await this.fetchModel()
+			const { id: modelId } = this.getModel()
 			const useR1Format = modelId.toLowerCase().includes("deepseek-r1")
 
 			const response = await client.chat({
 
@@ -103,6 +103,14 @@
 		},
 		"roo": {
 			"authenticationRequired": "Roo provider requires cloud authentication. Please sign in to Roo Code Cloud."
+		},
+		"ollama": {
+			"inputTooLong": "Input message is too long for the selected model. Estimated tokens: {{estimatedTokenCount}}, Max tokens: {{maxTokens}}. To increase the context window size, please set the OLLAMA_NUM_CTX environment variable or see Ollama documentation.",
+			"serviceNotRunning": "Ollama service is not running at {{baseUrl}}. Please start Ollama first.",
+			"modelNotFound": "Model {{modelId}} not found in Ollama. Please pull the model first with: ollama pull {{modelId}}",
+			"modelNotFoundWithAvailable": "Model {{modelId}} not found. Available models: {{availableModels}}",
+			"modelNotFoundNoModels": "Model {{modelId}} not found. No models available. Please pull the model first with: ollama pull {{modelId}}",
+			"completionError": "Ollama completion error: {{error}}"
 		}
 	},
 	"warnings": {