Skip to content

Commit 33da2bd

Browse files
committed
feat: enhance Ollama provider with comprehensive improvements
- Add token estimation to prevent exceeding model limits - Implement async model initialization with proper error handling - Fix context window handling with environment variable support - Improve error messages with internationalization - Update tests to reflect context window fixes - Add changeset documentation Matches and exceeds improvements from Kilo-Org PR #2170
1 parent af32353 commit 33da2bd

File tree

22 files changed

+264
-14
lines changed

22 files changed

+264
-14
lines changed

src/api/providers/__tests__/native-ollama.spec.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ vitest.mock("../fetchers/ollama", () => ({
2323
supportsImages: false,
2424
supportsPromptCache: false,
2525
},
26+
"deepseek-r1": {
27+
contextWindow: 32768,
28+
maxTokens: 32768,
29+
supportsImages: false,
30+
supportsPromptCache: false,
31+
},
2632
}),
2733
}))
2834

src/api/providers/fetchers/__tests__/ollama.test.ts

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,16 @@ describe("Ollama Fetcher", () => {
1818
const parsedModel = parseOllamaModel(modelData)
1919

2020
expect(parsedModel).toEqual({
21-
maxTokens: 40960,
22-
contextWindow: 40960,
21+
maxTokens: 4096, // Changed from 40960 - Ollama's quirk fix
22+
contextWindow: 4096, // Changed from 40960 - Ollama's quirk fix
2323
supportsImages: false,
2424
supportsComputerUse: false,
2525
supportsPromptCache: true,
2626
inputPrice: 0,
2727
outputPrice: 0,
2828
cacheWritesPrice: 0,
2929
cacheReadsPrice: 0,
30-
description: "Family: qwen3, Context: 40960, Size: 32.8B",
30+
description: "Family: qwen3, Context: 4096, Size: 32.8B", // Changed from 40960
3131
})
3232
})
3333

@@ -43,18 +43,54 @@ describe("Ollama Fetcher", () => {
4343
const parsedModel = parseOllamaModel(modelDataWithNullFamilies as any)
4444

4545
expect(parsedModel).toEqual({
46-
maxTokens: 40960,
47-
contextWindow: 40960,
46+
maxTokens: 4096, // Changed from 40960 - Ollama's quirk fix
47+
contextWindow: 4096, // Changed from 40960 - Ollama's quirk fix
4848
supportsImages: false,
4949
supportsComputerUse: false,
5050
supportsPromptCache: true,
5151
inputPrice: 0,
5252
outputPrice: 0,
5353
cacheWritesPrice: 0,
5454
cacheReadsPrice: 0,
55-
description: "Family: qwen3, Context: 40960, Size: 32.8B",
55+
description: "Family: qwen3, Context: 4096, Size: 32.8B", // Changed from 40960
5656
})
5757
})
58+
59+
it("should use num_ctx from parameters when available", () => {
60+
const modelDataWithNumCtx = {
61+
...ollamaModelsData["qwen3-2to16:latest"],
62+
parameters: "num_ctx 8192\nstop_token <eos>",
63+
}
64+
65+
const parsedModel = parseOllamaModel(modelDataWithNumCtx as any)
66+
67+
expect(parsedModel.contextWindow).toBe(8192)
68+
expect(parsedModel.maxTokens).toBe(8192)
69+
expect(parsedModel.description).toContain("Context: 8192")
70+
})
71+
72+
it("should use OLLAMA_NUM_CTX environment variable as fallback", () => {
73+
const originalEnv = process.env.OLLAMA_NUM_CTX
74+
process.env.OLLAMA_NUM_CTX = "16384"
75+
76+
const modelDataWithoutContext = {
77+
...ollamaModelsData["qwen3-2to16:latest"],
78+
model_info: {}, // No context_length in model_info
79+
parameters: undefined, // No parameters
80+
}
81+
82+
const parsedModel = parseOllamaModel(modelDataWithoutContext as any)
83+
84+
expect(parsedModel.contextWindow).toBe(16384)
85+
expect(parsedModel.maxTokens).toBe(16384)
86+
87+
// Restore original env
88+
if (originalEnv !== undefined) {
89+
process.env.OLLAMA_NUM_CTX = originalEnv
90+
} else {
91+
delete process.env.OLLAMA_NUM_CTX
92+
}
93+
})
5894
})
5995

6096
describe("getOllamaModels", () => {

src/api/providers/fetchers/ollama.ts

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,33 @@ type OllamaModelsResponse = z.infer<typeof OllamaModelsResponseSchema>
3838
type OllamaModelInfoResponse = z.infer<typeof OllamaModelInfoResponseSchema>
3939

4040
export const parseOllamaModel = (rawModel: OllamaModelInfoResponse): ModelInfo => {
41+
// Check for context window in model parameters first
42+
const contextLengthFromModelParameters = rawModel.parameters
43+
? parseInt(rawModel.parameters.match(/^num_ctx\s+(\d+)/m)?.[1] ?? "", 10) || undefined
44+
: undefined
45+
46+
// Check for context window in model_info
4147
const contextKey = Object.keys(rawModel.model_info).find((k) => k.includes("context_length"))
42-
const contextWindow =
48+
const contextLengthFromModelInfo =
4349
contextKey && typeof rawModel.model_info[contextKey] === "number" ? rawModel.model_info[contextKey] : undefined
4450

51+
// Use environment variable as fallback
52+
const contextLengthFromEnvironment = parseInt(process.env.OLLAMA_NUM_CTX || "4096", 10)
53+
54+
let contextWindow = contextLengthFromModelParameters ?? contextLengthFromModelInfo ?? contextLengthFromEnvironment
55+
56+
// Handle Ollama's quirk of returning 40960 for undefined context
57+
if (contextWindow === 40960 && !contextLengthFromModelParameters) {
58+
contextWindow = 4096 // For some unknown reason, Ollama returns an undefined context as "40960" rather than 4096, which is what it actually enforces.
59+
}
60+
4561
const modelInfo: ModelInfo = Object.assign({}, ollamaDefaultModelInfo, {
4662
description: `Family: ${rawModel.details.family}, Context: ${contextWindow}, Size: ${rawModel.details.parameter_size}`,
47-
contextWindow: contextWindow || ollamaDefaultModelInfo.contextWindow,
63+
contextWindow: contextWindow,
4864
supportsPromptCache: true,
4965
supportsImages: rawModel.capabilities?.includes("vision"),
5066
supportsComputerUse: false,
51-
maxTokens: contextWindow || ollamaDefaultModelInfo.contextWindow,
67+
maxTokens: contextWindow,
5268
})
5369

5470
return modelInfo

src/api/providers/native-ollama.ts

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,20 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
22
import { Message, Ollama, type Config as OllamaOptions } from "ollama"
3-
import { ModelInfo, openAiModelInfoSaneDefaults, DEEP_SEEK_DEFAULT_TEMPERATURE } from "@roo-code/types"
3+
import { ModelInfo, DEEP_SEEK_DEFAULT_TEMPERATURE } from "@roo-code/types"
44
import { ApiStream } from "../transform/stream"
55
import { BaseProvider } from "./base-provider"
66
import type { ApiHandlerOptions } from "../../shared/api"
77
import { getOllamaModels } from "./fetchers/ollama"
88
import { XmlMatcher } from "../../utils/xml-matcher"
99
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
10+
import { t } from "../../i18n"
11+
12+
const TOKEN_ESTIMATION_FACTOR = 4 // Industry standard technique for estimating token counts without actually implementing a parser/tokenizer
13+
14+
function estimateOllamaTokenCount(messages: Message[]): number {
15+
const totalChars = messages.reduce((acc, msg) => acc + (msg.content?.length || 0), 0)
16+
return Math.ceil(totalChars / TOKEN_ESTIMATION_FACTOR)
17+
}
1018

1119
function convertToOllamaMessages(anthropicMessages: Anthropic.Messages.MessageParam[]): Message[] {
1220
const ollamaMessages: Message[] = []
@@ -131,10 +139,20 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
131139
protected options: ApiHandlerOptions
132140
private client: Ollama | undefined
133141
protected models: Record<string, ModelInfo> = {}
142+
private isInitialized = false
134143

135144
constructor(options: ApiHandlerOptions) {
136145
super()
137146
this.options = options
147+
this.initialize()
148+
}
149+
150+
private async initialize(): Promise<void> {
151+
if (this.isInitialized) {
152+
return
153+
}
154+
await this.fetchModel()
155+
this.isInitialized = true
138156
}
139157

140158
private ensureClient(): Ollama {
@@ -165,15 +183,27 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
165183
messages: Anthropic.Messages.MessageParam[],
166184
metadata?: ApiHandlerCreateMessageMetadata,
167185
): ApiStream {
186+
if (!this.isInitialized) {
187+
await this.initialize()
188+
}
189+
168190
const client = this.ensureClient()
169-
const { id: modelId, info: modelInfo } = await this.fetchModel()
191+
const { id: modelId, info: modelInfo } = this.getModel()
170192
const useR1Format = modelId.toLowerCase().includes("deepseek-r1")
171193

172194
const ollamaMessages: Message[] = [
173195
{ role: "system", content: systemPrompt },
174196
...convertToOllamaMessages(messages),
175197
]
176198

199+
// Check if the estimated token count exceeds the model's limit
200+
const estimatedTokenCount = estimateOllamaTokenCount(ollamaMessages)
201+
if (modelInfo.maxTokens && estimatedTokenCount > modelInfo.maxTokens) {
202+
throw new Error(
203+
`Input message is too long for the selected model. Estimated tokens: ${estimatedTokenCount}, Max tokens: ${modelInfo.maxTokens}. To increase the context window size, please set the OLLAMA_NUM_CTX environment variable or see Ollama documentation.`,
204+
)
205+
}
206+
177207
const matcher = new XmlMatcher(
178208
"think",
179209
(chunk) =>
@@ -256,21 +286,39 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
256286

257287
async fetchModel() {
258288
this.models = await getOllamaModels(this.options.ollamaBaseUrl)
259-
return this.getModel()
289+
return this.models
260290
}
261291

262292
override getModel(): { id: string; info: ModelInfo } {
263293
const modelId = this.options.ollamaModelId || ""
294+
295+
const modelInfo = this.models[modelId]
296+
if (!modelInfo) {
297+
const availableModels = Object.keys(this.models)
298+
const errorMessage =
299+
availableModels.length > 0
300+
? t("common:errors.ollama.modelNotFoundWithAvailable", {
301+
modelId,
302+
availableModels: availableModels.join(", "),
303+
})
304+
: t("common:errors.ollama.modelNotFoundNoModels", { modelId })
305+
throw new Error(errorMessage)
306+
}
307+
264308
return {
265309
id: modelId,
266-
info: this.models[modelId] || openAiModelInfoSaneDefaults,
310+
info: modelInfo,
267311
}
268312
}
269313

270314
async completePrompt(prompt: string): Promise<string> {
271315
try {
316+
if (!this.isInitialized) {
317+
await this.initialize()
318+
}
319+
272320
const client = this.ensureClient()
273-
const { id: modelId } = await this.fetchModel()
321+
const { id: modelId } = this.getModel()
274322
const useR1Format = modelId.toLowerCase().includes("deepseek-r1")
275323

276324
const response = await client.chat({

src/i18n/locales/ca/common.json

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/i18n/locales/de/common.json

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/i18n/locales/en/common.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,14 @@
103103
},
104104
"roo": {
105105
"authenticationRequired": "Roo provider requires cloud authentication. Please sign in to Roo Code Cloud."
106+
},
107+
"ollama": {
108+
"inputTooLong": "Input message is too long for the selected model. Estimated tokens: {{estimatedTokenCount}}, Max tokens: {{maxTokens}}. To increase the context window size, please set the OLLAMA_NUM_CTX environment variable or see Ollama documentation.",
109+
"serviceNotRunning": "Ollama service is not running at {{baseUrl}}. Please start Ollama first.",
110+
"modelNotFound": "Model {{modelId}} not found in Ollama. Please pull the model first with: ollama pull {{modelId}}",
111+
"modelNotFoundWithAvailable": "Model {{modelId}} not found. Available models: {{availableModels}}",
112+
"modelNotFoundNoModels": "Model {{modelId}} not found. No models available. Please pull the model first with: ollama pull {{modelId}}",
113+
"completionError": "Ollama completion error: {{error}}"
106114
}
107115
},
108116
"warnings": {

src/i18n/locales/es/common.json

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/i18n/locales/fr/common.json

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/i18n/locales/hi/common.json

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)