Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 2 additions & 30 deletions src/api/providers/__tests__/native-ollama.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,34 +72,6 @@ describe("NativeOllamaHandler", () => {
expect(results[1]).toEqual({ type: "text", text: " world" })
expect(results[2]).toEqual({ type: "usage", inputTokens: 10, outputTokens: 2 })
})

it("should handle DeepSeek R1 models with reasoning detection", async () => {
const options: ApiHandlerOptions = {
apiModelId: "deepseek-r1",
ollamaModelId: "deepseek-r1",
ollamaBaseUrl: "http://localhost:11434",
}

handler = new NativeOllamaHandler(options)

// Mock response with thinking tags
mockChat.mockImplementation(async function* () {
yield { message: { content: "<think>Let me think" } }
yield { message: { content: " about this</think>" } }
yield { message: { content: "The answer is 42" } }
})

const stream = handler.createMessage("System", [{ role: "user" as const, content: "Question?" }])
const results = []

for await (const chunk of stream) {
results.push(chunk)
}

// Should detect reasoning vs regular text
expect(results.some((r) => r.type === "reasoning")).toBe(true)
expect(results.some((r) => r.type === "text")).toBe(true)
})
})

describe("completePrompt", () => {
Expand Down Expand Up @@ -134,7 +106,7 @@ describe("NativeOllamaHandler", () => {
for await (const _ of stream) {
// consume stream
}
}).rejects.toThrow("Ollama service is not running")
}).rejects.toThrow("errors.ollama.serviceNotRunning")
})

it("should handle model not found errors", async () => {
Expand All @@ -148,7 +120,7 @@ describe("NativeOllamaHandler", () => {
for await (const _ of stream) {
// consume stream
}
}).rejects.toThrow("Model llama2 not found in Ollama")
}).rejects.toThrow("errors.ollama.modelNotFound")
})
})

Expand Down
48 changes: 42 additions & 6 deletions src/api/providers/fetchers/__tests__/ollama.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@ describe("Ollama Fetcher", () => {
const parsedModel = parseOllamaModel(modelData)

expect(parsedModel).toEqual({
maxTokens: 40960,
contextWindow: 40960,
maxTokens: 4096, // Changed from 40960 - Ollama's quirk fix
contextWindow: 4096, // Changed from 40960 - Ollama's quirk fix
supportsImages: false,
supportsComputerUse: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0,
cacheWritesPrice: 0,
cacheReadsPrice: 0,
description: "Family: qwen3, Context: 40960, Size: 32.8B",
description: "Family: qwen3, Context: 4096, Size: 32.8B", // Changed from 40960
})
})

Expand All @@ -43,18 +43,54 @@ describe("Ollama Fetcher", () => {
const parsedModel = parseOllamaModel(modelDataWithNullFamilies as any)

expect(parsedModel).toEqual({
maxTokens: 40960,
contextWindow: 40960,
maxTokens: 4096, // Changed from 40960 - Ollama's quirk fix
contextWindow: 4096, // Changed from 40960 - Ollama's quirk fix
supportsImages: false,
supportsComputerUse: false,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0,
cacheWritesPrice: 0,
cacheReadsPrice: 0,
description: "Family: qwen3, Context: 40960, Size: 32.8B",
description: "Family: qwen3, Context: 4096, Size: 32.8B", // Changed from 40960
})
})

it("should use num_ctx from parameters when available", () => {
const modelDataWithNumCtx = {
...ollamaModelsData["qwen3-2to16:latest"],
parameters: "num_ctx 8192\nstop_token <eos>",
}

const parsedModel = parseOllamaModel(modelDataWithNumCtx as any)

expect(parsedModel.contextWindow).toBe(8192)
expect(parsedModel.maxTokens).toBe(8192)
expect(parsedModel.description).toContain("Context: 8192")
})

it("should use OLLAMA_NUM_CTX environment variable as fallback", () => {
const originalEnv = process.env.OLLAMA_NUM_CTX
process.env.OLLAMA_NUM_CTX = "16384"

const modelDataWithoutContext = {
...ollamaModelsData["qwen3-2to16:latest"],
model_info: {}, // No context_length in model_info
parameters: undefined, // No parameters
}

const parsedModel = parseOllamaModel(modelDataWithoutContext as any)

expect(parsedModel.contextWindow).toBe(16384)
expect(parsedModel.maxTokens).toBe(16384)

// Restore original env
if (originalEnv !== undefined) {
process.env.OLLAMA_NUM_CTX = originalEnv
} else {
delete process.env.OLLAMA_NUM_CTX
}
})
})

describe("getOllamaModels", () => {
Expand Down
22 changes: 19 additions & 3 deletions src/api/providers/fetchers/ollama.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,33 @@ type OllamaModelsResponse = z.infer<typeof OllamaModelsResponseSchema>
type OllamaModelInfoResponse = z.infer<typeof OllamaModelInfoResponseSchema>

export const parseOllamaModel = (rawModel: OllamaModelInfoResponse): ModelInfo => {
// Check for context window in model parameters first
const contextLengthFromModelParameters = rawModel.parameters
? parseInt(rawModel.parameters.match(/^num_ctx\s+(\d+)/m)?.[1] ?? "", 10) || undefined
: undefined

// Check for context window in model_info
const contextKey = Object.keys(rawModel.model_info).find((k) => k.includes("context_length"))
const contextWindow =
const contextLengthFromModelInfo =
contextKey && typeof rawModel.model_info[contextKey] === "number" ? rawModel.model_info[contextKey] : undefined

// Use environment variable as fallback
const contextLengthFromEnvironment = parseInt(process.env.OLLAMA_NUM_CTX || "4096", 10)

let contextWindow = contextLengthFromModelParameters ?? contextLengthFromModelInfo ?? contextLengthFromEnvironment

// Handle Ollama's quirk of returning 40960 for undefined context
if (contextWindow === 40960 && !contextLengthFromModelParameters) {
contextWindow = 4096 // For some unknown reason, Ollama returns an undefined context as "40960" rather than 4096, which is what it actually enforces.
}

const modelInfo: ModelInfo = Object.assign({}, ollamaDefaultModelInfo, {
description: `Family: ${rawModel.details.family}, Context: ${contextWindow}, Size: ${rawModel.details.parameter_size}`,
contextWindow: contextWindow || ollamaDefaultModelInfo.contextWindow,
contextWindow: contextWindow,
supportsPromptCache: true,
supportsImages: rawModel.capabilities?.includes("vision"),
supportsComputerUse: false,
maxTokens: contextWindow || ollamaDefaultModelInfo.contextWindow,
maxTokens: contextWindow,
})

return modelInfo
Expand Down
75 changes: 63 additions & 12 deletions src/api/providers/native-ollama.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
import { Anthropic } from "@anthropic-ai/sdk"
import { Message, Ollama, type Config as OllamaOptions } from "ollama"
import { ModelInfo, openAiModelInfoSaneDefaults, DEEP_SEEK_DEFAULT_TEMPERATURE } from "@roo-code/types"
import { ModelInfo, DEEP_SEEK_DEFAULT_TEMPERATURE } from "@roo-code/types"
import { ApiStream } from "../transform/stream"
import { BaseProvider } from "./base-provider"
import type { ApiHandlerOptions } from "../../shared/api"
import { getOllamaModels } from "./fetchers/ollama"
import { XmlMatcher } from "../../utils/xml-matcher"
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
import { t } from "../../i18n"

const TOKEN_ESTIMATION_FACTOR = 4 // Industry standard technique for estimating token counts without actually implementing a parser/tokenizer

function estimateOllamaTokenCount(messages: Message[]): number {
const totalChars = messages.reduce((acc, msg) => acc + (msg.content?.length || 0), 0)
return Math.ceil(totalChars / TOKEN_ESTIMATION_FACTOR)
}

function convertToOllamaMessages(anthropicMessages: Anthropic.Messages.MessageParam[]): Message[] {
const ollamaMessages: Message[] = []
Expand Down Expand Up @@ -131,10 +139,20 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
protected options: ApiHandlerOptions
private client: Ollama | undefined
protected models: Record<string, ModelInfo> = {}
private isInitialized = false

constructor(options: ApiHandlerOptions) {
super()
this.options = options
this.initialize()
}

private async initialize(): Promise<void> {
if (this.isInitialized) {
return
}
await this.fetchModel()
this.isInitialized = true
}

private ensureClient(): Ollama {
Expand All @@ -154,7 +172,7 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio

this.client = new Ollama(clientOptions)
} catch (error: any) {
throw new Error(`Error creating Ollama client: ${error.message}`)
throw new Error(t("common:errors.ollama.clientCreationError", { error: error.message }))
}
}
return this.client
Expand All @@ -165,15 +183,27 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
messages: Anthropic.Messages.MessageParam[],
metadata?: ApiHandlerCreateMessageMetadata,
): ApiStream {
if (!this.isInitialized) {
await this.initialize()
}

const client = this.ensureClient()
const { id: modelId, info: modelInfo } = await this.fetchModel()
const { id: modelId, info: modelInfo } = this.getModel()
const useR1Format = modelId.toLowerCase().includes("deepseek-r1")

const ollamaMessages: Message[] = [
{ role: "system", content: systemPrompt },
...convertToOllamaMessages(messages),
]

// Check if the estimated token count exceeds the model's limit
const estimatedTokenCount = estimateOllamaTokenCount(ollamaMessages)
if (modelInfo.maxTokens && estimatedTokenCount > modelInfo.maxTokens) {
throw new Error(
t("common:errors.ollama.inputTooLong", { estimatedTokenCount, maxTokens: modelInfo.maxTokens }),
)
}

const matcher = new XmlMatcher(
"think",
(chunk) =>
Expand All @@ -190,7 +220,6 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
messages: ollamaMessages,
stream: true,
options: {
num_ctx: modelInfo.contextWindow,
temperature: this.options.modelTemperature ?? (useR1Format ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),
},
})
Expand Down Expand Up @@ -233,7 +262,11 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
}
} catch (streamError: any) {
console.error("Error processing Ollama stream:", streamError)
throw new Error(`Ollama stream processing error: ${streamError.message || "Unknown error"}`)
throw new Error(
t("common:errors.ollama.streamProcessingError", {
error: streamError.message || t("common:errors.ollama.unknownError"),
}),
)
}
} catch (error: any) {
// Enhance error reporting
Expand All @@ -242,12 +275,12 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio

if (error.code === "ECONNREFUSED") {
throw new Error(
`Ollama service is not running at ${this.options.ollamaBaseUrl || "http://localhost:11434"}. Please start Ollama first.`,
t("common:errors.ollama.serviceNotRunning", {
baseUrl: this.options.ollamaBaseUrl || "http://localhost:11434",
}),
)
} else if (statusCode === 404) {
throw new Error(
`Model ${this.getModel().id} not found in Ollama. Please pull the model first with: ollama pull ${this.getModel().id}`,
)
throw new Error(t("common:errors.ollama.modelNotFound", { modelId: this.getModel().id }))
}

console.error(`Ollama API error (${statusCode || "unknown"}): ${errorMessage}`)
Expand All @@ -262,16 +295,34 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio

override getModel(): { id: string; info: ModelInfo } {
const modelId = this.options.ollamaModelId || ""

const modelInfo = this.models[modelId]
if (!modelInfo) {
const availableModels = Object.keys(this.models)
const errorMessage =
availableModels.length > 0
? t("common:errors.ollama.modelNotFoundWithAvailable", {
modelId,
availableModels: availableModels.join(", "),
})
: t("common:errors.ollama.modelNotFoundNoModels", { modelId })
throw new Error(errorMessage)
}

return {
id: modelId,
info: this.models[modelId] || openAiModelInfoSaneDefaults,
info: modelInfo,
}
}

async completePrompt(prompt: string): Promise<string> {
try {
if (!this.isInitialized) {
await this.initialize()
}

const client = this.ensureClient()
const { id: modelId } = await this.fetchModel()
const { id: modelId } = this.getModel()
const useR1Format = modelId.toLowerCase().includes("deepseek-r1")

const response = await client.chat({
Expand All @@ -286,7 +337,7 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
return response.message?.content || ""
} catch (error) {
if (error instanceof Error) {
throw new Error(`Ollama completion error: ${error.message}`)
throw new Error(t("common:errors.ollama.completionError", { error: error.message }))
}
throw error
}
Expand Down
11 changes: 11 additions & 0 deletions src/i18n/locales/ca/common.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 17 additions & 6 deletions src/i18n/locales/de/common.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading