Skip to content

Commit fc3a003

Browse files
thecolorbluedaniel-lxs
authored andcommitted
fix context length for lmstudio and ollama (#2462)
1 parent 347a292 commit fc3a003

File tree

14 files changed

+254
-32
lines changed

14 files changed

+254
-32
lines changed

packages/types/src/providers/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ export * from "./groq.js"
88
export * from "./lite-llm.js"
99
export * from "./lm-studio.js"
1010
export * from "./mistral.js"
11+
export * from "./ollama.js"
1112
export * from "./openai.js"
1213
export * from "./openrouter.js"
1314
export * from "./requesty.js"
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,19 @@
1+
import type { ModelInfo } from "../model.js"
2+
13
export const LMSTUDIO_DEFAULT_TEMPERATURE = 0
4+
5+
// LM Studio
6+
// https://lmstudio.ai/docs/cli/ls
7+
export const lMStudioDefaultModelId = "mistralai/devstral-small-2505"
8+
export const lMStudioDefaultModelInfo: ModelInfo = {
9+
maxTokens: 8192,
10+
contextWindow: 200_000,
11+
supportsImages: true,
12+
supportsComputerUse: true,
13+
supportsPromptCache: true,
14+
inputPrice: 0,
15+
outputPrice: 0,
16+
cacheWritesPrice: 0,
17+
cacheReadsPrice: 0,
18+
description: "LM Studio hosted models",
19+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import type { ModelInfo } from "../model.js"
2+
3+
// Ollama
4+
// https://ollama.com/models
5+
export const ollamaDefaultModelId = "devstral:24b"
6+
export const ollamaDefaultModelInfo: ModelInfo = {
7+
maxTokens: 4096,
8+
contextWindow: 200_000,
9+
supportsImages: true,
10+
supportsComputerUse: true,
11+
supportsPromptCache: true,
12+
inputPrice: 0,
13+
outputPrice: 0,
14+
cacheWritesPrice: 0,
15+
cacheReadsPrice: 0,
16+
description: "Ollama hosted models",
17+
}

pnpm-lock.yaml

Lines changed: 32 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import { ModelInfo, lMStudioDefaultModelInfo } from "@roo-code/types"
2+
import { LLMInfo, LMStudioClient } from "@lmstudio/sdk"
3+
import axios from "axios"
4+
5+
export const parseLMStudioModel = (rawModel: LLMInfo): ModelInfo => {
6+
const modelInfo: ModelInfo = Object.assign({}, lMStudioDefaultModelInfo, {
7+
description: `${rawModel.displayName} - ${rawModel.paramsString} - ${rawModel.path}`,
8+
contextWindow: rawModel.maxContextLength,
9+
supportsPromptCache: true,
10+
supportsImages: rawModel.vision,
11+
supportsComputerUse: false,
12+
maxTokens: rawModel.maxContextLength,
13+
})
14+
15+
return modelInfo
16+
}
17+
18+
export async function getLMStudioModels(baseUrl = "http://localhost:1234"): Promise<Record<string, ModelInfo>> {
19+
// clearing the input can leave an empty string; use the default in that case
20+
baseUrl = baseUrl === "" ? "http://localhost:1234" : baseUrl
21+
22+
const models: Record<string, ModelInfo> = {}
23+
// ws is required to connect using the LMStudio library
24+
const lmsUrl = baseUrl.replace(/^http:\/\//, "ws://").replace(/^https:\/\//, "wss://")
25+
26+
try {
27+
if (!URL.canParse(lmsUrl)) {
28+
return models
29+
}
30+
31+
// test the connection to LM Studio first
32+
// errors will be caught further down
33+
await axios.get(`${baseUrl}/v1/models`)
34+
35+
const client = new LMStudioClient({ baseUrl: lmsUrl })
36+
const response = (await client.system.listDownloadedModels()) as Array<LLMInfo>
37+
38+
for (const lmstudioModel of response) {
39+
models[lmstudioModel.modelKey] = parseLMStudioModel(lmstudioModel)
40+
}
41+
} catch (error) {
42+
if (error.code === "ECONNREFUSED") {
43+
console.error(`Error connecting to LMStudio at ${baseUrl}`)
44+
} else {
45+
console.error(
46+
`Error fetching LMStudio models: ${JSON.stringify(error, Object.getOwnPropertyNames(error), 2)}`,
47+
)
48+
}
49+
}
50+
51+
return models
52+
}

src/api/providers/fetchers/modelCache.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ import { getGlamaModels } from "./glama"
1414
import { getUnboundModels } from "./unbound"
1515
import { getLiteLLMModels } from "./litellm"
1616
import { GetModelsOptions } from "../../../shared/api"
17+
import { getOllamaModels } from "./ollama"
18+
import { getLMStudioModels } from "./lmstudio"
19+
1720
const memoryCache = new NodeCache({ stdTTL: 5 * 60, checkperiod: 5 * 60 })
1821

1922
async function writeModels(router: RouterName, data: ModelRecord) {
@@ -68,6 +71,12 @@ export const getModels = async (options: GetModelsOptions): Promise<ModelRecord>
6871
// Type safety ensures apiKey and baseUrl are always provided for litellm
6972
models = await getLiteLLMModels(options.apiKey, options.baseUrl)
7073
break
74+
case "ollama":
75+
models = await getOllamaModels(options.baseUrl)
76+
break
77+
case "lmstudio":
78+
models = await getLMStudioModels(options.baseUrl)
79+
break
7180
default: {
7281
// Ensures router is exhaustively checked if RouterName is a strict union
7382
const exhaustiveCheck: never = provider
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import axios from "axios"
2+
import { ModelInfo, ollamaDefaultModelInfo } from "@roo-code/types"
3+
import { z } from "zod"
4+
5+
const OllamaModelDetailsSchema = z.object({
6+
family: z.string(),
7+
families: z.array(z.string()),
8+
format: z.string(),
9+
parameter_size: z.string(),
10+
parent_model: z.string(),
11+
quantization_level: z.string(),
12+
})
13+
14+
const OllamaModelSchema = z.object({
15+
details: OllamaModelDetailsSchema,
16+
digest: z.string(),
17+
model: z.string(),
18+
modified_at: z.string(),
19+
name: z.string(),
20+
size: z.number(),
21+
})
22+
23+
const OllamaModelInfoResponseSchema = z.object({
24+
modelfile: z.string(),
25+
parameters: z.string(),
26+
template: z.string(),
27+
details: OllamaModelDetailsSchema,
28+
model_info: z.record(z.string(), z.any()),
29+
capabilities: z.array(z.string()).optional(),
30+
})
31+
32+
const OllamaModelsResponseSchema = z.object({
33+
models: z.array(OllamaModelSchema),
34+
})
35+
36+
type OllamaModelsResponse = z.infer<typeof OllamaModelsResponseSchema>
37+
38+
type OllamaModelInfoResponse = z.infer<typeof OllamaModelInfoResponseSchema>
39+
40+
export const parseOllamaModel = (rawModel: OllamaModelInfoResponse): ModelInfo => {
41+
const contextKey = Object.keys(rawModel.model_info).find((k) => k.includes("context_length"))
42+
const contextWindow = contextKey ? rawModel.model_info[contextKey] : undefined
43+
44+
const modelInfo: ModelInfo = Object.assign({}, ollamaDefaultModelInfo, {
45+
description: `Family: ${rawModel.details.family}, Context: ${contextWindow}, Size: ${rawModel.details.parameter_size}`,
46+
contextWindow: contextWindow || ollamaDefaultModelInfo.contextWindow,
47+
supportsPromptCache: true,
48+
supportsImages: rawModel.capabilities?.includes("vision"),
49+
supportsComputerUse: false,
50+
maxTokens: contextWindow || ollamaDefaultModelInfo.contextWindow,
51+
})
52+
53+
return modelInfo
54+
}
55+
56+
export async function getOllamaModels(baseUrl = "http://localhost:11434"): Promise<Record<string, ModelInfo>> {
57+
const models: Record<string, ModelInfo> = {}
58+
59+
// clearing the input can leave an empty string; use the default in that case
60+
baseUrl = baseUrl === "" ? "http://localhost:11434" : baseUrl
61+
62+
try {
63+
if (!URL.canParse(baseUrl)) {
64+
return models
65+
}
66+
67+
const response = await axios.get<OllamaModelsResponse>(`${baseUrl}/api/tags`)
68+
const parsedResponse = OllamaModelsResponseSchema.safeParse(response.data)
69+
let modelInfoPromises = []
70+
71+
if (parsedResponse.success) {
72+
for (const ollamaModel of parsedResponse.data.models) {
73+
modelInfoPromises.push(
74+
axios
75+
.post<OllamaModelInfoResponse>(`${baseUrl}/api/show`, {
76+
model: ollamaModel.model,
77+
})
78+
.then((ollamaModelInfo) => {
79+
models[ollamaModel.name] = parseOllamaModel(ollamaModelInfo.data)
80+
}),
81+
)
82+
}
83+
84+
await Promise.all(modelInfoPromises)
85+
} else {
86+
console.error(`Error parsing Ollama models response: ${JSON.stringify(parsedResponse.error, null, 2)}`)
87+
}
88+
} catch (error) {
89+
if (error.code === "ECONNREFUSED") {
90+
console.info(`Failed connecting to Ollama at ${baseUrl}`)
91+
} else {
92+
console.warn(`Error fetching Ollama models: ${JSON.stringify(error, Object.getOwnPropertyNames(error), 2)}`)
93+
}
94+
}
95+
96+
return models
97+
}

src/api/providers/ollama.ts

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import { Anthropic } from "@anthropic-ai/sdk"
22
import OpenAI from "openai"
3-
import axios from "axios"
43

54
import { type ModelInfo, openAiModelInfoSaneDefaults, DEEP_SEEK_DEFAULT_TEMPERATURE } from "@roo-code/types"
65

@@ -111,17 +110,3 @@ export class OllamaHandler extends BaseProvider implements SingleCompletionHandl
111110
}
112111
}
113112
}
114-
115-
export async function getOllamaModels(baseUrl = "http://localhost:11434") {
116-
try {
117-
if (!URL.canParse(baseUrl)) {
118-
return []
119-
}
120-
121-
const response = await axios.get(`${baseUrl}/api/tags`)
122-
const modelsArray = response.data?.models?.map((model: any) => model.name) || []
123-
return [...new Set<string>(modelsArray)]
124-
} catch (error) {
125-
return []
126-
}
127-
}

src/core/webview/webviewMessageHandler.ts

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,7 @@ import { singleCompletionHandler } from "../../utils/single-completion-handler"
2929
import { searchCommits } from "../../utils/git"
3030
import { exportSettings, importSettings } from "../config/importExport"
3131
import { getOpenAiModels } from "../../api/providers/openai"
32-
import { getOllamaModels } from "../../api/providers/ollama"
3332
import { getVsCodeLmModels } from "../../api/providers/vscode-lm"
34-
import { getLmStudioModels } from "../../api/providers/lm-studio"
3533
import { openMention } from "../mentions"
3634
import { TelemetrySetting } from "../../shared/TelemetrySetting"
3735
import { getWorkspacePath } from "../../utils/path"
@@ -379,6 +377,19 @@ export const webviewMessageHandler = async (
379377

380378
if (result.status === "fulfilled") {
381379
fetchedRouterModels[routerName] = result.value.models
380+
381+
// Ollama and LM Studio settings pages still need these events
382+
if (routerName === "ollama" && Object.keys(result.value.models).length > 0) {
383+
provider.postMessageToWebview({
384+
type: "ollamaModels",
385+
ollamaModels: Object.keys(result.value.models),
386+
})
387+
} else if (routerName === "lmstudio" && Object.keys(result.value.models).length > 0) {
388+
provider.postMessageToWebview({
389+
type: "lmStudioModels",
390+
lmStudioModels: Object.keys(result.value.models),
391+
})
392+
}
382393
} else {
383394
// Handle rejection: Post a specific error message for this provider
384395
const errorMessage = result.reason instanceof Error ? result.reason.message : String(result.reason)
@@ -399,6 +410,7 @@ export const webviewMessageHandler = async (
399410
type: "routerModels",
400411
routerModels: fetchedRouterModels as Record<RouterName, ModelRecord>,
401412
})
413+
402414
break
403415
case "requestOpenAiModels":
404416
if (message?.values?.baseUrl && message?.values?.apiKey) {
@@ -411,16 +423,6 @@ export const webviewMessageHandler = async (
411423
provider.postMessageToWebview({ type: "openAiModels", openAiModels })
412424
}
413425

414-
break
415-
case "requestOllamaModels":
416-
const ollamaModels = await getOllamaModels(message.text)
417-
// TODO: Cache like we do for OpenRouter, etc?
418-
provider.postMessageToWebview({ type: "ollamaModels", ollamaModels })
419-
break
420-
case "requestLmStudioModels":
421-
const lmStudioModels = await getLmStudioModels(message.text)
422-
// TODO: Cache like we do for OpenRouter, etc?
423-
provider.postMessageToWebview({ type: "lmStudioModels", lmStudioModels })
424426
break
425427
case "requestVsCodeLmModels":
426428
const vsCodeLmModels = await getVsCodeLmModels()

src/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,7 @@
369369
"@aws-sdk/client-bedrock-runtime": "^3.779.0",
370370
"@aws-sdk/credential-providers": "^3.806.0",
371371
"@google/genai": "^1.0.0",
372+
"@lmstudio/sdk": "^1.1.1",
372373
"@mistralai/mistralai": "^1.3.6",
373374
"@modelcontextprotocol/sdk": "^1.9.0",
374375
"@qdrant/js-client-rest": "^1.14.0",

0 commit comments

Comments
 (0)