From d6b8b4bebceab76a61b491f021e8c86f7a6470f0 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Mon, 23 Jun 2025 11:27:07 -0500 Subject: [PATCH 1/4] fix: improve LM Studio model detection to show all downloaded models - Change from listLoaded() to listDownloadedModels() to fetch all available models - Add fallback to listLoaded() for backward compatibility - Fix context length detection for both loaded and downloaded model types - Update tests to cover new functionality This fixes the issue where only the currently loaded model was visible and users had to restart VSCode to see newly downloaded models. --- .../fetchers/__tests__/lmstudio.test.ts | 42 ++++++++++++++++++- src/api/providers/fetchers/lmstudio.ts | 32 ++++++++++---- 2 files changed, 64 insertions(+), 10 deletions(-) diff --git a/src/api/providers/fetchers/__tests__/lmstudio.test.ts b/src/api/providers/fetchers/__tests__/lmstudio.test.ts index 59b4388785..98fe5db32e 100644 --- a/src/api/providers/fetchers/__tests__/lmstudio.test.ts +++ b/src/api/providers/fetchers/__tests__/lmstudio.test.ts @@ -1,6 +1,6 @@ import axios from "axios" import { vi, describe, it, expect, beforeEach } from "vitest" -import { LMStudioClient, LLM, LLMInstanceInfo } from "@lmstudio/sdk" // LLMInfo is a type +import { LMStudioClient, LLM, LLMInstanceInfo, LLMInfo } from "@lmstudio/sdk" import { getLMStudioModels, parseLMStudioModel } from "../lmstudio" import { ModelInfo, lMStudioDefaultModelInfo } from "@roo-code/types" // ModelInfo is a type @@ -11,12 +11,16 @@ const mockedAxios = axios as any // Mock @lmstudio/sdk const mockGetModelInfo = vi.fn() const mockListLoaded = vi.fn() +const mockListDownloadedModels = vi.fn() vi.mock("@lmstudio/sdk", () => { return { LMStudioClient: vi.fn().mockImplementation(() => ({ llm: { listLoaded: mockListLoaded, }, + system: { + listDownloadedModels: mockListDownloadedModels, + }, })), } }) @@ -28,6 +32,7 @@ describe("LMStudio Fetcher", () => { MockedLMStudioClientConstructor.mockClear() mockListLoaded.mockClear() mockGetModelInfo.mockClear() + mockListDownloadedModels.mockClear() }) describe("parseLMStudioModel", () => { @@ -88,8 +93,40 @@ describe("LMStudio Fetcher", () => { trainedForToolUse: false, // Added } - it("should fetch and parse models successfully", async () => { + it("should fetch downloaded models using system.listDownloadedModels", async () => { + const mockLLMInfo: LLMInfo = { + type: "llm" as const, + modelKey: "mistralai/devstral-small-2505", + format: "safetensors", + displayName: "Devstral Small 2505", + path: "mistralai/devstral-small-2505", + sizeBytes: 13277565112, + architecture: "mistral", + vision: false, + trainedForToolUse: false, + maxContextLength: 131072, + } + + mockedAxios.get.mockResolvedValueOnce({ data: { status: "ok" } }) + mockListDownloadedModels.mockResolvedValueOnce([mockLLMInfo]) + + const result = await getLMStudioModels(baseUrl) + + expect(mockedAxios.get).toHaveBeenCalledTimes(1) + expect(mockedAxios.get).toHaveBeenCalledWith(`${baseUrl}/v1/models`) + expect(MockedLMStudioClientConstructor).toHaveBeenCalledTimes(1) + expect(MockedLMStudioClientConstructor).toHaveBeenCalledWith({ baseUrl: lmsUrl }) + expect(mockListDownloadedModels).toHaveBeenCalledTimes(1) + expect(mockListDownloadedModels).toHaveBeenCalledWith("llm") + expect(mockListLoaded).not.toHaveBeenCalled() + + const expectedParsedModel = parseLMStudioModel(mockLLMInfo) + expect(result).toEqual({ [mockLLMInfo.path]: expectedParsedModel }) + }) + + it("should fall back to listLoaded when listDownloadedModels fails", async () => { mockedAxios.get.mockResolvedValueOnce({ data: { status: "ok" } }) + mockListDownloadedModels.mockRejectedValueOnce(new Error("Method not available")) mockListLoaded.mockResolvedValueOnce([{ getModelInfo: mockGetModelInfo }]) mockGetModelInfo.mockResolvedValueOnce(mockRawModel) @@ -99,6 +136,7 @@ describe("LMStudio Fetcher", () => { expect(mockedAxios.get).toHaveBeenCalledWith(`${baseUrl}/v1/models`) expect(MockedLMStudioClientConstructor).toHaveBeenCalledTimes(1) expect(MockedLMStudioClientConstructor).toHaveBeenCalledWith({ baseUrl: lmsUrl }) + expect(mockListDownloadedModels).toHaveBeenCalledTimes(1) expect(mockListLoaded).toHaveBeenCalledTimes(1) const expectedParsedModel = parseLMStudioModel(mockRawModel) diff --git a/src/api/providers/fetchers/lmstudio.ts b/src/api/providers/fetchers/lmstudio.ts index ea1a590f1e..4b7ece71ea 100644 --- a/src/api/providers/fetchers/lmstudio.ts +++ b/src/api/providers/fetchers/lmstudio.ts @@ -2,14 +2,17 @@ import { ModelInfo, lMStudioDefaultModelInfo } from "@roo-code/types" import { LLM, LLMInfo, LLMInstanceInfo, LMStudioClient } from "@lmstudio/sdk" import axios from "axios" -export const parseLMStudioModel = (rawModel: LLMInstanceInfo): ModelInfo => { +export const parseLMStudioModel = (rawModel: LLMInstanceInfo | LLMInfo): ModelInfo => { + // Handle both LLMInstanceInfo (from loaded models) and LLMInfo (from downloaded models) + const contextLength = "contextLength" in rawModel ? rawModel.contextLength : rawModel.maxContextLength + const modelInfo: ModelInfo = Object.assign({}, lMStudioDefaultModelInfo, { description: `${rawModel.displayName} - ${rawModel.path}`, - contextWindow: rawModel.contextLength, + contextWindow: contextLength, supportsPromptCache: true, supportsImages: rawModel.vision, supportsComputerUse: false, - maxTokens: rawModel.contextLength, + maxTokens: contextLength, }) return modelInfo @@ -33,12 +36,25 @@ export async function getLMStudioModels(baseUrl = "http://localhost:1234"): Prom await axios.get(`${baseUrl}/v1/models`) const client = new LMStudioClient({ baseUrl: lmsUrl }) - const response = (await client.llm.listLoaded().then((models: LLM[]) => { - return Promise.all(models.map((m) => m.getModelInfo())) - })) as Array - for (const lmstudioModel of response) { - models[lmstudioModel.modelKey] = parseLMStudioModel(lmstudioModel) + // First, try to get all downloaded models + try { + const downloadedModels = await client.system.listDownloadedModels("llm") + for (const model of downloadedModels) { + // Use the model path as the key since that's what users select + models[model.path] = parseLMStudioModel(model) + } + } catch (error) { + console.warn("Failed to list downloaded models, falling back to loaded models only") + + // Fall back to listing only loaded models + const loadedModels = (await client.llm.listLoaded().then((models: LLM[]) => { + return Promise.all(models.map((m) => m.getModelInfo())) + })) as Array + + for (const lmstudioModel of loadedModels) { + models[lmstudioModel.modelKey] = parseLMStudioModel(lmstudioModel) + } } } catch (error) { if (error.code === "ECONNREFUSED") { From d3b6a932222a11bd8a845b3ef3c23ad3739a8ad8 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Mon, 23 Jun 2025 11:43:49 -0500 Subject: [PATCH 2/4] feat: add model refresh functionality on mount for LMStudio and Ollama components --- .../components/settings/providers/LMStudio.tsx | 18 +++++++++++++++++- .../components/settings/providers/Ollama.tsx | 18 +++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/webview-ui/src/components/settings/providers/LMStudio.tsx b/webview-ui/src/components/settings/providers/LMStudio.tsx index 17af44871b..50bb9edf8c 100644 --- a/webview-ui/src/components/settings/providers/LMStudio.tsx +++ b/webview-ui/src/components/settings/providers/LMStudio.tsx @@ -1,4 +1,4 @@ -import { useCallback, useState, useMemo } from "react" +import { useCallback, useState, useMemo, useEffect } from "react" import { useEvent } from "react-use" import { Trans } from "react-i18next" import { Checkbox } from "vscrui" @@ -9,6 +9,7 @@ import type { ProviderSettings } from "@roo-code/types" import { useAppTranslation } from "@src/i18n/TranslationContext" import { ExtensionMessage } from "@roo/ExtensionMessage" import { useRouterModels } from "@src/components/ui/hooks/useRouterModels" +import { vscode } from "@src/utils/vscode" import { inputEventTransform } from "../transforms" @@ -49,6 +50,21 @@ export const LMStudio = ({ apiConfiguration, setApiConfigurationField }: LMStudi useEvent("message", onMessage) + // Refresh models on mount + useEffect(() => { + // Request fresh models without flushing first + // This ensures cached models remain visible while new ones load + vscode.postMessage({ type: "requestRouterModels" }) + + // Optionally flush cache after a delay to ensure fresh data on next load + // This won't affect the current session since models are already being fetched + const timer = setTimeout(() => { + vscode.postMessage({ type: "flushRouterModels", text: "lmstudio" }) + }, 1000) + + return () => clearTimeout(timer) + }, []) + // Check if the selected model exists in the fetched models const modelNotAvailable = useMemo(() => { const selectedModel = apiConfiguration?.lmStudioModelId diff --git a/webview-ui/src/components/settings/providers/Ollama.tsx b/webview-ui/src/components/settings/providers/Ollama.tsx index e118f68b46..9056a5b001 100644 --- a/webview-ui/src/components/settings/providers/Ollama.tsx +++ b/webview-ui/src/components/settings/providers/Ollama.tsx @@ -1,4 +1,4 @@ -import { useState, useCallback, useMemo } from "react" +import { useState, useCallback, useMemo, useEffect } from "react" import { useEvent } from "react-use" import { VSCodeTextField, VSCodeRadioGroup, VSCodeRadio } from "@vscode/webview-ui-toolkit/react" @@ -8,6 +8,7 @@ import { ExtensionMessage } from "@roo/ExtensionMessage" import { useAppTranslation } from "@src/i18n/TranslationContext" import { useRouterModels } from "@src/components/ui/hooks/useRouterModels" +import { vscode } from "@src/utils/vscode" import { inputEventTransform } from "../transforms" @@ -48,6 +49,21 @@ export const Ollama = ({ apiConfiguration, setApiConfigurationField }: OllamaPro useEvent("message", onMessage) + // Refresh models on mount + useEffect(() => { + // Request fresh models without flushing first + // This ensures cached models remain visible while new ones load + vscode.postMessage({ type: "requestRouterModels" }) + + // Optionally flush cache after a delay to ensure fresh data on next load + // This won't affect the current session since models are already being fetched + const timer = setTimeout(() => { + vscode.postMessage({ type: "flushRouterModels", text: "ollama" }) + }, 1000) + + return () => clearTimeout(timer) + }, []) + // Check if the selected model exists in the fetched models const modelNotAvailable = useMemo(() => { const selectedModel = apiConfiguration?.ollamaModelId From 1747b29086e17dc0b58ea3dba0597e3a7199782e Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Mon, 23 Jun 2025 13:34:25 -0500 Subject: [PATCH 3/4] fix: update model refresh logic to flush cache before requesting fresh models for LMStudio and Ollama components --- .../src/components/settings/providers/LMStudio.tsx | 12 +++++------- .../src/components/settings/providers/Ollama.tsx | 12 +++++------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/webview-ui/src/components/settings/providers/LMStudio.tsx b/webview-ui/src/components/settings/providers/LMStudio.tsx index 50bb9edf8c..653940adc3 100644 --- a/webview-ui/src/components/settings/providers/LMStudio.tsx +++ b/webview-ui/src/components/settings/providers/LMStudio.tsx @@ -52,15 +52,13 @@ export const LMStudio = ({ apiConfiguration, setApiConfigurationField }: LMStudi // Refresh models on mount useEffect(() => { - // Request fresh models without flushing first - // This ensures cached models remain visible while new ones load - vscode.postMessage({ type: "requestRouterModels" }) + // Flush cache first to ensure we get fresh models + vscode.postMessage({ type: "flushRouterModels", text: "lmstudio" }) - // Optionally flush cache after a delay to ensure fresh data on next load - // This won't affect the current session since models are already being fetched + // Request fresh LM Studio models after a small delay to ensure cache is flushed const timer = setTimeout(() => { - vscode.postMessage({ type: "flushRouterModels", text: "lmstudio" }) - }, 1000) + vscode.postMessage({ type: "requestLmStudioModels" }) + }, 100) return () => clearTimeout(timer) }, []) diff --git a/webview-ui/src/components/settings/providers/Ollama.tsx b/webview-ui/src/components/settings/providers/Ollama.tsx index 9056a5b001..1639497ca9 100644 --- a/webview-ui/src/components/settings/providers/Ollama.tsx +++ b/webview-ui/src/components/settings/providers/Ollama.tsx @@ -51,15 +51,13 @@ export const Ollama = ({ apiConfiguration, setApiConfigurationField }: OllamaPro // Refresh models on mount useEffect(() => { - // Request fresh models without flushing first - // This ensures cached models remain visible while new ones load - vscode.postMessage({ type: "requestRouterModels" }) + // Flush cache first to ensure we get fresh models + vscode.postMessage({ type: "flushRouterModels", text: "ollama" }) - // Optionally flush cache after a delay to ensure fresh data on next load - // This won't affect the current session since models are already being fetched + // Request fresh Ollama models after a small delay to ensure cache is flushed const timer = setTimeout(() => { - vscode.postMessage({ type: "flushRouterModels", text: "ollama" }) - }, 1000) + vscode.postMessage({ type: "requestOllamaModels" }) + }, 100) return () => clearTimeout(timer) }, []) From b5ae4d3cf6e5bd1a5d511adc87cd163e2dadf5f1 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Mon, 23 Jun 2025 13:41:45 -0500 Subject: [PATCH 4/4] fix: streamline model refresh by automatically flushing cache in handlers for LMStudio and Ollama components --- src/core/webview/webviewMessageHandler.ts | 6 ++++++ .../src/components/settings/providers/LMStudio.tsx | 11 ++--------- .../src/components/settings/providers/Ollama.tsx | 11 ++--------- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index f689196d79..8bf2f6b95a 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -448,6 +448,9 @@ export const webviewMessageHandler = async ( // Specific handler for Ollama models only const { apiConfiguration: ollamaApiConfig } = await provider.getState() try { + // Flush cache first to ensure fresh models + await flushModels("ollama") + const ollamaModels = await getModels({ provider: "ollama", baseUrl: ollamaApiConfig.ollamaBaseUrl, @@ -469,6 +472,9 @@ export const webviewMessageHandler = async ( // Specific handler for LM Studio models only const { apiConfiguration: lmStudioApiConfig } = await provider.getState() try { + // Flush cache first to ensure fresh models + await flushModels("lmstudio") + const lmStudioModels = await getModels({ provider: "lmstudio", baseUrl: lmStudioApiConfig.lmStudioBaseUrl, diff --git a/webview-ui/src/components/settings/providers/LMStudio.tsx b/webview-ui/src/components/settings/providers/LMStudio.tsx index 653940adc3..a907e43e1b 100644 --- a/webview-ui/src/components/settings/providers/LMStudio.tsx +++ b/webview-ui/src/components/settings/providers/LMStudio.tsx @@ -52,15 +52,8 @@ export const LMStudio = ({ apiConfiguration, setApiConfigurationField }: LMStudi // Refresh models on mount useEffect(() => { - // Flush cache first to ensure we get fresh models - vscode.postMessage({ type: "flushRouterModels", text: "lmstudio" }) - - // Request fresh LM Studio models after a small delay to ensure cache is flushed - const timer = setTimeout(() => { - vscode.postMessage({ type: "requestLmStudioModels" }) - }, 100) - - return () => clearTimeout(timer) + // Request fresh models - the handler now flushes cache automatically + vscode.postMessage({ type: "requestLmStudioModels" }) }, []) // Check if the selected model exists in the fetched models diff --git a/webview-ui/src/components/settings/providers/Ollama.tsx b/webview-ui/src/components/settings/providers/Ollama.tsx index 1639497ca9..263c3892f2 100644 --- a/webview-ui/src/components/settings/providers/Ollama.tsx +++ b/webview-ui/src/components/settings/providers/Ollama.tsx @@ -51,15 +51,8 @@ export const Ollama = ({ apiConfiguration, setApiConfigurationField }: OllamaPro // Refresh models on mount useEffect(() => { - // Flush cache first to ensure we get fresh models - vscode.postMessage({ type: "flushRouterModels", text: "ollama" }) - - // Request fresh Ollama models after a small delay to ensure cache is flushed - const timer = setTimeout(() => { - vscode.postMessage({ type: "requestOllamaModels" }) - }, 100) - - return () => clearTimeout(timer) + // Request fresh models - the handler now flushes cache automatically + vscode.postMessage({ type: "requestOllamaModels" }) }, []) // Check if the selected model exists in the fetched models