Skip to content

Commit e7e827a

Browse files
authored
fix: prevent duplicate LM Studio models with case-insensitive deduplication (#7185)
* fix: prevent duplicate LM Studio models with case-insensitive deduplication - Keep both listDownloadedModels and listLoaded APIs to support JIT loading - Implement case-insensitive deduplication to prevent duplicates - When duplicates are found, prefer loaded model data for accurate runtime info - Add test coverage for deduplication logic - Addresses feedback about LM Studio's JIT Model Loading feature (v0.3.5+) Fixes #6954 * fix: correct deduplication logic to prefer loaded models - When a loaded model ID is found in any downloaded model key (case-insensitive) - Remove the downloaded model and replace with the loaded model - This ensures loaded models with runtime info take precedence - Updated tests to verify the correct deduplication behavior * fix: improve deduplication logic and add comprehensive test coverage - Enhanced deduplication to use path segment matching instead of simple substring - Prevents false positives like 'llama' matching 'codellama' - Added comprehensive test cases for edge cases and multiple scenarios - Maintains support for JIT Model Loading feature
1 parent a8aea14 commit e7e827a

File tree

2 files changed

+249
-1
lines changed

2 files changed

+249
-1
lines changed

src/api/providers/fetchers/__tests__/lmstudio.test.ts

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,228 @@ describe("LMStudio Fetcher", () => {
143143
expect(result).toEqual({ [mockRawModel.modelKey]: expectedParsedModel })
144144
})
145145

146+
it("should deduplicate models when both downloaded and loaded", async () => {
147+
const mockDownloadedModel: LLMInfo = {
148+
type: "llm" as const,
149+
modelKey: "mistralai/devstral-small-2505",
150+
format: "safetensors",
151+
displayName: "Devstral Small 2505",
152+
path: "mistralai/devstral-small-2505",
153+
sizeBytes: 13277565112,
154+
architecture: "mistral",
155+
vision: false,
156+
trainedForToolUse: false,
157+
maxContextLength: 131072,
158+
}
159+
160+
const mockLoadedModel: LLMInstanceInfo = {
161+
type: "llm",
162+
modelKey: "devstral-small-2505", // Different key but should match case-insensitively
163+
format: "safetensors",
164+
displayName: "Devstral Small 2505",
165+
path: "mistralai/devstral-small-2505",
166+
sizeBytes: 13277565112,
167+
architecture: "mistral",
168+
identifier: "mistralai/devstral-small-2505",
169+
instanceReference: "RAP5qbeHVjJgBiGFQ6STCuTJ",
170+
vision: false,
171+
trainedForToolUse: false,
172+
maxContextLength: 131072,
173+
contextLength: 7161, // Runtime context info
174+
}
175+
176+
mockedAxios.get.mockResolvedValueOnce({ data: { status: "ok" } })
177+
mockListDownloadedModels.mockResolvedValueOnce([mockDownloadedModel])
178+
mockListLoaded.mockResolvedValueOnce([{ getModelInfo: vi.fn().mockResolvedValueOnce(mockLoadedModel) }])
179+
180+
const result = await getLMStudioModels(baseUrl)
181+
182+
// Should only have one model, with the loaded model replacing the downloaded one
183+
expect(Object.keys(result)).toHaveLength(1)
184+
185+
// The loaded model's key should be used, with loaded model's data
186+
const expectedParsedModel = parseLMStudioModel(mockLoadedModel)
187+
expect(result[mockLoadedModel.modelKey]).toEqual(expectedParsedModel)
188+
189+
// The downloaded model should have been removed
190+
expect(result[mockDownloadedModel.path]).toBeUndefined()
191+
})
192+
193+
it("should handle deduplication with path-based matching", async () => {
194+
const mockDownloadedModel: LLMInfo = {
195+
type: "llm" as const,
196+
modelKey: "Meta/Llama-3.1/8B-Instruct",
197+
format: "gguf",
198+
displayName: "Llama 3.1 8B Instruct",
199+
path: "Meta/Llama-3.1/8B-Instruct",
200+
sizeBytes: 8000000000,
201+
architecture: "llama",
202+
vision: false,
203+
trainedForToolUse: false,
204+
maxContextLength: 8192,
205+
}
206+
207+
const mockLoadedModel: LLMInstanceInfo = {
208+
type: "llm",
209+
modelKey: "Llama-3.1", // Should match the path segment
210+
format: "gguf",
211+
displayName: "Llama 3.1",
212+
path: "Meta/Llama-3.1/8B-Instruct",
213+
sizeBytes: 8000000000,
214+
architecture: "llama",
215+
identifier: "Meta/Llama-3.1/8B-Instruct",
216+
instanceReference: "ABC123",
217+
vision: false,
218+
trainedForToolUse: false,
219+
maxContextLength: 8192,
220+
contextLength: 4096,
221+
}
222+
223+
mockedAxios.get.mockResolvedValueOnce({ data: { status: "ok" } })
224+
mockListDownloadedModels.mockResolvedValueOnce([mockDownloadedModel])
225+
mockListLoaded.mockResolvedValueOnce([{ getModelInfo: vi.fn().mockResolvedValueOnce(mockLoadedModel) }])
226+
227+
const result = await getLMStudioModels(baseUrl)
228+
229+
expect(Object.keys(result)).toHaveLength(1)
230+
expect(result[mockLoadedModel.modelKey]).toBeDefined()
231+
expect(result[mockDownloadedModel.path]).toBeUndefined()
232+
})
233+
234+
it("should not deduplicate models with similar but distinct names", async () => {
235+
const mockDownloadedModels: LLMInfo[] = [
236+
{
237+
type: "llm" as const,
238+
modelKey: "mistral-7b",
239+
format: "gguf",
240+
displayName: "Mistral 7B",
241+
path: "mistralai/mistral-7b-instruct",
242+
sizeBytes: 7000000000,
243+
architecture: "mistral",
244+
vision: false,
245+
trainedForToolUse: false,
246+
maxContextLength: 4096,
247+
},
248+
{
249+
type: "llm" as const,
250+
modelKey: "codellama",
251+
format: "gguf",
252+
displayName: "Code Llama",
253+
path: "meta/codellama/7b",
254+
sizeBytes: 7000000000,
255+
architecture: "llama",
256+
vision: false,
257+
trainedForToolUse: false,
258+
maxContextLength: 4096,
259+
},
260+
]
261+
262+
const mockLoadedModel: LLMInstanceInfo = {
263+
type: "llm",
264+
modelKey: "llama", // Should not match "codellama" or "mistral-7b"
265+
format: "gguf",
266+
displayName: "Llama",
267+
path: "meta/llama/7b",
268+
sizeBytes: 7000000000,
269+
architecture: "llama",
270+
identifier: "meta/llama/7b",
271+
instanceReference: "XYZ789",
272+
vision: false,
273+
trainedForToolUse: false,
274+
maxContextLength: 4096,
275+
contextLength: 2048,
276+
}
277+
278+
mockedAxios.get.mockResolvedValueOnce({ data: { status: "ok" } })
279+
mockListDownloadedModels.mockResolvedValueOnce(mockDownloadedModels)
280+
mockListLoaded.mockResolvedValueOnce([{ getModelInfo: vi.fn().mockResolvedValueOnce(mockLoadedModel) }])
281+
282+
const result = await getLMStudioModels(baseUrl)
283+
284+
// Should have 3 models: mistral-7b (not deduped), codellama (not deduped), and llama (loaded)
285+
expect(Object.keys(result)).toHaveLength(3)
286+
expect(result["mistralai/mistral-7b-instruct"]).toBeDefined() // Should NOT be removed
287+
expect(result["meta/codellama/7b"]).toBeDefined() // Should NOT be removed (codellama != llama)
288+
expect(result[mockLoadedModel.modelKey]).toBeDefined()
289+
})
290+
291+
it("should handle multiple loaded models with various duplicate scenarios", async () => {
292+
const mockDownloadedModels: LLMInfo[] = [
293+
{
294+
type: "llm" as const,
295+
modelKey: "mistral-7b",
296+
format: "gguf",
297+
displayName: "Mistral 7B",
298+
path: "mistralai/mistral-7b/instruct",
299+
sizeBytes: 7000000000,
300+
architecture: "mistral",
301+
vision: false,
302+
trainedForToolUse: false,
303+
maxContextLength: 8192,
304+
},
305+
{
306+
type: "llm" as const,
307+
modelKey: "llama-3.1",
308+
format: "gguf",
309+
displayName: "Llama 3.1",
310+
path: "meta/llama-3.1/8b",
311+
sizeBytes: 8000000000,
312+
architecture: "llama",
313+
vision: false,
314+
trainedForToolUse: false,
315+
maxContextLength: 8192,
316+
},
317+
]
318+
319+
const mockLoadedModels: LLMInstanceInfo[] = [
320+
{
321+
type: "llm",
322+
modelKey: "mistral-7b", // Exact match with path segment
323+
format: "gguf",
324+
displayName: "Mistral 7B",
325+
path: "mistralai/mistral-7b/instruct",
326+
sizeBytes: 7000000000,
327+
architecture: "mistral",
328+
identifier: "mistralai/mistral-7b/instruct",
329+
instanceReference: "REF1",
330+
vision: false,
331+
trainedForToolUse: false,
332+
maxContextLength: 8192,
333+
contextLength: 4096,
334+
},
335+
{
336+
type: "llm",
337+
modelKey: "gpt-4", // No match, new model
338+
format: "gguf",
339+
displayName: "GPT-4",
340+
path: "openai/gpt-4",
341+
sizeBytes: 10000000000,
342+
architecture: "gpt",
343+
identifier: "openai/gpt-4",
344+
instanceReference: "REF2",
345+
vision: true,
346+
trainedForToolUse: true,
347+
maxContextLength: 32768,
348+
contextLength: 16384,
349+
},
350+
]
351+
352+
mockedAxios.get.mockResolvedValueOnce({ data: { status: "ok" } })
353+
mockListDownloadedModels.mockResolvedValueOnce(mockDownloadedModels)
354+
mockListLoaded.mockResolvedValueOnce(
355+
mockLoadedModels.map((model) => ({ getModelInfo: vi.fn().mockResolvedValueOnce(model) })),
356+
)
357+
358+
const result = await getLMStudioModels(baseUrl)
359+
360+
// Should have 3 models: llama-3.1 (downloaded), mistral-7b (loaded, replaced), gpt-4 (loaded, new)
361+
expect(Object.keys(result)).toHaveLength(3)
362+
expect(result["meta/llama-3.1/8b"]).toBeDefined() // Downloaded, not replaced
363+
expect(result["mistralai/mistral-7b/instruct"]).toBeUndefined() // Downloaded, replaced
364+
expect(result["mistral-7b"]).toBeDefined() // Loaded, replaced downloaded
365+
expect(result["gpt-4"]).toBeDefined() // Loaded, new
366+
})
367+
146368
it("should use default baseUrl if an empty string is provided", async () => {
147369
const defaultBaseUrl = "http://localhost:1234"
148370
const defaultLmsUrl = "ws://localhost:1234"

src/api/providers/fetchers/lmstudio.ts

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,38 @@ export async function getLMStudioModels(baseUrl = "http://localhost:1234"): Prom
8181
} catch (error) {
8282
console.warn("Failed to list downloaded models, falling back to loaded models only")
8383
}
84-
// We want to list loaded models *anyway* since they provide valuable extra info (context size)
84+
85+
// Get loaded models for their runtime info (context size)
8586
const loadedModels = (await client.llm.listLoaded().then((models: LLM[]) => {
8687
return Promise.all(models.map((m) => m.getModelInfo()))
8788
})) as Array<LLMInstanceInfo>
8889

90+
// Deduplicate: For each loaded model, check if any downloaded model path contains the loaded model's key
91+
// This handles cases like loaded "llama-3.1" matching downloaded "Meta/Llama-3.1/Something"
92+
// If found, remove the downloaded version and add the loaded model (prefer loaded over downloaded for accurate runtime info)
8993
for (const lmstudioModel of loadedModels) {
94+
const loadedModelId = lmstudioModel.modelKey.toLowerCase()
95+
96+
// Find if any downloaded model path contains the loaded model's key as a path segment
97+
// Use word boundaries or path separators to avoid false matches like "llama" matching "codellama"
98+
const existingKey = Object.keys(models).find((key) => {
99+
const keyLower = key.toLowerCase()
100+
// Check if the loaded model ID appears as a distinct segment in the path
101+
// This matches "llama-3.1" in "Meta/Llama-3.1/Something" but not "llama" in "codellama"
102+
return (
103+
keyLower.includes(`/${loadedModelId}/`) ||
104+
keyLower.includes(`/${loadedModelId}`) ||
105+
keyLower.startsWith(`${loadedModelId}/`) ||
106+
keyLower === loadedModelId
107+
)
108+
})
109+
110+
if (existingKey) {
111+
// Remove the downloaded version
112+
delete models[existingKey]
113+
}
114+
115+
// Add the loaded model (either as replacement or new entry)
90116
models[lmstudioModel.modelKey] = parseLMStudioModel(lmstudioModel)
91117
modelsWithLoadedDetails.add(lmstudioModel.modelKey)
92118
}

0 commit comments

Comments
 (0)