Skip to content

Commit 632bbe7

Browse files
committed
Router models: coalesce fetches, file-cache pre-read, active-only scope + debounce
Implements Phase 1/2/3 from temp plan: 1) Coalesce in-flight per-provider fetches with timeouts in modelCache and modelEndpointCache; 2) Read file cache on memory miss (Option A) with background refresh; 3) Scope router-models to active provider by default and add requestRouterModelsAll for activation/settings; 4) Debounce requestRouterModels to reduce duplicates. Also removes immediate re-read after write and adds small logging for OpenRouter fetch counts. Test adjustments ensure deterministic behavior in CI by disabling debounce in NODE_ENV=test and fetching all providers in unit test paths. Key changes: - src/api/providers/fetchers/modelCache.ts: add inFlightModelFetches and withTimeout; consult file cache on miss; remove immediate re-read after write; telemetry-style console logs - src/api/providers/fetchers/modelEndpointCache.ts: add inFlightEndpointFetches and withTimeout; consult file cache on miss - src/core/webview/webviewMessageHandler.ts: add requestRouterModelsAll; default requestRouterModels to active provider; debounce; warm caches on activation; NODE_ENV=test disables debounce and runs allFetches so tests remain stable - src/shared/WebviewMessage.ts: add 'requestRouterModelsAll' message type - src/shared/ExtensionMessage.ts: move includeCurrentTime/includeCurrentCost to optional fields - src/api/providers/openrouter.ts: log models/endpoints count after fetch - tests: update webviewMessageHandler.spec to use requestRouterModelsAll where full sweep is expected Working directory summary: M src/api/providers/fetchers/modelCache.ts, M src/api/providers/fetchers/modelEndpointCache.ts, M src/api/providers/openrouter.ts, M src/core/webview/webviewMessageHandler.ts, M src/shared/ExtensionMessage.ts, M src/shared/WebviewMessage.ts, M src/core/webview/__tests__/webviewMessageHandler.spec.ts. Excluded: temp_plan.md (not committed).
1 parent a3101aa commit 632bbe7

File tree

7 files changed

+478
-123
lines changed

7 files changed

+478
-123
lines changed

src/api/providers/fetchers/modelCache.ts

Lines changed: 180 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,22 @@ import { getRooModels } from "./roo"
2828

2929
const memoryCache = new NodeCache({ stdTTL: 5 * 60, checkperiod: 5 * 60 })
3030

31+
// Coalesce concurrent fetches per provider within this extension host
32+
const inFlightModelFetches = new Map<RouterName, Promise<ModelRecord>>()
33+
34+
function withTimeout<T>(p: Promise<T>, ms: number, label = "getModels"): Promise<T> {
35+
return new Promise<T>((resolve, reject) => {
36+
const t = setTimeout(() => reject(new Error(`${label} timeout after ${ms}ms`)), ms)
37+
p.then((v) => {
38+
clearTimeout(t)
39+
resolve(v)
40+
}).catch((e) => {
41+
clearTimeout(t)
42+
reject(e)
43+
})
44+
})
45+
}
46+
3147
async function writeModels(router: RouterName, data: ModelRecord) {
3248
const filename = `${router}_models.json`
3349
const cacheDir = await getCacheDirectoryPath(ContextProxy.instance.globalStorageUri.fsPath)
@@ -55,83 +71,181 @@ async function readModels(router: RouterName): Promise<ModelRecord | undefined>
5571
*/
5672
export const getModels = async (options: GetModelsOptions): Promise<ModelRecord> => {
5773
const { provider } = options
74+
const providerStr = String(provider)
5875

59-
let models = getModelsFromCache(provider)
60-
61-
if (models) {
62-
return models
76+
// 1) Try memory cache
77+
const cached = getModelsFromCache(provider)
78+
if (cached) {
79+
console.log(`[modelCache] cache_hit: ${providerStr} (${Object.keys(cached).length} models)`)
80+
return cached
6381
}
6482

83+
// 2) Try file cache snapshot (Option A), then kick off background refresh
6584
try {
66-
switch (provider) {
67-
case "openrouter":
68-
models = await getOpenRouterModels()
69-
break
70-
case "requesty":
71-
// Requesty models endpoint requires an API key for per-user custom policies.
72-
models = await getRequestyModels(options.baseUrl, options.apiKey)
73-
break
74-
case "glama":
75-
models = await getGlamaModels()
76-
break
77-
case "unbound":
78-
// Unbound models endpoint requires an API key to fetch application specific models.
79-
models = await getUnboundModels(options.apiKey)
80-
break
81-
case "litellm":
82-
// Type safety ensures apiKey and baseUrl are always provided for LiteLLM.
83-
models = await getLiteLLMModels(options.apiKey, options.baseUrl)
84-
break
85-
case "ollama":
86-
models = await getOllamaModels(options.baseUrl, options.apiKey)
87-
break
88-
case "lmstudio":
89-
models = await getLMStudioModels(options.baseUrl)
90-
break
91-
case "deepinfra":
92-
models = await getDeepInfraModels(options.apiKey, options.baseUrl)
93-
break
94-
case "io-intelligence":
95-
models = await getIOIntelligenceModels(options.apiKey)
96-
break
97-
case "vercel-ai-gateway":
98-
models = await getVercelAiGatewayModels()
99-
break
100-
case "huggingface":
101-
models = await getHuggingFaceModels()
102-
break
103-
case "roo": {
104-
// Roo Code Cloud provider requires baseUrl and optional apiKey
105-
const rooBaseUrl =
106-
options.baseUrl ?? process.env.ROO_CODE_PROVIDER_URL ?? "https://api.roocode.com/proxy"
107-
models = await getRooModels(rooBaseUrl, options.apiKey)
108-
break
109-
}
110-
default: {
111-
// Ensures router is exhaustively checked if RouterName is a strict union.
112-
const exhaustiveCheck: never = provider
113-
throw new Error(`Unknown provider: ${exhaustiveCheck}`)
85+
const file = await readModels(provider)
86+
if (file && Object.keys(file).length > 0) {
87+
console.log(`[modelCache] file_hit: ${providerStr} (${Object.keys(file).length} models, bg_refresh queued)`)
88+
// Populate memory cache immediately so follow-up callers are instant
89+
memoryCache.set(provider, file)
90+
91+
// Start background refresh if not already in-flight (do not await)
92+
if (!inFlightModelFetches.has(provider)) {
93+
const bgPromise = (async (): Promise<ModelRecord> => {
94+
let models: ModelRecord = {}
95+
try {
96+
switch (providerStr) {
97+
case "openrouter":
98+
models = await getOpenRouterModels()
99+
break
100+
case "requesty":
101+
models = await getRequestyModels(options.baseUrl, options.apiKey)
102+
break
103+
case "glama":
104+
models = await getGlamaModels()
105+
break
106+
case "unbound":
107+
models = await getUnboundModels(options.apiKey)
108+
break
109+
case "litellm":
110+
models = await getLiteLLMModels(options.apiKey as string, options.baseUrl as string)
111+
break
112+
case "ollama":
113+
models = await getOllamaModels(options.baseUrl, options.apiKey)
114+
break
115+
case "lmstudio":
116+
models = await getLMStudioModels(options.baseUrl)
117+
break
118+
case "deepinfra":
119+
models = await getDeepInfraModels(options.apiKey, options.baseUrl)
120+
break
121+
case "io-intelligence":
122+
models = await getIOIntelligenceModels(options.apiKey)
123+
break
124+
case "vercel-ai-gateway":
125+
models = await getVercelAiGatewayModels()
126+
break
127+
case "huggingface":
128+
models = await getHuggingFaceModels()
129+
break
130+
case "roo": {
131+
const rooBaseUrl =
132+
options.baseUrl ??
133+
process.env.ROO_CODE_PROVIDER_URL ??
134+
"https://api.roocode.com/proxy"
135+
models = await getRooModels(rooBaseUrl, options.apiKey)
136+
break
137+
}
138+
default:
139+
throw new Error(`Unknown provider: ${providerStr}`)
140+
}
141+
142+
console.log(
143+
`[modelCache] bg_refresh_done: ${providerStr} (${Object.keys(models || {}).length} models)`,
144+
)
145+
memoryCache.set(provider, models)
146+
await writeModels(provider, models).catch((err) =>
147+
console.error(`[modelCache] Error writing ${providerStr} to file cache:`, err),
148+
)
149+
return models || {}
150+
} catch (e) {
151+
console.error(`[modelCache] bg_refresh_failed: ${providerStr}`, e)
152+
throw e
153+
}
154+
})()
155+
156+
const timedBg = withTimeout(bgPromise, 30_000, `getModels(background:${providerStr})`)
157+
inFlightModelFetches.set(provider, timedBg)
158+
Promise.resolve(timedBg).finally(() => inFlightModelFetches.delete(provider))
114159
}
115-
}
116160

117-
// Cache the fetched models (even if empty, to signify a successful fetch with no models).
118-
memoryCache.set(provider, models)
161+
// Return the file snapshot immediately
162+
return file
163+
}
164+
} catch {
165+
// ignore file read errors; fall through to network/coalesce path
166+
}
119167

120-
await writeModels(provider, models).catch((err) =>
121-
console.error(`[getModels] Error writing ${provider} models to file cache:`, err),
122-
)
168+
// 3) Coalesce concurrent fetches
169+
const existing = inFlightModelFetches.get(provider)
170+
if (existing) {
171+
console.log(`[modelCache] coalesced_wait: ${providerStr}`)
172+
return existing
173+
}
123174

175+
// 4) Network fetch wrapped as a single in-flight promise for this provider
176+
const fetchPromise = (async (): Promise<ModelRecord> => {
177+
let models: ModelRecord = {}
124178
try {
125-
models = await readModels(provider)
179+
switch (providerStr) {
180+
case "openrouter":
181+
models = await getOpenRouterModels()
182+
break
183+
case "requesty":
184+
models = await getRequestyModels(options.baseUrl, options.apiKey)
185+
break
186+
case "glama":
187+
models = await getGlamaModels()
188+
break
189+
case "unbound":
190+
models = await getUnboundModels(options.apiKey)
191+
break
192+
case "litellm":
193+
models = await getLiteLLMModels(options.apiKey as string, options.baseUrl as string)
194+
break
195+
case "ollama":
196+
models = await getOllamaModels(options.baseUrl, options.apiKey)
197+
break
198+
case "lmstudio":
199+
models = await getLMStudioModels(options.baseUrl)
200+
break
201+
case "deepinfra":
202+
models = await getDeepInfraModels(options.apiKey, options.baseUrl)
203+
break
204+
case "io-intelligence":
205+
models = await getIOIntelligenceModels(options.apiKey)
206+
break
207+
case "vercel-ai-gateway":
208+
models = await getVercelAiGatewayModels()
209+
break
210+
case "huggingface":
211+
models = await getHuggingFaceModels()
212+
break
213+
case "roo": {
214+
const rooBaseUrl =
215+
options.baseUrl ?? process.env.ROO_CODE_PROVIDER_URL ?? "https://api.roocode.com/proxy"
216+
models = await getRooModels(rooBaseUrl, options.apiKey)
217+
break
218+
}
219+
default: {
220+
throw new Error(`Unknown provider: ${providerStr}`)
221+
}
222+
}
223+
224+
console.log(`[modelCache] network_fetch_done: ${providerStr} (${Object.keys(models || {}).length} models)`)
225+
226+
// Update memory cache first so waiters get immediate hits
227+
memoryCache.set(provider, models)
228+
229+
// Persist to file cache (best-effort)
230+
await writeModels(provider, models).catch((err) =>
231+
console.error(`[modelCache] Error writing ${providerStr} to file cache:`, err),
232+
)
233+
234+
// Return models as-is (skip immediate re-read)
235+
return models || {}
126236
} catch (error) {
127-
console.error(`[getModels] error reading ${provider} models from file cache`, error)
237+
console.error(`[modelCache] network_fetch_failed: ${providerStr}`, error)
238+
throw error
128239
}
129-
return models || {}
130-
} catch (error) {
131-
// Log the error and re-throw it so the caller can handle it (e.g., show a UI message).
132-
console.error(`[getModels] Failed to fetch models in modelCache for ${provider}:`, error)
240+
})()
133241

134-
throw error // Re-throw the original error to be handled by the caller.
242+
// Register and await with timeout; ensure cleanup
243+
const timed = withTimeout(fetchPromise, 30_000, `getModels(${providerStr})`)
244+
inFlightModelFetches.set(provider, timed)
245+
try {
246+
return await timed
247+
} finally {
248+
inFlightModelFetches.delete(provider)
135249
}
136250
}
137251

@@ -144,6 +258,6 @@ export const flushModels = async (router: RouterName) => {
144258
memoryCache.del(router)
145259
}
146260

147-
export function getModelsFromCache(provider: ProviderName) {
261+
export function getModelsFromCache(provider: RouterName) {
148262
return memoryCache.get<ModelRecord>(provider)
149263
}

0 commit comments

Comments
 (0)