From 4a55ed8fd5ecf6c675351f1b35759d7f7937bdae Mon Sep 17 00:00:00 2001 From: Sebastian Rumpf Date: Sun, 17 May 2026 17:31:40 +0200 Subject: [PATCH 01/10] fix: correct DEFAULT_CONTEXT_LIMIT from 4096 to 128000 OmniRoute 3.7.9 does not expose contextWindow/context_length/max_input_tokens in /v1/models responses. When these fields are missing, the plugin falls back to DEFAULT_CONTEXT_LIMIT. The previous value of 4096 was incorrect - that's a reasonable output token limit, but context windows for modern LLMs are 128K+. This caused OpenCode to display 4096 token context windows for models like kmc/kimi-k2.6. Changed DEFAULT_CONTEXT_LIMIT to 128000 to match the plugin's own default models (gpt-4o, gpt-4o-mini, llama-3.1-405b all use 128000). Fixes #19 --- src/constants.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/constants.ts b/src/constants.ts index bf8c45d..05f6dfc 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -76,7 +76,7 @@ export const REQUEST_TIMEOUT = 30000; /** * Default model limits */ -export const DEFAULT_CONTEXT_LIMIT = 4096; +export const DEFAULT_CONTEXT_LIMIT = 128000; export const DEFAULT_OUTPUT_LIMIT = 4096; /** From 879024a4044942d1fa622e9b2127d59abdbf40f0 Mon Sep 17 00:00:00 2001 From: Sebastian Rumpf Date: Tue, 19 May 2026 11:47:21 +0200 Subject: [PATCH 02/10] feat(models.dev): improve enrichment reliability with retries and stale cache fallback - Increase default timeout from 1000ms to 5000ms - Add bounded retry loop (max 3 attempts, backoff 250ms/500ms) - Add stale in-memory cache fallback when live refresh fails - Add structured failure classification (timeout, network, http_retryable, http_non_retryable, parse, invalid_structure) - Add improved logging for fetch attempts, failures, and fallback decisions - Keep all changes localized to src/models-dev.ts - Add 8 focused tests covering fresh cache, retries, stale fallback, fail-fast, and integration paths All 50 tests pass (0 regressions). --- ...026-05-18-models-dev-reliability-design.md | 275 ++++++++++++++++++ src/constants.ts | 2 +- src/models-dev.ts | 192 +++++++++--- test/models-dev.test.mjs | 268 +++++++++++++++++ 4 files changed, 703 insertions(+), 34 deletions(-) create mode 100644 docs/superpowers/specs/2026-05-18-models-dev-reliability-design.md create mode 100644 test/models-dev.test.mjs diff --git a/docs/superpowers/specs/2026-05-18-models-dev-reliability-design.md b/docs/superpowers/specs/2026-05-18-models-dev-reliability-design.md new file mode 100644 index 0000000..13d229a --- /dev/null +++ b/docs/superpowers/specs/2026-05-18-models-dev-reliability-design.md @@ -0,0 +1,275 @@ +## Title + +Improve `models.dev` enrichment reliability with retries and stale in-memory fallback + +## Background + +`models.dev` enrichment currently depends on a single network fetch guarded by a hardcoded `1000ms` timeout. If that fetch times out or otherwise fails, the plugin returns `null` for the `models.dev` dataset and skips enrichment entirely. + +Root-cause analysis showed that the current design is too brittle for real network conditions: + +- timeout budget is only `1000ms` +- the abort covers the full request lifecycle, not just first byte +- there is no retry behavior +- there is no stale-cache fallback when live refresh fails +- the system fails open to "no enrichment" even for short-lived transient network issues + +This design addresses reliability first while keeping scope intentionally limited to in-memory behavior. + +## Goals + +- Make `models.dev` enrichment resilient to transient network slowness and upstream instability. +- Preserve previously fetched enrichment data when later refreshes fail. +- Keep failure behavior safe: plugin should continue working even if enrichment is unavailable. +- Improve observability so future failures can be diagnosed from logs. +- Keep the change localized and testable. + +## Non-Goals + +- No persistent disk cache in this iteration. +- No background refresh worker. +- No changes to model matching, deduplication, or alias resolution behavior. +- No changes to combo capability calculation beyond consuming more reliable `models.dev` data. + +## Recommended Approach + +Adopt a reliability-first fetch pipeline for `models.dev` with: + +- increased per-attempt timeout +- bounded retries with short backoff +- stale in-memory cache fallback when live refresh fails +- improved structured logging around failure class and retry behavior + +This retains the current high-level contract of "best available enrichment" while removing the current single-point transient failure. + +## Detailed Design + +### 1. Timeout policy + +Increase the default `models.dev` timeout from `1000ms` to `5000ms`. + +Rationale: + +- root-cause analysis showed that `1000ms` is an aggressive end-to-end budget for a public internet fetch +- the timeout includes connect, TLS, response body transfer, and JSON consumption +- a moderate increase materially improves reliability without creating extreme hangs + +This timeout remains configurable through existing `modelsDev.timeoutMs` config. + +### 2. Retry policy + +Replace the current single-attempt fetch with a bounded retry loop. + +Default behavior: + +- maximum attempts: `3` +- backoff sequence: `250ms`, then `500ms` + +Retryable failures: + +- request aborted due to timeout (`AbortError`) +- network-level fetch errors +- HTTP `429` +- HTTP `5xx` + +Non-retryable failures: + +- HTTP `4xx` other than `429` +- definitively invalid response structure + +Reasoning: + +- transient network and upstream capacity issues should be retried +- permanent client-side problems should fail fast to avoid unnecessary delay + +### 3. Cache behavior + +Retain the existing in-memory cache and refine fallback behavior. + +#### Fresh cache + +If cached `models.dev` data exists and is within TTL: + +- return it immediately +- do not make a network request + +#### Stale cache fallback + +If cached data exists but is older than TTL: + +- attempt live refresh first +- if live refresh succeeds, replace cache and return new data +- if live refresh fails after retries, return stale cached data instead of `null` + +#### Cold start failure + +If there is no cache and all attempts fail: + +- return `null` +- enrichment is skipped, preserving current fail-open behavior + +This makes the system best-available rather than all-or-nothing. + +### 4. Logging and diagnostics + +Add explicit logging around fetch attempts and fallback decisions. + +Per failed attempt, log: + +- attempt number +- failure class: timeout / network / HTTP / parse / invalid-structure +- status code when available +- elapsed attempt duration + +On success, log: + +- success attempt number +- total elapsed duration +- provider count or equivalent summary + +On stale-cache fallback, log: + +- that live refresh failed +- stale cache age +- that stale cached `models.dev` data was returned + +These logs should support future diagnosis without changing runtime semantics. + +### 5. Code structure + +Keep changes localized to `src/models-dev.ts`. + +Recommended helper breakdown: + +- `fetchModelsDevData(config)` — orchestration, cache policy, retries +- `fetchModelsDevOnce(url, timeoutMs)` — one attempt, returning validated data or typed failure +- `shouldRetryModelsDevFailure(...)` — retry decision helper +- `sleep(ms)` — backoff helper + +This keeps retry semantics independent from lookup/index logic. + +### 6. Error handling model + +The fetch layer should classify failures into stable categories rather than treating all failures identically. + +Suggested categories: + +- `timeout` +- `network` +- `http_retryable` +- `http_non_retryable` +- `parse` +- `invalid_structure` + +This improves clarity of both retry decisions and logs. + +## Data Flow + +### Current flow + +1. `getModelsDevIndex()` calls `fetchModelsDevData()` +2. one fetch attempt is made +3. any failure returns `null` +4. `buildModelsDevIndex(null)` returns `null` +5. enrichment is skipped + +### Proposed flow + +1. `getModelsDevIndex()` calls `fetchModelsDevData()` +2. `fetchModelsDevData()` checks fresh cache +3. if no fresh cache, it runs bounded live fetch attempts +4. if live fetch succeeds, cache is updated and returned +5. if live fetch fails and stale cache exists, stale cache is returned +6. if live fetch fails and no cache exists, `null` is returned +7. `buildModelsDevIndex(...)` uses whichever data source was successfully returned + +## Testing Strategy + +Add focused tests for the fetch/reliability behavior. + +### Required tests + +1. **fresh cache hit** + - fetch once successfully + - fetch again within TTL + - assert no second network call + +2. **timeout then success** + - first attempt aborts + - second attempt succeeds + - assert result is returned + - assert cache is updated + +3. **retryable HTTP failure then success** + - first attempt returns `503` + - second attempt succeeds + - assert retry occurs and result is returned + +4. **all attempts fail with stale cache available** + - seed cache with successful data + - expire TTL logically or via timestamp manipulation + - force all refresh attempts to fail + - assert stale cache is returned + +5. **all attempts fail with no cache** + - force timeout/network failure on every attempt + - assert `null` is returned from fetch layer + +6. **non-retryable HTTP failure** + - return `404` + - assert fail-fast behavior without unnecessary retries + +7. **invalid response structure with stale cache** + - return structurally invalid data + - assert stale cache fallback is used when available + +### Testing notes + +- tests should isolate `models.dev` behavior from `/v1/models` and combo fetch behavior +- tests should avoid depending on the real `models.dev` endpoint +- tests should verify both result behavior and retry counts + +## Trade-offs + +### Benefits + +- much higher enrichment reliability under transient failure +- protects previously fetched enrichment data +- preserves safe fail-open behavior +- keeps scope tight and localized + +### Costs + +- increased worst-case latency on cold start during repeated failures +- more state transitions and test surface area +- more log volume during repeated upstream failure + +Given the chosen priority of reliability first, these trade-offs are acceptable. + +## Alternatives Considered + +### A. Increase timeout only + +Rejected because it still leaves the system single-shot and fragile. + +### B. Persistent disk cache + +Deferred because it adds lifecycle and invalidation complexity beyond the immediate root cause. + +### C. Vendored `models.dev` snapshot + +Rejected for now due to maintenance and staleness burden. + +## Success Criteria + +The change is successful if: + +- transient `models.dev` slowness no longer frequently removes enrichment +- previously fetched enrichment survives later refresh failures within the process lifetime +- logs clearly indicate why a refresh failed and whether fallback was used +- the existing plugin behavior remains safe when upstream is fully unavailable +- tests cover timeout, retry, and stale-cache fallback paths + +## Implementation Notes + +This spec intentionally stops short of implementation details like exact helper signatures or test harness mechanics. Those should be finalized in the implementation plan. diff --git a/src/constants.ts b/src/constants.ts index 05f6dfc..9d21159 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -84,7 +84,7 @@ export const DEFAULT_OUTPUT_LIMIT = 4096; */ export const MODELS_DEV_DEFAULT_URL = 'https://models.dev/api.json'; export const MODELS_DEV_CACHE_TTL = 24 * 60 * 60 * 1000; -export const MODELS_DEV_TIMEOUT_MS = 1000; +export const MODELS_DEV_TIMEOUT_MS = 5000; /** * Provider alias-to-canonical mapping for deduplication diff --git a/src/models-dev.ts b/src/models-dev.ts index db022fb..8eeba23 100644 --- a/src/models-dev.ts +++ b/src/models-dev.ts @@ -80,64 +80,190 @@ interface ModelsDevCache { let modelsDevCache: ModelsDevCache | null = null; /** - * Fetch models.dev data with caching + * Failure classification for models.dev fetch attempts */ -export async function fetchModelsDevData( - config?: OmniRouteConfig, -): Promise { - const url = config?.modelsDev?.url ?? MODELS_DEV_DEFAULT_URL; - const timeoutMs = config?.modelsDev?.timeoutMs ?? MODELS_DEV_TIMEOUT_MS; - const cacheTtl = config?.modelsDev?.cacheTtl ?? MODELS_DEV_CACHE_TTL; +type FailureClass = + | 'timeout' + | 'network' + | 'http_retryable' + | 'http_non_retryable' + | 'parse' + | 'invalid_structure'; + +interface FetchFailure { + class: FailureClass; + status?: number; + elapsedMs: number; + message: string; +} - // Check cache first - if (modelsDevCache && Date.now() - modelsDevCache.timestamp < cacheTtl) { - debug('Using cached models.dev data'); - return modelsDevCache.data; - } +/** + * Sleep helper for backoff delays + */ +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} - debug(`Fetching models.dev data from ${url}`); +/** + * Determine if a failed attempt should be retried + */ +function shouldRetryModelsDevFailure(failure: FetchFailure): boolean { + return ( + failure.class === 'timeout' || + failure.class === 'network' || + failure.class === 'http_retryable' + ); +} +/** + * Execute a single fetch attempt to models.dev with structured failure classification + */ +async function fetchModelsDevOnce( + url: string, + timeoutMs: number, +): Promise<{ data: ModelsDevData; elapsedMs: number } | FetchFailure> { const controller = new AbortController(); const timeoutId = setTimeout(() => controller.abort(), timeoutMs); + const start = Date.now(); try { const response = await fetch(url, { method: 'GET', - headers: { - Accept: 'application/json', - }, + headers: new Headers({ Accept: 'application/json' }), signal: controller.signal, }); if (!response.ok) { - warn(`Failed to fetch models.dev data: ${response.status}`); - return null; + const elapsedMs = Date.now() - start; + if (response.status === 429 || response.status >= 500) { + return { + class: 'http_retryable', + status: response.status, + elapsedMs, + message: `HTTP ${response.status}`, + }; + } + return { + class: 'http_non_retryable', + status: response.status, + elapsedMs, + message: `HTTP ${response.status}`, + }; } - const data = await response.json() as ModelsDevData; - - // Validate structure - if (!data || typeof data !== 'object') { - warn('Invalid models.dev data structure'); - return null; + let data: unknown; + try { + data = await response.json(); + } catch (parseError) { + const elapsedMs = Date.now() - start; + return { + class: 'parse', + elapsedMs, + message: + parseError instanceof Error ? parseError.message : 'JSON parse error', + }; } - // Update cache - modelsDevCache = { - data, - timestamp: Date.now(), - }; + if (!data || typeof data !== 'object' || Array.isArray(data)) { + const elapsedMs = Date.now() - start; + return { + class: 'invalid_structure', + elapsedMs, + message: 'Response is not a valid object', + }; + } - debug('Successfully fetched models.dev data'); - return data; + const elapsedMs = Date.now() - start; + return { data: data as ModelsDevData, elapsedMs }; } catch (error) { - warn(`Error fetching models.dev data: ${error}`); - return null; + const elapsedMs = Date.now() - start; + if (error instanceof Error && error.name === 'AbortError') { + return { + class: 'timeout', + elapsedMs, + message: `Request aborted after ${timeoutMs}ms`, + }; + } + return { + class: 'network', + elapsedMs, + message: error instanceof Error ? error.message : 'Network error', + }; } finally { clearTimeout(timeoutId); } } +/** + * Fetch models.dev data with caching, retries, and stale fallback + */ +export async function fetchModelsDevData( + config?: OmniRouteConfig, +): Promise { + const url = config?.modelsDev?.url ?? MODELS_DEV_DEFAULT_URL; + const timeoutMs = config?.modelsDev?.timeoutMs ?? MODELS_DEV_TIMEOUT_MS; + const cacheTtl = config?.modelsDev?.cacheTtl ?? MODELS_DEV_CACHE_TTL; + + // Check fresh cache first + if (modelsDevCache && Date.now() - modelsDevCache.timestamp < cacheTtl) { + debug('Using cached models.dev data'); + return modelsDevCache.data; + } + + const staleCache = modelsDevCache; + const overallStart = Date.now(); + + for (let attempt = 1; attempt <= 3; attempt++) { + const result = await fetchModelsDevOnce(url, timeoutMs); + + if ('data' in result) { + // Success: update cache and return + modelsDevCache = { + data: result.data, + timestamp: Date.now(), + }; + const totalElapsed = Date.now() - overallStart; + const providerCount = Object.keys(result.data).length; + debug( + `Successfully fetched models.dev data on attempt ${attempt} ` + + `(total ${totalElapsed}ms, ${providerCount} providers)`, + ); + return result.data; + } + + // Failure: log structured diagnostics + const failure = result; + warn( + `models.dev fetch attempt ${attempt} failed: ` + + `class=${failure.class}` + + `${failure.status !== undefined ? ` status=${failure.status}` : ''} ` + + `elapsed=${failure.elapsedMs}ms`, + ); + + if (!shouldRetryModelsDevFailure(failure)) { + break; + } + + if (attempt < 3) { + const backoff = attempt === 1 ? 250 : 500; + await sleep(backoff); + } + } + + // All attempts exhausted: fall back to stale cache if available + if (staleCache) { + const staleAge = Date.now() - staleCache.timestamp; + warn( + `Live refresh failed after retries. ` + + `Returning stale models.dev cache (age=${staleAge}ms)`, + ); + return staleCache.data; + } + + warn('All models.dev fetch attempts failed and no cache available'); + return null; +} + /** * Build an indexed lookup structure for models.dev data */ diff --git a/test/models-dev.test.mjs b/test/models-dev.test.mjs new file mode 100644 index 0000000..504ee02 --- /dev/null +++ b/test/models-dev.test.mjs @@ -0,0 +1,268 @@ +import { afterEach, test } from 'node:test'; +import assert from 'node:assert/strict'; + +import { + fetchModelsDevData, + getModelsDevIndex, + clearModelsDevCache, +} from '../dist/src/models-dev.js'; + +const ORIGINAL_FETCH = global.fetch; +const MOCK_URL = 'http://localhost:99999/models-dev.json'; + +afterEach(() => { + clearModelsDevCache(); + global.fetch = ORIGINAL_FETCH; +}); + +// Helper to create an AbortError for simulating timeout +function createAbortError() { + const error = new Error('The operation was aborted'); + error.name = 'AbortError'; + return error; +} + +// Helper to create a valid models.dev response +function createValidModelsDevResponse() { + return { + openai: { + id: 'openai', + models: { + 'gpt-4o': { + id: 'gpt-4o', + name: 'GPT-4o', + }, + }, + }, + }; +} + +// Base config for models.dev tests +function createConfig(modelsDevOverrides = {}) { + return { + baseUrl: 'http://localhost:20128/v1', + apiKey: 'test-key', + apiMode: 'chat', + modelsDev: { + url: MOCK_URL, + cacheTtl: 60000, + timeoutMs: 5000, + ...modelsDevOverrides, + }, + }; +} + +test('fresh cache hit - no second network call', async () => { + let calls = 0; + global.fetch = async (input) => { + const url = input instanceof Request ? input.url : input.toString(); + if (url === MOCK_URL) { + calls++; + return new Response(JSON.stringify(createValidModelsDevResponse()), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); + } + return new Response(JSON.stringify({}), { status: 200 }); + }; + + const config = createConfig(); + + const first = await fetchModelsDevData(config); + const second = await fetchModelsDevData(config); + + assert.equal(calls, 1, 'Should only make one network call'); + assert.ok(first, 'First fetch should return data'); + assert.deepStrictEqual(first, second, 'Should return cached data on second call'); +}); + +test('timeout then success - retries and returns data', async () => { + let calls = 0; + global.fetch = async (input) => { + const url = input instanceof Request ? input.url : input.toString(); + if (url === MOCK_URL) { + calls++; + if (calls === 1) { + throw createAbortError(); + } + return new Response(JSON.stringify(createValidModelsDevResponse()), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); + } + return new Response(JSON.stringify({}), { status: 200 }); + }; + + const config = createConfig(); + + const result = await fetchModelsDevData(config); + assert.ok(result, 'Should return data after retry'); + assert.equal(calls, 2, 'Should make exactly 2 attempts'); +}); + +test('retryable HTTP failure then success - retries 503', async () => { + let calls = 0; + global.fetch = async (input) => { + const url = input instanceof Request ? input.url : input.toString(); + if (url === MOCK_URL) { + calls++; + if (calls === 1) { + return new Response(JSON.stringify({ error: 'unavailable' }), { + status: 503, + headers: { 'Content-Type': 'application/json' }, + }); + } + return new Response(JSON.stringify(createValidModelsDevResponse()), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); + } + return new Response(JSON.stringify({}), { status: 200 }); + }; + + const config = createConfig(); + + const result = await fetchModelsDevData(config); + assert.ok(result, 'Should return data after retry'); + assert.equal(calls, 2, 'Should make exactly 2 attempts'); +}); + +test('all attempts fail with stale cache available - returns stale data', async () => { + let calls = 0; + global.fetch = async (input) => { + const url = input instanceof Request ? input.url : input.toString(); + if (url === MOCK_URL) { + calls++; + if (calls === 1) { + // Seed cache on first call + return new Response(JSON.stringify(createValidModelsDevResponse()), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); + } + // All subsequent calls fail + throw new Error('Network error'); + } + return new Response(JSON.stringify({}), { status: 200 }); + }; + + const config = createConfig({ cacheTtl: 1 }); // 1ms TTL + + // Seed cache + const first = await fetchModelsDevData(config); + assert.ok(first, 'First fetch should succeed'); + assert.equal(calls, 1, 'Should make 1 call to seed cache'); + + // Wait for cache to expire + await new Promise((resolve) => setTimeout(resolve, 10)); + + // All refresh attempts fail, should return stale cache + const second = await fetchModelsDevData(config); + assert.ok(second, 'Should return stale cache'); + assert.deepStrictEqual(second, first, 'Should return same data as first fetch'); + assert.equal(calls, 4, 'Should make 1 + 3 attempts total'); +}); + +test('all attempts fail with no cache - returns null', async () => { + let calls = 0; + global.fetch = async (input) => { + const url = input instanceof Request ? input.url : input.toString(); + if (url === MOCK_URL) { + calls++; + throw new Error('Network error'); + } + return new Response(JSON.stringify({}), { status: 200 }); + }; + + const config = createConfig(); + + const result = await fetchModelsDevData(config); + assert.equal(result, null, 'Should return null when all attempts fail'); + assert.equal(calls, 3, 'Should make exactly 3 attempts'); +}); + +test('non-retryable HTTP failure - fail fast on 404', async () => { + let calls = 0; + global.fetch = async (input) => { + const url = input instanceof Request ? input.url : input.toString(); + if (url === MOCK_URL) { + calls++; + return new Response(JSON.stringify({ error: 'not found' }), { + status: 404, + headers: { 'Content-Type': 'application/json' }, + }); + } + return new Response(JSON.stringify({}), { status: 200 }); + }; + + const config = createConfig(); + + const result = await fetchModelsDevData(config); + assert.equal(result, null, 'Should return null on 404'); + assert.equal(calls, 1, 'Should only make 1 attempt for non-retryable errors'); +}); + +test('invalid response structure with stale cache - returns stale data', async () => { + let calls = 0; + global.fetch = async (input) => { + const url = input instanceof Request ? input.url : input.toString(); + if (url === MOCK_URL) { + calls++; + if (calls === 1) { + // Seed cache on first call + return new Response(JSON.stringify(createValidModelsDevResponse()), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); + } + // Return invalid structure (array instead of object) + return new Response(JSON.stringify([]), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); + } + return new Response(JSON.stringify({}), { status: 200 }); + }; + + const config = createConfig({ cacheTtl: 1 }); // 1ms TTL + + // Seed cache + const first = await fetchModelsDevData(config); + assert.ok(first, 'First fetch should succeed'); + assert.equal(calls, 1, 'Should make 1 call to seed cache'); + + // Wait for cache to expire + await new Promise((resolve) => setTimeout(resolve, 10)); + + // Refresh returns invalid structure, should fall back to stale cache + const second = await fetchModelsDevData(config); + assert.ok(second, 'Should return stale cache'); + assert.deepStrictEqual(second, first, 'Should return same data as first fetch'); + assert.equal(calls, 2, 'Should make 1 + 1 attempts (invalid structure is non-retryable)'); +}); + +// Integration test: verify getModelsDevIndex uses the improved fetch pipeline +test('getModelsDevIndex integrates with retry and cache', async () => { + let calls = 0; + global.fetch = async (input) => { + const url = input instanceof Request ? input.url : input.toString(); + if (url === MOCK_URL) { + calls++; + if (calls === 1) { + throw createAbortError(); + } + return new Response(JSON.stringify(createValidModelsDevResponse()), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); + } + return new Response(JSON.stringify({}), { status: 200 }); + }; + + const config = createConfig(); + + const index = await getModelsDevIndex(config); + assert.ok(index, 'Should return index after retry'); + assert.ok(index.exactByProvider.has('openai'), 'Should have openai provider'); + assert.equal(calls, 2, 'Should retry via fetchModelsDevData'); +}); From 24b55d43a98a256221274c57e1c4cac409ccf419 Mon Sep 17 00:00:00 2001 From: Sebastian Rumpf Date: Tue, 19 May 2026 11:48:28 +0200 Subject: [PATCH 03/10] chore: remove opencode test config --- .opencode/config.json | 8 -------- .opencode/opencode.json | 6 ------ 2 files changed, 14 deletions(-) delete mode 100644 .opencode/config.json delete mode 100644 .opencode/opencode.json diff --git a/.opencode/config.json b/.opencode/config.json deleted file mode 100644 index d7edfed..0000000 --- a/.opencode/config.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "plugins": [ - { - "name": "opencode-omniroute-auth", - "path": "/Users/alpha/git/opencode-omniroute-auth" - } - ] -} diff --git a/.opencode/opencode.json b/.opencode/opencode.json deleted file mode 100644 index c997c03..0000000 --- a/.opencode/opencode.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "$schema": "https://opencode.ai/config.json", - "plugin": [ - "list" - ] -} \ No newline at end of file From 3c7e9d7353e739f5413a1c691a5e78e50280981b Mon Sep 17 00:00:00 2001 From: Sebastian Rumpf Date: Tue, 19 May 2026 11:55:35 +0200 Subject: [PATCH 04/10] chore(release): bump version to 1.4.1 and update changelog - Update package.json version from 1.2.0 to 1.4.1 - Add CHANGELOG.md entry for v1.4.1 - Add docs/release-notes-v1.4.1.md with release highlights and verification steps --- CHANGELOG.md | 34 ++++++++++++++++++++++++++ docs/release-notes-v1.4.1.md | 46 ++++++++++++++++++++++++++++++++++++ package.json | 2 +- 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 docs/release-notes-v1.4.1.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 6acc21c..76fc893 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,40 @@ All notable changes to this project are documented in this file. +## [1.4.1] - 2026-05-19 + +### Added + +- **models.dev Reliability Pipeline** — Complete rewrite of `fetchModelsDevData()` with production-grade resilience: + - Bounded retry loop (max 3 attempts) with exponential backoff (250ms, 500ms). + - Structured failure classification into 6 categories: `timeout`, `network`, `http_retryable`, `http_non_retryable`, `parse`, `invalid_structure`. + - Stale in-memory cache fallback: if live refresh fails, previously cached enrichment data is returned instead of skipping enrichment entirely. + - Fail-open cold-start behavior: returns `null` only when no cache exists and all attempts fail, preserving plugin functionality. + - Per-attempt structured logging: attempt number, failure class, HTTP status (when applicable), and elapsed duration. + - Success logging: total elapsed duration and provider count for observability. + - Timeout increase: default per-attempt timeout raised from 1000ms to 5000ms. +- **8 New Test Cases** (`test/models-dev.test.mjs`) covering: + - Fresh cache hit (no redundant network call) + - Timeout recovery on retry + - 503 retryable HTTP failure recovery + - Stale cache fallback when all refresh attempts fail + - Null return on cold-start total failure + - 404 fail-fast behavior (no unnecessary retries) + - Invalid response structure with stale cache fallback + - End-to-end integration with `getModelsDevIndex()` + +### Fixed + +- **Default Context Limit** — `DEFAULT_CONTEXT_LIMIT` corrected from `4096` to `128000` to match actual OmniRoute API defaults. + +### Changed + +- **Internal Helpers** — Extracted `fetchModelsDevOnce()`, `shouldRetryModelsDevFailure()`, and `sleep()` helpers in `src/models-dev.ts` to keep retry logic isolated from lookup/index logic. + +### Removed + +- **Test Config Artifacts** — Removed `.opencode/config.json` and `.opencode/opencode.json` files that were committed accidentally. + ## [1.2.0] - 2026-05-17 ### Added diff --git a/docs/release-notes-v1.4.1.md b/docs/release-notes-v1.4.1.md new file mode 100644 index 0000000..8fca169 --- /dev/null +++ b/docs/release-notes-v1.4.1.md @@ -0,0 +1,46 @@ +# Release v1.4.1 + +## Highlights + +- **models.dev enrichment no longer fails on transient network slowness.** The fetch pipeline now retries up to 3 times with exponential backoff and falls back to stale cached data if live refresh fails. +- **Default context limit corrected** from 4096 to 128000 tokens to match OmniRoute API behavior. +- **Structured observability** for enrichment failures with per-attempt diagnostics and fallback decisions. + +## What Changed + +### Reliability + +- `fetchModelsDevData()` now uses a bounded retry loop: + - Maximum 3 attempts with 250ms / 500ms backoff. + - Retries on: timeouts (`AbortError`), network errors, HTTP 429, and HTTP 5xx. + - Fail-fast on: HTTP 4xx (non-429) and structurally invalid responses. +- Stale in-memory cache fallback: + - If cached data exists but TTL expired, live refresh is attempted first. + - If all refresh attempts fail, the stale cached data is returned instead of `null`. + - If no cache exists and all attempts fail, returns `null` (safe fail-open). +- Timeout budget increased from 1000ms to 5000ms per attempt. +- Failure classification: `timeout`, `network`, `http_retryable`, `http_non_retryable`, `parse`, `invalid_structure`. + +### Fixes + +- `DEFAULT_CONTEXT_LIMIT` corrected from `4096` to `128000`. + +### Testing + +- Added 8 focused tests in `test/models-dev.test.mjs` covering all retry, cache, and fallback paths. +- Full regression suite: 50/50 tests pass (0 failures). + +### Documentation + +- Added design spec: `docs/superpowers/specs/2026-05-18-models-dev-reliability-design.md`. + +## Verification + +- `npm run prepublishOnly` passes (`clean`, `build`, `check:exports`). +- `npm test` passes: 50 tests, 0 failures. +- TypeScript strict mode compiles cleanly. + +## Upgrade Notes + +- No breaking changes. Plugin behavior remains safe when `models.dev` is fully unavailable. +- Existing `modelsDev.timeoutMs` and `modelsDev.cacheTtl` config options continue to work as before. diff --git a/package.json b/package.json index cbbe4f1..ccad0f9 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "opencode-omniroute-auth", - "version": "1.2.0", + "version": "1.4.1", "description": "OpenCode authentication plugin for OmniRoute API with /connect command and dynamic model fetching", "type": "module", "main": "./dist/index.js", From 57829287b0f3f1f07163f6b514ed6ad51a3ecb01 Mon Sep 17 00:00:00 2001 From: Sebastian Rumpf Date: Tue, 19 May 2026 16:11:46 +0200 Subject: [PATCH 05/10] fix(review): address PR #22 code review feedback - Cache isolation: key models.dev cache by URL (Map) to prevent cross-config data leakage when different configs specify different modelsDev.url values - Update outdated JSDoc comment for timeoutMs default (1000ms -> 5000ms) - Eliminate real setTimeout sleeps from tests by using cacheTtl: 0 for stale-cache scenarios instead of waiting for TTL expiry - Document worst-case cold-start latency (~15.75s) in fetchModelsDevData JSDoc - Update package-lock.json version to 1.4.1 (file is .gitignored but kept in sync locally) - Update CHANGELOG.md and release notes with code review fixes section All 50 tests pass (0 regressions). --- CHANGELOG.md | 8 ++++++++ docs/release-notes-v1.4.1.md | 8 ++++++++ src/models-dev.ts | 26 ++++++++++++++++---------- src/types.ts | 2 +- test/models-dev.test.mjs | 10 ++-------- 5 files changed, 35 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76fc893..2c1d8fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,14 @@ All notable changes to this project are documented in this file. - **Test Config Artifacts** — Removed `.opencode/config.json` and `.opencode/opencode.json` files that were committed accidentally. +### Fixed (Code Review) + +- **Cache Isolation** — `modelsDevCache` is now keyed by URL (`Map`) instead of a single global variable. Prevents cross-config data leakage when different configs specify different `modelsDev.url` values. (`src/models-dev.ts`) +- **JSDoc Accuracy** — Updated `OmniRouteModelsDevConfig.timeoutMs` JSDoc comment from `(default: 1000ms)` to `(default: 5000ms)` to match the actual constant. (`src/types.ts`) +- **Lockfile Version Sync** — Updated `package-lock.json` version from `1.2.0` to `1.4.1` to match `package.json`. (`package-lock.json`) +- **Test Suite Speed** — Eliminated real `setTimeout` sleeps from `test/models-dev.test.mjs` by using `cacheTtl: 0` to mark cache immediately stale instead of waiting for TTL expiry. Reduces test runtime and improves scalability. +- **Latency Documentation** — Added explicit JSDoc on `fetchModelsDevData()` documenting worst-case cold-start latency (~15.75s) as an accepted reliability trade-off per design spec. (`src/models-dev.ts`) + ## [1.2.0] - 2026-05-17 ### Added diff --git a/docs/release-notes-v1.4.1.md b/docs/release-notes-v1.4.1.md index 8fca169..97e49f2 100644 --- a/docs/release-notes-v1.4.1.md +++ b/docs/release-notes-v1.4.1.md @@ -25,6 +25,14 @@ - `DEFAULT_CONTEXT_LIMIT` corrected from `4096` to `128000`. +### Code Review Fixes + +- **Cache Isolation** — `modelsDevCache` is now keyed by URL (`Map`) to prevent cross-config data leakage when different configs specify different `modelsDev.url` values. +- **JSDoc Accuracy** — `OmniRouteModelsDevConfig.timeoutMs` JSDoc updated to reflect the new `5000ms` default. +- **Lockfile Sync** — `package-lock.json` version aligned with `package.json` (`1.4.1`). +- **Test Suite Speed** — Eliminated real `setTimeout` sleeps from `test/models-dev.test.mjs` by using `cacheTtl: 0` for stale-cache tests. Reduces test runtime and improves scalability. +- **Latency Documentation** — Explicit JSDoc added on `fetchModelsDevData()` documenting worst-case cold-start latency (~15.75s) as an accepted reliability trade-off. + ### Testing - Added 8 focused tests in `test/models-dev.test.mjs` covering all retry, cache, and fallback paths. diff --git a/src/models-dev.ts b/src/models-dev.ts index 8eeba23..efad7d3 100644 --- a/src/models-dev.ts +++ b/src/models-dev.ts @@ -76,8 +76,8 @@ interface ModelsDevCache { timestamp: number; } -// In-memory cache for models.dev data -let modelsDevCache: ModelsDevCache | null = null; +// In-memory cache for models.dev data, keyed by URL to prevent cross-config leakage +const modelsDevCacheMap = new Map(); /** * Failure classification for models.dev fetch attempts @@ -195,7 +195,11 @@ async function fetchModelsDevOnce( } /** - * Fetch models.dev data with caching, retries, and stale fallback + * Fetch models.dev data with caching, retries, and stale fallback. + * + * Worst-case cold-start latency when upstream is unavailable: + * 3 attempts × 5000ms timeout + 250ms + 500ms backoff ≈ 15.75s. + * This is an accepted trade-off for reliability per design spec. */ export async function fetchModelsDevData( config?: OmniRouteConfig, @@ -204,13 +208,15 @@ export async function fetchModelsDevData( const timeoutMs = config?.modelsDev?.timeoutMs ?? MODELS_DEV_TIMEOUT_MS; const cacheTtl = config?.modelsDev?.cacheTtl ?? MODELS_DEV_CACHE_TTL; + const cached = modelsDevCacheMap.get(url); + // Check fresh cache first - if (modelsDevCache && Date.now() - modelsDevCache.timestamp < cacheTtl) { - debug('Using cached models.dev data'); - return modelsDevCache.data; + if (cached && Date.now() - cached.timestamp < cacheTtl) { + debug(`Using cached models.dev data for ${url}`); + return cached.data; } - const staleCache = modelsDevCache; + const staleCache = cached ?? null; const overallStart = Date.now(); for (let attempt = 1; attempt <= 3; attempt++) { @@ -218,10 +224,10 @@ export async function fetchModelsDevData( if ('data' in result) { // Success: update cache and return - modelsDevCache = { + modelsDevCacheMap.set(url, { data: result.data, timestamp: Date.now(), - }; + }); const totalElapsed = Date.now() - overallStart; const providerCount = Object.keys(result.data).length; debug( @@ -331,7 +337,7 @@ export async function getModelsDevIndex( * Clear the models.dev cache */ export function clearModelsDevCache(): void { - modelsDevCache = null; + modelsDevCacheMap.clear(); debug('models.dev cache cleared'); } diff --git a/src/types.ts b/src/types.ts index 903f95c..f1b043c 100644 --- a/src/types.ts +++ b/src/types.ts @@ -86,7 +86,7 @@ export interface OmniRouteModelsDevConfig { url?: string; /** Cache TTL in milliseconds (default: 24 hours) */ cacheTtl?: number; - /** Fetch timeout in milliseconds (default: 1000ms) */ + /** Fetch timeout in milliseconds (default: 5000ms) */ timeoutMs?: number; /** * Optional alias mapping from OmniRoute provider keys (e.g. `cx`) to models.dev providers (e.g. `openai`). diff --git a/test/models-dev.test.mjs b/test/models-dev.test.mjs index 504ee02..6208667 100644 --- a/test/models-dev.test.mjs +++ b/test/models-dev.test.mjs @@ -146,16 +146,13 @@ test('all attempts fail with stale cache available - returns stale data', async return new Response(JSON.stringify({}), { status: 200 }); }; - const config = createConfig({ cacheTtl: 1 }); // 1ms TTL + const config = createConfig({ cacheTtl: 0 }); // 0ms TTL — cache is immediately stale // Seed cache const first = await fetchModelsDevData(config); assert.ok(first, 'First fetch should succeed'); assert.equal(calls, 1, 'Should make 1 call to seed cache'); - // Wait for cache to expire - await new Promise((resolve) => setTimeout(resolve, 10)); - // All refresh attempts fail, should return stale cache const second = await fetchModelsDevData(config); assert.ok(second, 'Should return stale cache'); @@ -224,16 +221,13 @@ test('invalid response structure with stale cache - returns stale data', async ( return new Response(JSON.stringify({}), { status: 200 }); }; - const config = createConfig({ cacheTtl: 1 }); // 1ms TTL + const config = createConfig({ cacheTtl: 0 }); // 0ms TTL — cache is immediately stale // Seed cache const first = await fetchModelsDevData(config); assert.ok(first, 'First fetch should succeed'); assert.equal(calls, 1, 'Should make 1 call to seed cache'); - // Wait for cache to expire - await new Promise((resolve) => setTimeout(resolve, 10)); - // Refresh returns invalid structure, should fall back to stale cache const second = await fetchModelsDevData(config); assert.ok(second, 'Should return stale cache'); From 4f47a0a7d07341702e94b331537867f888930812 Mon Sep 17 00:00:00 2001 From: Sebastian Rumpf Date: Tue, 19 May 2026 16:11:56 +0200 Subject: [PATCH 06/10] docs: add model variant support fix design spec --- ...-05-19-model-variant-support-fix-design.md | 239 ++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 docs/superpowers/specs/2026-05-19-model-variant-support-fix-design.md diff --git a/docs/superpowers/specs/2026-05-19-model-variant-support-fix-design.md b/docs/superpowers/specs/2026-05-19-model-variant-support-fix-design.md new file mode 100644 index 0000000..8702207 --- /dev/null +++ b/docs/superpowers/specs/2026-05-19-model-variant-support-fix-design.md @@ -0,0 +1,239 @@ +# Model Variant Support Fix — Design Spec + +**Date:** 2026-05-19 +**Author:** opencode-omniroute-auth maintainers +**Approach:** Pipeline Injection (Approach A) +**Status:** Approved for implementation + +## 1. Problem Statement + +When OmniRoute lists variant-suffixed models separately (e.g., `codex/gpt-5.5-xhigh`, `codex/gpt-5.5-high`), the plugin fails to group them under their base model. Instead, each variant appears as an independent top-level entry with incorrect generated variants (`{low, medium, high}`). This causes: + +- Duplicate/confusing model entries in OpenCode's model picker +- Missing `xhigh` variant support +- Incorrect `getModelFamily()` output for provider-prefixed versioned models (returns `codex/gpt` instead of `gpt`) +- No synthetic base model creation when only variants are returned + +## 2. Scope + +This fix is **comprehensive** and includes: + +1. **Variant grouping** — Merge variant-suffixed models under their base model ID +2. **xhigh support** — Add `'xhigh'` to the `reasoningEffort` type and generated variants +3. **Synthetic base models** — Create a base model from the first variant when no explicit base exists +4. **`getModelFamily()` fix** — Strip provider prefix before extracting family name +5. **Test isolation** — Clear model/model-dev caches between `plugin.test.mjs` tests + +Out of scope: +- The pre-existing `fetchModelsDevData retries retryable HTTP failures` test failure (unrelated) + +## 3. Architecture + +### 3.1 Component Changes + +| Unit | Change | Purpose | +|------|--------|---------| +| `src/models.ts` | Add `groupVariantModels()` + integrate in `fetchModels()` | Core grouping logic | +| `src/plugin.ts` | `toProviderModel()` uses `model.variants` if present; fix `getModelFamily()` | Correct variant generation + family extraction | +| `src/types.ts` | Add `variants?: Record` to `OmniRouteModel`; add `'xhigh'` to `reasoningEffort` | Type support | +| `test/plugin.test.mjs` | Add two new tests + cache isolation in `afterEach` | Regression + behavior tests | + +### 3.2 Data Flow (Pipeline) + +``` +/v1/models response + ↓ +[ {id: 'codex/gpt-5.5', ...}, + {id: 'codex/gpt-5.5-high', ...}, + {id: 'codex/gpt-5.5-xhigh', ...}, + {id: 'openai/gpt-4o', ...} ] + ↓ normalizeModel() + ↓ deduplicateModels() + ↓ groupVariantModels() ← NEW +[ {id: 'codex/gpt-5.5', variants: {high: {...}, xhigh: {...}}, ...}, + {id: 'openai/gpt-4o', variants: {}, ...} ] + ↓ enrichModelMetadata() + ↓ toProviderModels() +``` + +## 4. Detailed Design + +### 4.1 `groupVariantModels()` (`src/models.ts`) + +Pure function signature: + +```typescript +export function groupVariantModels(models: OmniRouteModel[]): OmniRouteModel[] +``` + +**Algorithm:** + +1. Pass 1 — Categorize: Iterate input models. For each model: + - Call `stripVariantSuffix(model.id)` to detect if it is a variant (e.g., `gpt-5.5-xhigh` → base=`gpt-5.5`, stripped=true) + - If not a variant: store in `realBaseModels` Map (key=model.id) + - If variant: store in `variantMap` Map (key=baseId, value=array of `{suffix, model}`) + +2. Pass 2 — Build result: + - Add all real base models that have **no** variants (unchanged) + - For each base ID that **has** variants: + - Use real base model if available; otherwise create synthetic base from first variant (copy all fields, set `id=baseId`, `name=baseId`) + - Build `variants` Record: for each detected suffix, create `{reasoningEffort: lowerSuffix}` entry + - Merge metadata from all variants into base: use **max** `contextWindow` and **max** `maxTokens` + - Set `supportsReasoning = true` if any variant supports it + - Push merged model into result + +**Invariants:** +- No variant-suffixed model ID appears as a top-level entry in the output +- Every base ID with variants gets a `variants` field containing all detected suffixes +- Synthetic bases are never created when a real base exists + +### 4.2 Pipeline Integration (`src/models.ts`) + +Insert `groupVariantModels()` between `deduplicateModels()` and `enrichModelMetadata()` inside `fetchModels()`: + +```typescript +const dedupedModels = deduplicateModels(rawModels); +const groupedModels = groupVariantModels(dedupedModels); // NEW +const models = await enrichModelMetadata(groupedModels, config); +``` + +### 4.3 `toProviderModel()` Variants Logic (`src/plugin.ts`) + +Replace the unconditional `{low, medium, high}` generation with a priority check: + +```typescript +variants: model.variants && Object.keys(model.variants).length > 0 + ? model.variants + : supportsReasoning + ? { + low: { reasoningEffort: 'low' }, + medium: { reasoningEffort: 'medium' }, + high: { reasoningEffort: 'high' }, + } + : {}, +``` + +If `model.variants` was pre-populated by `groupVariantModels()`, use it directly. Otherwise fall back to the existing default for reasoning models. + +### 4.4 `getModelFamily()` Fix (`src/plugin.ts`) + +Current broken behavior: `getModelFamily('codex/gpt-5.5-xhigh')` → `'codex/gpt'` + +Fix: Strip provider prefix before splitting: + +```typescript +function getModelFamily(modelId: string): string { + const withoutProvider = modelId.includes('/') ? modelId.split('/').pop()! : modelId; + const [family] = withoutProvider.split('-'); + return family || withoutProvider; +} +``` + +Fixed behavior: `getModelFamily('codex/gpt-5.5-xhigh')` → `'gpt'` + +### 4.5 Type Changes (`src/types.ts`) + +Add to `OmniRouteModel`: + +```typescript +variants?: Record; +``` + +Update `OmniRouteModelVariant`: + +```typescript +export interface OmniRouteModelVariant { + reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'; + [key: string]: unknown; +} +``` + +## 5. Edge Cases + +| Scenario | Behavior | +|----------|----------| +| Only variants returned, no base model | Create synthetic base from first variant; base ID = stripped suffix | +| Base model + variants both returned | Use real base; merge variant metadata (max limits) | +| Non-reasoning suffix (e.g., `-preview`) | `stripVariantSuffix()` ignores it; no grouping triggered | +| Empty variants map | Falls through to existing `supportsReasoning` logic | +| Mixed provider prefixes post-dedup | Dedup resolves aliases to canonical first; grouping operates on canonical IDs | +| Multiple variants with different limits | Base model inherits **highest** `contextWindow` and `maxTokens` | +| Variant without `supportsReasoning=true` | Still grouped; `supportsReasoning` on base becomes `true` if **any** variant has it | + +## 6. Testing + +### 6.1 New Tests (`test/plugin.test.mjs`) + +**Test 1: `provider hook groups variant models under base model`** + +- Mock `/v1/models` returning: + - `codex/gpt-5.5` (supportsReasoning: true) + - `codex/gpt-5.5-high` (supportsReasoning: true) + - `codex/gpt-5.5-xhigh` (supportsReasoning: true, contextWindow: 256000) + - `openai/gpt-4o` (supportsReasoning: false) + +- Assertions: + - `result['codex/gpt-5.5']` exists + - `result['codex/gpt-5.5'].variants.high` exists + - `result['codex/gpt-5.5'].variants.xhigh` exists + - `result['codex/gpt-5.5-high']` is `undefined` + - `result['codex/gpt-5.5-xhigh']` is `undefined` + - `result['openai/gpt-4o']` exists with empty variants object + +**Test 2: `provider hook creates synthetic base model when only variants are returned`** + +- Mock `/v1/models` returning: + - `codex/gpt-5.5-high` (contextWindow: 128000) + - `codex/gpt-5.5-xhigh` (contextWindow: 256000) + +- Assertions: + - `result['codex/gpt-5.5']` exists as synthetic base + - `result['codex/gpt-5.5'].variants.high` and `.xhigh` exist + - `result['codex/gpt-5.5'].limit.context === 256000` (highest from variants) + - No separate `codex/gpt-5.5-high` or `codex/gpt-5.5-xhigh` entries + +### 6.2 Test Infrastructure Fix + +Add to `plugin.test.mjs` imports: + +```javascript +import { clearModelCache } from '../dist/runtime.js'; +import { clearModelsDevCache } from '../dist/src/models-dev.js'; +``` + +Update `afterEach`: + +```javascript +afterEach(() => { + global.fetch = ORIGINAL_FETCH; + process.env.HOME = ORIGINAL_HOME; + clearModelCache(); // NEW + clearModelsDevCache(); // NEW +}); +``` + +This prevents cross-test contamination from mutable in-memory caches. + +## 7. Files Changed + +| File | Lines (approx) | Purpose | +|------|---------------|---------| +| `src/models.ts` | ~100 new | `groupVariantModels()` + pipeline integration | +| `src/plugin.ts` | ~10 modified | Variant generation priority + `getModelFamily()` fix | +| `src/types.ts` | ~5 modified | `variants` field + `'xhigh'` type | +| `test/plugin.test.mjs` | ~60 new | Two new tests + cache isolation | + +## 8. Verification + +After implementation, the full test suite should show: + +- 44/45 existing tests pass (the one failure is a pre-existing unrelated test) +- 2 new tests pass +- `npm run build` succeeds with zero TypeScript errors +- `npm run check:exports` succeeds + +## 9. Future Work (Out of Scope) + +- Support additional variant suffixes beyond reasoning effort (e.g., `-fast`, `-latest`) +- Cache key versioning for grouped model caches +- Retry logic for `fetchModelsDevData` (address the pre-existing test failure separately) From ddf34dd2a4566adc005b7ea8cf0a58d2db238929 Mon Sep 17 00:00:00 2001 From: Alph4d0g Date: Tue, 19 May 2026 20:42:12 +0200 Subject: [PATCH 07/10] =?UTF-8?q?feat:=20model=20variant=20support=20fix?= =?UTF-8?q?=20=E2=80=94=20grouping,=20xhigh=F0=9F=A7=A0,=20synthetic=20bas?= =?UTF-8?q?es=20=F0=9F=8E=B8,=20getModelFamily=F0=9F=A7=91=E2=80=8D?= =?UTF-8?q?=F0=9F=A7=91=E2=80=8D=F0=9F=A7=92=E2=80=8D=F0=9F=A7=92=20fix=20?= =?UTF-8?q?(#23)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * docs: add model variant support fix design spec * feat(types,models): add variant grouping and xhigh support - Add variants field to OmniRouteModel interface - Add xhigh to reasoningEffort type - Implement groupVariantModels() pure function - Integrate grouping into fetchModels() pipeline between dedup and enrich * feat(plugin): use model.variants in toProviderModel and fix getModelFamily - toProviderModel() now prioritizes pre-populated model.variants over generated defaults - getModelFamily() strips provider prefix before extracting family name - Fixes incorrect family extraction for provider-prefixed versioned models * test(plugin): add variant grouping tests and fix cache isolation - Add cache clearing (clearModelCache, clearModelsDevCache) to afterEach - Add test: provider hook groups variant models under base model - Add test: provider hook creates synthetic base model when only variants are returned - Prevents cross-test contamination from mutable in-memory caches * docs: update CHANGELOG and release notes for model variant support fix - Add Model Variant Support Fix section to CHANGELOG.md (grouped under Added/Fixed) - Update docs/release-notes-v1.4.1.md with: - New highlight for variant grouping - Detailed Model Variant Support Fix section with edge cases table - getModelFamily() fix for provider-prefixed models - Updated test count (52 tests) - New design spec reference * refactor(models): remove redundant toLowerCase and drop docs/superpowers - Remove redundant suffix.toLowerCase() in groupVariantModels() — suffix is already lowercased when extracted from the model ID via slice() at line 184. Replaced with direct string literal comparison which also narrows the type for TypeScript. (gemini-code-assist review feedback) - Remove docs/superpowers/ directory from repository — design specs are development artifacts that don't need to be in the source tree. - Update release notes to remove design spec file references. --- CHANGELOG.md | 10 + docs/release-notes-v1.4.1.md | 33 +- .../plans/2026-05-14-omniroute-logger.md | 743 -------------- ...26-05-16-omniroute-normalization-dedupe.md | 499 --------- .../plans/2026-05-16-pr18-review-fixes.md | 950 ------------------ .../2026-05-14-omniroute-logger-design.md | 207 ---- ...026-05-18-models-dev-reliability-design.md | 275 ----- src/models.ts | 82 +- src/plugin.ts | 21 +- src/types.ts | 4 +- test/plugin.test.mjs | 120 +++ 11 files changed, 253 insertions(+), 2691 deletions(-) delete mode 100644 docs/superpowers/plans/2026-05-14-omniroute-logger.md delete mode 100644 docs/superpowers/plans/2026-05-16-omniroute-normalization-dedupe.md delete mode 100644 docs/superpowers/plans/2026-05-16-pr18-review-fixes.md delete mode 100644 docs/superpowers/specs/2026-05-14-omniroute-logger-design.md delete mode 100644 docs/superpowers/specs/2026-05-18-models-dev-reliability-design.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c1d8fd..d7e5205 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,10 +23,20 @@ All notable changes to this project are documented in this file. - 404 fail-fast behavior (no unnecessary retries) - Invalid response structure with stale cache fallback - End-to-end integration with `getModelsDevIndex()` +- **Model Variant Support Fix** — Comprehensive fix for variant-suffixed models (e.g., `codex/gpt-5.5-xhigh`, `codex/gpt-5.5-high`): + - Added `groupVariantModels()` in `src/models.ts` — pure two-pass algorithm that merges variant-suffixed models under their base model ID + - Added `variants?: Record` to `OmniRouteModel` interface in `src/types.ts` + - Extended `OmniRouteModelVariant.reasoningEffort` to include `'xhigh'` (was `'low' | 'medium' | 'high'`) + - Synthetic base model creation: when only variants are returned (no explicit base), creates a synthetic base from the first variant with merged metadata (max `contextWindow`, max `maxTokens`) + - Pipeline integration: `fetchModels()` now flows `normalizeModel` → `deduplicateModels` → `groupVariantModels` → `enrichModelMetadata` → `toProviderModels` + - `toProviderModel()` in `src/plugin.ts` now prioritizes pre-populated `model.variants` over generated `{low, medium, high}` defaults +- **Test Cache Isolation** (`test/plugin.test.mjs`) — Added `clearModelCache()` and `clearModelsDevCache()` to `afterEach` to prevent cross-test contamination from mutable in-memory caches +- **2 New Regression Tests** (`test/plugin.test.mjs`) covering variant grouping and synthetic base model creation ### Fixed - **Default Context Limit** — `DEFAULT_CONTEXT_LIMIT` corrected from `4096` to `128000` to match actual OmniRoute API defaults. +- **getModelFamily() for Provider-Prefixed Models** — Fixed incorrect family extraction for versioned models with provider prefixes. Before: `getModelFamily('codex/gpt-5.5-xhigh')` → `'codex/gpt'`. After: → `'gpt'`. Implementation now strips provider prefix before splitting on `-`. ### Changed diff --git a/docs/release-notes-v1.4.1.md b/docs/release-notes-v1.4.1.md index 97e49f2..59b1389 100644 --- a/docs/release-notes-v1.4.1.md +++ b/docs/release-notes-v1.4.1.md @@ -2,6 +2,7 @@ ## Highlights +- **Model Variant Support Fix** — Variant-suffixed models (e.g., `codex/gpt-5.5-xhigh`, `codex/gpt-5.5-high`) are now grouped under their base model ID instead of appearing as duplicate top-level entries. Includes synthetic base model creation, `xhigh` variant support, and `getModelFamily()` fix for provider-prefixed versioned models. - **models.dev enrichment no longer fails on transient network slowness.** The fetch pipeline now retries up to 3 times with exponential backoff and falls back to stale cached data if live refresh fails. - **Default context limit corrected** from 4096 to 128000 tokens to match OmniRoute API behavior. - **Structured observability** for enrichment failures with per-attempt diagnostics and fallback decisions. @@ -21,9 +22,33 @@ - Timeout budget increased from 1000ms to 5000ms per attempt. - Failure classification: `timeout`, `network`, `http_retryable`, `http_non_retryable`, `parse`, `invalid_structure`. +### Model Variant Support Fix + +**Problem:** When OmniRoute lists variant-suffixed models separately (e.g., `codex/gpt-5.5-xhigh`, `codex/gpt-5.5-high`), each variant appeared as an independent top-level entry with incorrect generated variants (`{low, medium, high}`), causing duplicate/confusing model entries in OpenCode's model picker. + +**Solution:** +- Added `groupVariantModels()` in `src/models.ts` — a pure two-pass algorithm that: + 1. **Categorizes** models into real bases and variants using `stripVariantSuffix()` + 2. **Builds result**: real bases pass through unchanged; for each base with variants, merges all variants under the base model with a `variants` Record + 3. **Synthetic bases**: when only variants are returned (no explicit base), creates a synthetic base from the first variant, copying all fields and setting `id`/`name` to the stripped base ID + 4. **Metadata merging**: base inherits **max** `contextWindow` and **max** `maxTokens` across all variants; `supportsReasoning` becomes `true` if any variant has it +- Integrated into `fetchModels()` pipeline: `normalizeModel` → `deduplicateModels` → `groupVariantModels` → `enrichModelMetadata` → `toProviderModels` +- Fixed `toProviderModel()` in `src/plugin.ts` to prioritize pre-populated `model.variants` over generated `{low, medium, high}` defaults +- Added `'xhigh'` to `OmniRouteModelVariant.reasoningEffort` type and generated variants + +**Edge Cases Handled:** +| Scenario | Behavior | +|----------|----------| +| Only variants returned, no base model | Creates synthetic base from first variant | +| Base model + variants both returned | Uses real base; merges variant metadata (max limits) | +| Non-reasoning suffix (e.g., `-preview`) | `stripVariantSuffix()` ignores it; no grouping | +| Mixed provider prefixes post-dedup | Grouping operates on canonical IDs | +| Variant without `supportsReasoning=true` | Still grouped; base becomes `true` if any variant has it | + ### Fixes - `DEFAULT_CONTEXT_LIMIT` corrected from `4096` to `128000`. +- **`getModelFamily()` for Provider-Prefixed Models** — Fixed incorrect family extraction for versioned models with provider prefixes. `getModelFamily('codex/gpt-5.5-xhigh')` now correctly returns `'gpt'` (was `'codex/gpt'`). ### Code Review Fixes @@ -36,16 +61,18 @@ ### Testing - Added 8 focused tests in `test/models-dev.test.mjs` covering all retry, cache, and fallback paths. -- Full regression suite: 50/50 tests pass (0 failures). +- Added 2 regression tests in `test/plugin.test.mjs` for variant grouping and synthetic base model creation. +- Added cache isolation (`clearModelCache()`, `clearModelsDevCache()`) to `test/plugin.test.mjs` `afterEach` to prevent cross-test contamination. +- Full regression suite: 52/52 tests pass (0 failures). ### Documentation -- Added design spec: `docs/superpowers/specs/2026-05-18-models-dev-reliability-design.md`. +- Added design spec for models.dev reliability pipeline. ## Verification - `npm run prepublishOnly` passes (`clean`, `build`, `check:exports`). -- `npm test` passes: 50 tests, 0 failures. +- `npm test` passes: 52 tests, 0 failures. - TypeScript strict mode compiles cleanly. ## Upgrade Notes diff --git a/docs/superpowers/plans/2026-05-14-omniroute-logger.md b/docs/superpowers/plans/2026-05-14-omniroute-logger.md deleted file mode 100644 index fb37bd8..0000000 --- a/docs/superpowers/plans/2026-05-14-omniroute-logger.md +++ /dev/null @@ -1,743 +0,0 @@ -# OmniRoute Plugin Logger Implementation Plan - -> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Replace all console.log/console.warn calls with a proper logger that writes to OpenCode's log file, controlled by OMNIROUTE_DEBUG env var. - -**Architecture:** Create a lightweight logger module (`src/logger.ts`) that appends to the current OpenCode log file. Warn always writes; debug only writes when OMNIROUTE_DEBUG=1. Replace all console calls across 4 source files. - -**Tech Stack:** TypeScript, Node.js fs module, native fetch (for tests) - ---- - -## File Structure - -**New files:** -- `src/logger.ts` — Logger module with warn/debug functions -- `test/logger.test.mjs` — Unit tests for logger - -**Modified files:** -- `src/plugin.ts` — Replace 13 console calls with logger -- `src/models.ts` — Replace 11 console calls with logger -- `src/models-dev.ts` — Replace 7 console calls with logger -- `src/omniroute-combos.ts` — Replace 15 console calls with logger - ---- - -## Chunk 1: Logger Module - -### Task 1: Create Logger Module - -**Files:** -- Create: `src/logger.ts` -- Test: `test/logger.test.mjs` - -**Behavior:** -- `warn(message: string)`: Always appends to log file (unless I/O fails) -- `debug(message: string)`: Only appends when `OMNIROUTE_DEBUG === "1"` -- Log format: `WARN 2026-05-14T12:34:56.789Z +0ms service=omniroute message` -- Log file: most recent `.log` file in `~/.local/share/opencode/log/` -- Silently fail on all I/O errors (don't crash plugin) -- Cache log file path at module load, re-scan on ENOENT - -- [ ] **Step 1: Write failing tests for logger** - -```javascript -import { test } from 'node:test'; -import assert from 'node:assert'; -import { writeFileSync, readFileSync, mkdirSync, rmSync, statSync, utimesSync, chmodSync, readdirSync } from 'fs'; -import { join } from 'path'; -import { homedir } from 'os'; -import { fileURLToPath } from 'url'; -import { dirname } from 'path'; - -const __dirname = dirname(fileURLToPath(import.meta.url)); -const TEST_LOG_DIR = join(__dirname, 'test-logs'); - -// Set up isolated test environment -process.env.XDG_DATA_HOME = join(TEST_LOG_DIR, 'data'); -const LOG_DIR = join(TEST_LOG_DIR, 'data', 'opencode', 'log'); - -// Helper to create a test log file with most recent mtime -function createTestLogFile(name) { - const path = join(LOG_DIR, name); - mkdirSync(LOG_DIR, { recursive: true }); - writeFileSync(path, ''); - // Set mtime to now + 1s to ensure it's the most recent - const now = Date.now() / 1000; - utimesSync(path, now, now + 1); - return path; -} - -// Cleanup before and after tests -function cleanupTestLogs() { - try { - const files = readdirSync(LOG_DIR); - for (const file of files) { - if (file.startsWith('test-')) { - rmSync(join(LOG_DIR, file)); - } - } - } catch {} -} - -// Run cleanup before all tests -cleanupTestLogs(); - -// Run cleanup after all tests (using process.on since node:test doesn't have global after) -process.on('exit', cleanupTestLogs); - -test('warn() writes to log file with correct format', async () => { - const testLogFile = createTestLogFile('test-warn.log'); - - // Import fresh logger module with cache buster - const { warn } = await import(`../dist/src/logger.js?v=${Date.now()}`); - - warn('Test warning message'); - - const content = readFileSync(testLogFile, 'utf-8'); - assert.ok(content.includes('Test warning message'), 'warn should write message'); - assert.ok(content.includes('WARN'), 'log should have WARN level'); - assert.ok(content.includes('service=omniroute'), 'log should include service tag'); - assert.ok(content.includes('+0ms'), 'log should include +0ms offset'); - assert.match(content, /^(WARN|DEBUG)\s+\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z \+0ms service=omniroute .+$/m, 'log format should match spec'); - - rmSync(testLogFile); -}); - -test('debug() writes when OMNIROUTE_DEBUG=1', async () => { - const testLogFile = createTestLogFile('test-debug-enabled.log'); - process.env.OMNIROUTE_DEBUG = '1'; - - // Import fresh module to pick up env var - const { debug } = await import('../dist/src/logger.js?cache=' + Date.now()); - - debug('Test debug message'); - - const content = readFileSync(testLogFile, 'utf-8'); - assert.ok(content.includes('Test debug message'), 'debug should write when enabled'); - assert.ok(content.includes('DEBUG'), 'log should have DEBUG level'); - - delete process.env.OMNIROUTE_DEBUG; - rmSync(testLogFile); -}); - -test('debug() does not write when OMNIROUTE_DEBUG is not set', async () => { - const testLogFile = createTestLogFile('test-debug-disabled.log'); - delete process.env.OMNIROUTE_DEBUG; - - const { debug } = await import(`../dist/src/logger.js?v=${Date.now()}`); - - debug('Test debug message'); - - const content = readFileSync(testLogFile, 'utf-8'); - assert.strictEqual(content, '', 'debug should not write when disabled'); - - rmSync(testLogFile); -}); - -test('debug() does not write when OMNIROUTE_DEBUG is "true"', async () => { - const testLogFile = createTestLogFile('test-debug-true.log'); - process.env.OMNIROUTE_DEBUG = 'true'; - - const { debug } = await import(`../dist/src/logger.js?v=${Date.now()}`); - - debug('Test debug message'); - - const content = readFileSync(testLogFile, 'utf-8'); - assert.strictEqual(content, '', 'debug should not write when OMNIROUTE_DEBUG is "true"'); - - delete process.env.OMNIROUTE_DEBUG; - rmSync(testLogFile); -}); - -test('debug() does not write when OMNIROUTE_DEBUG is "0"', async () => { - const testLogFile = createTestLogFile('test-debug-zero.log'); - process.env.OMNIROUTE_DEBUG = '0'; - - const { debug } = await import(`../dist/src/logger.js?v=${Date.now()}`); - - debug('Test debug message'); - - const content = readFileSync(testLogFile, 'utf-8'); - assert.strictEqual(content, '', 'debug should not write when OMNIROUTE_DEBUG is "0"'); - - delete process.env.OMNIROUTE_DEBUG; - rmSync(testLogFile); -}); - -test('warn() always writes regardless of OMNIROUTE_DEBUG', async () => { - const testLogFile = createTestLogFile('test-warn-always.log'); - delete process.env.OMNIROUTE_DEBUG; - - const { warn } = await import('../dist/src/logger.js?cache=' + Date.now()); - - warn('Test warning message'); - - const content = readFileSync(testLogFile, 'utf-8'); - assert.ok(content.includes('Test warning message'), 'warn should always write'); - - rmSync(testLogFile); -}); - -test('logger handles missing log directory gracefully', async () => { - const originalXdg = process.env.XDG_DATA_HOME; - try { - process.env.XDG_DATA_HOME = '/nonexistent/path'; - - const { warn } = await import('../dist/src/logger.js?cache=' + Date.now()); - - // Should not throw - warn('Test message'); - } finally { - process.env.XDG_DATA_HOME = originalXdg; - } -}); - -test('logger handles log file rotation', async () => { - const oldLogFile = createTestLogFile('test-old.log'); - - const { warn } = await import(`../dist/src/logger.js?v=${Date.now()}`); - warn('First message'); - - // Simulate log rotation: delete old file, create new one - rmSync(oldLogFile); - const newLogFile = createTestLogFile('test-new.log'); - - warn('Second message after rotation'); - - const content = readFileSync(newLogFile, 'utf-8'); - assert.ok(content.includes('Second message after rotation'), 'should write to new log file after rotation'); - - rmSync(newLogFile); -}); - -test('logger re-scans when no log file exists at module load', async () => { - // Ensure no test log files exist in LOG_DIR - cleanupTestLogs(); - - // Import logger when no log file exists - const { warn } = await import(`../dist/src/logger.js?v=${Date.now()}`); - - // Create log file after module load - const testLogFile = createTestLogFile('test-rescan.log'); - - warn('Message after log file created'); - - const content = readFileSync(testLogFile, 'utf-8'); - assert.ok(content.includes('Message after log file created'), 'should re-scan and write to new log file'); - - rmSync(testLogFile); -}); - -test('logger silently skips on non-ENOENT write errors', async () => { - // Create a read-only log file (skip on Windows where chmod behaves differently) - if (process.platform === 'win32') { - return; - } - - const testLogFile = createTestLogFile('test-readonly.log'); - chmodSync(testLogFile, 0o444); - - const { warn } = await import(`../dist/src/logger.js?v=${Date.now()}`); - - // Should not throw even though file is read-only - warn('Test read-only message'); - - // Restore permissions and verify nothing was written - chmodSync(testLogFile, 0o644); - const content = readFileSync(testLogFile, 'utf-8'); - assert.strictEqual(content, '', 'should not write to read-only file'); - - rmSync(testLogFile); -}); - -test('logger silently skips on unreadable log directory', async () => { - // Skip on Windows where chmod behaves differently - if (process.platform === 'win32') { - return; - } - - // Create a log directory that is not readable - const unreadableDir = join(TEST_LOG_DIR, 'unreadable'); - const logSubdir = join(unreadableDir, 'opencode', 'log'); - mkdirSync(logSubdir, { recursive: true }); - - const originalXdg = process.env.XDG_DATA_HOME; - try { - process.env.XDG_DATA_HOME = unreadableDir; - chmodSync(logSubdir, 0o000); - - const { warn } = await import(`../dist/src/logger.js?v=${Date.now()}`); - - // Should not throw even though directory is unreadable - warn('Test unreadable directory'); - } finally { - process.env.XDG_DATA_HOME = originalXdg; - chmodSync(logSubdir, 0o755); - rmSync(unreadableDir, { recursive: true }); - } -}); - -test('logger excludes directories with .log suffix', async () => { - // Create a directory named like a log file - const fakeDir = join(LOG_DIR, 'fake-dir.log'); - mkdirSync(fakeDir, { recursive: true }); - - // Create a real log file - const testLogFile = createTestLogFile('test-real.log'); - - const { warn } = await import(`../dist/src/logger.js?v=${Date.now()}`); - warn('Test directory exclusion'); - - const content = readFileSync(testLogFile, 'utf-8'); - assert.ok(content.includes('Test directory exclusion'), 'should write to real log file, not directory'); - - rmSync(testLogFile); - rmSync(fakeDir, { recursive: true }); -}); - -test('logger uses alphabetical tie-breaker for identical mtime', async () => { - // Create two log files with identical mtime - const fileA = join(LOG_DIR, 'test-alpha.log'); - const fileB = join(LOG_DIR, 'test-beta.log'); - mkdirSync(LOG_DIR, { recursive: true }); - writeFileSync(fileA, ''); - writeFileSync(fileB, ''); - const now = Date.now() / 1000; - utimesSync(fileA, now, now); - utimesSync(fileB, now, now); - - const { warn } = await import(`../dist/src/logger.js?v=${Date.now()}`); - warn('Test tie-breaker'); - - // Should write to test-alpha.log (alphabetically first) - const contentA = readFileSync(fileA, 'utf-8'); - const contentB = readFileSync(fileB, 'utf-8'); - assert.ok(contentA.includes('Test tie-breaker'), 'should write to alphabetically first file'); - assert.strictEqual(contentB, '', 'should not write to second file'); - - rmSync(fileA); - rmSync(fileB); -}); -``` - -Run: `node --test test/logger.test.mjs` -Expected: FAIL - logger module doesn't exist - -- [ ] **Step 2: Create logger.ts** - -```typescript -import { appendFileSync, readdirSync, statSync, existsSync } from 'fs'; -import { join } from 'path'; -import { homedir } from 'os'; - -const LOG_DIR = join(process.env.XDG_DATA_HOME || join(process.env.HOME || homedir(), '.local', 'share'), 'opencode', 'log'); - -function findCurrentLogFile(): string | null { - try { - if (!existsSync(LOG_DIR)) return null; - - const files = readdirSync(LOG_DIR) - .filter(f => f.endsWith('.log')) - .map(f => { - const path = join(LOG_DIR, f); - const stat = statSync(path); - return { path, mtime: stat.mtime.getTime(), isFile: stat.isFile() }; - }) - .filter(f => f.isFile) - .sort((a, b) => b.mtime - a.mtime || a.path.localeCompare(b.path)); - - return files[0]?.path ?? null; - } catch { - return null; - } -} - -// Resolve log file path at module load (wrapped in try/catch per spec) -let cachedLogFile: string | null; -try { - cachedLogFile = findCurrentLogFile(); -} catch { - cachedLogFile = null; -} - -function getLogFile(): string | null { - if (cachedLogFile === null) { - // Re-scan if no file found at module load (OpenCode may create one later) - cachedLogFile = findCurrentLogFile(); - } - return cachedLogFile; -} - -function isNodeError(error: unknown): error is NodeJS.ErrnoException { - return ( - typeof error === 'object' && - error !== null && - 'code' in error && - typeof (error as NodeJS.ErrnoException).code === 'string' - ); -} - -function writeLog(level: string, message: string): void { - const logFile = getLogFile(); - if (!logFile) return; - - const timestamp = new Date().toISOString(); - const line = `${level.padEnd(5)} ${timestamp} +0ms service=omniroute ${message}\n`; - - try { - appendFileSync(logFile, line); - } catch (error: unknown) { - if (isNodeError(error) && error.code === 'ENOENT') { - // Log file was deleted, re-scan - cachedLogFile = findCurrentLogFile(); - // Retry once with new file - const newLogFile = cachedLogFile; - if (newLogFile) { - try { - appendFileSync(newLogFile, line); - } catch { - // Silently fail on second attempt - } - } - } - // Silently fail for all other errors - } -} - -export function warn(message: string): void { - writeLog('WARN', message); -} - -export function debug(message: string): void { - // Strict comparison: only "1" enables debug logging - if (process.env.OMNIROUTE_DEBUG !== '1') return; - writeLog('DEBUG', message); -} -``` - -- [ ] **Step 3: Build and run tests** - -Run: `npm run build && node --test test/logger.test.mjs` -Expected: Tests pass - -- [ ] **Step 4: Commit** - -```bash -git add src/logger.ts test/logger.test.mjs -git commit -m "feat: add logger module with warn/debug levels" -``` - ---- - -## Chunk 2: Replace Console Calls in plugin.ts - -### Task 2: Migrate plugin.ts - -**Files:** -- Modify: `src/plugin.ts` - -- [ ] **Step 1: Add logger import** - -Add at top of file: -```typescript -import { warn, debug } from './logger.js'; -``` - -- [ ] **Step 2: Replace console calls** - -Replace each console call (remove `[OmniRoute] ` prefix): - -Line 46: `console.warn('[OmniRoute] Eager model fetch failed, using defaults:', error)` -→ `warn(\`Eager model fetch failed, using defaults: ${error}\`)` - -Line 102: `console.log(\`[OmniRoute] Available models: ${models.map((model) => model.id).join(', ')}\`)` -→ `debug(\`Available models: ${models.map((model) => model.id).join(', ')}\`)` - -Line 104: `console.warn('[OmniRoute] Failed to fetch models, using defaults:', error)` -→ `warn(\`Failed to fetch models, using defaults: ${error}\`)` - -Line 110: `console.log(\`[OmniRoute] Provider models hydrated: ${Object.keys(provider.models).length}\`)` -→ `debug(\`Provider models hydrated: ${Object.keys(provider.models).length}\`)` - -Line 157: `console.warn('[OmniRoute] Unexpected error reading auth store:', error)` -→ `warn(\`Unexpected error reading auth store: ${error}\`)` - -Line 165-166: `console.warn(...)` -→ `warn('provider.api and options.apiMode differ; using options.apiMode')` - -Line 173: `console.warn(...)` -→ `warn(\`Unsupported provider.api value. Using ${apiMode}.\`)` - -Line 189: `console.warn(...)` -→ `warn('Unsupported apiMode option. Using chat.')` - -Line 211: `console.warn(...)` -→ `warn(\`Ignoring unsupported baseURL protocol: ${parsed.protocol}\`)` - -Line 217: `console.warn(...)` -→ `warn(\`Ignoring invalid baseURL: ${trimmed}\`)` - -Line 443: `console.log(...)` -→ `debug(\`Intercepting request to ${url}\`)` - -Line 471: `console.log(...)` -→ `debug('Processing /v1/models response')` - -Line 521: `console.log(...)` -→ `debug('Sanitized Gemini tool schema keywords')` - -- [ ] **Step 3: Build and verify** - -Run: `npm run build` -Expected: No TypeScript errors - -- [ ] **Step 4: Commit** - -```bash -git add src/plugin.ts -git commit -m "refactor: replace console calls with logger in plugin.ts" -``` - ---- - -## Chunk 3: Replace Console Calls in models.ts - -### Task 3: Migrate models.ts - -**Files:** -- Modify: `src/models.ts` - -- [ ] **Step 1: Add logger import** - -```typescript -import { warn, debug } from './logger.js'; -``` - -- [ ] **Step 2: Replace console calls** - -Line 56: `console.log('[OmniRoute] Using cached models')` -→ `debug('Using cached models')` - -Line 60: `console.log('[OmniRoute] Forcing model refresh')` -→ `debug('Forcing model refresh')` - -Line 67: `console.log(\`[OmniRoute] Fetching models from ${modelsUrl}\`)` -→ `debug(\`Fetching models from ${modelsUrl}\`)` - -Line 85-87: `console.error(...)` -→ `warn(\`Failed to fetch models: ${response.status} ${response.statusText}\`)` - -Line 96: `console.error('[OmniRoute] Invalid models response structure:', rawData)` -→ `warn(\`Invalid models response structure: ${JSON.stringify(rawData)}\`)` - -Line 131: `console.log(\`[OmniRoute] Successfully fetched ${models.length} models\`)` -→ `debug(\`Successfully fetched ${models.length} models\`)` - -Line 134: `console.error('[OmniRoute] Error fetching models:', error)` -→ `warn(\`Error fetching models: ${error}\`)` - -Line 139: `console.log('[OmniRoute] Returning expired cached models as fallback')` -→ `debug('Returning expired cached models as fallback')` - -Line 144: `console.log('[OmniRoute] Returning default models as fallback')` -→ `debug('Returning default models as fallback')` - -Line 161: `console.log('[OmniRoute] Model cache cleared for provided configuration')` -→ `debug('Model cache cleared for provided configuration')` - -Line 164: `console.log('[OmniRoute] All model caches cleared')` -→ `debug('All model caches cleared')` - -- [ ] **Step 3: Build and verify** - -Run: `npm run build` -Expected: No TypeScript errors - -- [ ] **Step 4: Commit** - -```bash -git add src/models.ts -git commit -m "refactor: replace console calls with logger in models.ts" -``` - ---- - -## Chunk 4: Replace Console Calls in models-dev.ts - -### Task 4: Migrate models-dev.ts - -**Files:** -- Modify: `src/models-dev.ts` - -- [ ] **Step 1: Add logger import** - -```typescript -import { warn, debug } from './logger.js'; -``` - -- [ ] **Step 2: Replace console calls** - -Line 93: `console.log('[OmniRoute] Using cached models.dev data')` -→ `debug('Using cached models.dev data')` - -Line 97: `console.log(\`[OmniRoute] Fetching models.dev data from ${url}\`)` -→ `debug(\`Fetching models.dev data from ${url}\`)` - -Line 112: `console.warn(\`[OmniRoute] Failed to fetch models.dev data: ${response.status}\`)` -→ `warn(\`Failed to fetch models.dev data: ${response.status}\`)` - -Line 120: `console.warn('[OmniRoute] Invalid models.dev data structure')` -→ `warn('Invalid models.dev data structure')` - -Line 130: `console.log('[OmniRoute] Successfully fetched models.dev data')` -→ `debug('Successfully fetched models.dev data')` - -Line 133: `console.warn('[OmniRoute] Error fetching models.dev data:', error)` -→ `warn(\`Error fetching models.dev data: ${error}\`)` - -Line 208: `console.log('[OmniRoute] models.dev cache cleared')` -→ `debug('models.dev cache cleared')` - -- [ ] **Step 3: Build and verify** - -Run: `npm run build` -Expected: No TypeScript errors - -- [ ] **Step 4: Commit** - -```bash -git add src/models-dev.ts -git commit -m "refactor: replace console calls with logger in models-dev.ts" -``` - ---- - -## Chunk 5: Replace Console Calls in omniroute-combos.ts - -### Task 5: Migrate omniroute-combos.ts - -**Files:** -- Modify: `src/omniroute-combos.ts` - -- [ ] **Step 1: Add logger import** - -```typescript -import { warn, debug } from './logger.js'; -``` - -- [ ] **Step 2: Replace console calls** - -Line 58: `console.log('[OmniRoute] Using cached combo data')` -→ `debug('Using cached combo data')` - -Line 63: `console.log(\`[OmniRoute] Fetching combo data from ${combosUrl}\`)` -→ `debug(\`Fetching combo data from ${combosUrl}\`)` - -Line 79: `console.warn(\`[OmniRoute] Failed to fetch combo data: ${response.status}\`)` -→ `warn(\`Failed to fetch combo data: ${response.status}\`)` - -Line 87: `console.warn('[OmniRoute] Invalid combo data structure')` -→ `warn('Invalid combo data structure')` - -Line 105: `console.log(\`[OmniRoute] Successfully fetched ${comboMap.size} combos\`)` -→ `debug(\`Successfully fetched ${comboMap.size} combos\`)` - -Line 108: `console.warn('[OmniRoute] Error fetching combo data:', error)` -→ `warn(\`Error fetching combo data: ${error}\`)` - -Line 120: `console.log('[OmniRoute] Combo cache cleared')` -→ `debug('Combo cache cleared')` - -Line 141: `console.log(\`[OmniRoute] Resolved combo "${modelId}" to ${combo.models.length} underlying models\`)` -→ `debug(\`Resolved combo "${modelId}" to ${combo.models.length} underlying models\`)` - -Line 149: `console.warn('[OmniRoute] Unexpected model entry in combo:', m)` -→ `warn(\`Unexpected model entry in combo: ${JSON.stringify(m)}\`)` - -Line 276: `console.log(\`[OmniRoute] Calculating capabilities for combo "${model.id}" from ${underlyingModels.length} models\`)` -→ `debug(\`Calculating capabilities for combo "${model.id}" from ${underlyingModels.length} models\`)` - -Line 291-293: `console.warn(...)` -→ `warn(\`Could not resolve ${unresolvedModels.length} underlying models for "${model.id}": ${unresolvedModels.join(', ')}\`)` - -Line 297: `console.warn(\`[OmniRoute] No models.dev matches found for combo "${model.id}"\`)` -→ `warn(\`No models.dev matches found for combo "${model.id}"\`)` - -Line 301: `console.log(\`[OmniRoute] Resolved ${resolvedModels.length}/${underlyingModels.length} underlying models for "${model.id}"\`)` -→ `debug(\`Resolved ${resolvedModels.length}/${underlyingModels.length} underlying models for "${model.id}"\`)` - -Line 306-308: `console.log(...)` -→ `debug(\`Calculated capabilities for "${model.id}": context=${capabilities.contextWindow ?? 'N/A'}, maxTokens=${capabilities.maxTokens ?? 'N/A'}, vision=${capabilities.supportsVision ?? false}, tools=${capabilities.supportsTools ?? false}\`)` - -Line 355: `console.log(\`[OmniRoute] Enriching combo model: ${model.id}\`)` -→ `debug(\`Enriching combo model: ${model.id}\`)` - -- [ ] **Step 3: Build and verify** - -Run: `npm run build` -Expected: No TypeScript errors - -- [ ] **Step 4: Commit** - -```bash -git add src/omniroute-combos.ts -git commit -m "refactor: replace console calls with logger in omniroute-combos.ts" -``` - ---- - -## Chunk 6: Final Verification - -### Task 6: Verify No Console Calls Remain - -- [ ] **Step 1: Run grep to find any remaining console calls** - -```bash -grep -En "console\.(log|warn|error)" src/ -``` - -Expected: No output (no matches) - -- [ ] **Step 2: Run full test suite** - -```bash -npm test -``` - -Expected: All tests pass (or same failures as baseline) - -- [ ] **Step 3: Manual verification** - -Run plugin with OMNIROUTE_DEBUG=1 and verify debug messages appear in latest OpenCode log: -```bash -OMNIROUTE_DEBUG=1 npm test -``` - -Check log file: -```bash -tail -20 ~/.local/share/opencode/log/$(ls -t ~/.local/share/opencode/log/*.log | head -1) -``` - -Expected: See DEBUG entries with `service=omniroute` - -- [ ] **Step 4: Commit** - -```bash -git commit -m "chore: verify no console calls remain" -``` - ---- - -## Completion Checklist - -- [ ] `src/logger.ts` created with warn/debug functions -- [ ] `test/logger.test.mjs` created with unit tests -- [ ] All console calls removed from `src/plugin.ts` -- [ ] All console calls removed from `src/models.ts` -- [ ] All console calls removed from `src/models-dev.ts` -- [ ] All console calls removed from `src/omniroute-combos.ts` -- [ ] Build passes without errors -- [ ] Tests pass (or match baseline) -- [ ] Manual verification shows logs in OpenCode log file diff --git a/docs/superpowers/plans/2026-05-16-omniroute-normalization-dedupe.md b/docs/superpowers/plans/2026-05-16-omniroute-normalization-dedupe.md deleted file mode 100644 index 7d61224..0000000 --- a/docs/superpowers/plans/2026-05-16-omniroute-normalization-dedupe.md +++ /dev/null @@ -1,499 +0,0 @@ -# OmniRoute Model Normalization & Deduplication Implementation Plan - -> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Address PR review claims by normalizing OmniRoute native metadata and deduplicating alias/canonical model entries. - -**Architecture:** Update `normalizeModel()` to read all OmniRoute field variants (snake_case, camelCase, capabilities object), then add a generic deduplication step that groups by canonical provider+model key and prefers canonical-prefixed IDs. Normalization happens before models.dev enrichment so no metadata is lost. - -**Tech Stack:** TypeScript ESM, Node.js native test runner - ---- - -## File Structure - -**Modified files:** -- `src/types.ts` — Add `capabilities` and snake_case fields to `OmniRouteModel` -- `src/models.ts` — Update `normalizeModel()`, add `deduplicateModels()`, wire into `fetchModels()` -- `src/constants.ts` — Add provider alias-to-canonical mapping -- `test/models.test.mjs` — Tests for normalization and deduplication - ---- - -## Chunk 1: Extend Types for OmniRoute Native Fields - -### Task 1: Add OmniRoute Native Fields to OmniRouteModel - -**Files:** -- Modify: `src/types.ts:25-50` (OmniRouteModel interface) - -**Issue:** Current `OmniRouteModel` only has camelCase fields, misses OmniRoute's snake_case and `capabilities` object. - -- [ ] **Step 1: Update OmniRouteModel interface** - -```typescript -export interface OmniRouteModel { - id: string; - name: string; - description?: string; - - // OmniRoute native fields (camelCase from API) - contextWindow?: number; - maxTokens?: number; - supportsStreaming?: boolean; - supportsVision?: boolean; - supportsTools?: boolean; - supportsTemperature?: boolean; - supportsReasoning?: boolean; - supportsAttachment?: boolean; - - // OmniRoute native fields (snake_case from API) - context_length?: number; - max_input_tokens?: number; - max_output_tokens?: number; - - // OmniRoute capabilities object - capabilities?: { - vision?: boolean; - tool_calling?: boolean; - reasoning?: boolean; - thinking?: boolean; - attachment?: boolean; - temperature?: boolean; - }; - - // Enriched fields from models.dev - temperature?: boolean; - reasoning?: boolean; - attachment?: boolean; - tool_call?: boolean; -} -``` - -- [ ] **Step 2: Build to verify no type errors** - -Run: `npm run build` -Expected: No errors - -- [ ] **Step 3: Commit** - -```bash -git add src/types.ts -git commit -m "types: add OmniRoute native fields (snake_case, capabilities) to OmniRouteModel" -``` - ---- - -## Chunk 2: Update normalizeModel to Read All Field Variants - -### Task 2: Comprehensive Model Normalization - -**Files:** -- Modify: `src/models.ts:123-144` (normalizeModel in fetchModels) - -**Issue:** Current normalization only reads camelCase fields, ignores snake_case and capabilities object. - -- [ ] **Step 1: Extract normalizeModel as standalone function** - -Add before `fetchModels`: - -```typescript -function normalizeModel(model: OmniRouteModel): OmniRouteModel { - const capabilities = model.capabilities && typeof model.capabilities === 'object' - ? model.capabilities - : {}; - - return { - ...model, - id: model.id, - name: model.name || model.id, - description: model.description || `OmniRoute model: ${model.id}`, - - // Context limits: prefer explicit camelCase, fallback to snake_case - contextWindow: model.contextWindow ?? model.context_length ?? model.max_input_tokens, - maxTokens: model.maxTokens ?? model.max_output_tokens, - - // Capabilities: prefer explicit camelCase, fallback to capabilities object, fallback to snake_case - supportsStreaming: model.supportsStreaming, - supportsVision: model.supportsVision ?? model.vision ?? capabilities.vision ?? capabilities.attachment, - supportsTools: model.supportsTools ?? model.tool_calling ?? capabilities.tool_calling ?? capabilities.toolcall, - supportsReasoning: model.supportsReasoning ?? model.reasoning ?? capabilities.reasoning ?? capabilities.thinking, - supportsAttachment: model.supportsAttachment ?? model.attachment ?? capabilities.attachment, - supportsTemperature: model.supportsTemperature ?? model.temperature ?? capabilities.temperature, - }; -} -``` - -- [ ] **Step 2: Update fetchModels to use normalizeModel** - -Replace the inline map with: -```typescript -.map(normalizeModel) -``` - -- [ ] **Step 3: Build and test** - -Run: `npm run build` -Expected: No errors - -Run: `npm test` -Expected: All existing tests pass - -- [ ] **Step 4: Commit** - -```bash -git add src/models.ts src/types.ts -git commit -m "feat: normalize all OmniRoute field variants (snake_case, capabilities)" -``` - ---- - -## Chunk 3: Generic Model Deduplication - -### Task 3: Create Provider Alias Map - -**Files:** -- Modify: `src/constants.ts` - -**Issue:** Need to know which provider prefixes are aliases so we can deduplicate. - -- [ ] **Step 1: Add provider alias-to-canonical mapping** - -```typescript -export const PROVIDER_ALIAS_TO_CANONICAL: Record = { - 'ollamacloud': 'ollama-cloud', - 'cc': 'claude', - 'gh': 'github', - 'cx': 'codex', - 'kr': 'kiro', - 'if': 'qoder', -}; -``` - -- [ ] **Step 2: Build** - -Run: `npm run build` -Expected: No errors - -- [ ] **Step 3: Commit** - -```bash -git add src/constants.ts -git commit -m "feat: add provider alias-to-canonical mapping for deduplication" -``` - ---- - -### Task 4: Implement Generic Deduplication - -**Files:** -- Modify: `src/models.ts` — Add `deduplicateModels()` function - -**Issue:** Alias-prefixed models appear alongside canonical-prefixed ones. - -- [ ] **Step 1: Add deduplicateModels function** - -Add after `normalizeModel`: - -```typescript -function deduplicateModels(models: OmniRouteModel[]): OmniRouteModel[] { - const seen = new Map(); - - for (const model of models) { - const parts = model.id.split('/'); - if (parts.length !== 2) { - // Not a provider/model ID, keep as-is - seen.set(model.id, model); - continue; - } - - const [providerPrefix, modelKey] = parts; - const canonicalPrefix = PROVIDER_ALIAS_TO_CANONICAL[providerPrefix] || providerPrefix; - const canonicalId = `${canonicalPrefix}/${modelKey}`; - - const existing = seen.get(canonicalId); - if (!existing) { - // First time seeing this model - store with canonical ID - seen.set(canonicalId, { - ...model, - id: canonicalId, - }); - } else { - // Already have canonical version - merge metadata, prefer non-alias - const isAlias = providerPrefix !== canonicalPrefix; - if (!isAlias) { - // This is the canonical version, overwrite alias - seen.set(canonicalId, { - ...model, - id: canonicalId, - }); - } - // If alias and we already have canonical, drop it - } - } - - return [...seen.values()]; -} -``` - -- [ ] **Step 2: Wire deduplication into fetchModels** - -After normalization, before enrichment: -```typescript -const rawModels = data.data - .filter(...) - .map(normalizeModel); - -const dedupedModels = deduplicateModels(rawModels); - -// Enrich with models.dev and combo capabilities -const models = await enrichModelMetadata(dedupedModels, config); -``` - -- [ ] **Step 3: Build and test** - -Run: `npm run build` -Expected: No errors - -Run: `npm test` -Expected: All existing tests pass - -- [ ] **Step 4: Commit** - -```bash -git add src/models.ts src/constants.ts -git commit -m "feat: deduplicate alias/canonical model entries, prefer canonical form" -``` - ---- - -## Chunk 4: Test Coverage - -### Task 5: Test Normalization of All Field Variants - -**Files:** -- Modify: `test/models.test.mjs` - -- [ ] **Step 1: Add test for snake_case field normalization** - -```javascript -test('normalizeModel reads snake_case fields', () => { - // We can't test normalizeModel directly (it's private), so test via fetchModels - global.fetch = async (input) => { - const url = input instanceof Request ? input.url : input.toString(); - if (url.includes('/v1/models')) { - return new Response( - JSON.stringify({ - object: 'list', - data: [ - { - id: 'test/model-1', - name: 'Test Model', - context_length: 128000, - max_output_tokens: 4096, - capabilities: { - vision: true, - tool_calling: true, - reasoning: true, - } - } - ] - }), - { status: 200, headers: { 'Content-Type': 'application/json' } } - ); - } - return new Response(JSON.stringify({ data: [] }), { status: 200 }); - }; - - const models = await fetchModels(CONFIG, CONFIG.apiKey, false); - const model = models.find(m => m.id === 'test/model-1'); - - assert.ok(model, 'Model should be found'); - assert.equal(model.contextWindow, 128000, 'Should read context_length'); - assert.equal(model.maxTokens, 4096, 'Should read max_output_tokens'); - assert.equal(model.supportsVision, true, 'Should read capabilities.vision'); - assert.equal(model.supportsTools, true, 'Should read capabilities.tool_calling'); - assert.equal(model.supportsReasoning, true, 'Should read capabilities.reasoning'); -}); -``` - -- [ ] **Step 2: Add test for camelCase fallback** - -```javascript -test('normalizeModel prefers camelCase over snake_case', () => { - global.fetch = async (input) => { - const url = input instanceof Request ? input.url : input.toString(); - if (url.includes('/v1/models')) { - return new Response( - JSON.stringify({ - object: 'list', - data: [ - { - id: 'test/model-2', - contextWindow: 64000, - context_length: 32000, - capabilities: { - vision: false, - } - } - ] - }), - { status: 200, headers: { 'Content-Type': 'application/json' } } - ); - } - return new Response(JSON.stringify({ data: [] }), { status: 200 }); - }; - - const models = await fetchModels(CONFIG, CONFIG.apiKey, false); - const model = models.find(m => m.id === 'test/model-2'); - - assert.ok(model, 'Model should be found'); - assert.equal(model.contextWindow, 64000, 'Should prefer camelCase over snake_case'); -}); -``` - -- [ ] **Step 3: Commit** - -```bash -git add test/models.test.mjs -git commit -m "test: verify normalization of snake_case and capabilities fields" -``` - ---- - -### Task 6: Test Deduplication - -**Files:** -- Modify: `test/models.test.mjs` - -- [ ] **Step 1: Add test for alias deduplication** - -```javascript -test('deduplication removes alias when canonical exists', async () => { - global.fetch = async (input) => { - const url = input instanceof Request ? input.url : input.toString(); - if (url.includes('/v1/models')) { - return new Response( - JSON.stringify({ - object: 'list', - data: [ - { - id: 'ollamacloud/deepseek-v4', - name: 'DeepSeek V4 (alias)', - context_length: 64000, - }, - { - id: 'ollama-cloud/deepseek-v4', - name: 'DeepSeek V4 (canonical)', - context_length: 128000, - } - ] - }), - { status: 200, headers: { 'Content-Type': 'application/json' } } - ); - } - return new Response(JSON.stringify({ data: [] }), { status: 200 }); - }; - - const models = await fetchModels(CONFIG, CONFIG.apiKey, false); - - assert.equal(models.length, 1, 'Should deduplicate to single model'); - assert.equal(models[0].id, 'ollama-cloud/deepseek-v4', 'Should prefer canonical ID'); - assert.equal(models[0].contextWindow, 128000, 'Should use canonical metadata'); -}); -``` - -- [ ] **Step 2: Add test for dedupe with only alias present** - -```javascript -test('deduplication keeps alias when canonical is missing', async () => { - global.fetch = async (input) => { - const url = input instanceof Request ? input.url : input.toString(); - if (url.includes('/v1/models')) { - return new Response( - JSON.stringify({ - object: 'list', - data: [ - { - id: 'ollamacloud/deepseek-v4', - name: 'DeepSeek V4', - context_length: 64000, - } - ] - }), - { status: 200, headers: { 'Content-Type': 'application/json' } } - ); - } - return new Response(JSON.stringify({ data: [] }), { status: 200 }); - }; - - const models = await fetchModels(CONFIG, CONFIG.apiKey, false); - - assert.equal(models.length, 1, 'Should keep single model'); - assert.equal(models[0].id, 'ollama-cloud/deepseek-v4', 'Should normalize to canonical ID'); -}); -``` - -- [ ] **Step 3: Run all tests** - -Run: `npm test` -Expected: All tests pass - -- [ ] **Step 4: Commit** - -```bash -git add test/models.test.mjs -git commit -m "test: verify deduplication of alias/canonical model entries" -``` - ---- - -## Chunk 5: Final Verification - -### Task 7: Full Build and Test - -- [ ] **Step 1: Clean build** - -Run: `npm run clean && npm run build` -Expected: Success - -- [ ] **Step 2: Run all tests** - -Run: `npm test` -Expected: All tests pass (target: 42+ tests) - -- [ ] **Step 3: Type check** - -Run: `npx tsc --noEmit` -Expected: No errors - -- [ ] **Step 4: Verify no regressions** - -Check that existing tests still pass and no functionality was broken. - -- [ ] **Step 5: Final commit summary** - -```bash -git log --oneline -25 -``` - ---- - -## Summary - -**Total tasks:** 7 (5 code + 2 test + 1 verification) -**Estimated commits:** 7 -**Estimated time:** 1-2 hours - -### Changes Summary: -1. **Types** — Added snake_case fields and capabilities object to OmniRouteModel -2. **Normalization** — normalizeModel now reads all field variants with proper precedence -3. **Constants** — Added PROVIDER_ALIAS_TO_CANONICAL mapping -4. **Deduplication** — Generic dedupe algorithm that prefers canonical IDs -5. **Tests** — Coverage for normalization and deduplication edge cases - -### Key Decisions: -- camelCase fields take precedence over snake_case (consistent with existing code) -- capabilities object takes precedence over top-level snake_case fields -- Deduplication normalizes alias IDs to canonical form even when canonical is missing -- Merge strategy: prefer canonical metadata over alias metadata diff --git a/docs/superpowers/plans/2026-05-16-pr18-review-fixes.md b/docs/superpowers/plans/2026-05-16-pr18-review-fixes.md deleted file mode 100644 index a510177..0000000 --- a/docs/superpowers/plans/2026-05-16-pr18-review-fixes.md +++ /dev/null @@ -1,950 +0,0 @@ -# PR #18 Review Issues Implementation Plan - -> **For agentic workers:** REQUIRED: Use superpowers:subagent-driven-development (if subagents available) or superpowers:executing-plans to implement this plan. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Address all 20 review issues from PR #18 (excluding CHANGELOG entry) - -**Architecture:** Systematic fixes across 6 source files and 2 test files, organized by severity and file grouping. Medium-priority issues first, then low-priority. - -**Tech Stack:** TypeScript ESM, Node.js native test runner, native fetch - ---- - -## File Structure - -**Modified files:** -- `src/plugin.ts` — 6 issues (fallback operator, metadata validation, magic constants, as const, tool defaults, array ordering) -- `src/models.ts` — 4 issues (cache key, sensitive logs, DRY violation, candidate explosion) -- `src/models-dev.ts` — 2 issues (metadata tracking, trailing newline) -- `src/omniroute-combos.ts` — 2 issues (log injection, DRY splitModelId) -- `src/types.ts` — 2 issues (provider fields shape, variants type) -- `src/logger.ts` — 1 issue (synchronous I/O) -- `test/models.test.mjs` — 3 test gaps (temperature/reasoning, variant+alias, subscription fallback) -- `test/plugin.test.mjs` — 1 issue (hardcoded ports) - ---- - -## Chunk 1: Critical Fixes (Medium Priority) - -### Task 1: Fix API Key Fallback Operator - -**Files:** -- Modify: `src/plugin.ts:45` - -**Issue:** `||` treats empty string `""` as falsy and falls back to env var unexpectedly. - -- [ ] **Step 1: Update fallback operator** - -```typescript -// Change from: -const apiKey = auth?.key || process.env.OMNIROUTE_API_KEY; -// To: -const apiKey = auth?.key ?? process.env.OMNIROUTE_API_KEY; -``` - -- [ ] **Step 2: Verify no other `||` fallback patterns exist** - -Run: `grep -n "|| process.env" src/*.ts` -Expected: No matches (or only intentional ones) - -- [ ] **Step 3: Commit** - -```bash -git add src/plugin.ts -git commit -m "fix: use nullish coalescing for API key fallback" -``` - ---- - -### Task 2: Add Runtime Validation for modelMetadata - -**Files:** -- Modify: `src/plugin.ts:351-379` (mergeModelMetadata function) -- Modify: `src/types.ts` (if needed for validator types) - -**Issue:** `metadata as OmniRouteModelMetadata` casts without validation. - -- [ ] **Step 1: Create metadata validation helper** - -Add to `src/plugin.ts` after `isRecord` function: - -```typescript -function isValidModelMetadata(value: unknown): value is OmniRouteModelMetadata { - if (!isRecord(value)) return false; - - // Only validate boolean fields if present - const booleanFields = [ - 'supportsStreaming', 'supportsVision', 'supportsTools', - 'supportsTemperature', 'supportsReasoning', 'supportsAttachment' - ]; - - for (const field of booleanFields) { - if (field in value && typeof value[field] !== 'boolean') { - return false; - } - } - - // Validate numeric fields - if ('contextWindow' in value && typeof value.contextWindow !== 'number') return false; - if ('maxTokens' in value && typeof value.maxTokens !== 'number') return false; - - return true; -} -``` - -- [ ] **Step 2: Update mergeModelMetadata to validate** - -```typescript -// In mergeModelMetadata, replace: -merged[id] = { - ...(generated[id] ?? {}), - ...(metadata as OmniRouteModelMetadata), -}; -// With: -if (isValidModelMetadata(metadata)) { - merged[id] = { - ...(generated[id] ?? {}), - ...metadata, - }; -} else { - warn(`Invalid metadata for model "${id}", skipping`); -} -``` - -- [ ] **Step 3: Build and test** - -Run: `npm run build` -Expected: No errors - -Run: `npm test` -Expected: All tests pass - -- [ ] **Step 4: Commit** - -```bash -git add src/plugin.ts -git commit -m "fix: add runtime validation for modelMetadata merge" -``` - ---- - -### Task 3: Fix Cache Key to Include modelsDev Config - -**Files:** -- Modify: `src/models.ts:36-39` (getCacheKey function) - -**Issue:** Different `modelsDev` configs share same cache key. - -- [ ] **Step 1: Update getCacheKey to hash config** - -```typescript -function getCacheKey(config: OmniRouteConfig, apiKey: string): string { - const baseUrl = config.baseUrl || OMNIROUTE_ENDPOINTS.BASE_URL; - - // Include modelsDev config in cache key - const modelsDevHash = config.modelsDev - ? JSON.stringify({ - enabled: config.modelsDev.enabled, - url: config.modelsDev.url, - providerAliases: config.modelsDev.providerAliases, - }) - : ''; - - return `${baseUrl}:${apiKey}:${modelsDevHash}`; -} -``` - -- [ ] **Step 2: Add test for cache key differentiation** - -In `test/models.test.mjs`, add: - -```javascript -test('fetchModels uses different cache for different modelsDev configs', async () => { - let calls = 0; - global.fetch = async (input) => { - const url = input instanceof Request ? input.url : input.toString(); - if (url.includes('/v1/models')) { - calls++; - return new Response( - JSON.stringify({ object: 'list', data: [{ id: `model-${calls}`, name: `Model ${calls}` }] }), - { status: 200, headers: { 'Content-Type': 'application/json' } } - ); - } - return new Response(JSON.stringify({ data: [] }), { status: 200 }); - }; - - const config1 = { ...CONFIG, modelsDev: { enabled: true, providerAliases: { oai: 'openai' } } }; - const config2 = { ...CONFIG, modelsDev: { enabled: true, providerAliases: { oai: 'anthropic' } } }; - - await fetchModels(config1, CONFIG.apiKey, false); - await fetchModels(config2, CONFIG.apiKey, false); - - assert.equal(calls, 2, 'Should fetch twice for different modelsDev configs'); -}); -``` - -- [ ] **Step 3: Run tests** - -Run: `npm test` -Expected: All tests pass - -- [ ] **Step 4: Commit** - -```bash -git add src/models.ts test/models.test.mjs -git commit -m "fix: include modelsDev config in cache key" -``` - ---- - -### Task 4: Fix DRY Violation - Extract splitModelId - -**Files:** -- Modify: `src/omniroute-combos.ts:227-248` — Export the function -- Modify: `src/models.ts:340-359` — Remove duplicate, import and use - -**Issue:** `splitModelId` and `splitOmniRouteModelForLookup` are identical. - -- [ ] **Step 1: Export splitModelId from omniroute-combos.ts** - -Change: -```typescript -function splitModelId(...) { ... } -``` -To: -```typescript -export function splitModelId(...) { ... } -``` - -- [ ] **Step 2: Remove duplicate from models.ts and import** - -In `src/models.ts`: -1. Add to imports from `omniroute-combos.ts`: -```typescript -import { splitModelId } from './omniroute-combos.js'; -``` - -2. Remove `splitOmniRouteModelForLookup` function entirely. - -3. Replace usage: -```typescript -const { providerKey, modelKey } = splitOmniRouteModelForLookup(model.id); -``` -With: -```typescript -const { providerKey, modelKey } = splitModelId(model.id); -``` - -- [ ] **Step 3: Build and test** - -Run: `npm run build` -Expected: No errors - -Run: `npm test` -Expected: All tests pass - -- [ ] **Step 4: Commit** - -```bash -git add src/models.ts src/omniroute-combos.ts -git commit -m "refactor: extract splitModelId to eliminate DRY violation" -``` - ---- - -### Task 5: Fix supportsTools Default Inconsistency - -**Files:** -- Modify: `src/plugin.ts:432` (toProviderModel) -- Modify: `src/models-dev.ts:331` (calculateLowestCommonCapabilities) - -**Issue:** `toProviderModel` defaults tools to true, combo logic defaults to false. - -- [ ] **Step 1: Document and align the defaults** - -In `src/plugin.ts`, add comment: -```typescript -// Default to true: if API doesn't explicitly say no tools, assume capability exists -// This aligns with OpenAI-compatible behavior where most models support tools -const supportsTools = model.supportsTools !== false; -``` - -In `src/models-dev.ts`, update combo logic: -```typescript -// For combos: only advertise tools if ALL underlying models explicitly support them -// This is intentionally stricter than single-model defaults because a combo -// with one tool-less model cannot reliably use tools across all backends -const supportsTools = model.tool_call === true; -``` - -- [ ] **Step 2: Commit** - -```bash -git add src/plugin.ts src/models-dev.ts -git commit -m "docs: document supportsTools default rationale" -``` - ---- - -### Task 6: Fix Array Metadata Ordering - -**Files:** -- Modify: `src/plugin.ts:351-379` (mergeModelMetadata) - -**Issue:** Generated blocks prepended — verify if OpenCode uses first or last match wins. - -- [ ] **Step 1: Research OpenCode matching logic** - -Check `@opencode-ai/plugin` source or documentation for `modelMetadata` array processing. - -- [ ] **Step 2: If last-match-wins, reverse order** - -```typescript -// If OpenCode uses last-match-wins, user config should come last -return [...userConfig, ...generatedBlocks]; -``` - -- [ ] **Step 3: Commit** - -```bash -git add src/plugin.ts -git commit -m "fix: correct modelMetadata array ordering for OpenCode matching" -``` - ---- - -### Task 7: Verify Provider Model Fields Match OpenCode Shape - -**Files:** -- Modify: `src/types.ts:111-155` - -**Issue:** New fields may not match `@opencode-ai/plugin` expectations. - -- [ ] **Step 1: Check OpenCode plugin types** - -Run: `npm ls @opencode-ai/plugin` or check node_modules for exact interface. - -- [ ] **Step 2: Rename fields if needed** - -If `tool_call` should be `toolCall`: -```typescript -// In OmniRouteProviderModel: -toolCall?: boolean; // instead of tool_call -``` - -Update all references in `src/plugin.ts` accordingly. - -- [ ] **Step 3: Commit** - -```bash -git add src/types.ts src/plugin.ts -git commit -m "fix: align provider model fields with OpenCode plugin types" -``` - ---- - -### Task 8: Tighten Variants Type - -**Files:** -- Modify: `src/types.ts:154` - -**Issue:** `Record` is too permissive. - -- [ ] **Step 1: Define strict variant types** - -```typescript -export interface OmniRouteModelVariant { - reasoningEffort?: 'low' | 'medium' | 'high'; - // Future variant types can be added here -} - -// In OmniRouteProviderModel: -variants: Record; -``` - -- [ ] **Step 2: Update toProviderModel** - -```typescript -variants: supportsReasoning - ? { - low: { reasoningEffort: 'low' }, - medium: { reasoningEffort: 'medium' }, - high: { reasoningEffort: 'high' }, - } - : {}, -``` - -- [ ] **Step 3: Build and test** - -Run: `npm run build` -Expected: No type errors - -- [ ] **Step 4: Commit** - -```bash -git add src/types.ts src/plugin.ts -git commit -m "types: tighten variants type to prevent invalid reasoning effort values" -``` - ---- - -## Chunk 2: Low Priority Fixes - -### Task 9: Fix Sensitive Data in Log - -**Files:** -- Modify: `src/models.ts:102` - -**Issue:** `JSON.stringify(rawData)` could leak sensitive data. - -- [ ] **Step 1: Replace with shape-only logging** - -```typescript -// Change from: -warn(`Invalid models response structure: ${JSON.stringify(rawData)}`); -// To: -const dataType = rawData && typeof rawData === 'object' - ? (Array.isArray(rawData.data) ? 'array' : typeof rawData.data) - : typeof rawData; -warn(`Invalid models response structure: expected { data: Array }, got { data: ${dataType} }`); -``` - -- [ ] **Step 2: Commit** - -```bash -git add src/models.ts -git commit -m "fix: avoid logging sensitive response data in models fetch error" -``` - ---- - -### Task 10: Fix Log Injection via Model IDs - -**Files:** -- Modify: `src/omniroute-combos.ts:55` and other log lines - -**Issue:** Model IDs from external API interpolated without sanitization. - -- [ ] **Step 1: Add sanitization helper** - -```typescript -function sanitizeForLog(value: string): string { - return value.replace(/[\r\n]/g, ''); -} -``` - -- [ ] **Step 2: Apply to all interpolated model IDs** - -Search for `model.id` in log lines and wrap with sanitizeForLog: -```typescript -warn(`Could not resolve ${unresolvedModels.length} underlying models for "${sanitizeForLog(model.id)}": ${unresolvedModels.map(sanitizeForLog).join(', ')}`); -``` - -- [ ] **Step 3: Commit** - -```bash -git add src/omniroute-combos.ts -git commit -m "fix: sanitize model IDs in log messages to prevent injection" -``` - ---- - -### Task 11: Fix Synchronous File I/O in Logger - -**Files:** -- Modify: `src/logger.ts` - -**Issue:** `appendFileSync` blocks event loop. - -- [ ] **Step 1: Switch to async logging** - -```typescript -// Replace appendFileSync with fire-and-forget appendFile -import { appendFile } from 'fs/promises'; - -export function warn(message: string): void { - const logFile = getLogFile(); - if (!logFile) return; - - const line = formatLogLine('WARN', message); - // Fire-and-forget: don't await, don't crash on error - appendFile(logFile, line).catch(() => {}); -} - -export function debug(message: string): void { - if (process.env.OMNIROUTE_DEBUG !== '1') return; - - const logFile = getLogFile(); - if (!logFile) return; - - const line = formatLogLine('DEBUG', message); - appendFile(logFile, line).catch(() => {}); -} -``` - -- [ ] **Step 2: Commit** - -```bash -git add src/logger.ts -git commit -m "perf: use async file I/O for logger to prevent event loop blocking" -``` - ---- - -### Task 12: Optimize Candidate Explosion in Lookup - -**Files:** -- Modify: `src/models.ts:290-307` - -**Issue:** Up to 8 candidates per model key, ~19k lookups. - -- [ ] **Step 1: Add early-exit for aliases that resolve to same key** - -```typescript -function getModelLookupCandidates(modelKey: string): string[] { - const candidates = new Set(); - - const addCandidate = (key: string): void => { - const lower = key.toLowerCase(); - const normalized = normalizeModelKey(key); - const aliasResolved = resolveModelAlias(key); - - candidates.add(lower); - candidates.add(normalized); - - // Only add alias variants if they differ from original - if (aliasResolved !== key) { - candidates.add(aliasResolved.toLowerCase()); - candidates.add(normalizeModelKey(aliasResolved)); - } - }; - - addCandidate(modelKey); - - const { base, stripped } = stripVariantSuffix(modelKey); - if (stripped) { - addCandidate(base); - } - - return [...candidates]; -} -``` - -- [ ] **Step 2: Commit** - -```bash -git add src/models.ts -git commit -m "perf: avoid duplicate lookup candidates when alias resolves to same key" -``` - ---- - -### Task 13: Fix Magic Constant 4096 - -**Files:** -- Modify: `src/plugin.ts:486-487` -- Modify: `src/constants.ts` — Add constants - -**Issue:** `4096` used without named constant. - -- [ ] **Step 1: Add named constants** - -In `src/constants.ts`: -```typescript -export const DEFAULT_CONTEXT_LIMIT = 4096; -export const DEFAULT_OUTPUT_LIMIT = 4096; -``` - -- [ ] **Step 2: Update plugin.ts to use constants** - -```typescript -import { DEFAULT_CONTEXT_LIMIT, DEFAULT_OUTPUT_LIMIT } from './constants.js'; - -// In toProviderModel: -limit: { - context: model.contextWindow ?? DEFAULT_CONTEXT_LIMIT, - output: model.maxTokens ?? DEFAULT_OUTPUT_LIMIT, -}, -``` - -- [ ] **Step 3: Commit** - -```bash -git add src/constants.ts src/plugin.ts -git commit -m "refactor: extract magic constant 4096 to named defaults" -``` - ---- - -### Task 14: Remove Redundant `as const` Assertions - -**Files:** -- Modify: `src/plugin.ts:447-449` - -**Issue:** `as const` is redundant given the type definition. - -- [ ] **Step 1: Remove assertions** - -```typescript -// Change from: -modalities: { - input: supportsVision ? ['text', 'image'] as const : ['text'] as const, - output: ['text'] as const, -}, - -// To: -modalities: { - input: supportsVision ? ['text', 'image'] : ['text'], - output: ['text'], -}, -``` - -- [ ] **Step 2: Verify types still compile** - -Run: `npx tsc --noEmit src/plugin.ts` -Expected: No errors - -- [ ] **Step 3: Commit** - -```bash -git add src/plugin.ts -git commit -m "style: remove redundant as const assertions" -``` - ---- - -### Task 15: Add Test for Single Model Metadata Tracking - -**Files:** -- Modify: `test/models.test.mjs` - -**Issue:** Early return bypasses metadata tracking logic. - -- [ ] **Step 1: Add test verifying consistency** - -```javascript -test('calculateLowestCommonCapabilities produces identical output for single model and combo-with-self', () => { - const single = calculateLowestCommonCapabilities([ - { id: 'test-model', temperature: true, reasoning: true, attachment: true } - ]); - - const combo = calculateLowestCommonCapabilities([ - { id: 'test-model', temperature: true, reasoning: true, attachment: true }, - { id: 'test-model', temperature: true, reasoning: true, attachment: true } - ]); - - assert.deepEqual(single, combo); -}); -``` - -- [ ] **Step 2: Run tests** - -Run: `npm test` -Expected: All tests pass - -- [ ] **Step 3: Commit** - -```bash -git add test/models.test.mjs -git commit -m "test: verify single-model and combo-with-self produce identical capabilities" -``` - ---- - -### Task 16: Add Trailing Newline to models-dev.ts - -**Files:** -- Modify: `src/models-dev.ts:480` - -**Issue:** File ends without trailing newline. - -- [ ] **Step 1: Add newline** - -Simply ensure the file ends with a newline character. - -- [ ] **Step 2: Commit** - -```bash -git add src/models-dev.ts -git commit -m "style: add trailing newline to models-dev.ts" -``` - ---- - -## Chunk 3: Test Coverage Gaps - -### Task 17: Add Temperature/Reasoning Combo Tests - -**Files:** -- Modify: `test/models.test.mjs` - -**Issue:** Missing tests for temperature and reasoning logic. - -- [ ] **Step 1: Add temperature tests** - -```javascript -test('calculateLowestCommonCapabilities handles temperature with mixed defined/undefined', () => { - const capabilities = calculateLowestCommonCapabilities([ - { id: 'with-temp', temperature: true }, - { id: 'without-temp' }, - ]); - - assert.equal(capabilities.supportsTemperature, true); -}); - -test('calculateLowestCommonCapabilities respects explicit temperature false', () => { - const capabilities = calculateLowestCommonCapabilities([ - { id: 'with-temp', temperature: true }, - { id: 'no-temp', temperature: false }, - ]); - - assert.equal(capabilities.supportsTemperature, false); -}); - -test('calculateLowestCommonCapabilities handles all three capabilities together', () => { - const capabilities = calculateLowestCommonCapabilities([ - { id: 'full', temperature: true, reasoning: true, attachment: true }, - { id: 'limited', temperature: false, reasoning: false, attachment: false }, - ]); - - assert.equal(capabilities.supportsTemperature, false); - assert.equal(capabilities.supportsReasoning, false); - assert.equal(capabilities.supportsAttachment, false); -}); - -test('calculateLowestCommonCapabilities handles single model with undefined temperature', () => { - const capabilities = calculateLowestCommonCapabilities([ - { id: 'no-meta' }, - ]); - - assert.equal(capabilities.supportsTemperature, undefined); -}); -``` - -- [ ] **Step 2: Run tests** - -Run: `npm test` -Expected: All tests pass - -- [ ] **Step 3: Commit** - -```bash -git add test/models.test.mjs -git commit -m "test: add temperature and reasoning combo capability tests" -``` - ---- - -### Task 18: Add Variant+Alias Integration Tests - -**Files:** -- Modify: `test/models.test.mjs` - -**Issue:** No test for variant suffix stripping + alias resolution. - -- [ ] **Step 1: Add integration test** - -```javascript -test('variant suffix stripping with alias resolution works end-to-end', async () => { - global.fetch = async (input) => { - const url = input instanceof Request ? input.url : input.toString(); - if (url.includes('/v1/models')) { - return new Response( - JSON.stringify({ - object: 'list', - data: [ - { id: 'moonshotai/kimi-k2.6-thinking-high', name: 'Kimi K2.6 Thinking High' } - ] - }), - { status: 200, headers: { 'Content-Type': 'application/json' } } - ); - } - if (url.includes('models.dev')) { - return new Response( - JSON.stringify({ - moonshotai: { - models: { - 'kimi-k2-thinking': { - id: 'kimi-k2-thinking', - name: 'Kimi K2 Thinking', - temperature: true, - reasoning: true, - } - } - } - }), - { status: 200, headers: { 'Content-Type': 'application/json' } } - ); - } - return new Response(JSON.stringify({ data: [] }), { status: 200 }); - }; - - const models = await fetchModels(CONFIG, CONFIG.apiKey, true); - const model = models.find(m => m.id === 'moonshotai/kimi-k2.6-thinking-high'); - - assert.ok(model, 'Model should be found'); - assert.equal(model.supportsReasoning, true, 'Should resolve alias and enrich capabilities'); -}); -``` - -- [ ] **Step 2: Run tests** - -Run: `npm test` -Expected: All tests pass - -- [ ] **Step 3: Commit** - -```bash -git add test/models.test.mjs -git commit -m "test: add variant suffix and alias resolution integration test" -``` - ---- - -### Task 19: Add Subscription Fallback Tests - -**Files:** -- Modify: `test/models.test.mjs` - -**Issue:** No test for subscription provider fallback. - -- [ ] **Step 1: Add subscription fallback test** - -```javascript -test('subscription fallback enriches from public provider', async () => { - global.fetch = async (input) => { - const url = input instanceof Request ? input.url : input.toString(); - if (url.includes('/v1/models')) { - return new Response( - JSON.stringify({ - object: 'list', - data: [ - { id: 'zai-coding-plan/gpt-4o', name: 'GPT-4o via ZAI' } - ] - }), - { status: 200, headers: { 'Content-Type': 'application/json' } } - ); - } - if (url.includes('models.dev')) { - return new Response( - JSON.stringify({ - zai: { - models: { - 'gpt-4o': { - id: 'gpt-4o', - name: 'GPT-4o', - temperature: true, - tool_call: true, - } - } - } - }), - { status: 200, headers: { 'Content-Type': 'application/json' } } - ); - } - return new Response(JSON.stringify({ data: [] }), { status: 200 }); - }; - - const models = await fetchModels(CONFIG, CONFIG.apiKey, true); - const model = models.find(m => m.id === 'zai-coding-plan/gpt-4o'); - - assert.ok(model, 'Model should be found'); - assert.equal(model.supportsTemperature, true, 'Should fallback to zai provider'); - assert.equal(model.supportsTools, true, 'Should inherit tools from fallback'); -}); -``` - -- [ ] **Step 2: Run tests** - -Run: `npm test` -Expected: All tests pass - -- [ ] **Step 3: Commit** - -```bash -git add test/models.test.mjs -git commit -m "test: add subscription fallback provider test" -``` - ---- - -### Task 20: Fix Hardcoded Ports in Tests - -**Files:** -- Modify: `test/plugin.test.mjs` - -**Issue:** Hardcoded ports like `20129`, `20130`, `20131`. - -- [ ] **Step 1: Add test helper for dummy URLs** - -At top of test file: -```javascript -function getDummyBaseUrl(port = 20128) { - return `http://localhost:${port}/v1`; -} -``` - -- [ ] **Step 2: Replace hardcoded ports with helper** - -Search for `http://localhost:20` and replace with `getDummyBaseUrl()` calls. - -- [ ] **Step 3: Commit** - -```bash -git add test/plugin.test.mjs -git commit -m "test: replace hardcoded ports with helper function" -``` - ---- - -## Chunk 4: Final Verification - -### Task 21: Full Build and Test - -- [ ] **Step 1: Clean build** - -Run: `npm run clean && npm run build` -Expected: Success - -- [ ] **Step 2: Run all tests** - -Run: `npm test` -Expected: All tests pass (target: 40+ tests) - -- [ ] **Step 3: Type check** - -Run: `npx tsc --noEmit` -Expected: No errors - -- [ ] **Step 4: Lint check** - -Run: `npm run lint` (if available) -Expected: No errors - -- [ ] **Step 5: Final commit summary** - -```bash -git log --oneline -20 -``` - ---- - -## Summary - -**Total tasks:** 21 (20 fixes + 1 verification) -**Estimated commits:** 20 -**Estimated time:** 2-3 hours - -### Priority Order: -1. **Medium priority first** (Tasks 1-8): Core functionality and type safety -2. **Low priority fixes** (Tasks 9-16): Logging, performance, style -3. **Test coverage** (Tasks 17-20): Fill testing gaps -4. **Verification** (Task 21): Ensure everything works - -### Key Decisions: -- Use `??` instead of `||` for API key fallback -- Keep `supportsTools` defaults as-is but document rationale -- Export `splitModelId` from `omniroute-combos.ts` -- Include `modelsDev` config in cache key hash -- Tighten `variants` type to `OmniRouteModelVariant` -- Add comprehensive test coverage for temperature/reasoning combos diff --git a/docs/superpowers/specs/2026-05-14-omniroute-logger-design.md b/docs/superpowers/specs/2026-05-14-omniroute-logger-design.md deleted file mode 100644 index be14557..0000000 --- a/docs/superpowers/specs/2026-05-14-omniroute-logger-design.md +++ /dev/null @@ -1,207 +0,0 @@ -# OmniRoute Plugin Logger Design - -**Date:** 2026-05-14 -**Status:** Spec Ready for Implementation -**Topic:** Proper logging implementation for opencode-omniroute-auth plugin - -## Problem Statement - -The plugin currently uses `console.log` and `console.warn` for debug output, which pollutes the OpenCode terminal. We need a proper logger that: -- Writes to the OpenCode log file instead of console -- Supports debug/warn levels with environment-based toggling -- Integrates with OpenCode's existing log format - -## Goals - -1. Replace all `console.log`/`console.warn` calls with a proper logger -2. Write logs to OpenCode's log directory (`~/.local/share/opencode/log/`) -3. Use OpenCode's native log format with `service=omniroute` -4. Keep warnings always-on, debug opt-in via `OMNIROUTE_DEBUG` env var -5. Zero I/O overhead when debug is disabled (no-op for debug calls — no file writes, but string interpolation at call site still executes) - -## Non-Goals - -- Multi-level logging (error/info/debug/trace) — only warn and debug -- Log rotation or retention policies — handled by OpenCode -- Structured logging with JSON — plain text matching OpenCode format -- Configurable log destination — always OpenCode's log directory - -## Architecture - -### Components - -#### 1. `src/logger.ts` — Logger Module - -A lightweight logger that appends to the current OpenCode log file. - -**Interface:** -```typescript -export function warn(message: string): void; -export function debug(message: string): void; -``` - -Note: Call sites must stringify any objects/arrays/errors before passing. For multi-argument calls like `console.warn(msg, errorObject)`, use template literals: `` logger.warn(`${msg}: ${errorObject}`) ``. For Error objects, use `error.message` or `error.stack` rather than `JSON.stringify(error)` (which returns `{}`). - -**Call Site Audit (existing console calls to migrate):** - -| File | Line | Current | New Level | Notes | -|------|------|---------|-----------|-------| -| `src/plugin.ts` | 46 | `console.warn(..., error)` | `warn` | Eager model fetch failed | -| `src/plugin.ts` | 102 | `console.log(...)` | `debug` | Available models list | -| `src/plugin.ts` | 104 | `console.warn(..., error)` | `warn` | Failed to fetch models | -| `src/plugin.ts` | 110 | `console.log(...)` | `debug` | Provider models hydrated | -| `src/plugin.ts` | 157 | `console.warn(..., error)` | `warn` | Unexpected error reading auth store | -| `src/plugin.ts` | 165 | `console.warn(...)` | `warn` | provider.api and options.apiMode differ | -| `src/plugin.ts` | 173 | `console.warn(...)` | `warn` | Unsupported provider.api value | -| `src/plugin.ts` | 189 | `console.warn(...)` | `warn` | Unsupported apiMode option | -| `src/plugin.ts` | 211 | `console.warn(...)` | `warn` | Ignoring unsupported baseURL protocol | -| `src/plugin.ts` | 217 | `console.warn(...)` | `warn` | Ignoring invalid baseURL | -| `src/plugin.ts` | 443 | `console.log(...)` | `debug` | Intercepting request | -| `src/plugin.ts` | 471 | `console.log(...)` | `debug` | Processing /v1/models response | -| `src/plugin.ts` | 521 | `console.log(...)` | `debug` | Sanitized Gemini tool schema keywords | -| `src/models.ts` | 56 | `console.log(...)` | `debug` | Using cached models | -| `src/models.ts` | 60 | `console.log(...)` | `debug` | Forcing model refresh | -| `src/models.ts` | 67 | `console.log(...)` | `debug` | Fetching models from URL | -| `src/models.ts` | 85 | `console.error(...)` | `warn` | Failed to fetch models (HTTP error) | -| `src/models.ts` | 96 | `console.error(...)` | `warn` | Invalid models response structure | -| `src/models.ts` | 131 | `console.log(...)` | `debug` | Successfully fetched N models | -| `src/models.ts` | 134 | `console.error(...)` | `warn` | Error fetching models | -| `src/models.ts` | 139 | `console.log(...)` | `debug` | Returning expired cached models | -| `src/models.ts` | 144 | `console.log(...)` | `debug` | Returning default models | -| `src/models.ts` | 161 | `console.log(...)` | `debug` | Model cache cleared (specific) | -| `src/models.ts` | 164 | `console.log(...)` | `debug` | All model caches cleared | -| `src/models-dev.ts` | 93 | `console.log(...)` | `debug` | Using cached models.dev data | -| `src/models-dev.ts` | 97 | `console.log(...)` | `debug` | Fetching models.dev data from URL | -| `src/models-dev.ts` | 112 | `console.warn(...)` | `warn` | Failed to fetch models.dev data | -| `src/models-dev.ts` | 120 | `console.warn(...)` | `warn` | Invalid models.dev data structure | -| `src/models-dev.ts` | 130 | `console.log(...)` | `debug` | Successfully fetched models.dev data | -| `src/models-dev.ts` | 133 | `console.warn(...)` | `warn` | Error fetching models.dev data | -| `src/models-dev.ts` | 208 | `console.log(...)` | `debug` | models.dev cache cleared | -| `src/omniroute-combos.ts` | 58 | `console.log(...)` | `debug` | Using cached combo data | -| `src/omniroute-combos.ts` | 63 | `console.log(...)` | `debug` | Fetching combo data from URL | -| `src/omniroute-combos.ts` | 79 | `console.warn(...)` | `warn` | Failed to fetch combo data | -| `src/omniroute-combos.ts` | 87 | `console.warn(...)` | `warn` | Invalid combo data structure | -| `src/omniroute-combos.ts` | 105 | `console.log(...)` | `debug` | Successfully fetched N combos | -| `src/omniroute-combos.ts` | 108 | `console.warn(...)` | `warn` | Error fetching combo data | -| `src/omniroute-combos.ts` | 120 | `console.log(...)` | `debug` | Combo cache cleared | -| `src/omniroute-combos.ts` | 141 | `console.log(...)` | `debug` | Resolved combo to N models | -| `src/omniroute-combos.ts` | 149 | `console.warn(...)` | `warn` | Unexpected model entry in combo | -| `src/omniroute-combos.ts` | 276 | `console.log(...)` | `debug` | Calculating capabilities for combo | -| `src/omniroute-combos.ts` | 291 | `console.warn(...)` | `warn` | Could not resolve underlying models | -| `src/omniroute-combos.ts` | 297 | `console.warn(...)` | `warn` | No models.dev matches found for combo | -| `src/omniroute-combos.ts` | 301 | `console.log(...)` | `debug` | Resolved N/N underlying models | -| `src/omniroute-combos.ts` | 306 | `console.log(...)` | `debug` | Calculated capabilities for combo | -| `src/omniroute-combos.ts` | 355 | `console.log(...)` | `debug` | Enriching combo model | - -**Behavior:** -- `warn()`: Always writes to log file (unless write fails) -- `debug()`: Only writes when `OMNIROUTE_DEBUG` environment variable is exactly `"1"` (strict string comparison; all other values including `"true"`, `"yes"`, `"0"`, empty string are treated as disabled) -- Both functions use synchronous I/O (`fs.appendFileSync` wrapped in `try/catch`) for simplicity and fire-and-forget semantics - -**Implementation Details:** -- Log file path is resolved once at module load and cached (wrapped in `try/catch` to prevent crash on import) -- Resolution: finds the most recent `.log` file in `~/.local/share/opencode/log/` by modification time (`mtime`) - - Only considers regular files (not directories) via `stat.isFile()` - - Tie-breaker: alphabetical sort if multiple files have identical `mtime` -- If no log file exists at module load, logger attempts re-scan on every `warn()` and `debug()` call (in case OpenCode creates a log file later) -- If cached file becomes unavailable (e.g., OpenCode rotated logs), falls back to re-scanning on next write failure - - Write failures that trigger re-scan: `ENOENT` (file deleted) - - Other failures (`EACCES`, `ENOSPC`, `EIO`): silently skip without re-scan - - Re-scan updates the cached path; the current log call uses the newly discovered file (same-call retry) -- Appends entries in OpenCode's format: `LEVEL ISO-TIMESTAMP +0ms service=omniroute MESSAGE` - - `OFFSET` is hardcoded to `+0ms` to match observed OpenCode log format -- Silently fails if log file cannot be written (don't crash the plugin) -- If no log files exist, skips logging (does not create new files to avoid conflicts with OpenCode's log rotation) - -#### 2. Updated Source Files - -Replace console calls in all source files. Mapping rules: -- `console.log()` → `logger.debug()` (informational messages) -- `console.warn()` → `logger.warn()` (warnings) -- `console.error()` → `logger.warn()` (errors that don't stop execution — plugin continues with fallbacks) - -Files to update: -- `src/plugin.ts` — 13 console statements (mix of log/warn) -- `src/models.ts` — 11 console statements (mix of log/error) -- `src/models-dev.ts` — 7 console statements (mix of log/warn) -- `src/omniroute-combos.ts` — 15 console statements (mix of log/warn) - -### Log Format - -Following OpenCode's existing format: - -``` -WARN 2026-05-14T12:34:56.789Z +0ms service=omniroute Invalid baseURL: foo://bar, using default -DEBUG 2026-05-14T12:34:56.789Z +0ms service=omniroute Available models: gpt-4o, claude-3-5-sonnet -``` - -Format: `{LEVEL} {ISO-TIMESTAMP} +0ms service=omniroute {MESSAGE}` - - `LEVEL` is right-padded to 5 characters (`WARN ` or `DEBUG`) - - Timestamp uses `Date.prototype.toISOString()` format: `YYYY-MM-DDTHH:MM:SS.sssZ` (UTC with milliseconds) - - The `+0ms` offset is hardcoded to match observed OpenCode log format (actual offset meaning is internal to OpenCode) - -### Data Flow - -``` -Plugin Code - | - v -logger.warn("Invalid baseURL") logger.debug("Fetching models") - | | - v v -Always write Check OMNIROUTE_DEBUG - | | - v v -Find current log file Find current log file - | | - v v -Append formatted entry Append formatted entry - | | - v v -~/.local/share/opencode/log/*.log -``` - -### Error Handling - -- **Log directory missing**: Silently skip logging (don't create directory) -- **Log directory not readable** (`EACCES`, `EPERM` on `readdirSync`): Silently skip logging -- **Log file not writable** (`EACCES`, `EPERM`): Silently skip (don't throw) -- **Log file deleted** (`ENOENT`): Trigger re-scan for new log file on next write -- **No log files exist**: Silently skip logging (don't create files to avoid conflicts with OpenCode's log rotation) -- **Disk full** (`ENOSPC`): Silently skip -- **Other I/O errors** (`EIO`, etc.): Silently skip - -## Testing Strategy - -### Unit Tests - -1. **Debug enabled**: Set `OMNIROUTE_DEBUG=1`, call `debug()`, verify log file contains entry -2. **Debug disabled**: Unset `OMNIROUTE_DEBUG`, call `debug()`, verify no entry written -3. **Warn always**: Call `warn()` with and without `OMNIROUTE_DEBUG`, verify always written -4. **Format correct**: Verify output matches OpenCode format with `service=omniroute` -5. **Graceful failure**: Mock unreadable log directory, verify no exceptions thrown -6. **Log rotation**: Simulate OpenCode log rotation by deleting the cached log file, then call `warn()` — verify logger re-scans directory and writes to new most-recent file - -### Integration Tests - -1. Run plugin with `OMNIROUTE_DEBUG=1`, verify debug messages appear in latest OpenCode log -2. Run plugin without env var, verify only warnings appear - -## Migration Plan - -1. Create `src/logger.ts` with warn/debug functions -2. Replace console calls in `src/plugin.ts` — remove `[OmniRoute] ` prefix from messages (redundant with `service=omniroute`) -3. Replace console calls in `src/models.ts` — remove `[OmniRoute] ` prefix -4. Replace console calls in `src/models-dev.ts` — remove `[OmniRoute] ` prefix -5. Replace console calls in `src/omniroute-combos.ts` — remove `[OmniRoute] ` prefix -6. Add unit tests for logger module -7. Verify no console calls remain: `grep -En "console\.(log|warn|error)" src/` - -## Open Questions - -None — design approved by user. - -## References - -- OpenCode log format observed in `~/.local/share/opencode/log/*.log` -- Existing plugin code in `src/plugin.ts`, `src/models.ts`, `src/models-dev.ts`, `src/omniroute-combos.ts` diff --git a/docs/superpowers/specs/2026-05-18-models-dev-reliability-design.md b/docs/superpowers/specs/2026-05-18-models-dev-reliability-design.md deleted file mode 100644 index 13d229a..0000000 --- a/docs/superpowers/specs/2026-05-18-models-dev-reliability-design.md +++ /dev/null @@ -1,275 +0,0 @@ -## Title - -Improve `models.dev` enrichment reliability with retries and stale in-memory fallback - -## Background - -`models.dev` enrichment currently depends on a single network fetch guarded by a hardcoded `1000ms` timeout. If that fetch times out or otherwise fails, the plugin returns `null` for the `models.dev` dataset and skips enrichment entirely. - -Root-cause analysis showed that the current design is too brittle for real network conditions: - -- timeout budget is only `1000ms` -- the abort covers the full request lifecycle, not just first byte -- there is no retry behavior -- there is no stale-cache fallback when live refresh fails -- the system fails open to "no enrichment" even for short-lived transient network issues - -This design addresses reliability first while keeping scope intentionally limited to in-memory behavior. - -## Goals - -- Make `models.dev` enrichment resilient to transient network slowness and upstream instability. -- Preserve previously fetched enrichment data when later refreshes fail. -- Keep failure behavior safe: plugin should continue working even if enrichment is unavailable. -- Improve observability so future failures can be diagnosed from logs. -- Keep the change localized and testable. - -## Non-Goals - -- No persistent disk cache in this iteration. -- No background refresh worker. -- No changes to model matching, deduplication, or alias resolution behavior. -- No changes to combo capability calculation beyond consuming more reliable `models.dev` data. - -## Recommended Approach - -Adopt a reliability-first fetch pipeline for `models.dev` with: - -- increased per-attempt timeout -- bounded retries with short backoff -- stale in-memory cache fallback when live refresh fails -- improved structured logging around failure class and retry behavior - -This retains the current high-level contract of "best available enrichment" while removing the current single-point transient failure. - -## Detailed Design - -### 1. Timeout policy - -Increase the default `models.dev` timeout from `1000ms` to `5000ms`. - -Rationale: - -- root-cause analysis showed that `1000ms` is an aggressive end-to-end budget for a public internet fetch -- the timeout includes connect, TLS, response body transfer, and JSON consumption -- a moderate increase materially improves reliability without creating extreme hangs - -This timeout remains configurable through existing `modelsDev.timeoutMs` config. - -### 2. Retry policy - -Replace the current single-attempt fetch with a bounded retry loop. - -Default behavior: - -- maximum attempts: `3` -- backoff sequence: `250ms`, then `500ms` - -Retryable failures: - -- request aborted due to timeout (`AbortError`) -- network-level fetch errors -- HTTP `429` -- HTTP `5xx` - -Non-retryable failures: - -- HTTP `4xx` other than `429` -- definitively invalid response structure - -Reasoning: - -- transient network and upstream capacity issues should be retried -- permanent client-side problems should fail fast to avoid unnecessary delay - -### 3. Cache behavior - -Retain the existing in-memory cache and refine fallback behavior. - -#### Fresh cache - -If cached `models.dev` data exists and is within TTL: - -- return it immediately -- do not make a network request - -#### Stale cache fallback - -If cached data exists but is older than TTL: - -- attempt live refresh first -- if live refresh succeeds, replace cache and return new data -- if live refresh fails after retries, return stale cached data instead of `null` - -#### Cold start failure - -If there is no cache and all attempts fail: - -- return `null` -- enrichment is skipped, preserving current fail-open behavior - -This makes the system best-available rather than all-or-nothing. - -### 4. Logging and diagnostics - -Add explicit logging around fetch attempts and fallback decisions. - -Per failed attempt, log: - -- attempt number -- failure class: timeout / network / HTTP / parse / invalid-structure -- status code when available -- elapsed attempt duration - -On success, log: - -- success attempt number -- total elapsed duration -- provider count or equivalent summary - -On stale-cache fallback, log: - -- that live refresh failed -- stale cache age -- that stale cached `models.dev` data was returned - -These logs should support future diagnosis without changing runtime semantics. - -### 5. Code structure - -Keep changes localized to `src/models-dev.ts`. - -Recommended helper breakdown: - -- `fetchModelsDevData(config)` — orchestration, cache policy, retries -- `fetchModelsDevOnce(url, timeoutMs)` — one attempt, returning validated data or typed failure -- `shouldRetryModelsDevFailure(...)` — retry decision helper -- `sleep(ms)` — backoff helper - -This keeps retry semantics independent from lookup/index logic. - -### 6. Error handling model - -The fetch layer should classify failures into stable categories rather than treating all failures identically. - -Suggested categories: - -- `timeout` -- `network` -- `http_retryable` -- `http_non_retryable` -- `parse` -- `invalid_structure` - -This improves clarity of both retry decisions and logs. - -## Data Flow - -### Current flow - -1. `getModelsDevIndex()` calls `fetchModelsDevData()` -2. one fetch attempt is made -3. any failure returns `null` -4. `buildModelsDevIndex(null)` returns `null` -5. enrichment is skipped - -### Proposed flow - -1. `getModelsDevIndex()` calls `fetchModelsDevData()` -2. `fetchModelsDevData()` checks fresh cache -3. if no fresh cache, it runs bounded live fetch attempts -4. if live fetch succeeds, cache is updated and returned -5. if live fetch fails and stale cache exists, stale cache is returned -6. if live fetch fails and no cache exists, `null` is returned -7. `buildModelsDevIndex(...)` uses whichever data source was successfully returned - -## Testing Strategy - -Add focused tests for the fetch/reliability behavior. - -### Required tests - -1. **fresh cache hit** - - fetch once successfully - - fetch again within TTL - - assert no second network call - -2. **timeout then success** - - first attempt aborts - - second attempt succeeds - - assert result is returned - - assert cache is updated - -3. **retryable HTTP failure then success** - - first attempt returns `503` - - second attempt succeeds - - assert retry occurs and result is returned - -4. **all attempts fail with stale cache available** - - seed cache with successful data - - expire TTL logically or via timestamp manipulation - - force all refresh attempts to fail - - assert stale cache is returned - -5. **all attempts fail with no cache** - - force timeout/network failure on every attempt - - assert `null` is returned from fetch layer - -6. **non-retryable HTTP failure** - - return `404` - - assert fail-fast behavior without unnecessary retries - -7. **invalid response structure with stale cache** - - return structurally invalid data - - assert stale cache fallback is used when available - -### Testing notes - -- tests should isolate `models.dev` behavior from `/v1/models` and combo fetch behavior -- tests should avoid depending on the real `models.dev` endpoint -- tests should verify both result behavior and retry counts - -## Trade-offs - -### Benefits - -- much higher enrichment reliability under transient failure -- protects previously fetched enrichment data -- preserves safe fail-open behavior -- keeps scope tight and localized - -### Costs - -- increased worst-case latency on cold start during repeated failures -- more state transitions and test surface area -- more log volume during repeated upstream failure - -Given the chosen priority of reliability first, these trade-offs are acceptable. - -## Alternatives Considered - -### A. Increase timeout only - -Rejected because it still leaves the system single-shot and fragile. - -### B. Persistent disk cache - -Deferred because it adds lifecycle and invalidation complexity beyond the immediate root cause. - -### C. Vendored `models.dev` snapshot - -Rejected for now due to maintenance and staleness burden. - -## Success Criteria - -The change is successful if: - -- transient `models.dev` slowness no longer frequently removes enrichment -- previously fetched enrichment survives later refresh failures within the process lifetime -- logs clearly indicate why a refresh failed and whether fallback was used -- the existing plugin behavior remains safe when upstream is fully unavailable -- tests cover timeout, retry, and stale-cache fallback paths - -## Implementation Notes - -This spec intentionally stops short of implementation details like exact helper signatures or test harness mechanics. Those should be finalized in the implementation plan. diff --git a/src/models.ts b/src/models.ts index cb11187..0cf9782 100644 --- a/src/models.ts +++ b/src/models.ts @@ -1,4 +1,4 @@ -import type { OmniRouteConfig, OmniRouteModel, OmniRouteModelsResponse } from './types.js'; +import type { OmniRouteConfig, OmniRouteModel, OmniRouteModelVariant, OmniRouteModelsResponse } from './types.js'; import { OMNIROUTE_DEFAULT_MODELS, OMNIROUTE_ENDPOINTS, @@ -167,6 +167,81 @@ export function isProviderAlias(providerPrefix: string): boolean { return providerPrefix in PROVIDER_ALIAS_TO_CANONICAL; } +/** + * Group variant-suffixed models (e.g. gpt-5.5-xhigh) under their base model. + * Returns a new array where every base model with variants gets a `variants` Record. + */ +export function groupVariantModels(models: OmniRouteModel[]): OmniRouteModel[] { + const realBaseModels = new Map(); + const variantMap = new Map>(); + + // Pass 1 — Categorize + for (const model of models) { + const { base, stripped } = stripVariantSuffix(model.id); + if (!stripped) { + realBaseModels.set(model.id, model); + } else { + const suffix = model.id.slice(base.length + 1).toLowerCase(); + const entry = variantMap.get(base); + if (entry) { + entry.push({ suffix, model }); + } else { + variantMap.set(base, [{ suffix, model }]); + } + } + } + + const result: OmniRouteModel[] = []; + + // Add all real base models that have no variants (unchanged) + for (const [id, model] of realBaseModels) { + if (!variantMap.has(id)) { + result.push(model); + } + } + + // For each base ID that has variants + for (const [baseId, variants] of variantMap) { + const baseModel = realBaseModels.get(baseId); + + // Use real base model if available; otherwise create synthetic base from first variant + const merged: OmniRouteModel = baseModel + ? { ...baseModel } + : { ...variants[0].model, id: baseId, name: baseId }; + + // Build variants Record + const variantsRecord: Record = {}; + for (const { suffix } of variants) { + if ( + suffix === 'low' || + suffix === 'medium' || + suffix === 'high' || + suffix === 'xhigh' + ) { + variantsRecord[suffix] = { reasoningEffort: suffix }; + } + } + merged.variants = variantsRecord; + + // Merge metadata from all variants into base: use max contextWindow and max maxTokens + for (const { model } of variants) { + if (model.contextWindow !== undefined) { + merged.contextWindow = Math.max(merged.contextWindow ?? 0, model.contextWindow); + } + if (model.maxTokens !== undefined) { + merged.maxTokens = Math.max(merged.maxTokens ?? 0, model.maxTokens); + } + if (model.supportsReasoning) { + merged.supportsReasoning = true; + } + } + + result.push(merged); + } + + return result; +} + /** * Fetch models from OmniRoute /v1/models endpoint * This is the CRITICAL FEATURE - dynamically fetches available models @@ -248,9 +323,8 @@ export async function fetchModels( .map(normalizeModel); const dedupedModels = deduplicateModels(rawModels); - - // Enrich with models.dev and combo capabilities - const models = await enrichModelMetadata(dedupedModels, config); + const groupedModels = groupVariantModels(dedupedModels); + const models = await enrichModelMetadata(groupedModels, config); // Update cache modelCache.set(cacheKey, { diff --git a/src/plugin.ts b/src/plugin.ts index 142193f..dc90dd4 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -557,19 +557,22 @@ function toProviderModel(model: OmniRouteModel, baseUrl: string): OmniRouteProvi options: {}, headers: {}, status: 'active', - variants: supportsReasoning - ? { - low: { reasoningEffort: 'low' }, - medium: { reasoningEffort: 'medium' }, - high: { reasoningEffort: 'high' }, - } - : {}, + variants: model.variants && Object.keys(model.variants).length > 0 + ? model.variants + : supportsReasoning + ? { + low: { reasoningEffort: 'low' }, + medium: { reasoningEffort: 'medium' }, + high: { reasoningEffort: 'high' }, + } + : {}, }; } function getModelFamily(modelId: string): string { - const [family] = modelId.split('-'); - return family || modelId; + const withoutProvider = modelId.includes('/') ? modelId.split('/').pop()! : modelId; + const [family] = withoutProvider.split('-'); + return family || withoutProvider; } /** diff --git a/src/types.ts b/src/types.ts index f1b043c..1f82bbb 100644 --- a/src/types.ts +++ b/src/types.ts @@ -44,6 +44,8 @@ export interface OmniRouteModel { input?: number; output?: number; }; + + variants?: Record; } export interface OmniRouteModelMetadata { @@ -185,7 +187,7 @@ export interface OmniRouteProviderModel { * Model variant configuration */ export interface OmniRouteModelVariant { - reasoningEffort?: 'low' | 'medium' | 'high'; + reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'; [key: string]: unknown; } diff --git a/test/plugin.test.mjs b/test/plugin.test.mjs index a996057..a15190a 100644 --- a/test/plugin.test.mjs +++ b/test/plugin.test.mjs @@ -5,6 +5,8 @@ import { join } from 'path'; import { tmpdir } from 'os'; import OmniRouteAuthPlugin from '../dist/index.js'; +import { clearModelCache } from '../dist/runtime.js'; +import { clearModelsDevCache } from '../dist/src/models-dev.js'; const ORIGINAL_FETCH = global.fetch; const ORIGINAL_HOME = process.env.HOME; @@ -12,6 +14,8 @@ const ORIGINAL_HOME = process.env.HOME; afterEach(() => { global.fetch = ORIGINAL_FETCH; process.env.HOME = ORIGINAL_HOME; + clearModelCache(); + clearModelsDevCache(); }); function getDummyBaseUrl(port = 20128) { @@ -611,3 +615,119 @@ test('config hook respects explicit attachment false for vision models', async ( await rm(tempHome, { recursive: true, force: true }); } }); + +test('provider hook groups variant models under base model', async () => { + const plugin = await OmniRouteAuthPlugin({}); + + global.fetch = async (input) => { + const url = input instanceof Request ? input.url : String(input); + if (url.endsWith('/v1/models')) { + return new Response( + JSON.stringify({ + object: 'list', + data: [ + { + id: 'codex/gpt-5.5', + name: 'GPT-5.5', + supportsReasoning: true, + }, + { + id: 'codex/gpt-5.5-high', + name: 'GPT-5.5 High', + supportsReasoning: true, + }, + { + id: 'codex/gpt-5.5-xhigh', + name: 'GPT-5.5 XHigh', + supportsReasoning: true, + contextWindow: 256000, + }, + { + id: 'openai/gpt-4o', + name: 'GPT-4o', + supportsReasoning: false, + }, + ], + }), + { status: 200, headers: { 'Content-Type': 'application/json' } }, + ); + } + return new Response(JSON.stringify({ ok: true }), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); + }; + + const result = await plugin.provider.models( + { + id: 'omniroute', + name: 'OmniRoute', + source: 'config', + env: [], + options: { baseURL: 'http://localhost:20128/v1', apiMode: 'chat' }, + models: {}, + }, + { auth: { type: 'api', key: 'test-key' } }, + ); + + assert.ok(result['codex/gpt-5.5']); + assert.ok(result['codex/gpt-5.5'].variants.high); + assert.ok(result['codex/gpt-5.5'].variants.xhigh); + assert.equal(result['codex/gpt-5.5-high'], undefined); + assert.equal(result['codex/gpt-5.5-xhigh'], undefined); + assert.ok(result['openai/gpt-4o']); + assert.equal(Object.keys(result['openai/gpt-4o'].variants).length, 0); +}); + +test('provider hook creates synthetic base model when only variants are returned', async () => { + const plugin = await OmniRouteAuthPlugin({}); + + global.fetch = async (input) => { + const url = input instanceof Request ? input.url : String(input); + if (url.endsWith('/v1/models')) { + return new Response( + JSON.stringify({ + object: 'list', + data: [ + { + id: 'codex/gpt-5.5-high', + name: 'GPT-5.5 High', + contextWindow: 128000, + supportsReasoning: true, + }, + { + id: 'codex/gpt-5.5-xhigh', + name: 'GPT-5.5 XHigh', + contextWindow: 256000, + supportsReasoning: true, + }, + ], + }), + { status: 200, headers: { 'Content-Type': 'application/json' } }, + ); + } + return new Response(JSON.stringify({ ok: true }), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); + }; + + const result = await plugin.provider.models( + { + id: 'omniroute', + name: 'OmniRoute', + source: 'config', + env: [], + options: { baseURL: 'http://localhost:20128/v1', apiMode: 'chat' }, + models: {}, + }, + { auth: { type: 'api', key: 'test-key' } }, + ); + + assert.ok(result['codex/gpt-5.5']); + assert.ok(result['codex/gpt-5.5'].variants.high); + assert.ok(result['codex/gpt-5.5'].variants.xhigh); + assert.equal(result['codex/gpt-5.5'].limit.context, 256000); + assert.equal(result['codex/gpt-5.5-high'], undefined); + assert.equal(result['codex/gpt-5.5-xhigh'], undefined); +}); From 5f7a6e0ec81a3b0e02750d8d0ab2f500b9d5b511 Mon Sep 17 00:00:00 2001 From: Sebastian Rumpf Date: Tue, 19 May 2026 21:02:11 +0200 Subject: [PATCH 08/10] chore(release): keep superpowers artifacts local --- .gitignore | 3 + docs/release-notes-v1.4.1.md | 2 +- ...-05-19-model-variant-support-fix-design.md | 239 ------------------ 3 files changed, 4 insertions(+), 240 deletions(-) delete mode 100644 docs/superpowers/specs/2026-05-19-model-variant-support-fix-design.md diff --git a/.gitignore b/.gitignore index d5dce37..7a3e870 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,6 @@ temp/ # Worktrees .worktrees/ + +# Local planning artifacts +docs/superpowers/ diff --git a/docs/release-notes-v1.4.1.md b/docs/release-notes-v1.4.1.md index 59b1389..dea1af4 100644 --- a/docs/release-notes-v1.4.1.md +++ b/docs/release-notes-v1.4.1.md @@ -67,7 +67,7 @@ ### Documentation -- Added design spec for models.dev reliability pipeline. +- Internal `docs/superpowers/` planning/spec artifacts are kept local only and are excluded from the GitHub repository. ## Verification diff --git a/docs/superpowers/specs/2026-05-19-model-variant-support-fix-design.md b/docs/superpowers/specs/2026-05-19-model-variant-support-fix-design.md deleted file mode 100644 index 8702207..0000000 --- a/docs/superpowers/specs/2026-05-19-model-variant-support-fix-design.md +++ /dev/null @@ -1,239 +0,0 @@ -# Model Variant Support Fix — Design Spec - -**Date:** 2026-05-19 -**Author:** opencode-omniroute-auth maintainers -**Approach:** Pipeline Injection (Approach A) -**Status:** Approved for implementation - -## 1. Problem Statement - -When OmniRoute lists variant-suffixed models separately (e.g., `codex/gpt-5.5-xhigh`, `codex/gpt-5.5-high`), the plugin fails to group them under their base model. Instead, each variant appears as an independent top-level entry with incorrect generated variants (`{low, medium, high}`). This causes: - -- Duplicate/confusing model entries in OpenCode's model picker -- Missing `xhigh` variant support -- Incorrect `getModelFamily()` output for provider-prefixed versioned models (returns `codex/gpt` instead of `gpt`) -- No synthetic base model creation when only variants are returned - -## 2. Scope - -This fix is **comprehensive** and includes: - -1. **Variant grouping** — Merge variant-suffixed models under their base model ID -2. **xhigh support** — Add `'xhigh'` to the `reasoningEffort` type and generated variants -3. **Synthetic base models** — Create a base model from the first variant when no explicit base exists -4. **`getModelFamily()` fix** — Strip provider prefix before extracting family name -5. **Test isolation** — Clear model/model-dev caches between `plugin.test.mjs` tests - -Out of scope: -- The pre-existing `fetchModelsDevData retries retryable HTTP failures` test failure (unrelated) - -## 3. Architecture - -### 3.1 Component Changes - -| Unit | Change | Purpose | -|------|--------|---------| -| `src/models.ts` | Add `groupVariantModels()` + integrate in `fetchModels()` | Core grouping logic | -| `src/plugin.ts` | `toProviderModel()` uses `model.variants` if present; fix `getModelFamily()` | Correct variant generation + family extraction | -| `src/types.ts` | Add `variants?: Record` to `OmniRouteModel`; add `'xhigh'` to `reasoningEffort` | Type support | -| `test/plugin.test.mjs` | Add two new tests + cache isolation in `afterEach` | Regression + behavior tests | - -### 3.2 Data Flow (Pipeline) - -``` -/v1/models response - ↓ -[ {id: 'codex/gpt-5.5', ...}, - {id: 'codex/gpt-5.5-high', ...}, - {id: 'codex/gpt-5.5-xhigh', ...}, - {id: 'openai/gpt-4o', ...} ] - ↓ normalizeModel() - ↓ deduplicateModels() - ↓ groupVariantModels() ← NEW -[ {id: 'codex/gpt-5.5', variants: {high: {...}, xhigh: {...}}, ...}, - {id: 'openai/gpt-4o', variants: {}, ...} ] - ↓ enrichModelMetadata() - ↓ toProviderModels() -``` - -## 4. Detailed Design - -### 4.1 `groupVariantModels()` (`src/models.ts`) - -Pure function signature: - -```typescript -export function groupVariantModels(models: OmniRouteModel[]): OmniRouteModel[] -``` - -**Algorithm:** - -1. Pass 1 — Categorize: Iterate input models. For each model: - - Call `stripVariantSuffix(model.id)` to detect if it is a variant (e.g., `gpt-5.5-xhigh` → base=`gpt-5.5`, stripped=true) - - If not a variant: store in `realBaseModels` Map (key=model.id) - - If variant: store in `variantMap` Map (key=baseId, value=array of `{suffix, model}`) - -2. Pass 2 — Build result: - - Add all real base models that have **no** variants (unchanged) - - For each base ID that **has** variants: - - Use real base model if available; otherwise create synthetic base from first variant (copy all fields, set `id=baseId`, `name=baseId`) - - Build `variants` Record: for each detected suffix, create `{reasoningEffort: lowerSuffix}` entry - - Merge metadata from all variants into base: use **max** `contextWindow` and **max** `maxTokens` - - Set `supportsReasoning = true` if any variant supports it - - Push merged model into result - -**Invariants:** -- No variant-suffixed model ID appears as a top-level entry in the output -- Every base ID with variants gets a `variants` field containing all detected suffixes -- Synthetic bases are never created when a real base exists - -### 4.2 Pipeline Integration (`src/models.ts`) - -Insert `groupVariantModels()` between `deduplicateModels()` and `enrichModelMetadata()` inside `fetchModels()`: - -```typescript -const dedupedModels = deduplicateModels(rawModels); -const groupedModels = groupVariantModels(dedupedModels); // NEW -const models = await enrichModelMetadata(groupedModels, config); -``` - -### 4.3 `toProviderModel()` Variants Logic (`src/plugin.ts`) - -Replace the unconditional `{low, medium, high}` generation with a priority check: - -```typescript -variants: model.variants && Object.keys(model.variants).length > 0 - ? model.variants - : supportsReasoning - ? { - low: { reasoningEffort: 'low' }, - medium: { reasoningEffort: 'medium' }, - high: { reasoningEffort: 'high' }, - } - : {}, -``` - -If `model.variants` was pre-populated by `groupVariantModels()`, use it directly. Otherwise fall back to the existing default for reasoning models. - -### 4.4 `getModelFamily()` Fix (`src/plugin.ts`) - -Current broken behavior: `getModelFamily('codex/gpt-5.5-xhigh')` → `'codex/gpt'` - -Fix: Strip provider prefix before splitting: - -```typescript -function getModelFamily(modelId: string): string { - const withoutProvider = modelId.includes('/') ? modelId.split('/').pop()! : modelId; - const [family] = withoutProvider.split('-'); - return family || withoutProvider; -} -``` - -Fixed behavior: `getModelFamily('codex/gpt-5.5-xhigh')` → `'gpt'` - -### 4.5 Type Changes (`src/types.ts`) - -Add to `OmniRouteModel`: - -```typescript -variants?: Record; -``` - -Update `OmniRouteModelVariant`: - -```typescript -export interface OmniRouteModelVariant { - reasoningEffort?: 'low' | 'medium' | 'high' | 'xhigh'; - [key: string]: unknown; -} -``` - -## 5. Edge Cases - -| Scenario | Behavior | -|----------|----------| -| Only variants returned, no base model | Create synthetic base from first variant; base ID = stripped suffix | -| Base model + variants both returned | Use real base; merge variant metadata (max limits) | -| Non-reasoning suffix (e.g., `-preview`) | `stripVariantSuffix()` ignores it; no grouping triggered | -| Empty variants map | Falls through to existing `supportsReasoning` logic | -| Mixed provider prefixes post-dedup | Dedup resolves aliases to canonical first; grouping operates on canonical IDs | -| Multiple variants with different limits | Base model inherits **highest** `contextWindow` and `maxTokens` | -| Variant without `supportsReasoning=true` | Still grouped; `supportsReasoning` on base becomes `true` if **any** variant has it | - -## 6. Testing - -### 6.1 New Tests (`test/plugin.test.mjs`) - -**Test 1: `provider hook groups variant models under base model`** - -- Mock `/v1/models` returning: - - `codex/gpt-5.5` (supportsReasoning: true) - - `codex/gpt-5.5-high` (supportsReasoning: true) - - `codex/gpt-5.5-xhigh` (supportsReasoning: true, contextWindow: 256000) - - `openai/gpt-4o` (supportsReasoning: false) - -- Assertions: - - `result['codex/gpt-5.5']` exists - - `result['codex/gpt-5.5'].variants.high` exists - - `result['codex/gpt-5.5'].variants.xhigh` exists - - `result['codex/gpt-5.5-high']` is `undefined` - - `result['codex/gpt-5.5-xhigh']` is `undefined` - - `result['openai/gpt-4o']` exists with empty variants object - -**Test 2: `provider hook creates synthetic base model when only variants are returned`** - -- Mock `/v1/models` returning: - - `codex/gpt-5.5-high` (contextWindow: 128000) - - `codex/gpt-5.5-xhigh` (contextWindow: 256000) - -- Assertions: - - `result['codex/gpt-5.5']` exists as synthetic base - - `result['codex/gpt-5.5'].variants.high` and `.xhigh` exist - - `result['codex/gpt-5.5'].limit.context === 256000` (highest from variants) - - No separate `codex/gpt-5.5-high` or `codex/gpt-5.5-xhigh` entries - -### 6.2 Test Infrastructure Fix - -Add to `plugin.test.mjs` imports: - -```javascript -import { clearModelCache } from '../dist/runtime.js'; -import { clearModelsDevCache } from '../dist/src/models-dev.js'; -``` - -Update `afterEach`: - -```javascript -afterEach(() => { - global.fetch = ORIGINAL_FETCH; - process.env.HOME = ORIGINAL_HOME; - clearModelCache(); // NEW - clearModelsDevCache(); // NEW -}); -``` - -This prevents cross-test contamination from mutable in-memory caches. - -## 7. Files Changed - -| File | Lines (approx) | Purpose | -|------|---------------|---------| -| `src/models.ts` | ~100 new | `groupVariantModels()` + pipeline integration | -| `src/plugin.ts` | ~10 modified | Variant generation priority + `getModelFamily()` fix | -| `src/types.ts` | ~5 modified | `variants` field + `'xhigh'` type | -| `test/plugin.test.mjs` | ~60 new | Two new tests + cache isolation | - -## 8. Verification - -After implementation, the full test suite should show: - -- 44/45 existing tests pass (the one failure is a pre-existing unrelated test) -- 2 new tests pass -- `npm run build` succeeds with zero TypeScript errors -- `npm run check:exports` succeeds - -## 9. Future Work (Out of Scope) - -- Support additional variant suffixes beyond reasoning effort (e.g., `-fast`, `-latest`) -- Cache key versioning for grouped model caches -- Retry logic for `fetchModelsDevData` (address the pre-existing test failure separately) From c0b06c4d1d0ecfc95278e1889f52d80cc5fd4c93 Mon Sep 17 00:00:00 2001 From: Sebastian Rumpf Date: Tue, 19 May 2026 21:09:47 +0200 Subject: [PATCH 09/10] chore(release): correct version to 1.2.1 --- CHANGELOG.md | 4 ++-- docs/{release-notes-v1.4.1.md => release-notes-v1.2.1.md} | 4 ++-- package.json | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) rename docs/{release-notes-v1.4.1.md => release-notes-v1.2.1.md} (99%) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7e5205..e87c96f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ All notable changes to this project are documented in this file. -## [1.4.1] - 2026-05-19 +## [1.2.1] - 2026-05-19 ### Added @@ -50,7 +50,7 @@ All notable changes to this project are documented in this file. - **Cache Isolation** — `modelsDevCache` is now keyed by URL (`Map`) instead of a single global variable. Prevents cross-config data leakage when different configs specify different `modelsDev.url` values. (`src/models-dev.ts`) - **JSDoc Accuracy** — Updated `OmniRouteModelsDevConfig.timeoutMs` JSDoc comment from `(default: 1000ms)` to `(default: 5000ms)` to match the actual constant. (`src/types.ts`) -- **Lockfile Version Sync** — Updated `package-lock.json` version from `1.2.0` to `1.4.1` to match `package.json`. (`package-lock.json`) +- **Lockfile Version Sync** — Updated `package-lock.json` version from `1.2.0` to `1.2.1` to match `package.json`. (`package-lock.json`) - **Test Suite Speed** — Eliminated real `setTimeout` sleeps from `test/models-dev.test.mjs` by using `cacheTtl: 0` to mark cache immediately stale instead of waiting for TTL expiry. Reduces test runtime and improves scalability. - **Latency Documentation** — Added explicit JSDoc on `fetchModelsDevData()` documenting worst-case cold-start latency (~15.75s) as an accepted reliability trade-off per design spec. (`src/models-dev.ts`) diff --git a/docs/release-notes-v1.4.1.md b/docs/release-notes-v1.2.1.md similarity index 99% rename from docs/release-notes-v1.4.1.md rename to docs/release-notes-v1.2.1.md index dea1af4..05842eb 100644 --- a/docs/release-notes-v1.4.1.md +++ b/docs/release-notes-v1.2.1.md @@ -1,4 +1,4 @@ -# Release v1.4.1 +# Release v1.2.1 ## Highlights @@ -54,7 +54,7 @@ - **Cache Isolation** — `modelsDevCache` is now keyed by URL (`Map`) to prevent cross-config data leakage when different configs specify different `modelsDev.url` values. - **JSDoc Accuracy** — `OmniRouteModelsDevConfig.timeoutMs` JSDoc updated to reflect the new `5000ms` default. -- **Lockfile Sync** — `package-lock.json` version aligned with `package.json` (`1.4.1`). +- **Lockfile Sync** — `package-lock.json` version aligned with `package.json` (`1.2.1`). - **Test Suite Speed** — Eliminated real `setTimeout` sleeps from `test/models-dev.test.mjs` by using `cacheTtl: 0` for stale-cache tests. Reduces test runtime and improves scalability. - **Latency Documentation** — Explicit JSDoc added on `fetchModelsDevData()` documenting worst-case cold-start latency (~15.75s) as an accepted reliability trade-off. diff --git a/package.json b/package.json index ccad0f9..f6545ab 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "opencode-omniroute-auth", - "version": "1.4.1", + "version": "1.2.1", "description": "OpenCode authentication plugin for OmniRoute API with /connect command and dynamic model fetching", "type": "module", "main": "./dist/index.js", From f7020348a4fd48d4db4c8800119723a1f94d4f00 Mon Sep 17 00:00:00 2001 From: Sebastian Rumpf Date: Tue, 19 May 2026 22:28:04 +0200 Subject: [PATCH 10/10] fix(review): address release PR feedback --- CHANGELOG.md | 8 ++++++-- docs/release-notes-v1.2.1.md | 11 +++++++---- src/models-dev.ts | 30 ++++++++++++++++++++++++++---- src/models.ts | 18 ++++++++++++++++-- test/models-dev.test.mjs | 21 +++++++++++++++++++++ test/models.test.mjs | 34 ++++++++++++++++++++++++++++++++++ 6 files changed, 110 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e87c96f..f180295 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ All notable changes to this project are documented in this file. - Per-attempt structured logging: attempt number, failure class, HTTP status (when applicable), and elapsed duration. - Success logging: total elapsed duration and provider count for observability. - Timeout increase: default per-attempt timeout raised from 1000ms to 5000ms. -- **8 New Test Cases** (`test/models-dev.test.mjs`) covering: +- **9 New Test Cases** (`test/models-dev.test.mjs`) covering: - Fresh cache hit (no redundant network call) - Timeout recovery on retry - 503 retryable HTTP failure recovery @@ -22,16 +22,18 @@ All notable changes to this project are documented in this file. - Null return on cold-start total failure - 404 fail-fast behavior (no unnecessary retries) - Invalid response structure with stale cache fallback + - Malformed provider entry rejection before cache update - End-to-end integration with `getModelsDevIndex()` - **Model Variant Support Fix** — Comprehensive fix for variant-suffixed models (e.g., `codex/gpt-5.5-xhigh`, `codex/gpt-5.5-high`): - Added `groupVariantModels()` in `src/models.ts` — pure two-pass algorithm that merges variant-suffixed models under their base model ID - Added `variants?: Record` to `OmniRouteModel` interface in `src/types.ts` - Extended `OmniRouteModelVariant.reasoningEffort` to include `'xhigh'` (was `'low' | 'medium' | 'high'`) - - Synthetic base model creation: when only variants are returned (no explicit base), creates a synthetic base from the first variant with merged metadata (max `contextWindow`, max `maxTokens`) + - Synthetic base model creation: when only variants are returned (no explicit base), creates a synthetic base from the first variant with merged metadata (max `contextWindow`, max `maxTokens`, unioned capability flags) - Pipeline integration: `fetchModels()` now flows `normalizeModel` → `deduplicateModels` → `groupVariantModels` → `enrichModelMetadata` → `toProviderModels` - `toProviderModel()` in `src/plugin.ts` now prioritizes pre-populated `model.variants` over generated `{low, medium, high}` defaults - **Test Cache Isolation** (`test/plugin.test.mjs`) — Added `clearModelCache()` and `clearModelsDevCache()` to `afterEach` to prevent cross-test contamination from mutable in-memory caches - **2 New Regression Tests** (`test/plugin.test.mjs`) covering variant grouping and synthetic base model creation +- **1 New Regression Test** (`test/models.test.mjs`) covering capability union across grouped variants ### Fixed @@ -53,6 +55,8 @@ All notable changes to this project are documented in this file. - **Lockfile Version Sync** — Updated `package-lock.json` version from `1.2.0` to `1.2.1` to match `package.json`. (`package-lock.json`) - **Test Suite Speed** — Eliminated real `setTimeout` sleeps from `test/models-dev.test.mjs` by using `cacheTtl: 0` to mark cache immediately stale instead of waiting for TTL expiry. Reduces test runtime and improves scalability. - **Latency Documentation** — Added explicit JSDoc on `fetchModelsDevData()` documenting worst-case cold-start latency (~15.75s) as an accepted reliability trade-off per design spec. (`src/models-dev.ts`) +- **models.dev Structural Validation** — Added runtime validation for provider entries and nested `models` records before accepting fetched models.dev data. Prevents malformed upstream objects from being cached or indexed. (`src/models-dev.ts`) +- **Variant Capability Union** — Grouped variant models now merge `supportsVision`, `supportsTools`, `supportsStreaming`, `supportsTemperature`, and `supportsAttachment` into the base model when any variant supports them. (`src/models.ts`) ## [1.2.0] - 2026-05-17 diff --git a/docs/release-notes-v1.2.1.md b/docs/release-notes-v1.2.1.md index 05842eb..7d7e9b0 100644 --- a/docs/release-notes-v1.2.1.md +++ b/docs/release-notes-v1.2.1.md @@ -31,7 +31,7 @@ 1. **Categorizes** models into real bases and variants using `stripVariantSuffix()` 2. **Builds result**: real bases pass through unchanged; for each base with variants, merges all variants under the base model with a `variants` Record 3. **Synthetic bases**: when only variants are returned (no explicit base), creates a synthetic base from the first variant, copying all fields and setting `id`/`name` to the stripped base ID - 4. **Metadata merging**: base inherits **max** `contextWindow` and **max** `maxTokens` across all variants; `supportsReasoning` becomes `true` if any variant has it + 4. **Metadata merging**: base inherits **max** `contextWindow`, **max** `maxTokens`, and the union of supported capability flags across all variants - Integrated into `fetchModels()` pipeline: `normalizeModel` → `deduplicateModels` → `groupVariantModels` → `enrichModelMetadata` → `toProviderModels` - Fixed `toProviderModel()` in `src/plugin.ts` to prioritize pre-populated `model.variants` over generated `{low, medium, high}` defaults - Added `'xhigh'` to `OmniRouteModelVariant.reasoningEffort` type and generated variants @@ -57,13 +57,16 @@ - **Lockfile Sync** — `package-lock.json` version aligned with `package.json` (`1.2.1`). - **Test Suite Speed** — Eliminated real `setTimeout` sleeps from `test/models-dev.test.mjs` by using `cacheTtl: 0` for stale-cache tests. Reduces test runtime and improves scalability. - **Latency Documentation** — Explicit JSDoc added on `fetchModelsDevData()` documenting worst-case cold-start latency (~15.75s) as an accepted reliability trade-off. +- **models.dev Structural Validation** — Fetched models.dev payloads now validate provider entries and nested `models` records before accepting data, preventing malformed upstream objects from entering cache/index paths. +- **Variant Capability Union** — Grouped variants now merge `supportsVision`, `supportsTools`, `supportsStreaming`, `supportsTemperature`, and `supportsAttachment` into the base model when any variant advertises those capabilities. ### Testing -- Added 8 focused tests in `test/models-dev.test.mjs` covering all retry, cache, and fallback paths. +- Added 9 focused tests in `test/models-dev.test.mjs` covering all retry, cache, fallback, and malformed-provider validation paths. +- Added 1 unit test in `test/models.test.mjs` covering capability union across grouped variants. - Added 2 regression tests in `test/plugin.test.mjs` for variant grouping and synthetic base model creation. - Added cache isolation (`clearModelCache()`, `clearModelsDevCache()`) to `test/plugin.test.mjs` `afterEach` to prevent cross-test contamination. -- Full regression suite: 52/52 tests pass (0 failures). +- Full regression suite: 54/54 tests pass (0 failures). ### Documentation @@ -72,7 +75,7 @@ ## Verification - `npm run prepublishOnly` passes (`clean`, `build`, `check:exports`). -- `npm test` passes: 52 tests, 0 failures. +- `npm test` passes: 54 tests, 0 failures. - TypeScript strict mode compiles cleanly. ## Upgrade Notes diff --git a/src/models-dev.ts b/src/models-dev.ts index efad7d3..429f8c7 100644 --- a/src/models-dev.ts +++ b/src/models-dev.ts @@ -97,6 +97,29 @@ interface FetchFailure { message: string; } +function isRecord(value: unknown): value is Record { + return Boolean(value) && typeof value === 'object' && !Array.isArray(value); +} + +function isModelsDevData(value: unknown): value is ModelsDevData { + if (!isRecord(value)) return false; + + const providers = Object.values(value); + if (providers.length === 0) return false; + + for (const provider of providers) { + if (!isRecord(provider) || typeof provider.id !== 'string' || !isRecord(provider.models)) { + return false; + } + + for (const model of Object.values(provider.models)) { + if (!isRecord(model)) return false; + } + } + + return true; +} + /** * Sleep helper for backoff delays */ @@ -164,17 +187,17 @@ async function fetchModelsDevOnce( }; } - if (!data || typeof data !== 'object' || Array.isArray(data)) { + if (!isModelsDevData(data)) { const elapsedMs = Date.now() - start; return { class: 'invalid_structure', elapsedMs, - message: 'Response is not a valid object', + message: 'Response is not valid models.dev data', }; } const elapsedMs = Date.now() - start; - return { data: data as ModelsDevData, elapsedMs }; + return { data, elapsedMs }; } catch (error) { const elapsedMs = Date.now() - start; if (error instanceof Error && error.name === 'AbortError') { @@ -613,4 +636,3 @@ export function stripVariantSuffix(modelKey: string): { base: string; stripped: } return { base: modelKey, stripped: false }; } - diff --git a/src/models.ts b/src/models.ts index 0cf9782..71f6fac 100644 --- a/src/models.ts +++ b/src/models.ts @@ -223,7 +223,7 @@ export function groupVariantModels(models: OmniRouteModel[]): OmniRouteModel[] { } merged.variants = variantsRecord; - // Merge metadata from all variants into base: use max contextWindow and max maxTokens + // Merge metadata from all variants into base: use max limits and union capabilities. for (const { model } of variants) { if (model.contextWindow !== undefined) { merged.contextWindow = Math.max(merged.contextWindow ?? 0, model.contextWindow); @@ -234,6 +234,21 @@ export function groupVariantModels(models: OmniRouteModel[]): OmniRouteModel[] { if (model.supportsReasoning) { merged.supportsReasoning = true; } + if (model.supportsVision) { + merged.supportsVision = true; + } + if (model.supportsTools) { + merged.supportsTools = true; + } + if (model.supportsStreaming) { + merged.supportsStreaming = true; + } + if (model.supportsTemperature) { + merged.supportsTemperature = true; + } + if (model.supportsAttachment) { + merged.supportsAttachment = true; + } } result.push(merged); @@ -538,4 +553,3 @@ function lookupModelsDevModel( return undefined; } - diff --git a/test/models-dev.test.mjs b/test/models-dev.test.mjs index 6208667..fb1733f 100644 --- a/test/models-dev.test.mjs +++ b/test/models-dev.test.mjs @@ -235,6 +235,27 @@ test('invalid response structure with stale cache - returns stale data', async ( assert.equal(calls, 2, 'Should make 1 + 1 attempts (invalid structure is non-retryable)'); }); +test('invalid provider structure with no cache - returns null', async () => { + let calls = 0; + global.fetch = async (input) => { + const url = input instanceof Request ? input.url : input.toString(); + if (url === MOCK_URL) { + calls++; + return new Response(JSON.stringify({ openai: { id: 'openai', models: null } }), { + status: 200, + headers: { 'Content-Type': 'application/json' }, + }); + } + return new Response(JSON.stringify({}), { status: 200 }); + }; + + const config = createConfig(); + + const result = await fetchModelsDevData(config); + assert.equal(result, null, 'Should return null for malformed provider entries'); + assert.equal(calls, 1, 'Should not retry structurally invalid responses'); +}); + // Integration test: verify getModelsDevIndex uses the improved fetch pipeline test('getModelsDevIndex integrates with retry and cache', async () => { let calls = 0; diff --git a/test/models.test.mjs b/test/models.test.mjs index 8036659..394e708 100644 --- a/test/models.test.mjs +++ b/test/models.test.mjs @@ -9,6 +9,7 @@ import { refreshModels, } from '../dist/runtime.js'; import { calculateLowestCommonCapabilities } from '../dist/src/models-dev.js'; +import { groupVariantModels } from '../dist/src/models.js'; const ORIGINAL_FETCH = global.fetch; @@ -249,6 +250,39 @@ test('subscription provider fallback enriches from public provider', async () => assert.equal(getSubscriptionFallback('unknown-provider'), null); }); +test('groupVariantModels merges capability flags from all variants into synthetic base', () => { + const [model] = groupVariantModels([ + { + id: 'codex/gpt-5.5-high', + name: 'GPT-5.5 High', + contextWindow: 128000, + maxTokens: 32000, + supportsReasoning: true, + }, + { + id: 'codex/gpt-5.5-xhigh', + name: 'GPT-5.5 XHigh', + contextWindow: 256000, + maxTokens: 64000, + supportsVision: true, + supportsTools: true, + supportsStreaming: true, + supportsTemperature: true, + supportsAttachment: true, + }, + ]); + + assert.equal(model.id, 'codex/gpt-5.5'); + assert.equal(model.contextWindow, 256000); + assert.equal(model.maxTokens, 64000); + assert.equal(model.supportsReasoning, true); + assert.equal(model.supportsVision, true); + assert.equal(model.supportsTools, true); + assert.equal(model.supportsStreaming, true); + assert.equal(model.supportsTemperature, true); + assert.equal(model.supportsAttachment, true); +}); + // Task 21: Normalization of snake_case and capabilities fields test('normalizeModel reads snake_case fields', async () => { global.fetch = async (input) => {