diff --git a/CHANGELOG.md b/CHANGELOG.md index 321d0cad..d55dd081 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **`/debate` External Tool Quick Reference** — Added a "External Tool Quick Reference" section to all copies of the debate skill (`plugins/debate/skills/debate/SKILL.md`, OpenCode and Codex adapters) with safe command patterns, effort-to-model mapping tables, and output parsing expressions. The section includes a canonical-source pointer to `plugins/consult/skills/consult/SKILL.md` so the debate orchestrator doesn't duplicate provider logic. Added pointer notes in `debate-orchestrator` agents. Fixes issue #232. +- **`/consult` and `/debate` model defaults update** — Gemini high/max effort now uses `gemini-3.1-pro-preview`; Gemini low/medium uses `gemini-3-flash-preview`. Codex uses `gpt-5.3-codex` for all effort tiers. Updated across all platforms: Claude Code plugin, OpenCode adapter, and Codex adapter for both consult and debate skills and commands. Fixes issue #234. + - **`/consult` model name updates** — Updated stale model names in the consult skill: Codex models are now `o4-mini` (low/medium) and `o3` (high/max); Gemini models include `gemini-3-flash-preview`, `gemini-3-pro-preview`, and `gemini-3.1-pro-preview`. Synced to OpenCode adapter consult skill. Fixes issue #232. - **`/next-task` Phase 12 ship invocation** — Phase 12 now invokes `ship:ship` via `await Skill({ name: "ship:ship", args: ... })` instead of `Task({ subagent_type: "ship:ship", ... })`. `ship:ship` is a skill, not an agent; the previous `Task()` call silently failed, leaving the workflow stuck after delivery validation with no PR created. The Codex adapter is updated in parity and regression tests are added. Fixes issue #230. diff --git a/README.md b/README.md index 6aa4a5df..32395c5e 100644 --- a/README.md +++ b/README.md @@ -651,8 +651,8 @@ agent-knowledge/ | Tool | Default Model (high) | Reasoning Control | |------|---------------------|-------------------| | Claude | claude-opus-4-6 | max-turns | -| Gemini | gemini-3-pro-preview | built-in | -| Codex | o3 | model_reasoning_effort | +| Gemini | gemini-3.1-pro-preview | built-in | +| Codex | gpt-5.3-codex | model_reasoning_effort | | OpenCode | (user-selected or default) | --variant | | Copilot | (default) | none | diff --git a/__tests__/debate-command.test.js b/__tests__/debate-command.test.js index 5a99771a..a47456c4 100644 --- a/__tests__/debate-command.test.js +++ b/__tests__/debate-command.test.js @@ -696,7 +696,7 @@ describe('external tool quick reference (#232)', () => { }); test('current model names present in effort-to-model mapping of each skill copy', () => { - const expectedModels = ['claude-haiku-4-5', 'claude-sonnet-4-6', 'claude-opus-4-6', 'o4-mini', 'o3', 'gemini-2.5-flash']; + const expectedModels = ['claude-haiku-4-5', 'claude-sonnet-4-6', 'claude-opus-4-6', 'gpt-5.3-codex', 'gemini-3-flash-preview', 'gemini-3.1-pro-preview']; for (const content of allDebateSkillContents()) { for (const model of expectedModels) { expect(content).toMatch(new RegExp(`Effort-to-Model Mapping[\\s\\S]*${model}`)); @@ -719,19 +719,26 @@ describe('consult skill opencode adapter sync (#232)', () => { expect(openCodeConsultSkillContent).toContain('claude-opus-4-6'); }); - test('opencode consult adapter has updated codex model names (no speculative gpt-5.x)', () => { - expect(openCodeConsultSkillContent).not.toContain('gpt-5.3-codex'); - expect(openCodeConsultSkillContent).not.toContain('gpt-5.2-codex'); - expect(openCodeConsultSkillContent).toContain('o4-mini'); - expect(openCodeConsultSkillContent).toContain('o3'); + test('opencode consult adapter has updated codex model names', () => { + expect(openCodeConsultSkillContent).toContain('gpt-5.3-codex'); + expect(openCodeConsultSkillContent).not.toContain('o4-mini'); + expect(openCodeConsultSkillContent).not.toMatch(/\|\s*(?:low|medium|high|max)\s*\|\s*o3\s*\|/); }); test('canonical consult skill has updated model names', () => { expect(consultSkillContent).toContain('claude-haiku-4-5'); expect(consultSkillContent).toContain('claude-sonnet-4-6'); expect(consultSkillContent).toContain('claude-opus-4-6'); - expect(consultSkillContent).not.toContain('gpt-5.3-codex'); - expect(consultSkillContent).toContain('o4-mini'); - expect(consultSkillContent).toContain('o3'); + expect(consultSkillContent).toContain('gpt-5.3-codex'); + expect(consultSkillContent).not.toContain('o4-mini'); + expect(consultSkillContent).not.toMatch(/\|\s*(?:low|medium|high|max)\s*\|\s*o3\s*\|/); + }); + + test('consult skill uses gemini-3.1-pro-preview as high-effort Gemini default (#234)', () => { + expect(consultSkillContent).toContain('gemini-3.1-pro-preview'); + expect(openCodeConsultSkillContent).toContain('gemini-3.1-pro-preview'); + // Ensure old model is not used as high/max default (may still appear in the models list) + expect(consultSkillContent).not.toMatch(/\|\s*(?:high|max)\s*\|\s*gemini-3-pro-preview/); + expect(openCodeConsultSkillContent).not.toMatch(/\|\s*(?:high|max)\s*\|\s*gemini-3-pro-preview/); }); }); diff --git a/adapters/codex/skills/consult/SKILL.md b/adapters/codex/skills/consult/SKILL.md index 271a7874..e8b4073f 100644 --- a/adapters/codex/skills/consult/SKILL.md +++ b/adapters/codex/skills/consult/SKILL.md @@ -169,8 +169,8 @@ request_user_input: - header: "Model" question: "Which Gemini model?" options: - - label: "gemini-3-pro" description: "Most capable, strong reasoning" - - label: "gemini-3-flash" description: "Fast, 78% SWE-bench" + - label: "gemini-3.1-pro-preview" description: "Most capable, strong reasoning" + - label: "gemini-3-flash-preview" description: "Fast, efficient coding" - label: "gemini-2.5-pro" description: "Previous gen pro model" - label: "gemini-2.5-flash" description: "Previous gen flash model" ``` @@ -214,7 +214,7 @@ request_user_input: - label: "claude-sonnet-4-5" description: "Default Copilot model" - label: "claude-opus-4-6" description: "Most capable Claude model" - label: "gpt-5.3-codex" description: "OpenAI GPT-5.3 Codex" - - label: "gemini-3-pro" description: "Google Gemini 3 Pro" + - label: "gemini-3.1-pro-preview" description: "Google Gemini 3.1 Pro" ``` Map the user's choice to the model string (strip " (Recommended)" suffix if present). @@ -233,7 +233,7 @@ Invoke the `consult` skill directly using the Skill tool: Skill: consult Args: "[question]" --tool=[tool] --effort=[effort] --model=[model] [--context=[context]] [--continue=[session_id]] -Example: "Is this the right approach?" --tool=gemini --effort=high --model=gemini-3-pro +Example: "Is this the right approach?" --tool=gemini --effort=high --model=gemini-3.1-pro-preview ``` The skill handles the full consultation lifecycle: model resolution, command building, context packaging, execution with 120s timeout, and returns a plain JSON result. diff --git a/adapters/codex/skills/debate/SKILL.md b/adapters/codex/skills/debate/SKILL.md index c4acd101..3b52ef64 100644 --- a/adapters/codex/skills/debate/SKILL.md +++ b/adapters/codex/skills/debate/SKILL.md @@ -289,10 +289,10 @@ Read the consult skill file to get the exact patterns and replacements. | Effort | Claude | Gemini | Codex | OpenCode | Copilot | |--------|--------|--------|-------|----------|---------| -| low | claude-haiku-4-5 (1 turn) | gemini-2.5-flash | o4-mini (low) | default (low) | no control | -| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | o4-mini (medium) | default (medium) | no control | -| high | claude-opus-4-6 (5 turns) | gemini-3-pro-preview | o3 (high) | default (high) | no control | -| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | o3 (high) | default + --thinking | no control | +| low | claude-haiku-4-5 (1 turn) | gemini-3-flash-preview | gpt-5.3-codex (low) | default (low) | no control | +| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | gpt-5.3-codex (medium) | default (medium) | no control | +| high | claude-opus-4-6 (5 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default (high) | no control | +| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default + --thinking | no control | ### Output Parsing diff --git a/adapters/opencode/commands/consult.md b/adapters/opencode/commands/consult.md index 4ec42ac0..f6268bda 100644 --- a/adapters/opencode/commands/consult.md +++ b/adapters/opencode/commands/consult.md @@ -177,8 +177,8 @@ AskUserQuestion: question: "Which Gemini model?" multiSelect: false options: - - label: "gemini-3-pro" description: "Most capable, strong reasoning" - - label: "gemini-3-flash" description: "Fast, 78% SWE-bench" + - label: "gemini-3.1-pro-preview" description: "Most capable, strong reasoning" + - label: "gemini-3-flash-preview" description: "Fast, efficient coding" - label: "gemini-2.5-pro" description: "Previous gen pro model" - label: "gemini-2.5-flash" description: "Previous gen flash model" ``` @@ -222,7 +222,7 @@ AskUserQuestion: - label: "claude-sonnet-4-5" description: "Default Copilot model" - label: "claude-opus-4-6" description: "Most capable Claude model" - label: "gpt-5.3-codex" description: "OpenAI GPT-5.3 Codex" - - label: "gemini-3-pro" description: "Google Gemini 3 Pro" + - label: "gemini-3.1-pro-preview" description: "Google Gemini 3.1 Pro" ``` Map the user's choice to the model string (strip " (Recommended)" suffix if present). @@ -241,7 +241,7 @@ Invoke the `consult` skill directly using the Skill tool: Skill: consult Args: "[question]" --tool=[tool] --effort=[effort] --model=[model] [--context=[context]] [--continue=[session_id]] -Example: "Is this the right approach?" --tool=gemini --effort=high --model=gemini-3-pro +Example: "Is this the right approach?" --tool=gemini --effort=high --model=gemini-3.1-pro-preview ``` The skill handles the full consultation lifecycle: model resolution, command building, context packaging, execution with 120s timeout, and returns a plain JSON result. diff --git a/adapters/opencode/commands/debate.md b/adapters/opencode/commands/debate.md index 079cbd18..ae1e31c8 100644 --- a/adapters/opencode/commands/debate.md +++ b/adapters/opencode/commands/debate.md @@ -293,10 +293,10 @@ Read the consult skill file to get the exact patterns and replacements. | Effort | Claude | Gemini | Codex | OpenCode | Copilot | |--------|--------|--------|-------|----------|---------| -| low | claude-haiku-4-5 (1 turn) | gemini-2.5-flash | o4-mini (low) | default (low) | no control | -| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | o4-mini (medium) | default (medium) | no control | -| high | claude-opus-4-6 (5 turns) | gemini-3-pro-preview | o3 (high) | default (high) | no control | -| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | o3 (high) | default + --thinking | no control | +| low | claude-haiku-4-5 (1 turn) | gemini-3-flash-preview | gpt-5.3-codex (low) | default (low) | no control | +| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | gpt-5.3-codex (medium) | default (medium) | no control | +| high | claude-opus-4-6 (5 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default (high) | no control | +| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default + --thinking | no control | ### Output Parsing diff --git a/adapters/opencode/skills/consult/SKILL.md b/adapters/opencode/skills/consult/SKILL.md index c0441e71..dd9dc5d4 100644 --- a/adapters/opencode/skills/consult/SKILL.md +++ b/adapters/opencode/skills/consult/SKILL.md @@ -70,9 +70,9 @@ Models: gemini-2.5-flash, gemini-2.5-pro, gemini-3-flash-preview, gemini-3-pro-p | Effort | Model | |--------|-------| -| low | gemini-2.5-flash | +| low | gemini-3-flash-preview | | medium | gemini-3-flash-preview | -| high | gemini-3-pro-preview | +| high | gemini-3.1-pro-preview | | max | gemini-3.1-pro-preview | **Parse output**: `JSON.parse(stdout).response` @@ -89,14 +89,14 @@ Session resume (latest): codex exec resume --last "QUESTION" --json Note: `codex exec` is the non-interactive/headless mode. There is no `-q` flag. The TUI mode is `codex` (no subcommand). -Models: o4-mini, o3 +Models: gpt-5.3-codex | Effort | Model | Reasoning | |--------|-------|-----------| -| low | o4-mini | low | -| medium | o4-mini | medium | -| high | o3 | high | -| max | o3 | high | +| low | gpt-5.3-codex | low | +| medium | gpt-5.3-codex | medium | +| high | gpt-5.3-codex | high | +| max | gpt-5.3-codex | high | **Parse output**: `JSON.parse(stdout).message` or raw text **Session ID**: Codex prints a resume hint at session end (e.g., `codex resume SESSION_ID`). Extract the session ID from stdout or from `JSON.parse(stdout).session_id` if available. @@ -110,7 +110,7 @@ Session resume: opencode run "QUESTION" --format json --model "MODEL" --variant With thinking: add --thinking flag ``` -Models: 75+ via providers (format: provider/model). Top picks: claude-sonnet-4-6, claude-opus-4-6, gpt-5.2, o3, gemini-3-pro-preview, minimax-m2.1 +Models: 75+ via providers (format: provider/model). Top picks: claude-sonnet-4-6, claude-opus-4-6, gpt-5.3-codex, gemini-3.1-pro-preview, minimax-m2.1 | Effort | Model | Variant | |--------|-------|---------| @@ -277,7 +277,7 @@ Return a plain JSON object to stdout (no markers or wrappers): ```json { "tool": "gemini", - "model": "gemini-3-pro-preview", + "model": "gemini-3.1-pro-preview", "effort": "high", "duration_ms": 12300, "response": "The AI's response text here...", @@ -315,4 +315,4 @@ This skill is invoked by: - `consult-agent` for `/consult` command - Direct invocation: `Skill('consult', '"question" --tool=gemini --effort=high')` -Example: `Skill('consult', '"Is this approach correct?" --tool=gemini --effort=high --model=gemini-3-pro-preview')` +Example: `Skill('consult', '"Is this approach correct?" --tool=gemini --effort=high --model=gemini-3.1-pro-preview')` diff --git a/adapters/opencode/skills/debate/SKILL.md b/adapters/opencode/skills/debate/SKILL.md index 08d762b8..95c7d11d 100644 --- a/adapters/opencode/skills/debate/SKILL.md +++ b/adapters/opencode/skills/debate/SKILL.md @@ -222,7 +222,7 @@ Save to `{AI_STATE_DIR}/debate/last-debate.json`: "id": "debate-{ISO timestamp}-{4 char random hex}", "topic": "original topic text", "proposer": {"tool": "claude", "model": "opus"}, - "challenger": {"tool": "gemini", "model": "gemini-3-pro"}, + "challenger": {"tool": "gemini", "model": "gemini-3.1-pro-preview"}, "effort": "high", "rounds_completed": 2, "max_rounds": 2, @@ -277,10 +277,10 @@ Platform state directory: | Effort | Claude | Gemini | Codex | OpenCode | Copilot | |--------|--------|--------|-------|----------|---------| -| low | claude-haiku-4-5 (1 turn) | gemini-2.5-flash | o4-mini (low) | default (low) | no control | -| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | o4-mini (medium) | default (medium) | no control | -| high | claude-opus-4-6 (5 turns) | gemini-3-pro-preview | o3 (high) | default (high) | no control | -| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | o3 (high) | default + --thinking | no control | +| low | claude-haiku-4-5 (1 turn) | gemini-3-flash-preview | gpt-5.3-codex (low) | default (low) | no control | +| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | gpt-5.3-codex (medium) | default (medium) | no control | +| high | claude-opus-4-6 (5 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default (high) | no control | +| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default + --thinking | no control | ### Output Parsing diff --git a/docs/consult-command-test-strategy.md b/docs/consult-command-test-strategy.md index 760b9a86..afc75ed5 100644 --- a/docs/consult-command-test-strategy.md +++ b/docs/consult-command-test-strategy.md @@ -171,10 +171,10 @@ describe('Model Selection', () => { describe('Gemini models', () => { it('should map effort levels correctly', () => { - expect(getGeminiModel('low')).toBe('gemini-2.5-flash'); - expect(getGeminiModel('medium')).toBe('gemini-3-flash'); - expect(getGeminiModel('high')).toBe('gemini-3-pro'); - expect(getGeminiModel('max')).toBe('gemini-3-pro'); + expect(getGeminiModel('low')).toBe('gemini-3-flash-preview'); + expect(getGeminiModel('medium')).toBe('gemini-3-flash-preview'); + expect(getGeminiModel('high')).toBe('gemini-3.1-pro-preview'); + expect(getGeminiModel('max')).toBe('gemini-3.1-pro-preview'); }); }); @@ -244,7 +244,7 @@ describe('Session Management', () => { it('should include question in saved session', () => { const session = { tool: 'gemini', - model: 'gemini-3-pro', + model: 'gemini-3.1-pro-preview', effort: 'medium', session_id: 'xyz-789', timestamp: new Date().toISOString(), @@ -458,7 +458,7 @@ describe('Session Continuation', () => { it('should restore tool from saved session', () => { const session = { tool: 'gemini', - model: 'gemini-3-pro', + model: 'gemini-3.1-pro-preview', effort: 'medium', session_id: 'session-456', timestamp: new Date().toISOString(), @@ -672,18 +672,18 @@ describe('Command Building', () => { describe('Gemini Command', () => { it('should build basic command', () => { - const { command, flags } = buildGeminiCommand('question', 'gemini-3-pro'); + const { command, flags } = buildGeminiCommand('question', 'gemini-3.1-pro-preview'); expect(command).toBe('gemini'); expect(flags).toContain('-p'); expect(flags).toContain('"question"'); expect(flags).toContain('--output-format'); expect(flags).toContain('json'); expect(flags).toContain('-m'); - expect(flags).toContain('gemini-3-pro'); + expect(flags).toContain('gemini-3.1-pro-preview'); }); it('should append session resume for continuation', () => { - const { flags } = buildGeminiCommand('question', 'gemini-3-pro', 'session-456', true); + const { flags } = buildGeminiCommand('question', 'gemini-3.1-pro-preview', 'session-456', true); expect(flags).toContain('--resume'); expect(flags).toContain('session-456'); }); @@ -939,7 +939,7 @@ describe('Full Consultation Flow', () => { jest.spyOn(fs, 'readFileSync').mockReturnValueOnce(JSON.stringify({ tool: 'gemini', session_id: 'session-456', - model: 'gemini-3-pro', + model: 'gemini-3.1-pro-preview', effort: 'medium', timestamp: new Date().toISOString(), question: 'continue', @@ -1139,7 +1139,7 @@ describe('Mocked Tool Outputs', () => { const mockGeminiOutput = `=== CONSULT_RESULT === { "tool": "gemini", - "model": "gemini-3-pro", + "model": "gemini-3.1-pro-preview", "effort": "medium", "duration_ms": 23400, "response": "Based on my analysis, the approach seems sound but could benefit from error handling for edge cases.", @@ -1175,7 +1175,7 @@ describe('Mocked Tool Outputs', () => { it('should parse structured output correctly', () => { const result = parseMockOutput(mockGeminiOutput, 'gemini'); expect(result.tool).toBe('gemini'); - expect(result.model).toBe('gemini-3-pro'); + expect(result.model).toBe('gemini-3.1-pro-preview'); expect(result.duration_ms).toBe(23400); expect(result.session_id).toBe('session-xyz-789'); }); diff --git a/plugins/consult/commands/consult.md b/plugins/consult/commands/consult.md index b8402675..9585b6e5 100644 --- a/plugins/consult/commands/consult.md +++ b/plugins/consult/commands/consult.md @@ -174,8 +174,8 @@ AskUserQuestion: question: "Which Gemini model?" multiSelect: false options: - - label: "gemini-3-pro" description: "Most capable, strong reasoning" - - label: "gemini-3-flash" description: "Fast, 78% SWE-bench" + - label: "gemini-3.1-pro-preview" description: "Most capable, strong reasoning" + - label: "gemini-3-flash-preview" description: "Fast, efficient coding" - label: "gemini-2.5-pro" description: "Previous gen pro model" - label: "gemini-2.5-flash" description: "Previous gen flash model" ``` @@ -219,7 +219,7 @@ AskUserQuestion: - label: "claude-sonnet-4-5" description: "Default Copilot model" - label: "claude-opus-4-6" description: "Most capable Claude model" - label: "gpt-5.3-codex" description: "OpenAI GPT-5.3 Codex" - - label: "gemini-3-pro" description: "Google Gemini 3 Pro" + - label: "gemini-3.1-pro-preview" description: "Google Gemini 3.1 Pro" ``` Map the user's choice to the model string (strip " (Recommended)" suffix if present). @@ -238,7 +238,7 @@ Invoke the `consult` skill directly using the Skill tool: Skill: consult Args: "[question]" --tool=[tool] --effort=[effort] --model=[model] [--context=[context]] [--continue=[session_id]] -Example: "Is this the right approach?" --tool=gemini --effort=high --model=gemini-3-pro +Example: "Is this the right approach?" --tool=gemini --effort=high --model=gemini-3.1-pro-preview ``` The skill handles the full consultation lifecycle: model resolution, command building, context packaging, execution with 120s timeout, and returns a plain JSON result. diff --git a/plugins/consult/skills/consult/SKILL.md b/plugins/consult/skills/consult/SKILL.md index aa2ebc8b..3b8beb41 100644 --- a/plugins/consult/skills/consult/SKILL.md +++ b/plugins/consult/skills/consult/SKILL.md @@ -64,9 +64,9 @@ Models: gemini-2.5-flash, gemini-2.5-pro, gemini-3-flash-preview, gemini-3-pro-p | Effort | Model | |--------|-------| -| low | gemini-2.5-flash | +| low | gemini-3-flash-preview | | medium | gemini-3-flash-preview | -| high | gemini-3-pro-preview | +| high | gemini-3.1-pro-preview | | max | gemini-3.1-pro-preview | **Parse output**: `JSON.parse(stdout).response` @@ -83,14 +83,14 @@ Session resume (latest): codex exec resume --last "QUESTION" --json Note: `codex exec` is the non-interactive/headless mode. There is no `-q` flag. The TUI mode is `codex` (no subcommand). -Models: o4-mini, o3 +Models: gpt-5.3-codex | Effort | Model | Reasoning | |--------|-------|-----------| -| low | o4-mini | low | -| medium | o4-mini | medium | -| high | o3 | high | -| max | o3 | high | +| low | gpt-5.3-codex | low | +| medium | gpt-5.3-codex | medium | +| high | gpt-5.3-codex | high | +| max | gpt-5.3-codex | high | **Parse output**: `JSON.parse(stdout).message` or raw text **Session ID**: Codex prints a resume hint at session end (e.g., `codex resume SESSION_ID`). Extract the session ID from stdout or from `JSON.parse(stdout).session_id` if available. @@ -104,7 +104,7 @@ Session resume: opencode run "QUESTION" --format json --model "MODEL" --variant With thinking: add --thinking flag ``` -Models: 75+ via providers (format: provider/model). Top picks: claude-sonnet-4-6, claude-opus-4-6, gpt-5.2, o3, gemini-3-pro-preview, minimax-m2.1 +Models: 75+ via providers (format: provider/model). Top picks: claude-sonnet-4-6, claude-opus-4-6, gpt-5.3-codex, gemini-3.1-pro-preview, minimax-m2.1 | Effort | Model | Variant | |--------|-------|---------| @@ -271,7 +271,7 @@ Return a plain JSON object to stdout (no markers or wrappers): ```json { "tool": "gemini", - "model": "gemini-3-pro-preview", + "model": "gemini-3.1-pro-preview", "effort": "high", "duration_ms": 12300, "response": "The AI's response text here...", @@ -309,4 +309,4 @@ This skill is invoked by: - `consult-agent` for `/consult` command - Direct invocation: `Skill('consult', '"question" --tool=gemini --effort=high')` -Example: `Skill('consult', '"Is this approach correct?" --tool=gemini --effort=high --model=gemini-3-pro-preview')` +Example: `Skill('consult', '"Is this approach correct?" --tool=gemini --effort=high --model=gemini-3.1-pro-preview')` diff --git a/plugins/debate/commands/debate.md b/plugins/debate/commands/debate.md index 139c6d28..cf7d120a 100644 --- a/plugins/debate/commands/debate.md +++ b/plugins/debate/commands/debate.md @@ -296,10 +296,10 @@ Read the consult skill file to get the exact patterns and replacements. | Effort | Claude | Gemini | Codex | OpenCode | Copilot | |--------|--------|--------|-------|----------|---------| -| low | claude-haiku-4-5 (1 turn) | gemini-2.5-flash | o4-mini (low) | default (low) | no control | -| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | o4-mini (medium) | default (medium) | no control | -| high | claude-opus-4-6 (5 turns) | gemini-3-pro-preview | o3 (high) | default (high) | no control | -| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | o3 (high) | default + --thinking | no control | +| low | claude-haiku-4-5 (1 turn) | gemini-3-flash-preview | gpt-5.3-codex (low) | default (low) | no control | +| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | gpt-5.3-codex (medium) | default (medium) | no control | +| high | claude-opus-4-6 (5 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default (high) | no control | +| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default + --thinking | no control | ### Output Parsing diff --git a/plugins/debate/skills/debate/SKILL.md b/plugins/debate/skills/debate/SKILL.md index 852e118b..d6cf1fd8 100644 --- a/plugins/debate/skills/debate/SKILL.md +++ b/plugins/debate/skills/debate/SKILL.md @@ -216,7 +216,7 @@ Save to `{AI_STATE_DIR}/debate/last-debate.json`: "id": "debate-{ISO timestamp}-{4 char random hex}", "topic": "original topic text", "proposer": {"tool": "claude", "model": "opus"}, - "challenger": {"tool": "gemini", "model": "gemini-3-pro"}, + "challenger": {"tool": "gemini", "model": "gemini-3.1-pro-preview"}, "effort": "high", "rounds_completed": 2, "max_rounds": 2, @@ -271,10 +271,10 @@ Platform state directory: | Effort | Claude | Gemini | Codex | OpenCode | Copilot | |--------|--------|--------|-------|----------|---------| -| low | claude-haiku-4-5 (1 turn) | gemini-2.5-flash | o4-mini (low) | default (low) | no control | -| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | o4-mini (medium) | default (medium) | no control | -| high | claude-opus-4-6 (5 turns) | gemini-3-pro-preview | o3 (high) | default (high) | no control | -| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | o3 (high) | default + --thinking | no control | +| low | claude-haiku-4-5 (1 turn) | gemini-3-flash-preview | gpt-5.3-codex (low) | default (low) | no control | +| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | gpt-5.3-codex (medium) | default (medium) | no control | +| high | claude-opus-4-6 (5 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default (high) | no control | +| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default + --thinking | no control | ### Output Parsing