agent-sh · avifenesh · Feb 20, 2026 · Feb 20, 2026 · Feb 20, 2026 · Feb 20, 2026
@@ -21,6 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - **`/debate` External Tool Quick Reference** — Added a "External Tool Quick Reference" section to all copies of the debate skill (`plugins/debate/skills/debate/SKILL.md`, OpenCode and Codex adapters) with safe command patterns, effort-to-model mapping tables, and output parsing expressions. The section includes a canonical-source pointer to `plugins/consult/skills/consult/SKILL.md` so the debate orchestrator doesn't duplicate provider logic. Added pointer notes in `debate-orchestrator` agents. Fixes issue #232.
 
+- **`/consult` and `/debate` model defaults update** — Gemini high/max effort now uses `gemini-3.1-pro-preview`; Gemini low/medium uses `gemini-3-flash-preview`. Codex uses `gpt-5.3-codex` for all effort tiers. Updated across all platforms: Claude Code plugin, OpenCode adapter, and Codex adapter for both consult and debate skills and commands. Fixes issue #234.
+
 - **`/consult` model name updates** — Updated stale model names in the consult skill: Codex models are now `o4-mini` (low/medium) and `o3` (high/max); Gemini models include `gemini-3-flash-preview`, `gemini-3-pro-preview`, and `gemini-3.1-pro-preview`. Synced to OpenCode adapter consult skill. Fixes issue #232.
 
 - **`/next-task` Phase 12 ship invocation** — Phase 12 now invokes `ship:ship` via `await Skill({ name: "ship:ship", args: ... })` instead of `Task({ subagent_type: "ship:ship", ... })`. `ship:ship` is a skill, not an agent; the previous `Task()` call silently failed, leaving the workflow stuck after delivery validation with no PR created. The Codex adapter is updated in parity and regression tests are added. Fixes issue #230.

@@ -651,8 +651,8 @@ agent-knowledge/
 | Tool | Default Model (high) | Reasoning Control |
 |------|---------------------|-------------------|
 | Claude | claude-opus-4-6 | max-turns |
-| Gemini | gemini-3-pro-preview | built-in |
-| Codex | o3 | model_reasoning_effort |
+| Gemini | gemini-3.1-pro-preview | built-in |
+| Codex | gpt-5.3-codex | model_reasoning_effort |
 | OpenCode | (user-selected or default) | --variant |
 | Copilot | (default) | none |
 

@@ -696,7 +696,7 @@ describe('external tool quick reference (#232)', () => {
   });
 
   test('current model names present in effort-to-model mapping of each skill copy', () => {
-    const expectedModels = ['claude-haiku-4-5', 'claude-sonnet-4-6', 'claude-opus-4-6', 'o4-mini', 'o3', 'gemini-2.5-flash'];
+    const expectedModels = ['claude-haiku-4-5', 'claude-sonnet-4-6', 'claude-opus-4-6', 'gpt-5.3-codex', 'gemini-3-flash-preview', 'gemini-3.1-pro-preview'];
     for (const content of allDebateSkillContents()) {
       for (const model of expectedModels) {
         expect(content).toMatch(new RegExp(`Effort-to-Model Mapping[\\s\\S]*${model}`));
@@ -719,19 +719,26 @@ describe('consult skill opencode adapter sync (#232)', () => {
     expect(openCodeConsultSkillContent).toContain('claude-opus-4-6');
   });
 
-  test('opencode consult adapter has updated codex model names (no speculative gpt-5.x)', () => {
-    expect(openCodeConsultSkillContent).not.toContain('gpt-5.3-codex');
-    expect(openCodeConsultSkillContent).not.toContain('gpt-5.2-codex');
-    expect(openCodeConsultSkillContent).toContain('o4-mini');
-    expect(openCodeConsultSkillContent).toContain('o3');
+  test('opencode consult adapter has updated codex model names', () => {
+    expect(openCodeConsultSkillContent).toContain('gpt-5.3-codex');
+    expect(openCodeConsultSkillContent).not.toContain('o4-mini');
+    expect(openCodeConsultSkillContent).not.toMatch(/\|\s*(?:low|medium|high|max)\s*\|\s*o3\s*\|/);
   });
 
   test('canonical consult skill has updated model names', () => {
     expect(consultSkillContent).toContain('claude-haiku-4-5');
     expect(consultSkillContent).toContain('claude-sonnet-4-6');
     expect(consultSkillContent).toContain('claude-opus-4-6');
-    expect(consultSkillContent).not.toContain('gpt-5.3-codex');
-    expect(consultSkillContent).toContain('o4-mini');
-    expect(consultSkillContent).toContain('o3');
+    expect(consultSkillContent).toContain('gpt-5.3-codex');
+    expect(consultSkillContent).not.toContain('o4-mini');
+    expect(consultSkillContent).not.toMatch(/\|\s*(?:low|medium|high|max)\s*\|\s*o3\s*\|/);
+  });
+
+  test('consult skill uses gemini-3.1-pro-preview as high-effort Gemini default (#234)', () => {
+    expect(consultSkillContent).toContain('gemini-3.1-pro-preview');
+    expect(openCodeConsultSkillContent).toContain('gemini-3.1-pro-preview');
+    // Ensure old model is not used as high/max default (may still appear in the models list)
+    expect(consultSkillContent).not.toMatch(/\|\s*(?:high|max)\s*\|\s*gemini-3-pro-preview/);
+    expect(openCodeConsultSkillContent).not.toMatch(/\|\s*(?:high|max)\s*\|\s*gemini-3-pro-preview/);
   });
 });
@@ -169,8 +169,8 @@ request_user_input:
     - header: "Model"
       question: "Which Gemini model?"
       options:
-        - label: "gemini-3-pro"          description: "Most capable, strong reasoning"
-        - label: "gemini-3-flash"        description: "Fast, 78% SWE-bench"
+        - label: "gemini-3.1-pro-preview"          description: "Most capable, strong reasoning"
+        - label: "gemini-3-flash-preview"  description: "Fast, efficient coding"
         - label: "gemini-2.5-pro"        description: "Previous gen pro model"
         - label: "gemini-2.5-flash"      description: "Previous gen flash model"
 ```
@@ -214,7 +214,7 @@ request_user_input:
         - label: "claude-sonnet-4-5"        description: "Default Copilot model"
         - label: "claude-opus-4-6"          description: "Most capable Claude model"
         - label: "gpt-5.3-codex"            description: "OpenAI GPT-5.3 Codex"
-        - label: "gemini-3-pro"             description: "Google Gemini 3 Pro"
+        - label: "gemini-3.1-pro-preview"           description: "Google Gemini 3.1 Pro"
 ```
 
 Map the user's choice to the model string (strip " (Recommended)" suffix if present).
@@ -233,7 +233,7 @@ Invoke the `consult` skill directly using the Skill tool:
 Skill: consult
 Args: "[question]" --tool=[tool] --effort=[effort] --model=[model] [--context=[context]] [--continue=[session_id]]
 
-Example: "Is this the right approach?" --tool=gemini --effort=high --model=gemini-3-pro
+Example: "Is this the right approach?" --tool=gemini --effort=high --model=gemini-3.1-pro-preview
 ```
 
 The skill handles the full consultation lifecycle: model resolution, command building, context packaging, execution with 120s timeout, and returns a plain JSON result.

@@ -289,10 +289,10 @@ Read the consult skill file to get the exact patterns and replacements.
 
 | Effort | Claude | Gemini | Codex | OpenCode | Copilot |
 |--------|--------|--------|-------|----------|---------|
-| low | claude-haiku-4-5 (1 turn) | gemini-2.5-flash | o4-mini (low) | default (low) | no control |
-| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | o4-mini (medium) | default (medium) | no control |
-| high | claude-opus-4-6 (5 turns) | gemini-3-pro-preview | o3 (high) | default (high) | no control |
-| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | o3 (high) | default + --thinking | no control |
+| low | claude-haiku-4-5 (1 turn) | gemini-3-flash-preview | gpt-5.3-codex (low) | default (low) | no control |
+| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | gpt-5.3-codex (medium) | default (medium) | no control |
+| high | claude-opus-4-6 (5 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default (high) | no control |
+| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default + --thinking | no control |
 
 ### Output Parsing
 

@@ -177,8 +177,8 @@ AskUserQuestion:
       question: "Which Gemini model?"
       multiSelect: false
       options:
-        - label: "gemini-3-pro"          description: "Most capable, strong reasoning"
-        - label: "gemini-3-flash"        description: "Fast, 78% SWE-bench"
+        - label: "gemini-3.1-pro-preview"          description: "Most capable, strong reasoning"
+        - label: "gemini-3-flash-preview"  description: "Fast, efficient coding"
         - label: "gemini-2.5-pro"        description: "Previous gen pro model"
         - label: "gemini-2.5-flash"      description: "Previous gen flash model"
 ```
@@ -222,7 +222,7 @@ AskUserQuestion:
         - label: "claude-sonnet-4-5"        description: "Default Copilot model"
         - label: "claude-opus-4-6"          description: "Most capable Claude model"
         - label: "gpt-5.3-codex"            description: "OpenAI GPT-5.3 Codex"
-        - label: "gemini-3-pro"             description: "Google Gemini 3 Pro"
+        - label: "gemini-3.1-pro-preview"           description: "Google Gemini 3.1 Pro"
 ```
 
 Map the user's choice to the model string (strip " (Recommended)" suffix if present).
@@ -241,7 +241,7 @@ Invoke the `consult` skill directly using the Skill tool:
 Skill: consult
 Args: "[question]" --tool=[tool] --effort=[effort] --model=[model] [--context=[context]] [--continue=[session_id]]
 
-Example: "Is this the right approach?" --tool=gemini --effort=high --model=gemini-3-pro
+Example: "Is this the right approach?" --tool=gemini --effort=high --model=gemini-3.1-pro-preview
 ```
 
 The skill handles the full consultation lifecycle: model resolution, command building, context packaging, execution with 120s timeout, and returns a plain JSON result.

@@ -293,10 +293,10 @@ Read the consult skill file to get the exact patterns and replacements.
 
 | Effort | Claude | Gemini | Codex | OpenCode | Copilot |
 |--------|--------|--------|-------|----------|---------|
-| low | claude-haiku-4-5 (1 turn) | gemini-2.5-flash | o4-mini (low) | default (low) | no control |
-| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | o4-mini (medium) | default (medium) | no control |
-| high | claude-opus-4-6 (5 turns) | gemini-3-pro-preview | o3 (high) | default (high) | no control |
-| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | o3 (high) | default + --thinking | no control |
+| low | claude-haiku-4-5 (1 turn) | gemini-3-flash-preview | gpt-5.3-codex (low) | default (low) | no control |
+| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | gpt-5.3-codex (medium) | default (medium) | no control |
+| high | claude-opus-4-6 (5 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default (high) | no control |
+| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default + --thinking | no control |
 
 ### Output Parsing
 

@@ -70,9 +70,9 @@ Models: gemini-2.5-flash, gemini-2.5-pro, gemini-3-flash-preview, gemini-3-pro-p
 
 | Effort | Model |
 |--------|-------|
-| low | gemini-2.5-flash |
+| low | gemini-3-flash-preview |
 | medium | gemini-3-flash-preview |
-| high | gemini-3-pro-preview |
+| high | gemini-3.1-pro-preview |
 | max | gemini-3.1-pro-preview |
 
 **Parse output**: `JSON.parse(stdout).response`
@@ -89,14 +89,14 @@ Session resume (latest): codex exec resume --last "QUESTION" --json
 
 Note: `codex exec` is the non-interactive/headless mode. There is no `-q` flag. The TUI mode is `codex` (no subcommand).
 
-Models: o4-mini, o3
+Models: gpt-5.3-codex
 
 | Effort | Model | Reasoning |
 |--------|-------|-----------|
-| low | o4-mini | low |
-| medium | o4-mini | medium |
-| high | o3 | high |
-| max | o3 | high |
+| low | gpt-5.3-codex | low |
+| medium | gpt-5.3-codex | medium |
+| high | gpt-5.3-codex | high |
+| max | gpt-5.3-codex | high |
 
 **Parse output**: `JSON.parse(stdout).message` or raw text
 **Session ID**: Codex prints a resume hint at session end (e.g., `codex resume SESSION_ID`). Extract the session ID from stdout or from `JSON.parse(stdout).session_id` if available.
@@ -110,7 +110,7 @@ Session resume: opencode run "QUESTION" --format json --model "MODEL" --variant
 With thinking: add --thinking flag
 ```
 
-Models: 75+ via providers (format: provider/model). Top picks: claude-sonnet-4-6, claude-opus-4-6, gpt-5.2, o3, gemini-3-pro-preview, minimax-m2.1
+Models: 75+ via providers (format: provider/model). Top picks: claude-sonnet-4-6, claude-opus-4-6, gpt-5.3-codex, gemini-3.1-pro-preview, minimax-m2.1
 
 | Effort | Model | Variant |
 |--------|-------|---------|
@@ -277,7 +277,7 @@ Return a plain JSON object to stdout (no markers or wrappers):
 ```json
 {
   "tool": "gemini",
-  "model": "gemini-3-pro-preview",
+  "model": "gemini-3.1-pro-preview",
   "effort": "high",
   "duration_ms": 12300,
   "response": "The AI's response text here...",
@@ -315,4 +315,4 @@ This skill is invoked by:
 - `consult-agent` for `/consult` command
 - Direct invocation: `Skill('consult', '"question" --tool=gemini --effort=high')`
 
-Example: `Skill('consult', '"Is this approach correct?" --tool=gemini --effort=high --model=gemini-3-pro-preview')`
+Example: `Skill('consult', '"Is this approach correct?" --tool=gemini --effort=high --model=gemini-3.1-pro-preview')`
@@ -222,7 +222,7 @@ Save to `{AI_STATE_DIR}/debate/last-debate.json`:
   "id": "debate-{ISO timestamp}-{4 char random hex}",
   "topic": "original topic text",
   "proposer": {"tool": "claude", "model": "opus"},
-  "challenger": {"tool": "gemini", "model": "gemini-3-pro"},
+  "challenger": {"tool": "gemini", "model": "gemini-3.1-pro-preview"},
   "effort": "high",
   "rounds_completed": 2,
   "max_rounds": 2,
@@ -277,10 +277,10 @@ Platform state directory:
 
 | Effort | Claude | Gemini | Codex | OpenCode | Copilot |
 |--------|--------|--------|-------|----------|---------|
-| low | claude-haiku-4-5 (1 turn) | gemini-2.5-flash | o4-mini (low) | default (low) | no control |
-| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | o4-mini (medium) | default (medium) | no control |
-| high | claude-opus-4-6 (5 turns) | gemini-3-pro-preview | o3 (high) | default (high) | no control |
-| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | o3 (high) | default + --thinking | no control |
+| low | claude-haiku-4-5 (1 turn) | gemini-3-flash-preview | gpt-5.3-codex (low) | default (low) | no control |
+| medium | claude-sonnet-4-6 (3 turns) | gemini-3-flash-preview | gpt-5.3-codex (medium) | default (medium) | no control |
+| high | claude-opus-4-6 (5 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default (high) | no control |
+| max | claude-opus-4-6 (10 turns) | gemini-3.1-pro-preview | gpt-5.3-codex (high) | default + --thinking | no control |
 
 ### Output Parsing
 

@@ -171,10 +171,10 @@ describe('Model Selection', () => {
 
   describe('Gemini models', () => {
     it('should map effort levels correctly', () => {
-      expect(getGeminiModel('low')).toBe('gemini-2.5-flash');
-      expect(getGeminiModel('medium')).toBe('gemini-3-flash');
-      expect(getGeminiModel('high')).toBe('gemini-3-pro');
-      expect(getGeminiModel('max')).toBe('gemini-3-pro');
+      expect(getGeminiModel('low')).toBe('gemini-3-flash-preview');
+      expect(getGeminiModel('medium')).toBe('gemini-3-flash-preview');
+      expect(getGeminiModel('high')).toBe('gemini-3.1-pro-preview');
+      expect(getGeminiModel('max')).toBe('gemini-3.1-pro-preview');
     });
   });
 
@@ -244,7 +244,7 @@ describe('Session Management', () => {
     it('should include question in saved session', () => {
       const session = {
         tool: 'gemini',
-        model: 'gemini-3-pro',
+        model: 'gemini-3.1-pro-preview',
         effort: 'medium',
         session_id: 'xyz-789',
         timestamp: new Date().toISOString(),
@@ -458,7 +458,7 @@ describe('Session Continuation', () => {
     it('should restore tool from saved session', () => {
       const session = {
         tool: 'gemini',
-        model: 'gemini-3-pro',
+        model: 'gemini-3.1-pro-preview',
         effort: 'medium',
         session_id: 'session-456',
         timestamp: new Date().toISOString(),
@@ -672,18 +672,18 @@ describe('Command Building', () => {
 
   describe('Gemini Command', () => {
     it('should build basic command', () => {
-      const { command, flags } = buildGeminiCommand('question', 'gemini-3-pro');
+      const { command, flags } = buildGeminiCommand('question', 'gemini-3.1-pro-preview');
       expect(command).toBe('gemini');
       expect(flags).toContain('-p');
       expect(flags).toContain('"question"');
       expect(flags).toContain('--output-format');
       expect(flags).toContain('json');
       expect(flags).toContain('-m');
-      expect(flags).toContain('gemini-3-pro');
+      expect(flags).toContain('gemini-3.1-pro-preview');
     });
 
     it('should append session resume for continuation', () => {
-      const { flags } = buildGeminiCommand('question', 'gemini-3-pro', 'session-456', true);
+      const { flags } = buildGeminiCommand('question', 'gemini-3.1-pro-preview', 'session-456', true);
       expect(flags).toContain('--resume');
       expect(flags).toContain('session-456');
     });
@@ -939,7 +939,7 @@ describe('Full Consultation Flow', () => {
       jest.spyOn(fs, 'readFileSync').mockReturnValueOnce(JSON.stringify({
         tool: 'gemini',
         session_id: 'session-456',
-        model: 'gemini-3-pro',
+        model: 'gemini-3.1-pro-preview',
         effort: 'medium',
         timestamp: new Date().toISOString(),
         question: 'continue',
@@ -1139,7 +1139,7 @@ describe('Mocked Tool Outputs', () => {
   const mockGeminiOutput = `=== CONSULT_RESULT ===
 {
   "tool": "gemini",
-  "model": "gemini-3-pro",
+  "model": "gemini-3.1-pro-preview",
   "effort": "medium",
   "duration_ms": 23400,
   "response": "Based on my analysis, the approach seems sound but could benefit from error handling for edge cases.",
@@ -1175,7 +1175,7 @@ describe('Mocked Tool Outputs', () => {
     it('should parse structured output correctly', () => {
       const result = parseMockOutput(mockGeminiOutput, 'gemini');
       expect(result.tool).toBe('gemini');
-      expect(result.model).toBe('gemini-3-pro');
+      expect(result.model).toBe('gemini-3.1-pro-preview');
       expect(result.duration_ms).toBe(23400);
       expect(result.session_id).toBe('session-xyz-789');
     });

@@ -174,8 +174,8 @@ AskUserQuestion:
       question: "Which Gemini model?"
       multiSelect: false
       options:
-        - label: "gemini-3-pro"          description: "Most capable, strong reasoning"
-        - label: "gemini-3-flash"        description: "Fast, 78% SWE-bench"
+        - label: "gemini-3.1-pro-preview"          description: "Most capable, strong reasoning"
+        - label: "gemini-3-flash-preview"  description: "Fast, efficient coding"
         - label: "gemini-2.5-pro"        description: "Previous gen pro model"
         - label: "gemini-2.5-flash"      description: "Previous gen flash model"
 ```
@@ -219,7 +219,7 @@ AskUserQuestion:
         - label: "claude-sonnet-4-5"        description: "Default Copilot model"
         - label: "claude-opus-4-6"          description: "Most capable Claude model"
         - label: "gpt-5.3-codex"            description: "OpenAI GPT-5.3 Codex"
-        - label: "gemini-3-pro"             description: "Google Gemini 3 Pro"
+        - label: "gemini-3.1-pro-preview"           description: "Google Gemini 3.1 Pro"
 ```
 
 Map the user's choice to the model string (strip " (Recommended)" suffix if present).
@@ -238,7 +238,7 @@ Invoke the `consult` skill directly using the Skill tool:
 Skill: consult
 Args: "[question]" --tool=[tool] --effort=[effort] --model=[model] [--context=[context]] [--continue=[session_id]]
 
-Example: "Is this the right approach?" --tool=gemini --effort=high --model=gemini-3-pro
+Example: "Is this the right approach?" --tool=gemini --effort=high --model=gemini-3.1-pro-preview
 ```
 
 The skill handles the full consultation lifecycle: model resolution, command building, context packaging, execution with 120s timeout, and returns a plain JSON result.