agent-sh · avifenesh · Feb 19, 2026 · Feb 19, 2026 · Feb 19, 2026 · Feb 19, 2026
@@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Fixed
+
+- **`/debate` command inline orchestration** — The `/debate` command now manages the full debate workflow directly (parse → resolve → execute → verdict), following the `/consult` pattern. The `debate-orchestrator` agent is now the programmatic entry point for other agents/workflows that need to spawn a debate via `Task()`. Fixes issue #231.
+
 ## [5.1.0] - 2026-02-18
 
 ### Added

@@ -91,7 +91,7 @@ This came from testing on 1,000+ repositories.
 | [`/sync-docs`](#sync-docs) | Finds outdated references, stale examples, missing CHANGELOG entries |
 | [`/learn`](#learn) | Research any topic, gather online sources, create learning guide with RAG index |
 | [`/consult`](#consult) | Consult another AI CLI tool for a second opinion. Use when you want to cross-check ideas, get alternative approaches, or validate decisions with Gemini, Codex, Claude, OpenCode, or Copilot. |
-| [`/debate`](#debate) | Structured debate between two AI tools to stress-test ideas. Proposer/Challenger format with a verdict. |
+| [`/debate`](#debate) | Use when user asks to "debate", "argue about", "compare perspectives", "stress test idea", "devil advocate", or "tool vs tool". Structured debate between two AI tools with proposer/challenger roles and a verdict. |
 <!-- GEN:END:readme-commands -->
 
 Each command works standalone. Together, they compose into end-to-end pipelines.

@@ -161,8 +161,9 @@ describe('provider configuration - prompt templates', () => {
 
 // ─── 3. Command / Skill / Agent Alignment ───────────────────────────
 describe('command/skill/agent alignment', () => {
-  test('command spawns debate:debate-orchestrator', () => {
-    expect(commandContent).toMatch(/debate:debate-orchestrator/);
+  test('command invokes debate and consult skills inline', () => {
+    expect(commandContent).toMatch(/Skill:\s*debate/);
+    expect(commandContent).toMatch(/Skill:\s*consult/);
   });
 
   test('agent invokes debate skill', () => {
@@ -173,21 +174,23 @@ describe('command/skill/agent alignment', () => {
     expect(agentContent).toMatch(/Skill:\s*consult/);
   });
 
-  test('skill version matches plugin.json version', () => {
-    const fm = parseFrontmatter(skillContent);
-    expect(fm.version).toBe(pluginJson.version);
-  });
-
-  test('command invokes skill via Task tool in Phase 3', () => {
-    expect(commandContent).toMatch(/Task:/);
-    expect(commandContent).toMatch(/debate:debate-orchestrator/);
+  test('command invokes skills via Skill blocks in Phase 3', () => {
+    const phase3Match = commandContent.match(/### Phase 3[\s\S]*$/);
+    expect(phase3Match).not.toBeNull();
+    const phase3 = phase3Match[0];
+    expect(phase3).toMatch(/Skill:\s*debate/);
+    expect(phase3).toMatch(/Skill:\s*consult/);
   });
 
   test('agent has Skill tool for invoking skills', () => {
     const fm = parseFrontmatter(agentContent);
     const toolsStr = Array.isArray(fm.tools) ? fm.tools.join(', ') : fm.tools;
     expect(toolsStr).toContain('Skill');
   });
+
+  test('command does not spawn debate-orchestrator via Task', () => {
+    expect(commandContent).not.toMatch(/subagent_type.*debate-orchestrator|debate:debate-orchestrator/);
+  });
 });
 
 // ─── 4. Security Constraints ────────────────────────────────────────
@@ -210,6 +213,10 @@ describe('security constraints', () => {
     expect(agentContent).toMatch(/Output Sanitization/);
   });
 
+  test('command has output sanitization section', () => {
+    expect(commandContent).toMatch(/## Output Sanitization/);
+  });
+
   test('orchestrator mentions 120s timeout', () => {
     expect(agentContent).toMatch(/120s?\s*timeout/i);
   });
@@ -385,8 +392,10 @@ describe('error handling coverage', () => {
     expect(commandContent).toMatch(/context.*file=PATH|--context=.*file/i);
   });
 
-  test('command handles orchestrator failure', () => {
-    expect(commandContent).toMatch(/Orchestrator fails|Debate failed/i);
+  test('command handles tool failure during debate', () => {
+    expect(commandContent).toMatch(/Proposer fails round 1/i);
+    expect(commandContent).toMatch(/Challenger fails round 1/i);
+    expect(commandContent).toMatch(/Any tool fails mid-debate/i);
   });
 });
 
@@ -436,10 +445,16 @@ describe('cross-file consistency', () => {
     expect(fm.model).toBe('opus');
   });
 
-  test('command allowed-tools includes Task', () => {
+  test('command allowed-tools includes Skill', () => {
+    const fm = parseFrontmatter(commandContent);
+    const tools = fm['allowed-tools'] || '';
+    expect(tools).toContain('Skill');
+  });
+
+  test('command allowed-tools does not include Task (least-privilege)', () => {
     const fm = parseFrontmatter(commandContent);
     const tools = fm['allowed-tools'] || '';
-    expect(tools).toContain('Task');
+    expect(tools).not.toContain('Task');
   });
 
   test('command allowed-tools includes AskUserQuestion', () => {
@@ -453,10 +468,10 @@ describe('cross-file consistency', () => {
     expect(fm.version).toBe(pluginJson.version);
   });
 
-  test('orchestrator description mentions proposer/challenger', () => {
+  test('orchestrator description describes programmatic entry point', () => {
     const fm = parseFrontmatter(agentContent);
-    expect(fm.description).toMatch(/proposer/i);
-    expect(fm.description).toMatch(/challenger/i);
+    expect(fm.description).toMatch(/programmatic/i);
+    expect(fm.description).toMatch(/Task\(\)/);
   });
 
   test('agent tools list includes all 5 provider CLI tools', () => {
@@ -470,8 +485,8 @@ describe('cross-file consistency', () => {
   });
 
   test('command and agent both reference debate skill', () => {
-    // Command spawns orchestrator which invokes skill
-    expect(commandContent).toMatch(/debate/i);
+    // Command executes debate inline via Skill:debate and Skill:consult. Agent is the programmatic entry point for Task() callers.
+    expect(commandContent).toMatch(/Skill:\s*debate/);
     expect(agentContent).toMatch(/Skill:\s*debate/);
   });
 });

@@ -5,7 +5,7 @@ description: "Use when user asks to \"debate\", \"argue about\", \"compare persp
 
 # /debate - Structured AI Dialectic
 
-You are executing the /debate command. Your job is to parse the user's request, resolve missing parameters interactively, and spawn the debate orchestrator.
+You are executing the /debate command. Your job is to parse the user's request, resolve missing parameters interactively, and execute the debate directly.
 
 ## Constraints
 
@@ -14,6 +14,7 @@ You are executing the /debate command. Your job is to parse the user's request,
 - MUST validate tool names against allow-list: gemini, codex, claude, opencode, copilot
 - Proposer and challenger MUST be different tools
 - Rounds MUST be 1-5 (default: 2)
+- MUST sanitize all tool output before displaying (see Output Sanitization section below)
 
 ## Execution
 
@@ -151,38 +152,119 @@ If context resolved to "file":
 
 If proposer and challenger resolve to the same tool after selection, show error and re-ask for challenger.
 
-### Phase 3: Spawn Debate Orchestrator
+### Phase 3: Execute Debate
 
-With all parameters resolved, spawn the debate orchestrator agent:
+With all parameters resolved (topic, proposer, challenger, effort, rounds, optional model_proposer, model_challenger, context), execute the debate directly.
 
+#### Phase 3a: Load Debate Templates
+
+Invoke the `debate` skill to load prompt templates, context assembly rules, and synthesis format:
+
+```
+Skill: debate
+Args: "[topic]" --proposer=[proposer] --challenger=[challenger] --rounds=[rounds] --effort=[effort]
+```
+
+The skill returns the prompt templates and rules. Use them for all subsequent steps.
+
+#### Phase 3b: Execute Debate Rounds
+
+For each round (1 through N):
+
+**Build Proposer Prompt:**
+
+- **Round 1**: Use the "Round 1: Proposer Opening" template from the skill. Substitute {topic}.
+- **Round 2+**: Use the "Round 2+: Proposer Defense" template. Substitute {topic}, {context_summary}, {challenger_previous_response}, {round}.
+
+**Context assembly rules:**
+- **Rounds 1-2**: Include full text of all prior exchanges per the skill's context format.
+- **Round 3+**: Summarize rounds 1 through {round}-2 (target 500-800 tokens, preserving core positions, key evidence, all concessions as verbatim quotes, points of disagreement, and any contradictions between rounds). Include only the most recent round's responses in full.
+
+**Invoke Proposer via Consult Skill:**
+
+Only include `--model=[model_proposer]` if the user provided a specific model. If model is "omit", empty, or "auto", do NOT pass --model to the consult skill.
+
+```
+Skill: consult
+Args: "{proposer_prompt}" --tool=[proposer] --effort=[effort] [--model=[model_proposer]] [--context=[context]]
 ```
-Task:
-  subagent_type: "debate:debate-orchestrator"
-  model: opus
-  prompt: |
-    Execute a structured debate with these pre-resolved parameters:
-    - topic: [topic]
-    - proposer: [proposer tool]
-    - challenger: [challenger tool]
-    - effort: [effort]
-    - rounds: [rounds]
-    - model_proposer: [model or "omit"]
-    - model_challenger: [model or "omit"]
-
-    If model is "omit" or empty, do NOT include --model in consult skill invocations. The consult skill will use effort-based defaults.
-    - context: [context or "none"]
-
-    Follow the debate skill templates. Display each round progressively.
-    Deliver a verdict that picks a winner.
+
+Parse the JSON result. Extract the response text. Record: round, role="proposer", tool, response, duration_ms.
+
+If the proposer call fails on round 1, abort: `[ERROR] Debate aborted: proposer ({tool}) failed on opening round. {error}`
+If the proposer call fails on round 2+, skip remaining rounds and proceed to Phase 3c (synthesize from completed rounds, note the early stop).
+
+Display to user immediately:
 ```
+--- Round {round}: {proposer_tool} (Proposer) ---
+
+{proposer_response}
+```
+
+**Build Challenger Prompt:**
+
+- **Round 1**: Use the "Round 1: Challenger Response" template from the skill. Substitute {topic}, {proposer_tool}, {proposer_round1_response}.
+- **Round 2+**: Use the "Round 2+: Challenger Follow-up" template. Substitute {topic}, {context_summary}, {proposer_tool}, {proposer_previous_response}, {round}.
+
+**Invoke Challenger via Consult Skill:**
+
+Only include `--model=[model_challenger]` if the user provided a specific model. If model is "omit", empty, or "auto", do NOT pass --model to the consult skill.
+
+```
+Skill: consult
+Args: "{challenger_prompt}" --tool=[challenger] --effort=[effort] [--model=[model_challenger]] [--context=[context]]
+```
+
+Parse the JSON result. Record: round, role="challenger", tool, response, duration_ms.
+
+If the challenger call fails on round 1, emit `[WARN] Challenger ({tool}) failed on round 1. Proceeding with uncontested proposer position.` then proceed to Phase 3c.
+If the challenger call fails on round 2+, skip remaining rounds and proceed to Phase 3c.
+
+Display to user immediately:
+```
+--- Round {round}: {challenger_tool} (Challenger) ---
+
+{challenger_response}
+```
+
+Assemble context for the next round using the context assembly rules above.
+
+#### Phase 3c: Synthesize and Deliver Verdict
+
+After all rounds complete (or after a partial failure), YOU are the JUDGE. Read all exchanges carefully. Use the synthesis format from the debate skill:
+
+1. **Pick a winner.** Which tool made the stronger argument overall? Why? Cite 2-3 specific arguments that were decisive.
+2. **List agreements.** What did both tools agree on? Include evidence that supports each agreement.
+3. **List disagreements.** Where do they still diverge? What's each side's position?
+4. **List unresolved questions.** What did neither side address adequately?
+5. **Make a recommendation.** What should the user DO? Be specific and actionable.
+
+**Verdict rules (from the debate skill):**
+- You MUST pick a side. "Both approaches have merit" is NOT acceptable.
+- Cite specific arguments from the debate as evidence.
+- The recommendation must be actionable.
+- Be honest about what wasn't resolved.
+
+Display the full synthesis using the format from the debate skill's Synthesis Format section.
+
+#### Phase 3d: Save State
+
+Write the debate state to `{AI_STATE_DIR}/debate/last-debate.json` using the schema from the debate skill.
+
+Platform state directory: use the AI_STATE_DIR environment variable if set. Otherwise:
+- Claude Code: `.claude/`
+- OpenCode: `.opencode/`
+- Codex CLI: `.codex/`
+
+Create the `debate/` subdirectory if it doesn't exist.
+
+## Output Sanitization
 
-### Phase 4: Present Results
+Apply the FULL redaction pattern table from the consult skill (`plugins/consult/skills/consult/SKILL.md`, Output Sanitization section). The skill is the canonical source with all 14 patterns. Do NOT maintain a separate subset here.
 
-Display the orchestrator's output directly. It includes:
-- Progressive round-by-round output (displayed as each round completes)
-- Final synthesis with verdict, agreements, disagreements, and recommendation
+The consult skill's table covers: Anthropic keys (`sk-*`, `sk-ant-*`), OpenAI project keys (`sk-proj-*`), Google keys (`AIza*`), GitHub tokens (`ghp_*`, `gho_*`, `github_pat_*`), AWS keys (`AKIA*`, `ASIA*`), env assignments (`ANTHROPIC_API_KEY=*`, `OPENAI_API_KEY=*`, `GOOGLE_API_KEY=*`, `GEMINI_API_KEY=*`), and auth headers (`Bearer *`).
 
-On failure: `[ERROR] Debate Failed: {specific error message}`
+Read the consult skill file to get the exact patterns and replacements.
 
 ## Error Handling
 
@@ -194,7 +276,9 @@ On failure: `[ERROR] Debate Failed: {specific error message}`
 | Same tool for both | `[ERROR] Proposer and challenger must be different tools.` |
 | Rounds out of range | `[ERROR] Rounds must be 1-5. Got: {rounds}` |
 | Context file not found | `[ERROR] Context file not found: {PATH}` |
-| Orchestrator fails | `[ERROR] Debate failed: {error}` |
+| Proposer fails round 1 | `[ERROR] Debate aborted: proposer ({tool}) failed on opening round. {error}` |
+| Challenger fails round 1 | `[WARN] Challenger ({tool}) failed on round 1. Proceeding with uncontested proposer position.` Then synthesize from available exchanges. |
+| Any tool fails mid-debate | Synthesize from completed rounds. Note the incomplete round in output. |
 
 ## Example Usage
 

@@ -1,6 +1,6 @@
 ---
 name: debate-orchestrator
-description: "Orchestrate multi-round debates between AI tools. Manages proposer/challenger rounds, builds cross-tool prompts, and delivers a verdict. Use when the /debate command dispatches a structured debate."
+description: "Orchestrate multi-round debates between AI tools. Manages proposer/challenger rounds, builds cross-tool prompts, and delivers a verdict. Programmatic entry point for other agents or workflows that need to spawn a structured debate via Task()."
 mode: subagent
 ---
 
@@ -16,7 +16,7 @@ mode: subagent
 
 You are the judge and orchestrator of a structured debate between two AI tools. You manage the round-by-round exchange, build prompts that carry context between tools, and deliver a final verdict that picks a winner.
 
-You are spawned by the /debate command with all parameters pre-resolved.
+You are spawned programmatically by other agents or workflows that need a structured debate. All parameters are pre-resolved by the caller.
 
 ## Why Opus Model
 
@@ -26,7 +26,7 @@ This is the most judgment-intensive agent in agentsys. You must: evaluate argume
 
 ### 1. Parse Input
 
-Extract from prompt (ALL pre-resolved by the /debate command):
+Extract from prompt (ALL pre-resolved by the caller):
 
 **Required:**
 - **topic**: The debate question
@@ -42,7 +42,7 @@ Extract from prompt (ALL pre-resolved by the /debate command):
 
 If any required param is missing, return:
 ```json
-{"error": "Missing required parameter: [param]. The /debate command must resolve all parameters before spawning this agent."}
+{"error": "Missing required parameter: [param]. The caller must resolve all parameters before spawning this agent."}
 ```
 
 ### 2. Invoke Debate Skill
@@ -67,7 +67,7 @@ For each round (1 through N):
 
 For context assembly:
 - **Rounds 1-2**: Include full text of all prior exchanges per the skill's context format.
-- **Round 3+**: Summarize rounds 1 through N-2 yourself (you have the full exchange history). Include only the most recent round's responses in full.
+- **Round 3+**: Summarize rounds 1 through {round}-2 yourself (you have the full exchange history). Include only the most recent round's responses in full.
 
 #### 3b. Invoke Proposer via Consult Skill
 
@@ -82,7 +82,7 @@ Parse the JSON result. Extract the response text. Record: round, role="proposer"
 
 Display to user immediately:
 ```
---- Round {N}: {proposer_tool} (Proposer) ---
+--- Round {round}: {proposer_tool} (Proposer) ---
 
 {proposer_response}
 ```
@@ -109,7 +109,7 @@ Parse the JSON result. Record: round, role="challenger", tool, response, duratio
 
 Display to user immediately:
 ```
---- Round {N}: {challenger_tool} (Challenger) ---
+--- Round {round}: {challenger_tool} (Challenger) ---
 
 {challenger_response}
 ```
@@ -153,7 +153,7 @@ Create the `debate/` subdirectory if it doesn't exist.
 
 Apply the FULL redaction pattern table from the consult skill (`plugins/consult/skills/consult/SKILL.md`, Output Sanitization section). The skill is the canonical source with all 14 patterns. Do NOT maintain a separate subset here.
 
-The consult skill's table covers: Anthropic keys (`sk-*`, `sk-ant-*`, `sk-proj-*`), Google keys (`AIza*`), GitHub tokens (`ghp_*`, `gho_*`, `github_pat_*`), AWS keys (`AKIA*`, `ASIA*`), env assignments (`ANTHROPIC_API_KEY=*`, `OPENAI_API_KEY=*`, `GOOGLE_API_KEY=*`, `GEMINI_API_KEY=*`), and auth headers (`Bearer *`).
+The consult skill's table covers: Anthropic keys (`sk-*`, `sk-ant-*`), OpenAI project keys (`sk-proj-*`), Google keys (`AIza*`), GitHub tokens (`ghp_*`, `gho_*`, `github_pat_*`), AWS keys (`AKIA*`, `ASIA*`), env assignments (`ANTHROPIC_API_KEY=*`, `OPENAI_API_KEY=*`, `GOOGLE_API_KEY=*`, `GEMINI_API_KEY=*`), and auth headers (`Bearer *`).
 
 Read the consult skill file to get the exact patterns and replacements.