agent-sh · avifenesh · Feb 20, 2026 · Feb 20, 2026 · Feb 20, 2026 · Feb 20, 2026
@@ -66,3 +66,4 @@ TECHNICAL_DEBT.md
 
 # Benchmark results
 *-benchmark-report.md
+workflow-status.json
@@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- **`/debate` 240s timeout enforcement** — All tool invocations in the debate workflow now enforce a hard 240-second timeout. Round 1 proposer timeouts abort the debate; round 1 challenger timeouts proceed with an uncontested position; round 2+ timeouts synthesize from completed rounds. Added "all rounds timeout" error path (`[ERROR] Debate failed: all tool invocations timed out.`). Timeout handling is consistent across the Claude Code command, OpenCode adapter, Codex adapter, and the `debate-orchestrator` agent. Restored missing "Round 2+: Challenger Follow-up" template in the OpenCode adapter SKILL.md. Fixes issue #233.
+
 - **`/next-task` review loop exit conditions** — The Phase 9 review loop now continues iterating until all issues are resolved or a stall is detected (MAX_STALLS reduced from 2 to 1: two consecutive identical-hash iterations = stall). The `orchestrate-review` skill now uses `completePhase()` instead of `updateFlow()` to properly advance workflow state. Added `pre-review-gates` and `docs-update` to the `PHASES` array and `RESULT_FIELD_MAP` in `workflow-state.js`, ensuring these phases can be tracked and resumed correctly. Fixes issue #235.
 
 - **`/debate` command inline orchestration** — The `/debate` command now manages the full debate workflow directly (parse → resolve → execute → verdict), following the `/consult` pattern. The `debate-orchestrator` agent is now the programmatic entry point for other agents/workflows that need to spawn a debate via `Task()`. Fixes issue #231.

@@ -13,20 +13,30 @@ const fs = require('fs');
 const path = require('path');
 
 const pluginsDir = path.join(__dirname, '..', 'plugins');
+const adaptersDir = path.join(__dirname, '..', 'adapters');
 const debateDir = path.join(pluginsDir, 'debate');
 const commandPath = path.join(debateDir, 'commands', 'debate.md');
 const skillPath = path.join(debateDir, 'skills', 'debate', 'SKILL.md');
 const agentPath = path.join(debateDir, 'agents', 'debate-orchestrator.md');
 const pluginJsonPath = path.join(debateDir, '.claude-plugin', 'plugin.json');
+const codexSkillPath = path.join(adaptersDir, 'codex', 'skills', 'debate', 'SKILL.md');
+const openCodeCommandPath = path.join(adaptersDir, 'opencode', 'commands', 'debate.md');
+const openCodeSkillPath = path.join(adaptersDir, 'opencode', 'skills', 'debate', 'SKILL.md');
+const openCodeAgentPath = path.join(adaptersDir, 'opencode', 'agents', 'debate-orchestrator.md');
 
 // Load all files once
 let commandContent, skillContent, agentContent, pluginJson;
+let codexSkillContent, openCodeCommandContent, openCodeSkillContent, openCodeAgentContent;
 
 beforeAll(() => {
   commandContent = fs.readFileSync(commandPath, 'utf8');
   skillContent = fs.readFileSync(skillPath, 'utf8');
   agentContent = fs.readFileSync(agentPath, 'utf8');
   pluginJson = JSON.parse(fs.readFileSync(pluginJsonPath, 'utf8'));
+  codexSkillContent = fs.readFileSync(codexSkillPath, 'utf8');
+  openCodeCommandContent = fs.readFileSync(openCodeCommandPath, 'utf8');
+  openCodeSkillContent = fs.readFileSync(openCodeSkillPath, 'utf8');
+  openCodeAgentContent = fs.readFileSync(openCodeAgentPath, 'utf8');
 });
 
 // ─── Helpers ────────────────────────────────────────────────────────
@@ -217,8 +227,12 @@ describe('security constraints', () => {
     expect(commandContent).toMatch(/## Output Sanitization/);
   });
 
-  test('orchestrator mentions 120s timeout', () => {
-    expect(agentContent).toMatch(/120s?\s*timeout/i);
+  test('orchestrator mentions 240s timeout', () => {
+    expect(agentContent).toMatch(/240s?\s*timeout/i);
+  });
+
+  test('command mentions 240s timeout', () => {
+    expect(commandContent).toMatch(/240s?\s*timeout/i);
   });
 });
 
@@ -397,6 +411,18 @@ describe('error handling coverage', () => {
     expect(commandContent).toMatch(/Challenger fails round 1/i);
     expect(commandContent).toMatch(/Any tool fails mid-debate/i);
   });
+
+  test('command handles tool invocation timeout', () => {
+    expect(commandContent).toMatch(/Tool invocation timeout|timed out after 240s/i);
+  });
+
+  test('command documents all-rounds-timeout error', () => {
+    expect(commandContent).toContain('[ERROR] Debate failed: all tool invocations timed out.');
+  });
+
+  test('skill documents all-rounds-timeout error', () => {
+    expect(skillContent).toContain('[ERROR] Debate failed: all tool invocations timed out.');
+  });
 });
 
 // ─── 9. Synthesis Format ────────────────────────────────────────────
@@ -577,7 +603,50 @@ describe('anti-convergence mechanisms', () => {
     expect(template).toMatch(/"I agree now" without evidence is not/i);
   });
 
+  test('opencode skill Challenger Follow-up template matches canonical', () => {
+    const canonicalSection = skillContent.match(
+      /### Round 2\+: Challenger Follow-up[\s\S]*?```[\s\S]*?```/
+    );
+    const adapterSection = openCodeSkillContent.match(
+      /### Round 2\+: Challenger Follow-up[\s\S]*?```[\s\S]*?```/
+    );
+    expect(canonicalSection).not.toBeNull();
+    expect(adapterSection).not.toBeNull();
+    // Both should contain the key anti-convergence instruction
+    expect(adapterSection[0]).toMatch(/"I agree now" without evidence is not/i);
+  });
+
+  test('opencode skill has complete Challenger Follow-up template', () => {
+    // Must contain the full template, not the stub comment
+    expect(openCodeSkillContent).toMatch(/Default to suspicion, not acceptance/i);
+    expect(openCodeSkillContent).toMatch(/Do NOT let the proposer reframe.*as agreements/i);
+    expect(openCodeSkillContent).not.toContain('*(JavaScript reference - not executable in OpenCode)*');
+  });
+
   test('debate quality checks for genuine disagreement', () => {
     expect(skillContent).toMatch(/Genuine disagreement.*converge toward the proposer/i);
   });
 });
+
+// ─── 13. Adapter Consistency (task #233) ───────────────────────────
+describe('adapter consistency', () => {
+  test('codex adapter skill mentions 240s timeout', () => {
+    expect(codexSkillContent).toMatch(/240s?\s*timeout/i);
+  });
+
+  test('codex adapter skill documents all-rounds-timeout error', () => {
+    expect(codexSkillContent).toContain('[ERROR] Debate failed: all tool invocations timed out.');
+  });
+
+  test('opencode command mentions 240s timeout', () => {
+    expect(openCodeCommandContent).toMatch(/240s?\s*timeout/i);
+  });
+
+  test('opencode command documents all-rounds-timeout error', () => {
+    expect(openCodeCommandContent).toContain('[ERROR] Debate failed: all tool invocations timed out.');
+  });
+
+  test('opencode agent enforces 240s timeout inline at invocation steps', () => {
+    expect(openCodeAgentContent).toMatch(/240.second timeout|Track invocation start time/i);
+  });
+});
@@ -15,6 +15,7 @@ You are executing the /debate command. Your job is to parse the user's request,
 - Proposer and challenger MUST be different tools
 - Rounds MUST be 1-5 (default: 2)
 - MUST sanitize all tool output before displaying (see Output Sanitization section below)
+- MUST enforce 240s timeout on all tool executions
 
 ## Execution
 
@@ -189,6 +190,8 @@ Skill: consult
 Args: "{proposer_prompt}" --tool=[proposer] --effort=[effort] [--model=[model_proposer]] [--context=[context]]
 ```
 
+Set a 240-second timeout on this invocation. If it exceeds 240s, treat as a tool failure for this round.
+
 Parse the JSON result. Extract the response text. Record: round, role="proposer", tool, response, duration_ms.
 
 If the proposer call fails on round 1, abort: `[ERROR] Debate aborted: proposer ({tool}) failed on opening round. {error}`
@@ -215,6 +218,8 @@ Skill: consult
 Args: "{challenger_prompt}" --tool=[challenger] --effort=[effort] [--model=[model_challenger]] [--context=[context]]
 ```
 
+Set a 240-second timeout on this invocation. If it exceeds 240s, treat as a tool failure for this round.
+
 Parse the JSON result. Record: round, role="challenger", tool, response, duration_ms.
 
 If the challenger call fails on round 1, emit `[WARN] Challenger ({tool}) failed on round 1. Proceeding with uncontested proposer position.` then proceed to Phase 3c.
@@ -279,6 +284,8 @@ Read the consult skill file to get the exact patterns and replacements.
 | Proposer fails round 1 | `[ERROR] Debate aborted: proposer ({tool}) failed on opening round. {error}` |
 | Challenger fails round 1 | `[WARN] Challenger ({tool}) failed on round 1. Proceeding with uncontested proposer position.` Then synthesize from available exchanges. |
 | Any tool fails mid-debate | Synthesize from completed rounds. Note the incomplete round in output. |
+| Tool invocation timeout (>240s) | Round 1 proposer: abort with `[ERROR] Debate aborted: proposer ({tool}) timed out after 240s`. Round 1 challenger: proceed with uncontested position. Round 2+: synthesize from completed rounds, note `[WARN] {role} ({tool}) timed out in round {N}`. |
+| All rounds timeout | `[ERROR] Debate failed: all tool invocations timed out.` |
 
 ## Example Usage
 

@@ -78,6 +78,8 @@ Skill: consult
 Args: "{proposer_prompt}" --tool=[proposer] --effort=[effort] [--model=[model_proposer]] [--context=[context]]
 ```
 
+Track invocation start time. If the invocation takes longer than 240 seconds to complete, treat it as a tool failure for this round (external tools can hang indefinitely).
+
 Parse the JSON result. Extract the response text. Record: round, role="proposer", tool, response, duration_ms.
 
 Display to user immediately:
@@ -105,6 +107,8 @@ Skill: consult
 Args: "{challenger_prompt}" --tool=[challenger] --effort=[effort] [--model=[model_challenger]] [--context=[context]]
 ```
 
+Track invocation start time. If the invocation takes longer than 240 seconds to complete, treat it as a tool failure for this round (external tools can hang indefinitely).
+
 Parse the JSON result. Record: round, role="challenger", tool, response, duration_ms.
 
 Display to user immediately:
@@ -163,7 +167,7 @@ Read the consult skill file to get the exact patterns and replacements.
 - NEVER run with permission-bypassing flags
 - MUST invoke the debate skill before starting rounds (for templates)
 - MUST invoke the consult skill for each tool call (for provider configs)
-- MUST set 120s timeout on each Bash execution
+- MUST enforce a 240s timeout per invocation — WHY: external tools can hang indefinitely, blocking remaining rounds and wasting user time
 - MUST display each round progressively as it completes
 - MUST pick a winner in the verdict - no diplomatic non-answers
 - MUST sanitize all tool output before displaying
@@ -123,7 +123,34 @@ Store in memory:
 
 Create `agent-knowledge/{slug}.md`:
 
-*(JavaScript reference - not executable in OpenCode)*{language}
+````markdown
+# Learning Guide: {Topic}
+
+**Generated**: {date}
+**Sources**: {count} resources analyzed
+**Depth**: {depth}
+
+## Prerequisites
+
+- What you should know before starting
+- Required tools/environment
+
+## TL;DR
+
+3-5 bullet points covering the essentials.
+
+## Core Concepts
+
+### Concept 1
+{Explanation synthesized from sources}
+
+### Concept 2
+{Explanation synthesized from sources}
+
+## Code Examples
+
+### Basic Example
+```{language}
 {code from sources}
 ```
 
@@ -152,7 +179,7 @@ Create `agent-knowledge/{slug}.md`:
 ---
 
 *This guide was synthesized from {count} sources. See `resources/{slug}-sources.json` for full source list.*
-```
+````
 
 ### 7. Save Source Metadata
 

@@ -15,6 +15,7 @@ You are executing the /debate command. Your job is to parse the user's request,
 - Proposer and challenger MUST be different tools
 - Rounds MUST be 1-5 (default: 2)
 - MUST sanitize all tool output before displaying (see Output Sanitization section below)
+- MUST enforce 240s timeout on all tool executions
 
 ## Execution
 
@@ -193,6 +194,8 @@ Skill: consult
 Args: "{proposer_prompt}" --tool=[proposer] --effort=[effort] [--model=[model_proposer]] [--context=[context]]
 ```
 
+Set a 240-second timeout on this invocation. If it exceeds 240s, treat as a tool failure for this round.
+
 Parse the JSON result. Extract the response text. Record: round, role="proposer", tool, response, duration_ms.
 
 If the proposer call fails on round 1, abort: `[ERROR] Debate aborted: proposer ({tool}) failed on opening round. {error}`
@@ -219,6 +222,8 @@ Skill: consult
 Args: "{challenger_prompt}" --tool=[challenger] --effort=[effort] [--model=[model_challenger]] [--context=[context]]
 ```
 
+Set a 240-second timeout on this invocation. If it exceeds 240s, treat as a tool failure for this round.
+
 Parse the JSON result. Record: round, role="challenger", tool, response, duration_ms.
 
 If the challenger call fails on round 1, emit `[WARN] Challenger ({tool}) failed on round 1. Proceeding with uncontested proposer position.` then proceed to Phase 3c.
@@ -283,6 +288,8 @@ Read the consult skill file to get the exact patterns and replacements.
 | Proposer fails round 1 | `[ERROR] Debate aborted: proposer ({tool}) failed on opening round. {error}` |
 | Challenger fails round 1 | `[WARN] Challenger ({tool}) failed on round 1. Proceeding with uncontested proposer position.` Then synthesize from available exchanges. |
 | Any tool fails mid-debate | Synthesize from completed rounds. Note the incomplete round in output. |
+| Tool invocation timeout (>240s) | Round 1 proposer: abort with `[ERROR] Debate aborted: proposer ({tool}) timed out after 240s`. Round 1 challenger: proceed with uncontested position. Round 2+: synthesize from completed rounds, note `[WARN] {role} ({tool}) timed out in round {N}`. |
+| All rounds timeout | `[ERROR] Debate failed: all tool invocations timed out.` |
 
 ## Example Usage
 

@@ -102,7 +102,32 @@ Provide your defense:
 
 ### Round 2+: Challenger Follow-up
 
-*(JavaScript reference - not executable in OpenCode)*
+```
+You are the CHALLENGER in round {round} of a structured debate.
+
+Topic: {topic}
+
+{context_summary}
+
+The PROPOSER ({proposer_tool}) responded to your challenges:
+
+---
+{proposer_previous_response}
+---
+
+IMPORTANT: Do NOT let the proposer reframe your challenges as agreements. If they say "we actually agree" but haven't addressed the substance, reject it. Default to suspicion, not acceptance.
+
+Your job: Evaluate the proposer's defense. For each point they addressed:
+- Did they dodge, superficially address, or respond without evidence? Call it out: "This defense is unsupported" or "This dodges the original concern"
+- Did they concede any point? Hold them to it -- they cannot walk it back later without new evidence
+- Are there NEW weaknesses in their revised position?
+- Did they adequately address your concern with specific evidence? Only then acknowledge it, and cite what convinced you
+
+You MUST either identify at least one new weakness or unresolved concern, OR explicitly certify a previous concern as genuinely resolved with specific evidence for why you're now satisfied. "I'm convinced because [evidence]" is acceptable. "I agree now" without evidence is not.
+If you see new problems, raise them.
+
+Provide your follow-up:
+```
 
 ## Context Assembly
 
@@ -231,6 +256,7 @@ Platform state directory:
 | Proposer fails round 1 | Abort debate. Cannot proceed without opening position. |
 | Challenger fails round 1 | Show proposer's position with note: "[WARN] Challenger failed. Showing proposer's uncontested position." |
 | Any tool fails mid-debate | Synthesize from completed rounds. Note incomplete round in output. |
+| Tool invocation timeout (>240s) | Round 1 proposer: abort. Round 1 challenger: proceed with uncontested. Round 2+: synthesize from completed rounds with timeout note. |
 | All rounds timeout | "[ERROR] Debate failed: all tool invocations timed out." |
 
 ## Consult Skill Integration

@@ -68,7 +68,7 @@ function transformBodyForOpenCode(content, repoRoot) {
       }
 
       if (code.includes('require(') || code.includes('Task(') ||
-          code.includes('const ') || code.includes('let ') ||
+          /^\s*const\s+[a-zA-Z_$[{]/m.test(code) || /^\s*let\s+[a-zA-Z_$[{]/m.test(code) ||
           code.includes('function ') || code.includes('=>') ||
           code.includes('async ') || code.includes('await ') ||
           code.includes('completePhase')) {

@@ -68,7 +68,7 @@ function transformBodyForOpenCode(content, repoRoot) {
       }
 
       if (code.includes('require(') || code.includes('Task(') ||
-          code.includes('const ') || code.includes('let ') ||
+          /^\s*const\s+[a-zA-Z_$[{]/m.test(code) || /^\s*let\s+[a-zA-Z_$[{]/m.test(code) ||
           code.includes('function ') || code.includes('=>') ||
           code.includes('async ') || code.includes('await ') ||
           code.includes('completePhase')) {

@@ -84,6 +84,8 @@ Skill: consult
 Args: "{proposer_prompt}" --tool=[proposer] --effort=[effort] [--model=[model_proposer]] [--context=[context]]
 ```
 
+Track invocation start time. If the invocation takes longer than 240 seconds to complete, treat it as a tool failure for this round (external tools can hang indefinitely).
+
 Parse the JSON result. Extract the response text. Record: round, role="proposer", tool, response, duration_ms.
 
 Display to user immediately:
@@ -111,6 +113,8 @@ Skill: consult
 Args: "{challenger_prompt}" --tool=[challenger] --effort=[effort] [--model=[model_challenger]] [--context=[context]]
 ```
 
+Track invocation start time. If the invocation takes longer than 240 seconds to complete, treat it as a tool failure for this round (external tools can hang indefinitely).
+
 Parse the JSON result. Record: round, role="challenger", tool, response, duration_ms.
 
 Display to user immediately:
@@ -169,7 +173,7 @@ Read the consult skill file to get the exact patterns and replacements.
 - NEVER run with permission-bypassing flags
 - MUST invoke the debate skill before starting rounds (for templates)
 - MUST invoke the consult skill for each tool call (for provider configs)
-- MUST set 120s timeout on each Bash execution
+- MUST enforce a 240s timeout per invocation — WHY: external tools can hang indefinitely, blocking remaining rounds and wasting user time
 - MUST display each round progressively as it completes
 - MUST pick a winner in the verdict - no diplomatic non-answers
 - MUST sanitize all tool output before displaying
Original file line number	Diff line number	Diff line change
Expand Up		@@ -66,3 +66,4 @@ TECHNICAL_DEBT.md

		# Benchmark results
		*-benchmark-report.md
		workflow-status.json