From df860db81eb8b84c6c9dae3e357288a01c8ffd8d Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 13 Apr 2026 13:54:44 +0000 Subject: [PATCH 01/12] feat(pr-review): add sub-agent delegation for file-level reviews Add experimental support for splitting PR reviews across multiple sub-agents using the SDK DelegateTool. When USE_SUB_AGENTS=true: - The main agent becomes a review coordinator - It spawns file_reviewer sub-agents (one per changed file) - Each sub-agent reviews its file and returns structured findings - The coordinator consolidates results and posts a single PR review New components: - SUB_AGENT_PROMPT: coordinator prompt template in prompt.py - FILE_REVIEWER_SKILL: sub-agent persona/instructions - _create_file_reviewer_agent: factory for file_reviewer agents - _register_sub_agents: registers the agent type and DelegateTool The feature is opt-in via the use-sub-agents action input (default: false). All existing behavior is preserved when the flag is not set. Co-authored-by: openhands --- plugins/pr-review/README.md | 2 + plugins/pr-review/action.yml | 5 ++ plugins/pr-review/scripts/agent_script.py | 92 +++++++++++++++++-- plugins/pr-review/scripts/prompt.py | 104 +++++++++++++++++++++- tests/test_pr_review_prompt.py | 60 ++++++++++++- 5 files changed, 252 insertions(+), 11 deletions(-) diff --git a/plugins/pr-review/README.md b/plugins/pr-review/README.md index a8df87a6..383f8cd4 100644 --- a/plugins/pr-review/README.md +++ b/plugins/pr-review/README.md @@ -26,6 +26,7 @@ Then configure the required secrets (see [Installation](#installation) below). - **A/B Testing**: Support for testing multiple LLM models - **Review Context Awareness**: Considers previous reviews and unresolved threads - **Evidence Enforcement**: Optional check that PR descriptions include concrete end-to-end proof the code works, not just test output +- **Sub-Agent Delegation** *(Experimental)*: Split large PR reviews across multiple sub-agents, one per file, then consolidate findings - **Observability**: Optional Laminar integration for tracing and evaluation ## Plugin Contents @@ -143,6 +144,7 @@ PR reviews are automatically triggered when: | `llm-base-url` | No | `''` | Custom LLM endpoint URL | | `review-style` | No | `roasted` | Review style: `standard` or `roasted` | | `require-evidence` | No | `'false'` | Require the reviewer to enforce an `Evidence` section in the PR description with end-to-end proof: screenshots/videos for frontend work, commands and runtime output for backend or scripts, and an agent conversation link when applicable. Test output alone does not qualify. | +| `use-sub-agents` | No | `'false'` | **(Experimental)** Enable sub-agent delegation for file-level reviews. The main agent acts as a coordinator that spawns `file_reviewer` sub-agents via the SDK DelegateTool, delegates per-file review work, and consolidates findings into a single PR review. Useful for large PRs with many changed files. | | `extensions-repo` | No | `OpenHands/extensions` | Extensions repository | | `extensions-version` | No | `main` | Git ref (tag, branch, or SHA) | | `llm-api-key` | Yes | - | LLM API key | diff --git a/plugins/pr-review/action.yml b/plugins/pr-review/action.yml index 40a8d63a..6d4ef3ef 100644 --- a/plugins/pr-review/action.yml +++ b/plugins/pr-review/action.yml @@ -27,6 +27,10 @@ inputs: description: "When true, require the reviewer to check the PR description for an Evidence section proving the code works end-to-end (screenshots/videos for frontend changes; commands and runtime output for backend, CLI, or script changes; conversation link when agent-generated). Test output alone does not count." required: false default: 'false' + use-sub-agents: + description: "When true, enable sub-agent delegation for file-level reviews. The main agent acts as a coordinator that spawns file_reviewer sub-agents via the DelegateTool, delegates per-file review work, and consolidates findings into a single PR review. Experimental." + required: false + default: 'false' extensions-repo: description: GitHub repository for extensions (owner/repo) required: false @@ -125,6 +129,7 @@ runs: LLM_BASE_URL: ${{ inputs.llm-base-url }} REVIEW_STYLE: ${{ inputs.review-style }} REQUIRE_EVIDENCE: ${{ inputs.require-evidence }} + USE_SUB_AGENTS: ${{ inputs.use-sub-agents }} LLM_API_KEY: ${{ inputs.llm-api-key }} GITHUB_TOKEN: ${{ inputs.github-token }} LMNR_PROJECT_API_KEY: ${{ inputs.lmnr-api-key }} diff --git a/plugins/pr-review/scripts/agent_script.py b/plugins/pr-review/scripts/agent_script.py index bca96ae8..a64c6973 100644 --- a/plugins/pr-review/scripts/agent_script.py +++ b/plugins/pr-review/scripts/agent_script.py @@ -35,6 +35,11 @@ REVIEW_STYLE: Review style ('standard' or 'roasted', default: 'standard') REQUIRE_EVIDENCE: Whether to require PR description evidence showing the code works ('true'/'false', default: 'false') + USE_SUB_AGENTS: Enable sub-agent delegation for file-level reviews + ('true'/'false', default: 'false'). When enabled, the main agent acts + as a coordinator that spawns file_reviewer sub-agents via the + DelegateTool, delegates per-file review work, and consolidates + findings into a single GitHub PR review. For setup instructions, usage examples, and GitHub Actions integration, see README.md in this directory. @@ -53,18 +58,22 @@ from typing import Any from lmnr import Laminar -from openhands.sdk import LLM, Agent, AgentContext, Conversation, get_logger +from openhands.sdk import LLM, Agent, AgentContext, Conversation, Tool, get_logger +from openhands.sdk.context import Skill from openhands.sdk.context.skills import load_project_skills from openhands.sdk.conversation import get_agent_final_response from openhands.sdk.git.utils import run_git_command from openhands.sdk.plugin import PluginSource +from openhands.sdk.subagent import register_agent +from openhands.sdk.tool import register_tool +from openhands.tools.delegate import DelegateTool, DelegationVisualizer from openhands.tools.preset.default import get_default_condenser, get_default_tools # Add the script directory to Python path so we can import prompt.py script_dir = Path(__file__).parent sys.path.insert(0, str(script_dir)) -from prompt import format_prompt # noqa: E402 +from prompt import format_prompt, get_file_reviewer_skill_content # noqa: E402 logger = get_logger(__name__) @@ -737,6 +746,7 @@ def validate_environment() -> dict[str, Any]: "base_url": os.getenv("LLM_BASE_URL"), "review_style": review_style, "require_evidence": _get_bool_env("REQUIRE_EVIDENCE"), + "use_sub_agents": _get_bool_env("USE_SUB_AGENTS"), "pr_info": { "number": os.getenv("PR_NUMBER"), "title": os.getenv("PR_TITLE"), @@ -772,6 +782,50 @@ def fetch_pr_context(pr_number: str) -> tuple[str, str, str]: return pr_diff, commit_id, review_context +def _create_file_reviewer_agent(llm: LLM) -> Agent: + """Factory for file_reviewer sub-agents used during delegation. + + Each sub-agent receives a skill that defines its review persona and + expected output format. It has no tools — the coordinator handles + all GitHub API interaction. + """ + # review_style is read at registration time from the environment + review_style = os.getenv("REVIEW_STYLE", "standard").lower() + skill_content = get_file_reviewer_skill_content(review_style) + + skills = [ + Skill( + name="file_review_instructions", + content=skill_content, + trigger=None, + ), + ] + return Agent( + llm=llm, + tools=[], # sub-agents only analyse; coordinator posts the review + agent_context=AgentContext( + skills=skills, + system_message_suffix=( + "You are a file-level code reviewer sub-agent. " + "Return findings as a JSON array. Do NOT call the GitHub API." + ), + ), + ) + + +def _register_sub_agents() -> None: + """Register the file_reviewer agent type and the DelegateTool.""" + register_agent( + name="file_reviewer", + factory_func=_create_file_reviewer_agent, + description=( + "Reviews one or more files from a PR diff and returns structured " + "findings as a JSON array." + ), + ) + register_tool("DelegateTool", DelegateTool) + + def create_conversation( config: dict[str, Any], secrets: dict[str, str], @@ -782,6 +836,9 @@ def create_conversation( handles wiring skills, MCP config, and hooks automatically. Project-specific skills from the workspace are loaded separately. + When ``config["use_sub_agents"]`` is True the coordinator agent is + given the DelegateTool so it can spawn file_reviewer sub-agents. + Args: config: Configuration dictionary from validate_environment() secrets: Secrets to mask in output @@ -813,9 +870,17 @@ def create_conversation( skills=project_skills, ) + tools = get_default_tools(enable_browser=False) + + use_sub_agents = config.get("use_sub_agents", False) + if use_sub_agents: + _register_sub_agents() + tools.append(Tool(name=DelegateTool.name)) + logger.info("Sub-agent delegation enabled — DelegateTool added") + agent = Agent( llm=llm, - tools=get_default_tools(enable_browser=False), + tools=tools, agent_context=agent_context, system_prompt_kwargs={"cli_mode": True}, condenser=get_default_condenser( @@ -825,12 +890,18 @@ def create_conversation( # The plugin directory is the parent of the scripts/ directory plugin_dir = script_dir.parent # plugins/pr-review/ - return Conversation( - agent=agent, - workspace=cwd, - secrets=secrets, - plugins=[PluginSource(source=str(plugin_dir))], - ) + conversation_kwargs: dict[str, Any] = { + "agent": agent, + "workspace": cwd, + "secrets": secrets, + "plugins": [PluginSource(source=str(plugin_dir))], + } + if use_sub_agents: + conversation_kwargs["visualizer"] = DelegationVisualizer( + name="PR Review Coordinator" + ) + + return Conversation(**conversation_kwargs) def run_review( @@ -943,10 +1014,12 @@ def main(): pr_info = config["pr_info"] review_style = config["review_style"] require_evidence = config["require_evidence"] + use_sub_agents = config["use_sub_agents"] logger.info(f"Reviewing PR #{pr_info['number']}: {pr_info['title']}") logger.info(f"Review style: {review_style}") logger.info(f"Require PR evidence: {require_evidence}") + logger.info(f"Sub-agent delegation: {use_sub_agents}") try: pr_diff, commit_id, review_context = fetch_pr_context(pr_info["number"]) @@ -968,6 +1041,7 @@ def main(): diff=pr_diff, review_context=review_context, require_evidence=require_evidence, + use_sub_agents=use_sub_agents, ) secrets = {} diff --git a/plugins/pr-review/scripts/prompt.py b/plugins/pr-review/scripts/prompt.py index ef842ab1..673f95ae 100644 --- a/plugins/pr-review/scripts/prompt.py +++ b/plugins/pr-review/scripts/prompt.py @@ -11,6 +11,10 @@ - {pr_number} - The PR number - {commit_id} - The HEAD commit SHA - {review_context} - Previous review comments and thread resolution status + +When sub-agent delegation is enabled, the main agent acts as a coordinator +that splits the diff by file and delegates individual file reviews to +sub-agents, then consolidates results and posts the final review. """ # Template for when there is review context available @@ -75,6 +79,75 @@ Analyze the changes and post your review using the GitHub API. """ +# Prompt for the main coordinator agent when sub-agent delegation is enabled. +# The coordinator splits the diff into per-file chunks and delegates each +# to a "file_reviewer" sub-agent, then consolidates and posts the review. +SUB_AGENT_PROMPT = """{skill_trigger} +/github-pr-review + +You are a **review coordinator**. Your job is to delegate the actual file-level +review work to sub-agents and then consolidate their findings into a single +GitHub PR review. + +## Pull Request Information + +- **Title**: {title} +- **Description**: {body} +- **Repository**: {repo_name} +- **Base Branch**: {base_branch} +- **Head Branch**: {head_branch} +- **PR Number**: {pr_number} +- **Commit ID**: {commit_id} + +{review_context_section}{evidence_requirements_section} + +## Instructions + +You have access to the **DelegateTool**. Follow these steps: + +1. **Spawn sub-agents** — one `file_reviewer` sub-agent per changed file (or + small group of closely related files). Use `spawn` with descriptive IDs + based on the file paths (e.g. `"review_src_utils"`, `"review_tests"`). + +2. **Delegate** — send each sub-agent the diff chunk for its file(s) together + with the PR context (title, description, base/head branch). Ask it to + return a structured list of findings with severity, file path, line number, + and a short description. + +3. **Collect results** — after all sub-agents respond, merge their findings. + De-duplicate and drop low-signal noise. + +4. **Post the review** — use the GitHub API (as described by /github-pr-review) + to submit a single PR review with inline comments on the relevant lines. + Keep the top-level review body brief. + +## Full Diff + +The complete diff is provided below. Split it by file when delegating. + +```diff +{diff} +``` +""" + +# System-level instruction injected into each file_reviewer sub-agent so it +# knows its role, the review style, and the expected output format. +FILE_REVIEWER_SKILL = """\ +You are a **file-level code reviewer**. You will receive a diff for one or more +files from a pull request together with PR metadata. + +Review style: {review_style_description} + +For each issue you find, return a JSON object with: +- `path`: the file path +- `line`: the diff line number (use the NEW file line number) +- `severity`: one of `critical`, `major`, `minor`, `nit` +- `body`: a concise description of the issue with a suggested fix when possible + +Return your findings as a JSON array. If you find no issues, return `[]`. +Do NOT post anything to the GitHub API — the coordinator agent will handle that. +""" + def format_prompt( skill_trigger: str, @@ -88,6 +161,7 @@ def format_prompt( diff: str, review_context: str = "", require_evidence: bool = False, + use_sub_agents: bool = False, ) -> str: """Format the PR review prompt with all parameters. @@ -105,6 +179,9 @@ def format_prompt( the review context section is omitted from the prompt. require_evidence: Whether to instruct the reviewer to enforce PR description evidence showing the code works. + use_sub_agents: When True, use the sub-agent coordinator prompt instead of + the single-agent prompt. The coordinator will delegate + file-level reviews to sub-agents and consolidate results. Returns: Formatted prompt string @@ -121,7 +198,9 @@ def format_prompt( _EVIDENCE_REQUIREMENT_SECTION if require_evidence else "" ) - return PROMPT.format( + template = SUB_AGENT_PROMPT if use_sub_agents else PROMPT + + return template.format( skill_trigger=skill_trigger, title=title, body=body, @@ -134,3 +213,26 @@ def format_prompt( evidence_requirements_section=evidence_requirements_section, diff=diff, ) + + +def get_file_reviewer_skill_content(review_style: str = "standard") -> str: + """Return the file_reviewer sub-agent skill content. + + Args: + review_style: 'standard' or 'roasted' + + Returns: + Formatted skill content string for the file_reviewer agent type + """ + style_descriptions = { + "standard": ( + "Balanced review covering correctness, style, readability, " + "and security. Be constructive." + ), + "roasted": ( + "Linus Torvalds-style brutally honest review. Focus on data " + "structures, simplicity, and pragmatism. No hand-holding." + ), + } + description = style_descriptions.get(review_style, style_descriptions["standard"]) + return FILE_REVIEWER_SKILL.format(review_style_description=description) diff --git a/tests/test_pr_review_prompt.py b/tests/test_pr_review_prompt.py index d94ee25d..75bfe9be 100644 --- a/tests/test_pr_review_prompt.py +++ b/tests/test_pr_review_prompt.py @@ -18,7 +18,9 @@ def _load_prompt_module(): return module -def _format_prompt(*, require_evidence: bool) -> str: +def _format_prompt( + *, require_evidence: bool, use_sub_agents: bool = False +) -> str: module = _load_prompt_module() return module.format_prompt( skill_trigger="/codereview-roasted", @@ -32,6 +34,7 @@ def _format_prompt(*, require_evidence: bool) -> str: diff="diff --git a/file b/file", review_context="", require_evidence=require_evidence, + use_sub_agents=use_sub_agents, ) @@ -52,3 +55,58 @@ def test_format_prompt_includes_evidence_requirements_when_enabled(): assert "real code path end-to-end" in prompt assert "unit test output" in prompt assert "https://app.all-hands.dev/conversations/{conversation_id}" in prompt + + +# --- Sub-agent delegation prompt tests --- + + +def test_format_prompt_uses_standard_prompt_by_default(): + prompt = _format_prompt(require_evidence=False, use_sub_agents=False) + + # Standard prompt should NOT mention delegation or sub-agents + assert "review coordinator" not in prompt + assert "DelegateTool" not in prompt + assert "file_reviewer" not in prompt + # Standard prompt should contain the normal review instruction + assert "Analyze the changes and post your review" in prompt + + +def test_format_prompt_uses_sub_agent_prompt_when_enabled(): + prompt = _format_prompt(require_evidence=False, use_sub_agents=True) + + # Sub-agent prompt should mention coordination and delegation + assert "review coordinator" in prompt + assert "DelegateTool" in prompt + assert "Spawn sub-agents" in prompt + assert "file_reviewer" in prompt + # Sub-agent prompt should still include the PR info + assert "Add evidence enforcement" in prompt + assert "OpenHands/extensions" in prompt + assert "abc123" in prompt + # Should include the diff + assert "diff --git a/file b/file" in prompt + + +def test_sub_agent_prompt_includes_evidence_when_enabled(): + prompt = _format_prompt(require_evidence=True, use_sub_agents=True) + + assert "review coordinator" in prompt + assert "## PR Description Evidence Requirement" in prompt + + +def test_get_file_reviewer_skill_content_standard(): + module = _load_prompt_module() + content = module.get_file_reviewer_skill_content("standard") + + assert "file-level code reviewer" in content + assert "Balanced review" in content + assert "JSON array" in content + + +def test_get_file_reviewer_skill_content_roasted(): + module = _load_prompt_module() + content = module.get_file_reviewer_skill_content("roasted") + + assert "file-level code reviewer" in content + assert "Linus Torvalds" in content + assert "JSON array" in content From e0f76813978a9d37f8f0c39342d537eb6d4c9da3 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 13 Apr 2026 14:08:16 +0000 Subject: [PATCH 02/12] fix: use TaskToolSet instead of deprecated DelegateTool TaskToolSet is the current SDK tool for sub-agent delegation: - Sequential/blocking execution (one file review at a time) - Single 'task' tool call with prompt, subagent_type, description - Auto-registers on import (no register_tool() needed) - Built-in resumption via task IDs DelegateTool is the older parallel variant (spawn + delegate). Co-authored-by: openhands --- plugins/pr-review/README.md | 2 +- plugins/pr-review/action.yml | 2 +- plugins/pr-review/scripts/agent_script.py | 22 +++++++++-------- plugins/pr-review/scripts/prompt.py | 30 ++++++++++++----------- tests/test_pr_review_prompt.py | 6 ++--- 5 files changed, 33 insertions(+), 29 deletions(-) diff --git a/plugins/pr-review/README.md b/plugins/pr-review/README.md index 383f8cd4..3e23e6e9 100644 --- a/plugins/pr-review/README.md +++ b/plugins/pr-review/README.md @@ -144,7 +144,7 @@ PR reviews are automatically triggered when: | `llm-base-url` | No | `''` | Custom LLM endpoint URL | | `review-style` | No | `roasted` | Review style: `standard` or `roasted` | | `require-evidence` | No | `'false'` | Require the reviewer to enforce an `Evidence` section in the PR description with end-to-end proof: screenshots/videos for frontend work, commands and runtime output for backend or scripts, and an agent conversation link when applicable. Test output alone does not qualify. | -| `use-sub-agents` | No | `'false'` | **(Experimental)** Enable sub-agent delegation for file-level reviews. The main agent acts as a coordinator that spawns `file_reviewer` sub-agents via the SDK DelegateTool, delegates per-file review work, and consolidates findings into a single PR review. Useful for large PRs with many changed files. | +| `use-sub-agents` | No | `'false'` | **(Experimental)** Enable sub-agent delegation for file-level reviews. The main agent acts as a coordinator that delegates per-file review work to `file_reviewer` sub-agents via the SDK TaskToolSet, then consolidates findings into a single PR review. Useful for large PRs with many changed files. | | `extensions-repo` | No | `OpenHands/extensions` | Extensions repository | | `extensions-version` | No | `main` | Git ref (tag, branch, or SHA) | | `llm-api-key` | Yes | - | LLM API key | diff --git a/plugins/pr-review/action.yml b/plugins/pr-review/action.yml index 6d4ef3ef..147437b6 100644 --- a/plugins/pr-review/action.yml +++ b/plugins/pr-review/action.yml @@ -28,7 +28,7 @@ inputs: required: false default: 'false' use-sub-agents: - description: "When true, enable sub-agent delegation for file-level reviews. The main agent acts as a coordinator that spawns file_reviewer sub-agents via the DelegateTool, delegates per-file review work, and consolidates findings into a single PR review. Experimental." + description: "When true, enable sub-agent delegation for file-level reviews. The main agent acts as a coordinator that delegates per-file review work to file_reviewer sub-agents via the TaskToolSet, then consolidates findings into a single PR review. Experimental." required: false default: 'false' extensions-repo: diff --git a/plugins/pr-review/scripts/agent_script.py b/plugins/pr-review/scripts/agent_script.py index a64c6973..49362761 100644 --- a/plugins/pr-review/scripts/agent_script.py +++ b/plugins/pr-review/scripts/agent_script.py @@ -37,8 +37,8 @@ works ('true'/'false', default: 'false') USE_SUB_AGENTS: Enable sub-agent delegation for file-level reviews ('true'/'false', default: 'false'). When enabled, the main agent acts - as a coordinator that spawns file_reviewer sub-agents via the - DelegateTool, delegates per-file review work, and consolidates + as a coordinator that delegates per-file review work to + file_reviewer sub-agents via the TaskToolSet, then consolidates findings into a single GitHub PR review. For setup instructions, usage examples, and GitHub Actions integration, @@ -64,10 +64,9 @@ from openhands.sdk.conversation import get_agent_final_response from openhands.sdk.git.utils import run_git_command from openhands.sdk.plugin import PluginSource -from openhands.sdk.subagent import register_agent -from openhands.sdk.tool import register_tool -from openhands.tools.delegate import DelegateTool, DelegationVisualizer +from openhands.tools.delegate import DelegationVisualizer, register_agent from openhands.tools.preset.default import get_default_condenser, get_default_tools +from openhands.tools.task import TaskToolSet # Add the script directory to Python path so we can import prompt.py script_dir = Path(__file__).parent @@ -814,7 +813,11 @@ def _create_file_reviewer_agent(llm: LLM) -> Agent: def _register_sub_agents() -> None: - """Register the file_reviewer agent type and the DelegateTool.""" + """Register the file_reviewer agent type. + + TaskToolSet auto-registers on import, so no explicit + ``register_tool()`` call is needed. + """ register_agent( name="file_reviewer", factory_func=_create_file_reviewer_agent, @@ -823,7 +826,6 @@ def _register_sub_agents() -> None: "findings as a JSON array." ), ) - register_tool("DelegateTool", DelegateTool) def create_conversation( @@ -837,7 +839,7 @@ def create_conversation( Project-specific skills from the workspace are loaded separately. When ``config["use_sub_agents"]`` is True the coordinator agent is - given the DelegateTool so it can spawn file_reviewer sub-agents. + given the TaskToolSet so it can delegate to file_reviewer sub-agents. Args: config: Configuration dictionary from validate_environment() @@ -875,8 +877,8 @@ def create_conversation( use_sub_agents = config.get("use_sub_agents", False) if use_sub_agents: _register_sub_agents() - tools.append(Tool(name=DelegateTool.name)) - logger.info("Sub-agent delegation enabled — DelegateTool added") + tools.append(Tool(name=TaskToolSet.name)) + logger.info("Sub-agent delegation enabled — TaskToolSet added") agent = Agent( llm=llm, diff --git a/plugins/pr-review/scripts/prompt.py b/plugins/pr-review/scripts/prompt.py index 673f95ae..d3ddfa4c 100644 --- a/plugins/pr-review/scripts/prompt.py +++ b/plugins/pr-review/scripts/prompt.py @@ -14,7 +14,8 @@ When sub-agent delegation is enabled, the main agent acts as a coordinator that splits the diff by file and delegates individual file reviews to -sub-agents, then consolidates results and posts the final review. +sub-agents via the TaskToolSet, then consolidates results and posts the +final review. """ # Template for when there is review context available @@ -81,7 +82,8 @@ # Prompt for the main coordinator agent when sub-agent delegation is enabled. # The coordinator splits the diff into per-file chunks and delegates each -# to a "file_reviewer" sub-agent, then consolidates and posts the review. +# to a "file_reviewer" sub-agent via the TaskToolSet, then consolidates +# and posts the review. SUB_AGENT_PROMPT = """{skill_trigger} /github-pr-review @@ -103,21 +105,21 @@ ## Instructions -You have access to the **DelegateTool**. Follow these steps: +You have access to the **task** tool (TaskToolSet). Follow these steps: -1. **Spawn sub-agents** — one `file_reviewer` sub-agent per changed file (or - small group of closely related files). Use `spawn` with descriptive IDs - based on the file paths (e.g. `"review_src_utils"`, `"review_tests"`). +1. **Delegate file reviews** — for each changed file (or small group of + closely related files), call the task tool with: + - `subagent_type`: `"file_reviewer"` + - `prompt`: the diff chunk for the file(s), together with the PR context + (title, description, base/head branch). Ask it to return a structured + list of findings with severity, file path, line number, and a short + description. + - `description`: a short label like `"Review src/utils.py"` -2. **Delegate** — send each sub-agent the diff chunk for its file(s) together - with the PR context (title, description, base/head branch). Ask it to - return a structured list of findings with severity, file path, line number, - and a short description. +2. **Collect results** — each task tool call returns the sub-agent's findings. + Merge them all together. De-duplicate and drop low-signal noise. -3. **Collect results** — after all sub-agents respond, merge their findings. - De-duplicate and drop low-signal noise. - -4. **Post the review** — use the GitHub API (as described by /github-pr-review) +3. **Post the review** — use the GitHub API (as described by /github-pr-review) to submit a single PR review with inline comments on the relevant lines. Keep the top-level review body brief. diff --git a/tests/test_pr_review_prompt.py b/tests/test_pr_review_prompt.py index 75bfe9be..8f7d287c 100644 --- a/tests/test_pr_review_prompt.py +++ b/tests/test_pr_review_prompt.py @@ -65,7 +65,7 @@ def test_format_prompt_uses_standard_prompt_by_default(): # Standard prompt should NOT mention delegation or sub-agents assert "review coordinator" not in prompt - assert "DelegateTool" not in prompt + assert "TaskToolSet" not in prompt assert "file_reviewer" not in prompt # Standard prompt should contain the normal review instruction assert "Analyze the changes and post your review" in prompt @@ -76,8 +76,8 @@ def test_format_prompt_uses_sub_agent_prompt_when_enabled(): # Sub-agent prompt should mention coordination and delegation assert "review coordinator" in prompt - assert "DelegateTool" in prompt - assert "Spawn sub-agents" in prompt + assert "task" in prompt.lower() + assert "TaskToolSet" in prompt assert "file_reviewer" in prompt # Sub-agent prompt should still include the PR info assert "Add evidence enforcement" in prompt From 6669953dc7bc6e34327032419746bcaf21feb881 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 13 Apr 2026 14:23:49 +0000 Subject: [PATCH 03/12] fix: update test stubs for new agent_script imports Add missing stubs to test_pr_review_review_context.py for: - sdk.Tool (added to agent_script imports) - openhands.sdk.context.Skill - openhands.sdk.plugin.PluginSource - openhands.tools.delegate (DelegationVisualizer, register_agent) - openhands.tools.task (TaskToolSet) Also fix a prompt module cache collision: clear sys.modules['prompt'] before loading agent_script.py so it picks up the correct prompt.py from pr-review/scripts/ instead of release-notes/scripts/. Co-authored-by: openhands --- tests/test_pr_review_review_context.py | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/test_pr_review_review_context.py b/tests/test_pr_review_review_context.py index 6fead292..3a601215 100644 --- a/tests/test_pr_review_review_context.py +++ b/tests/test_pr_review_review_context.py @@ -58,6 +58,7 @@ def set_trace_metadata(metadata): sdk.Agent = object sdk.AgentContext = object sdk.Conversation = object + sdk.Tool = object class _Logger: def info(self, *args, **kwargs): @@ -75,6 +76,10 @@ def debug(self, *args, **kwargs): sdk.get_logger = lambda name: _Logger() sys.modules["openhands.sdk"] = sdk + sdk_context = _ensure_package("openhands.sdk.context") + sdk_context.Skill = object + sys.modules["openhands.sdk.context"] = sdk_context + context_skills = types.ModuleType("openhands.sdk.context.skills") context_skills.load_project_skills = lambda cwd: [] sys.modules["openhands.sdk.context.skills"] = context_skills @@ -87,11 +92,33 @@ def debug(self, *args, **kwargs): git_utils.run_git_command = lambda command, repo_dir: "deadbeef" sys.modules["openhands.sdk.git.utils"] = git_utils + sdk_plugin = types.ModuleType("openhands.sdk.plugin") + sdk_plugin.PluginSource = object + sys.modules["openhands.sdk.plugin"] = sdk_plugin + + tools_delegate = types.ModuleType("openhands.tools.delegate") + tools_delegate.DelegationVisualizer = object + tools_delegate.register_agent = lambda **kwargs: None + sys.modules["openhands.tools.delegate"] = tools_delegate + + tools_task = types.ModuleType("openhands.tools.task") + + class _TaskToolSet: + name = "TaskToolSet" + + tools_task.TaskToolSet = _TaskToolSet + sys.modules["openhands.tools.task"] = tools_task + tools_preset = types.ModuleType("openhands.tools.preset.default") tools_preset.get_default_condenser = lambda llm: None tools_preset.get_default_tools = lambda enable_browser=False: [] sys.modules["openhands.tools.preset.default"] = tools_preset + # Clear any cached 'prompt' module so agent_script.py picks up the + # correct prompt.py from its own scripts/ directory (not the one from + # another plugin like release-notes). + sys.modules.pop("prompt", None) + script_path = ( Path(__file__).parent.parent / "plugins" From 7dcf8b6dbf04bc40ca8cfcd8fb018f8e2b4eee7c Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 15 Apr 2026 14:43:56 +0000 Subject: [PATCH 04/12] fix: import register_agent from openhands.sdk, add known limitations docs - Move register_agent import from openhands.tools.delegate to openhands.sdk where it actually lives (fixes critical review feedback) - Update test stub to match the corrected import path - Add 'Known Limitations: Sub-Agent Delegation' section to README documenting experimental constraints (LLM-driven JSON parsing, potential consolidation info loss, no integration tests yet) Co-authored-by: openhands --- plugins/pr-review/README.md | 13 ++++++++++++- plugins/pr-review/scripts/agent_script.py | 12 ++++++++++-- tests/test_pr_review_review_context.py | 4 +++- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/plugins/pr-review/README.md b/plugins/pr-review/README.md index 93b383ca..77cb8cca 100644 --- a/plugins/pr-review/README.md +++ b/plugins/pr-review/README.md @@ -24,7 +24,7 @@ Then configure the required secrets (see [Installation](#installation) below). - **A/B Testing**: Support for testing multiple LLM models - **Review Context Awareness**: Considers previous reviews and unresolved threads - **Evidence Enforcement**: Optional check that PR descriptions include concrete end-to-end proof the code works, not just test output -- **Sub-Agent Delegation** *(Experimental)*: Split large PR reviews across multiple sub-agents, one per file, then consolidate findings +- **Sub-Agent Delegation** *(Experimental)*: Split large PR reviews across multiple sub-agents, one per file, then consolidate findings (see [Known Limitations](#known-limitations-sub-agent-delegation)) - **Observability**: Optional Laminar integration for tracing and evaluation ## Plugin Contents @@ -149,6 +149,17 @@ PR reviews are automatically triggered when: | `github-token` | Yes | - | GitHub token for API access | | `lmnr-api-key` | No | `''` | Laminar API key for observability | +## Known Limitations: Sub-Agent Delegation + +The `use-sub-agents` feature is **experimental** and has the following known constraints: + +- **LLM-driven JSON parsing**: The coordinator agent relies on the LLM to parse and merge JSON responses from sub-agents. There is no code-level validation of sub-agent output, so malformed responses may cause incomplete reviews. +- **Potential information loss during consolidation**: When merging findings from multiple sub-agents, the coordinator may lose or deduplicate findings imperfectly, especially for cross-file issues. +- **No integration tests yet**: Current test coverage verifies prompt formatting only. End-to-end validation of the delegation flow requires manual workflow testing. +- **Sub-agents have no tools**: File reviewer sub-agents analyse the diff in their context window only — they cannot run commands or query the GitHub API. + +These limitations are acceptable for an opt-in experimental feature and will be addressed as the feature matures. + ## A/B Testing Multiple Models Test different LLM models by providing a comma-separated list: diff --git a/plugins/pr-review/scripts/agent_script.py b/plugins/pr-review/scripts/agent_script.py index 595ea0d8..278f17bd 100644 --- a/plugins/pr-review/scripts/agent_script.py +++ b/plugins/pr-review/scripts/agent_script.py @@ -55,13 +55,21 @@ from typing import Any from lmnr import Laminar -from openhands.sdk import LLM, Agent, AgentContext, Conversation, Tool, get_logger +from openhands.sdk import ( + LLM, + Agent, + AgentContext, + Conversation, + Tool, + get_logger, + register_agent, +) from openhands.sdk.context import Skill from openhands.sdk.context.skills import load_project_skills from openhands.sdk.conversation import get_agent_final_response from openhands.sdk.git.utils import run_git_command from openhands.sdk.plugin import PluginSource -from openhands.tools.delegate import DelegationVisualizer, register_agent +from openhands.tools.delegate import DelegationVisualizer from openhands.tools.preset.default import get_default_condenser, get_default_tools from openhands.tools.task import TaskToolSet diff --git a/tests/test_pr_review_review_context.py b/tests/test_pr_review_review_context.py index 3a601215..c43385dd 100644 --- a/tests/test_pr_review_review_context.py +++ b/tests/test_pr_review_review_context.py @@ -98,9 +98,11 @@ def debug(self, *args, **kwargs): tools_delegate = types.ModuleType("openhands.tools.delegate") tools_delegate.DelegationVisualizer = object - tools_delegate.register_agent = lambda **kwargs: None sys.modules["openhands.tools.delegate"] = tools_delegate + # register_agent lives in openhands.sdk, not openhands.tools.delegate + sdk.register_agent = lambda **kwargs: None + tools_task = types.ModuleType("openhands.tools.task") class _TaskToolSet: From 199020efe44289669bfd58ceb9f815632831c942 Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 15 Apr 2026 14:50:23 +0000 Subject: [PATCH 05/12] fix: address second round of review feedback - Add JSON schema example to FILE_REVIEWER_SKILL for less ambiguous output - Add error handling guidance to coordinator prompt for malformed sub-agent responses - Fix typo: 'analyse' -> 'analyze' in README - Add smoke tests for _register_sub_agents() and _create_file_reviewer_agent() - Improve test stubs to accept keyword arguments (Skill, Agent, etc.) Co-authored-by: openhands --- plugins/pr-review/README.md | 2 +- plugins/pr-review/scripts/prompt.py | 24 +++++++++++----- tests/test_pr_review_review_context.py | 39 ++++++++++++++++++++++---- 3 files changed, 51 insertions(+), 14 deletions(-) diff --git a/plugins/pr-review/README.md b/plugins/pr-review/README.md index 77cb8cca..14fcca69 100644 --- a/plugins/pr-review/README.md +++ b/plugins/pr-review/README.md @@ -156,7 +156,7 @@ The `use-sub-agents` feature is **experimental** and has the following known con - **LLM-driven JSON parsing**: The coordinator agent relies on the LLM to parse and merge JSON responses from sub-agents. There is no code-level validation of sub-agent output, so malformed responses may cause incomplete reviews. - **Potential information loss during consolidation**: When merging findings from multiple sub-agents, the coordinator may lose or deduplicate findings imperfectly, especially for cross-file issues. - **No integration tests yet**: Current test coverage verifies prompt formatting only. End-to-end validation of the delegation flow requires manual workflow testing. -- **Sub-agents have no tools**: File reviewer sub-agents analyse the diff in their context window only — they cannot run commands or query the GitHub API. +- **Sub-agents have no tools**: File reviewer sub-agents analyze the diff in their context window only — they cannot run commands or query the GitHub API. These limitations are acceptable for an opt-in experimental feature and will be addressed as the feature matures. diff --git a/plugins/pr-review/scripts/prompt.py b/plugins/pr-review/scripts/prompt.py index b8b5fafd..7e87e405 100644 --- a/plugins/pr-review/scripts/prompt.py +++ b/plugins/pr-review/scripts/prompt.py @@ -116,8 +116,10 @@ description. - `description`: a short label like `"Review src/utils.py"` -2. **Collect results** — each task tool call returns the sub-agent's findings. - Merge them all together. De-duplicate and drop low-signal noise. +2. **Collect results** — each task tool call returns the sub-agent's findings + as a JSON array. Merge them all together. De-duplicate and drop low-signal + noise. If a sub-agent returns malformed output (not valid JSON), skip its + results and note the file in the review body so nothing is silently lost. 3. **Post the review** — use the GitHub API (as described by /github-pr-review) to submit a single PR review with inline comments on the relevant lines. @@ -140,11 +142,19 @@ Review style: {review_style_description} -For each issue you find, return a JSON object with: -- `path`: the file path -- `line`: the diff line number (use the NEW file line number) -- `severity`: one of `critical`, `major`, `minor`, `nit` -- `body`: a concise description of the issue with a suggested fix when possible +For each issue you find, return a JSON object with these exact fields: +- `path` (string): the file path exactly as shown in the diff header +- `line` (integer): the NEW file line number where the issue occurs +- `severity` (string): one of `"critical"`, `"major"`, `"minor"`, `"nit"` +- `body` (string): a concise description of the issue with a suggested fix + +Example output: +```json +[ + {{"path": "src/utils.py", "line": 42, "severity": "major", "body": "Unchecked `None` return — add a guard before accessing `.value`."}}, + {{"path": "src/utils.py", "line": 78, "severity": "nit", "body": "Unused import `os`."}} +] +``` Return your findings as a JSON array. If you find no issues, return `[]`. Do NOT post anything to the GitHub API — the coordinator agent will handle that. diff --git a/tests/test_pr_review_review_context.py b/tests/test_pr_review_review_context.py index c43385dd..b999dfbb 100644 --- a/tests/test_pr_review_review_context.py +++ b/tests/test_pr_review_review_context.py @@ -53,12 +53,18 @@ def set_trace_metadata(metadata): lmnr.Laminar = _Laminar sys.modules["lmnr"] = lmnr + class _Stub: + """Generic stub that accepts any arguments.""" + def __init__(self, *args, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + sdk = types.ModuleType("openhands.sdk") - sdk.LLM = object - sdk.Agent = object - sdk.AgentContext = object - sdk.Conversation = object - sdk.Tool = object + sdk.LLM = _Stub + sdk.Agent = _Stub + sdk.AgentContext = _Stub + sdk.Conversation = _Stub + sdk.Tool = _Stub class _Logger: def info(self, *args, **kwargs): @@ -76,8 +82,13 @@ def debug(self, *args, **kwargs): sdk.get_logger = lambda name: _Logger() sys.modules["openhands.sdk"] = sdk + class _Skill: + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + sdk_context = _ensure_package("openhands.sdk.context") - sdk_context.Skill = object + sdk_context.Skill = _Skill sys.modules["openhands.sdk.context"] = sdk_context context_skills = types.ModuleType("openhands.sdk.context.skills") @@ -192,3 +203,19 @@ def test_format_thread_includes_rendered_suggestion_text_in_review_context(): assert "- Do **NOT** approve the PR." in formatted assert "Dependabot ignores the freshness guardrail" in formatted assert "```suggestion" not in formatted + + +def test_register_sub_agents_completes_without_error(): + """Smoke test: _register_sub_agents() runs without raising.""" + module = _load_agent_script_module() + # _register_sub_agents calls register_agent (stubbed as a no-op) + module._register_sub_agents() + + +def test_create_file_reviewer_agent_factory_is_callable(): + """Smoke test: _create_file_reviewer_agent accepts an LLM and is callable.""" + module = _load_agent_script_module() + # The factory should be callable; with our stubs LLM is just `object` + result = module._create_file_reviewer_agent(object()) + # Agent stub is `object`, so the factory should return *something* + assert result is not None From 0ef1afca0e699d043bab8e72b769fb1ca5993cc0 Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 15 Apr 2026 14:56:51 +0000 Subject: [PATCH 06/12] fix: address nits from third review round - Fix spelling: 'analyse' -> 'analyze' in code comment - Clarify REVIEW_STYLE deprecation scope in sub-agent factory comment Co-authored-by: openhands --- plugins/pr-review/scripts/agent_script.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/plugins/pr-review/scripts/agent_script.py b/plugins/pr-review/scripts/agent_script.py index 278f17bd..9fa377f9 100644 --- a/plugins/pr-review/scripts/agent_script.py +++ b/plugins/pr-review/scripts/agent_script.py @@ -787,7 +787,8 @@ def _create_file_reviewer_agent(llm: LLM) -> Agent: expected output format. It has no tools — the coordinator handles all GitHub API interaction. """ - # review_style is read at registration time from the environment + # REVIEW_STYLE is deprecated for the main reviewer (styles are merged), + # but still used here to configure sub-agent tone. Defaults to "standard". review_style = os.getenv("REVIEW_STYLE", "standard").lower() skill_content = get_file_reviewer_skill_content(review_style) @@ -800,7 +801,7 @@ def _create_file_reviewer_agent(llm: LLM) -> Agent: ] return Agent( llm=llm, - tools=[], # sub-agents only analyse; coordinator posts the review + tools=[], # sub-agents only analyze; coordinator posts the review agent_context=AgentContext( skills=skills, system_message_suffix=( From 4f1e5da2e2bf919deec6d8bedbdcc88aea14dedf Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 20 Apr 2026 13:54:10 +0000 Subject: [PATCH 07/12] feat(pr-review): smart delegation activation + sub-agent tool access Address two review comments on PR #164: 1. Smart activation (simonrosenberg): Add 'auto' mode for use-sub-agents. In auto mode the agent gets the TaskToolSet but decides at runtime whether to delegate based on diff size/complexity, vs reviewing directly for small PRs. 'true' forces delegation, 'false' disables it. 2. Sub-agent terminal access (VascoSch92): Give file_reviewer sub-agents terminal and file_editor tools so they can inspect surrounding code context (cat, grep, view) instead of relying only on the diff snippet. The coordinator still handles all GitHub API interaction. Changes: - action.yml: document 'auto'/'true'/'false' tri-state for use-sub-agents - agent_script.py: add _get_sub_agents_mode() for tri-state parsing, give sub-agents terminal + file_editor tools - prompt.py: add AUTO_DELEGATION_PROMPT with delegation heuristics, update FILE_REVIEWER_SKILL to mention tool access, update format_prompt to handle str|bool use_sub_agents - tests: add 4 new tests for auto mode and tool access All 11 prompt tests pass. Co-authored-by: openhands --- plugins/pr-review/action.yml | 7 +- plugins/pr-review/scripts/agent_script.py | 42 +++++++++--- plugins/pr-review/scripts/prompt.py | 81 ++++++++++++++++++++--- tests/test_pr_review_prompt.py | 36 +++++++++- 4 files changed, 147 insertions(+), 19 deletions(-) diff --git a/plugins/pr-review/action.yml b/plugins/pr-review/action.yml index a8939507..34c4b614 100644 --- a/plugins/pr-review/action.yml +++ b/plugins/pr-review/action.yml @@ -28,7 +28,12 @@ inputs: required: false default: 'false' use-sub-agents: - description: "When true, enable sub-agent delegation for file-level reviews. The main agent acts as a coordinator that delegates per-file review work to file_reviewer sub-agents via the TaskToolSet, then consolidates findings into a single PR review. Experimental." + description: > + Controls sub-agent delegation for file-level reviews (experimental). + 'false' (default): single-agent review, no delegation. + 'true': force delegation — coordinator + file_reviewer sub-agents. + 'auto': smart activation — agent gets the TaskToolSet and decides + at runtime whether to delegate based on diff size and complexity. required: false default: 'false' extensions-repo: diff --git a/plugins/pr-review/scripts/agent_script.py b/plugins/pr-review/scripts/agent_script.py index 9fa377f9..c8059af8 100644 --- a/plugins/pr-review/scripts/agent_script.py +++ b/plugins/pr-review/scripts/agent_script.py @@ -166,6 +166,22 @@ def _get_bool_env(name: str, default: bool = False) -> bool: return value.strip().lower() in {"1", "true", "yes", "on"} +def _get_sub_agents_mode() -> str: + """Parse USE_SUB_AGENTS env var into a tri-state mode. + + Returns: + ``"auto"`` – agent decides at runtime whether to delegate + ``"true"`` – force delegation (coordinator + file_reviewer sub-agents) + ``"false"`` – no delegation (single-agent review, the default) + """ + value = os.getenv("USE_SUB_AGENTS", "false").strip().lower() + if value == "auto": + return "auto" + if value in {"1", "true", "yes", "on"}: + return "true" + return "false" + + def _call_github_api( url: str, method: str = "GET", @@ -744,7 +760,7 @@ def validate_environment() -> dict[str, Any]: "model": os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"), "base_url": os.getenv("LLM_BASE_URL"), "require_evidence": _get_bool_env("REQUIRE_EVIDENCE"), - "use_sub_agents": _get_bool_env("USE_SUB_AGENTS"), + "use_sub_agents": _get_sub_agents_mode(), "pr_info": { "number": os.getenv("PR_NUMBER"), "title": os.getenv("PR_TITLE"), @@ -784,8 +800,9 @@ def _create_file_reviewer_agent(llm: LLM) -> Agent: """Factory for file_reviewer sub-agents used during delegation. Each sub-agent receives a skill that defines its review persona and - expected output format. It has no tools — the coordinator handles - all GitHub API interaction. + expected output format. It has read-only terminal and file_editor + access so it can inspect surrounding code context in the PR repo, + but the coordinator handles all GitHub API interaction. """ # REVIEW_STYLE is deprecated for the main reviewer (styles are merged), # but still used here to configure sub-agent tone. Defaults to "standard". @@ -801,11 +818,16 @@ def _create_file_reviewer_agent(llm: LLM) -> Agent: ] return Agent( llm=llm, - tools=[], # sub-agents only analyze; coordinator posts the review + tools=[ + Tool(name="terminal"), + Tool(name="file_editor"), + ], agent_context=AgentContext( skills=skills, system_message_suffix=( "You are a file-level code reviewer sub-agent. " + "You can read files with the terminal (cat, grep) and " + "file_editor (view) to understand surrounding context. " "Return findings as a JSON array. Do NOT call the GitHub API." ), ), @@ -874,11 +896,15 @@ def create_conversation( tools = get_default_tools(enable_browser=False) - use_sub_agents = config.get("use_sub_agents", False) - if use_sub_agents: + sub_agents_mode = config.get("use_sub_agents", "false") + enable_delegation = sub_agents_mode in ("true", "auto") + if enable_delegation: _register_sub_agents() tools.append(Tool(name=TaskToolSet.name)) - logger.info("Sub-agent delegation enabled — TaskToolSet added") + logger.info( + f"Sub-agent delegation enabled (mode={sub_agents_mode}) " + "— TaskToolSet added" + ) agent = Agent( llm=llm, @@ -898,7 +924,7 @@ def create_conversation( "secrets": secrets, "plugins": [PluginSource(source=str(plugin_dir))], } - if use_sub_agents: + if enable_delegation: conversation_kwargs["visualizer"] = DelegationVisualizer( name="PR Review Coordinator" ) diff --git a/plugins/pr-review/scripts/prompt.py b/plugins/pr-review/scripts/prompt.py index 7e87e405..428e6cdb 100644 --- a/plugins/pr-review/scripts/prompt.py +++ b/plugins/pr-review/scripts/prompt.py @@ -12,10 +12,11 @@ - {commit_id} - The HEAD commit SHA - {review_context} - Previous review comments and thread resolution status -When sub-agent delegation is enabled, the main agent acts as a coordinator -that splits the diff by file and delegates individual file reviews to -sub-agents via the TaskToolSet, then consolidates results and posts the -final review. +Sub-agent delegation modes (controlled by ``use_sub_agents``): +- ``"false"`` (default): single-agent review, no delegation. +- ``"true"``: force delegation — coordinator + file_reviewer sub-agents. +- ``"auto"``: smart activation — agent gets the TaskToolSet and decides + at runtime whether to delegate based on diff size and complexity. """ # Template for when there is review context available @@ -134,12 +135,66 @@ ``` """ +# Prompt for "auto" mode: the agent gets the TaskToolSet but decides itself +# whether delegation is worthwhile based on diff size and complexity. +AUTO_DELEGATION_PROMPT = """{skill_trigger} +/github-pr-review + +When posting a review, keep the review body brief unless your active review instructions require a longer structured format. + +Review the PR changes below and identify issues that need to be addressed. + +## Pull Request Information + +- **Title**: {title} +- **Description**: {body} +- **Repository**: {repo_name} +- **Base Branch**: {base_branch} +- **Head Branch**: {head_branch} +- **PR Number**: {pr_number} +- **Commit ID**: {commit_id} + +{review_context_section}{evidence_requirements_section} + +## Delegation Strategy + +You have access to the **task** tool (TaskToolSet) for delegating file-level +reviews to `file_reviewer` sub-agents. **Decide whether to delegate based on +the diff below:** + +- **Delegate** when the diff spans many files (roughly 4+) or is large + (roughly 500+ changed lines). Split by file or small groups of related files + and use `subagent_type: "file_reviewer"` for each chunk. +- **Review directly** when the diff is small or touches only a few files — + delegation overhead is not worth it. + +If you delegate: +1. Send each file/group to a sub-agent with the diff chunk and PR context. +2. Collect and merge findings, de-duplicate, drop noise. +3. Post a single consolidated review via the GitHub API. + +If you review directly: +- Analyze the diff yourself and post the review as usual. + +## Git Diff + +```diff +{diff} +``` + +Analyze the changes and post your review using the GitHub API. +""" + # System-level instruction injected into each file_reviewer sub-agent so it # knows its role, the review style, and the expected output format. FILE_REVIEWER_SKILL = """\ You are a **file-level code reviewer**. You will receive a diff for one or more files from a pull request together with PR metadata. +You have access to `terminal` and `file_editor` (read-only) so you can inspect +the full source files for surrounding context — use `cat`, `grep`, or the +file_editor `view` command when the diff alone is not enough to judge an issue. + Review style: {review_style_description} For each issue you find, return a JSON object with these exact fields: @@ -173,7 +228,7 @@ def format_prompt( diff: str, review_context: str = "", require_evidence: bool = False, - use_sub_agents: bool = False, + use_sub_agents: str | bool = False, ) -> str: """Format the PR review prompt with all parameters. @@ -191,9 +246,10 @@ def format_prompt( the review context section is omitted from the prompt. require_evidence: Whether to instruct the reviewer to enforce PR description evidence showing the code works. - use_sub_agents: When True, use the sub-agent coordinator prompt instead of - the single-agent prompt. The coordinator will delegate - file-level reviews to sub-agents and consolidate results. + use_sub_agents: Delegation mode — ``"true"`` forces delegation, + ``"auto"`` lets the agent decide, ``"false"`` (or + ``False``) disables delegation. Accepts legacy + ``bool`` for backward compatibility. Returns: Formatted prompt string @@ -210,7 +266,14 @@ def format_prompt( _EVIDENCE_REQUIREMENT_SECTION if require_evidence else "" ) - template = SUB_AGENT_PROMPT if use_sub_agents else PROMPT + # Normalise legacy bool to string mode + mode = str(use_sub_agents).lower() + if mode == "true": + template = SUB_AGENT_PROMPT + elif mode == "auto": + template = AUTO_DELEGATION_PROMPT + else: + template = PROMPT return template.format( skill_trigger=skill_trigger, diff --git a/tests/test_pr_review_prompt.py b/tests/test_pr_review_prompt.py index d23c5ef4..43c17bea 100644 --- a/tests/test_pr_review_prompt.py +++ b/tests/test_pr_review_prompt.py @@ -19,7 +19,7 @@ def _load_prompt_module(): def _format_prompt( - *, require_evidence: bool, use_sub_agents: bool = False + *, require_evidence: bool, use_sub_agents: str | bool = False ) -> str: module = _load_prompt_module() return module.format_prompt( @@ -113,6 +113,37 @@ def test_sub_agent_prompt_includes_evidence_when_enabled(): assert "## PR Description Evidence Requirement" in prompt +def test_format_prompt_auto_mode_includes_delegation_strategy(): + prompt = _format_prompt(require_evidence=False, use_sub_agents="auto") + + # Auto prompt should include the delegation decision heuristic + assert "Delegation Strategy" in prompt + assert "Delegate" in prompt + assert "Review directly" in prompt + assert "file_reviewer" in prompt + # Should still include PR info and diff + assert "Add evidence enforcement" in prompt + assert "diff --git a/file b/file" in prompt + # Should NOT be the forced-coordinator prompt + assert "review coordinator" not in prompt + + +def test_format_prompt_auto_mode_with_evidence(): + prompt = _format_prompt(require_evidence=True, use_sub_agents="auto") + + assert "Delegation Strategy" in prompt + assert "## PR Description Evidence Requirement" in prompt + + +def test_format_prompt_string_true_behaves_like_bool_true(): + """String 'true' should pick the same template as bool True.""" + prompt_bool = _format_prompt(require_evidence=False, use_sub_agents=True) + prompt_str = _format_prompt(require_evidence=False, use_sub_agents="true") + + assert "review coordinator" in prompt_bool + assert "review coordinator" in prompt_str + + def test_get_file_reviewer_skill_content_standard(): module = _load_prompt_module() content = module.get_file_reviewer_skill_content("standard") @@ -120,6 +151,9 @@ def test_get_file_reviewer_skill_content_standard(): assert "file-level code reviewer" in content assert "Balanced review" in content assert "JSON array" in content + # Sub-agents now have tool access + assert "terminal" in content + assert "file_editor" in content def test_get_file_reviewer_skill_content_roasted(): From 322696442962451d43187005b0e90273beb3c282 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 20 Apr 2026 13:58:17 +0000 Subject: [PATCH 08/12] refactor(pr-review): simplify delegation to smart activation only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the tri-state (false/true/auto) in favour of a simple boolean: - false (default): single-agent review, no delegation - true: smart activation — agent gets the TaskToolSet and decides at runtime whether to delegate based on diff size and complexity The forced-coordinator mode (SUB_AGENT_PROMPT) is removed; there is now a single DELEGATION_PROMPT that includes heuristics for when delegation is worthwhile vs overhead. Sub-agent tool access (terminal + file_editor) is kept from the previous commit so file_reviewer sub-agents can inspect surrounding code context. All 8 prompt tests pass. Co-authored-by: openhands --- plugins/pr-review/action.yml | 8 +-- plugins/pr-review/scripts/agent_script.py | 30 ++------ plugins/pr-review/scripts/prompt.py | 87 ++++------------------- tests/test_pr_review_prompt.py | 46 +++--------- 4 files changed, 29 insertions(+), 142 deletions(-) diff --git a/plugins/pr-review/action.yml b/plugins/pr-review/action.yml index 34c4b614..898eed4f 100644 --- a/plugins/pr-review/action.yml +++ b/plugins/pr-review/action.yml @@ -29,11 +29,9 @@ inputs: default: 'false' use-sub-agents: description: > - Controls sub-agent delegation for file-level reviews (experimental). - 'false' (default): single-agent review, no delegation. - 'true': force delegation — coordinator + file_reviewer sub-agents. - 'auto': smart activation — agent gets the TaskToolSet and decides - at runtime whether to delegate based on diff size and complexity. + Enable sub-agent delegation for file-level reviews (experimental). + When true, the agent gets the TaskToolSet and decides at runtime + whether to delegate based on diff size and complexity. required: false default: 'false' extensions-repo: diff --git a/plugins/pr-review/scripts/agent_script.py b/plugins/pr-review/scripts/agent_script.py index c8059af8..cd31c591 100644 --- a/plugins/pr-review/scripts/agent_script.py +++ b/plugins/pr-review/scripts/agent_script.py @@ -166,22 +166,6 @@ def _get_bool_env(name: str, default: bool = False) -> bool: return value.strip().lower() in {"1", "true", "yes", "on"} -def _get_sub_agents_mode() -> str: - """Parse USE_SUB_AGENTS env var into a tri-state mode. - - Returns: - ``"auto"`` – agent decides at runtime whether to delegate - ``"true"`` – force delegation (coordinator + file_reviewer sub-agents) - ``"false"`` – no delegation (single-agent review, the default) - """ - value = os.getenv("USE_SUB_AGENTS", "false").strip().lower() - if value == "auto": - return "auto" - if value in {"1", "true", "yes", "on"}: - return "true" - return "false" - - def _call_github_api( url: str, method: str = "GET", @@ -760,7 +744,7 @@ def validate_environment() -> dict[str, Any]: "model": os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"), "base_url": os.getenv("LLM_BASE_URL"), "require_evidence": _get_bool_env("REQUIRE_EVIDENCE"), - "use_sub_agents": _get_sub_agents_mode(), + "use_sub_agents": _get_bool_env("USE_SUB_AGENTS"), "pr_info": { "number": os.getenv("PR_NUMBER"), "title": os.getenv("PR_TITLE"), @@ -896,15 +880,11 @@ def create_conversation( tools = get_default_tools(enable_browser=False) - sub_agents_mode = config.get("use_sub_agents", "false") - enable_delegation = sub_agents_mode in ("true", "auto") - if enable_delegation: + use_sub_agents = config.get("use_sub_agents", False) + if use_sub_agents: _register_sub_agents() tools.append(Tool(name=TaskToolSet.name)) - logger.info( - f"Sub-agent delegation enabled (mode={sub_agents_mode}) " - "— TaskToolSet added" - ) + logger.info("Sub-agent delegation enabled — TaskToolSet added") agent = Agent( llm=llm, @@ -924,7 +904,7 @@ def create_conversation( "secrets": secrets, "plugins": [PluginSource(source=str(plugin_dir))], } - if enable_delegation: + if use_sub_agents: conversation_kwargs["visualizer"] = DelegationVisualizer( name="PR Review Coordinator" ) diff --git a/plugins/pr-review/scripts/prompt.py b/plugins/pr-review/scripts/prompt.py index 428e6cdb..e88d78d8 100644 --- a/plugins/pr-review/scripts/prompt.py +++ b/plugins/pr-review/scripts/prompt.py @@ -12,11 +12,9 @@ - {commit_id} - The HEAD commit SHA - {review_context} - Previous review comments and thread resolution status -Sub-agent delegation modes (controlled by ``use_sub_agents``): -- ``"false"`` (default): single-agent review, no delegation. -- ``"true"``: force delegation — coordinator + file_reviewer sub-agents. -- ``"auto"``: smart activation — agent gets the TaskToolSet and decides - at runtime whether to delegate based on diff size and complexity. +When sub-agent delegation is enabled (``use_sub_agents=True``), the agent +gets the TaskToolSet and decides at runtime whether to delegate based on +diff size and complexity. """ # Template for when there is review context available @@ -81,63 +79,10 @@ Analyze the changes and post your review using the GitHub API. """ -# Prompt for the main coordinator agent when sub-agent delegation is enabled. -# The coordinator splits the diff into per-file chunks and delegates each -# to a "file_reviewer" sub-agent via the TaskToolSet, then consolidates -# and posts the review. -SUB_AGENT_PROMPT = """{skill_trigger} -/github-pr-review - -You are a **review coordinator**. Your job is to delegate the actual file-level -review work to sub-agents and then consolidate their findings into a single -GitHub PR review. - -## Pull Request Information - -- **Title**: {title} -- **Description**: {body} -- **Repository**: {repo_name} -- **Base Branch**: {base_branch} -- **Head Branch**: {head_branch} -- **PR Number**: {pr_number} -- **Commit ID**: {commit_id} - -{review_context_section}{evidence_requirements_section} - -## Instructions - -You have access to the **task** tool (TaskToolSet). Follow these steps: - -1. **Delegate file reviews** — for each changed file (or small group of - closely related files), call the task tool with: - - `subagent_type`: `"file_reviewer"` - - `prompt`: the diff chunk for the file(s), together with the PR context - (title, description, base/head branch). Ask it to return a structured - list of findings with severity, file path, line number, and a short - description. - - `description`: a short label like `"Review src/utils.py"` - -2. **Collect results** — each task tool call returns the sub-agent's findings - as a JSON array. Merge them all together. De-duplicate and drop low-signal - noise. If a sub-agent returns malformed output (not valid JSON), skip its - results and note the file in the review body so nothing is silently lost. - -3. **Post the review** — use the GitHub API (as described by /github-pr-review) - to submit a single PR review with inline comments on the relevant lines. - Keep the top-level review body brief. - -## Full Diff - -The complete diff is provided below. Split it by file when delegating. - -```diff -{diff} -``` -""" - -# Prompt for "auto" mode: the agent gets the TaskToolSet but decides itself -# whether delegation is worthwhile based on diff size and complexity. -AUTO_DELEGATION_PROMPT = """{skill_trigger} +# Prompt used when sub-agent delegation is enabled (use_sub_agents=True). +# The agent gets the TaskToolSet and decides at runtime whether to delegate +# based on diff size and complexity. +DELEGATION_PROMPT = """{skill_trigger} /github-pr-review When posting a review, keep the review body brief unless your active review instructions require a longer structured format. @@ -228,7 +173,7 @@ def format_prompt( diff: str, review_context: str = "", require_evidence: bool = False, - use_sub_agents: str | bool = False, + use_sub_agents: bool = False, ) -> str: """Format the PR review prompt with all parameters. @@ -246,10 +191,9 @@ def format_prompt( the review context section is omitted from the prompt. require_evidence: Whether to instruct the reviewer to enforce PR description evidence showing the code works. - use_sub_agents: Delegation mode — ``"true"`` forces delegation, - ``"auto"`` lets the agent decide, ``"false"`` (or - ``False``) disables delegation. Accepts legacy - ``bool`` for backward compatibility. + use_sub_agents: When True, the agent gets the TaskToolSet and decides + at runtime whether to delegate file-level reviews to + sub-agents based on diff size and complexity. Returns: Formatted prompt string @@ -266,14 +210,7 @@ def format_prompt( _EVIDENCE_REQUIREMENT_SECTION if require_evidence else "" ) - # Normalise legacy bool to string mode - mode = str(use_sub_agents).lower() - if mode == "true": - template = SUB_AGENT_PROMPT - elif mode == "auto": - template = AUTO_DELEGATION_PROMPT - else: - template = PROMPT + template = DELEGATION_PROMPT if use_sub_agents else PROMPT return template.format( skill_trigger=skill_trigger, diff --git a/tests/test_pr_review_prompt.py b/tests/test_pr_review_prompt.py index 43c17bea..95b6ecbe 100644 --- a/tests/test_pr_review_prompt.py +++ b/tests/test_pr_review_prompt.py @@ -19,7 +19,7 @@ def _load_prompt_module(): def _format_prompt( - *, require_evidence: bool, use_sub_agents: str | bool = False + *, require_evidence: bool, use_sub_agents: bool = False ) -> str: module = _load_prompt_module() return module.format_prompt( @@ -90,15 +90,18 @@ def test_format_prompt_uses_standard_prompt_by_default(): assert "Analyze the changes and post your review" in prompt -def test_format_prompt_uses_sub_agent_prompt_when_enabled(): +def test_format_prompt_uses_delegation_prompt_when_enabled(): prompt = _format_prompt(require_evidence=False, use_sub_agents=True) - # Sub-agent prompt should mention coordination and delegation - assert "review coordinator" in prompt + # Delegation prompt should mention the delegation strategy + assert "Delegation Strategy" in prompt assert "task" in prompt.lower() assert "TaskToolSet" in prompt assert "file_reviewer" in prompt - # Sub-agent prompt should still include the PR info + # Should include smart-activation heuristics + assert "Delegate" in prompt + assert "Review directly" in prompt + # Should still include the PR info assert "Add evidence enforcement" in prompt assert "OpenHands/extensions" in prompt assert "abc123" in prompt @@ -106,44 +109,13 @@ def test_format_prompt_uses_sub_agent_prompt_when_enabled(): assert "diff --git a/file b/file" in prompt -def test_sub_agent_prompt_includes_evidence_when_enabled(): +def test_delegation_prompt_includes_evidence_when_enabled(): prompt = _format_prompt(require_evidence=True, use_sub_agents=True) - assert "review coordinator" in prompt - assert "## PR Description Evidence Requirement" in prompt - - -def test_format_prompt_auto_mode_includes_delegation_strategy(): - prompt = _format_prompt(require_evidence=False, use_sub_agents="auto") - - # Auto prompt should include the delegation decision heuristic - assert "Delegation Strategy" in prompt - assert "Delegate" in prompt - assert "Review directly" in prompt - assert "file_reviewer" in prompt - # Should still include PR info and diff - assert "Add evidence enforcement" in prompt - assert "diff --git a/file b/file" in prompt - # Should NOT be the forced-coordinator prompt - assert "review coordinator" not in prompt - - -def test_format_prompt_auto_mode_with_evidence(): - prompt = _format_prompt(require_evidence=True, use_sub_agents="auto") - assert "Delegation Strategy" in prompt assert "## PR Description Evidence Requirement" in prompt -def test_format_prompt_string_true_behaves_like_bool_true(): - """String 'true' should pick the same template as bool True.""" - prompt_bool = _format_prompt(require_evidence=False, use_sub_agents=True) - prompt_str = _format_prompt(require_evidence=False, use_sub_agents="true") - - assert "review coordinator" in prompt_bool - assert "review coordinator" in prompt_str - - def test_get_file_reviewer_skill_content_standard(): module = _load_prompt_module() content = module.get_file_reviewer_skill_content("standard") From 722ea61131957d6062969ebc15999cc7d9452267 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 20 Apr 2026 14:04:57 +0000 Subject: [PATCH 09/12] refactor(pr-review): use suffix instead of separate delegation prompt Instead of a full duplicate DELEGATION_PROMPT, append a short _DELEGATION_SUFFIX to the base PROMPT when use_sub_agents=True. The main agent gets the same review prompt it always had, plus a section explaining the task tool is available for large diffs. Rewrote FILE_REVIEWER_SKILL with: - clear section structure (task, tools, review style, output format) - explicit JSON schema table with field types and descriptions - severity guide (critical/major/minor/nit) - example output and empty-array case Removed redundant system_message_suffix from _create_file_reviewer_agent since the skill content now covers everything. All 8 prompt tests pass. Co-authored-by: openhands --- plugins/pr-review/scripts/agent_script.py | 10 +- plugins/pr-review/scripts/prompt.py | 120 ++++++++++------------ tests/test_pr_review_prompt.py | 35 ++++--- 3 files changed, 77 insertions(+), 88 deletions(-) diff --git a/plugins/pr-review/scripts/agent_script.py b/plugins/pr-review/scripts/agent_script.py index cd31c591..09f7487e 100644 --- a/plugins/pr-review/scripts/agent_script.py +++ b/plugins/pr-review/scripts/agent_script.py @@ -806,15 +806,7 @@ def _create_file_reviewer_agent(llm: LLM) -> Agent: Tool(name="terminal"), Tool(name="file_editor"), ], - agent_context=AgentContext( - skills=skills, - system_message_suffix=( - "You are a file-level code reviewer sub-agent. " - "You can read files with the terminal (cat, grep) and " - "file_editor (view) to understand surrounding context. " - "Return findings as a JSON array. Do NOT call the GitHub API." - ), - ), + agent_context=AgentContext(skills=skills), ) diff --git a/plugins/pr-review/scripts/prompt.py b/plugins/pr-review/scripts/prompt.py index e88d78d8..775dc8f1 100644 --- a/plugins/pr-review/scripts/prompt.py +++ b/plugins/pr-review/scripts/prompt.py @@ -12,9 +12,9 @@ - {commit_id} - The HEAD commit SHA - {review_context} - Previous review comments and thread resolution status -When sub-agent delegation is enabled (``use_sub_agents=True``), the agent -gets the TaskToolSet and decides at runtime whether to delegate based on -diff size and complexity. +When sub-agent delegation is enabled (``use_sub_agents=True``), a short +delegation suffix is appended to the base prompt giving the agent the +option to delegate file-level reviews via the TaskToolSet. """ # Template for when there is review context available @@ -79,76 +79,62 @@ Analyze the changes and post your review using the GitHub API. """ -# Prompt used when sub-agent delegation is enabled (use_sub_agents=True). -# The agent gets the TaskToolSet and decides at runtime whether to delegate -# based on diff size and complexity. -DELEGATION_PROMPT = """{skill_trigger} -/github-pr-review - -When posting a review, keep the review body brief unless your active review instructions require a longer structured format. - -Review the PR changes below and identify issues that need to be addressed. +# Appended to PROMPT when use_sub_agents=True. Gives the main agent the +# option to delegate via the TaskToolSet without duplicating the base prompt. +_DELEGATION_SUFFIX = """ +## Sub-agent Delegation -## Pull Request Information +You have access to the **task** tool for delegating file-level reviews to +`file_reviewer` sub-agents. Use it when the diff is large — roughly 4+ files +or 500+ changed lines. For smaller diffs, just review directly. -- **Title**: {title} -- **Description**: {body} -- **Repository**: {repo_name} -- **Base Branch**: {base_branch} -- **Head Branch**: {head_branch} -- **PR Number**: {pr_number} -- **Commit ID**: {commit_id} - -{review_context_section}{evidence_requirements_section} +When delegating, split the diff by file (or small group of related files) and +call the task tool with `subagent_type: "file_reviewer"`. Each sub-agent will +return a JSON array of findings. Merge them, de-duplicate, drop noise, and +post a single consolidated review via the GitHub API. +""" -## Delegation Strategy +# Skill content injected into each file_reviewer sub-agent. +# Defines the review persona, available tools, and — most importantly — the +# exact JSON schema the sub-agent must return. +FILE_REVIEWER_SKILL = """\ +You are a **file-level code reviewer** sub-agent. -You have access to the **task** tool (TaskToolSet) for delegating file-level -reviews to `file_reviewer` sub-agents. **Decide whether to delegate based on -the diff below:** +## Your Task -- **Delegate** when the diff spans many files (roughly 4+) or is large - (roughly 500+ changed lines). Split by file or small groups of related files - and use `subagent_type: "file_reviewer"` for each chunk. -- **Review directly** when the diff is small or touches only a few files — - delegation overhead is not worth it. +You will receive a diff for one or more files from a pull request. +Review the changes and return structured findings. -If you delegate: -1. Send each file/group to a sub-agent with the diff chunk and PR context. -2. Collect and merge findings, de-duplicate, drop noise. -3. Post a single consolidated review via the GitHub API. +## Tools -If you review directly: -- Analyze the diff yourself and post the review as usual. +You have `terminal` and `file_editor` so you can inspect the full source +files for surrounding context — use `cat`, `grep`, or `file_editor view` +when the diff alone is not enough to judge an issue. -## Git Diff +## Review Style -```diff -{diff} -``` +{review_style_description} -Analyze the changes and post your review using the GitHub API. -""" +## Output Format -# System-level instruction injected into each file_reviewer sub-agent so it -# knows its role, the review style, and the expected output format. -FILE_REVIEWER_SKILL = """\ -You are a **file-level code reviewer**. You will receive a diff for one or more -files from a pull request together with PR metadata. +Return a JSON array wrapped in a ```json fenced code block. +Each element must have exactly these fields: -You have access to `terminal` and `file_editor` (read-only) so you can inspect -the full source files for surrounding context — use `cat`, `grep`, or the -file_editor `view` command when the diff alone is not enough to judge an issue. +| Field | Type | Description | +|------------|--------|-------------| +| `path` | string | File path exactly as shown in the diff header (e.g. `src/utils.py`) | +| `line` | int | Line number in the **new** file where the issue occurs | +| `severity` | string | One of: `"critical"`, `"major"`, `"minor"`, `"nit"` | +| `body` | string | Concise description of the issue, including a suggested fix | -Review style: {review_style_description} +### Severity guide +- **critical** — bug, security vulnerability, or data loss +- **major** — incorrect logic, missing error handling, performance issue +- **minor** — style, readability, or minor correctness concern +- **nit** — cosmetic or trivial preference -For each issue you find, return a JSON object with these exact fields: -- `path` (string): the file path exactly as shown in the diff header -- `line` (integer): the NEW file line number where the issue occurs -- `severity` (string): one of `"critical"`, `"major"`, `"minor"`, `"nit"` -- `body` (string): a concise description of the issue with a suggested fix +### Example -Example output: ```json [ {{"path": "src/utils.py", "line": 42, "severity": "major", "body": "Unchecked `None` return — add a guard before accessing `.value`."}}, @@ -156,8 +142,13 @@ ] ``` -Return your findings as a JSON array. If you find no issues, return `[]`. -Do NOT post anything to the GitHub API — the coordinator agent will handle that. +If you find no issues, return: +```json +[] +``` + +**Important**: Return ONLY the JSON array. Do NOT post anything to the GitHub +API — the coordinator agent handles that. """ @@ -210,9 +201,7 @@ def format_prompt( _EVIDENCE_REQUIREMENT_SECTION if require_evidence else "" ) - template = DELEGATION_PROMPT if use_sub_agents else PROMPT - - return template.format( + prompt = PROMPT.format( skill_trigger=skill_trigger, title=title, body=body, @@ -226,6 +215,11 @@ def format_prompt( diff=diff, ) + if use_sub_agents: + prompt += _DELEGATION_SUFFIX + + return prompt + def get_file_reviewer_skill_content(review_style: str = "standard") -> str: """Return the file_reviewer sub-agent skill content. diff --git a/tests/test_pr_review_prompt.py b/tests/test_pr_review_prompt.py index 95b6ecbe..4d4bb9d2 100644 --- a/tests/test_pr_review_prompt.py +++ b/tests/test_pr_review_prompt.py @@ -90,29 +90,25 @@ def test_format_prompt_uses_standard_prompt_by_default(): assert "Analyze the changes and post your review" in prompt -def test_format_prompt_uses_delegation_prompt_when_enabled(): +def test_format_prompt_appends_delegation_suffix_when_enabled(): prompt = _format_prompt(require_evidence=False, use_sub_agents=True) - # Delegation prompt should mention the delegation strategy - assert "Delegation Strategy" in prompt - assert "task" in prompt.lower() - assert "TaskToolSet" in prompt - assert "file_reviewer" in prompt - # Should include smart-activation heuristics - assert "Delegate" in prompt - assert "Review directly" in prompt - # Should still include the PR info + # Should still include the base prompt content assert "Add evidence enforcement" in prompt assert "OpenHands/extensions" in prompt assert "abc123" in prompt - # Should include the diff assert "diff --git a/file b/file" in prompt + assert "Analyze the changes and post your review" in prompt + # Delegation suffix appended + assert "Sub-agent Delegation" in prompt + assert "file_reviewer" in prompt + assert "task" in prompt.lower() -def test_delegation_prompt_includes_evidence_when_enabled(): +def test_delegation_suffix_with_evidence(): prompt = _format_prompt(require_evidence=True, use_sub_agents=True) - assert "Delegation Strategy" in prompt + assert "Sub-agent Delegation" in prompt assert "## PR Description Evidence Requirement" in prompt @@ -122,10 +118,17 @@ def test_get_file_reviewer_skill_content_standard(): assert "file-level code reviewer" in content assert "Balanced review" in content - assert "JSON array" in content - # Sub-agents now have tool access + # JSON schema documented + assert "path" in content + assert "line" in content + assert "severity" in content + assert "body" in content + assert "critical" in content + # Tool access documented assert "terminal" in content assert "file_editor" in content + # Must not touch GitHub API + assert "Do NOT post anything to the GitHub" in content def test_get_file_reviewer_skill_content_roasted(): @@ -134,4 +137,4 @@ def test_get_file_reviewer_skill_content_roasted(): assert "file-level code reviewer" in content assert "Linus Torvalds" in content - assert "JSON array" in content + assert "severity" in content From bc425c48f78194aef5c7cf7df693e3d866172330 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 20 Apr 2026 14:17:30 +0000 Subject: [PATCH 10/12] refactor(pr-review): remove deprecated review styles from sub-agent Review styles (standard/roasted) were already merged into a single unified style for the main agent. The sub-agent still had the old style_descriptions dict and REVIEW_STYLE env var dispatch. - Bake unified review description directly into FILE_REVIEWER_SKILL - Simplify get_file_reviewer_skill_content() to take no arguments - Remove REVIEW_STYLE env var usage from _create_file_reviewer_agent - Merge two style-specific tests into one unified test All 7 prompt tests pass. Co-authored-by: openhands --- plugins/pr-review/scripts/agent_script.py | 5 +---- plugins/pr-review/scripts/prompt.py | 27 +++++------------------ tests/test_pr_review_prompt.py | 16 ++++---------- 3 files changed, 10 insertions(+), 38 deletions(-) diff --git a/plugins/pr-review/scripts/agent_script.py b/plugins/pr-review/scripts/agent_script.py index 09f7487e..74a410e4 100644 --- a/plugins/pr-review/scripts/agent_script.py +++ b/plugins/pr-review/scripts/agent_script.py @@ -788,10 +788,7 @@ def _create_file_reviewer_agent(llm: LLM) -> Agent: access so it can inspect surrounding code context in the PR repo, but the coordinator handles all GitHub API interaction. """ - # REVIEW_STYLE is deprecated for the main reviewer (styles are merged), - # but still used here to configure sub-agent tone. Defaults to "standard". - review_style = os.getenv("REVIEW_STYLE", "standard").lower() - skill_content = get_file_reviewer_skill_content(review_style) + skill_content = get_file_reviewer_skill_content() skills = [ Skill( diff --git a/plugins/pr-review/scripts/prompt.py b/plugins/pr-review/scripts/prompt.py index 775dc8f1..ff42d885 100644 --- a/plugins/pr-review/scripts/prompt.py +++ b/plugins/pr-review/scripts/prompt.py @@ -113,7 +113,8 @@ ## Review Style -{review_style_description} +Be direct, pragmatic, and thorough. Focus on correctness, security, +simplicity, and maintainability. Call out real problems; skip trivial noise. ## Output Format @@ -221,24 +222,6 @@ def format_prompt( return prompt -def get_file_reviewer_skill_content(review_style: str = "standard") -> str: - """Return the file_reviewer sub-agent skill content. - - Args: - review_style: 'standard' or 'roasted' - - Returns: - Formatted skill content string for the file_reviewer agent type - """ - style_descriptions = { - "standard": ( - "Balanced review covering correctness, style, readability, " - "and security. Be constructive." - ), - "roasted": ( - "Linus Torvalds-style brutally honest review. Focus on data " - "structures, simplicity, and pragmatism. No hand-holding." - ), - } - description = style_descriptions.get(review_style, style_descriptions["standard"]) - return FILE_REVIEWER_SKILL.format(review_style_description=description) +def get_file_reviewer_skill_content() -> str: + """Return the file_reviewer sub-agent skill content.""" + return FILE_REVIEWER_SKILL diff --git a/tests/test_pr_review_prompt.py b/tests/test_pr_review_prompt.py index 4d4bb9d2..31437d5b 100644 --- a/tests/test_pr_review_prompt.py +++ b/tests/test_pr_review_prompt.py @@ -112,12 +112,13 @@ def test_delegation_suffix_with_evidence(): assert "## PR Description Evidence Requirement" in prompt -def test_get_file_reviewer_skill_content_standard(): +def test_get_file_reviewer_skill_content(): module = _load_prompt_module() - content = module.get_file_reviewer_skill_content("standard") + content = module.get_file_reviewer_skill_content() assert "file-level code reviewer" in content - assert "Balanced review" in content + # Unified review style (no more standard/roasted split) + assert "pragmatic" in content # JSON schema documented assert "path" in content assert "line" in content @@ -129,12 +130,3 @@ def test_get_file_reviewer_skill_content_standard(): assert "file_editor" in content # Must not touch GitHub API assert "Do NOT post anything to the GitHub" in content - - -def test_get_file_reviewer_skill_content_roasted(): - module = _load_prompt_module() - content = module.get_file_reviewer_skill_content("roasted") - - assert "file-level code reviewer" in content - assert "Linus Torvalds" in content - assert "severity" in content From c0447cae572378cefef1f57a661323b5326ad170 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 20 Apr 2026 14:22:00 +0000 Subject: [PATCH 11/12] refactor(pr-review): remove get_file_reviewer_skill_content, use finish tool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Delete get_file_reviewer_skill_content() — callers now import the FILE_REVIEWER_SKILL constant directly - Sub-agent returns its JSON findings via the finish tool instead of a vague 'return ONLY the JSON array' instruction Co-authored-by: openhands --- plugins/pr-review/scripts/agent_script.py | 6 ++---- plugins/pr-review/scripts/prompt.py | 8 +------- tests/test_pr_review_prompt.py | 10 +++++----- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/plugins/pr-review/scripts/agent_script.py b/plugins/pr-review/scripts/agent_script.py index 74a410e4..d3564939 100644 --- a/plugins/pr-review/scripts/agent_script.py +++ b/plugins/pr-review/scripts/agent_script.py @@ -77,7 +77,7 @@ script_dir = Path(__file__).parent sys.path.insert(0, str(script_dir)) -from prompt import format_prompt, get_file_reviewer_skill_content # noqa: E402 +from prompt import FILE_REVIEWER_SKILL, format_prompt # noqa: E402 logger = get_logger(__name__) @@ -788,12 +788,10 @@ def _create_file_reviewer_agent(llm: LLM) -> Agent: access so it can inspect surrounding code context in the PR repo, but the coordinator handles all GitHub API interaction. """ - skill_content = get_file_reviewer_skill_content() - skills = [ Skill( name="file_review_instructions", - content=skill_content, + content=FILE_REVIEWER_SKILL, trigger=None, ), ] diff --git a/plugins/pr-review/scripts/prompt.py b/plugins/pr-review/scripts/prompt.py index ff42d885..4f33629a 100644 --- a/plugins/pr-review/scripts/prompt.py +++ b/plugins/pr-review/scripts/prompt.py @@ -148,8 +148,7 @@ [] ``` -**Important**: Return ONLY the JSON array. Do NOT post anything to the GitHub -API — the coordinator agent handles that. +When you are done, call the `finish` tool with the JSON array as the message. """ @@ -220,8 +219,3 @@ def format_prompt( prompt += _DELEGATION_SUFFIX return prompt - - -def get_file_reviewer_skill_content() -> str: - """Return the file_reviewer sub-agent skill content.""" - return FILE_REVIEWER_SKILL diff --git a/tests/test_pr_review_prompt.py b/tests/test_pr_review_prompt.py index 31437d5b..80c16a5e 100644 --- a/tests/test_pr_review_prompt.py +++ b/tests/test_pr_review_prompt.py @@ -112,12 +112,12 @@ def test_delegation_suffix_with_evidence(): assert "## PR Description Evidence Requirement" in prompt -def test_get_file_reviewer_skill_content(): +def test_file_reviewer_skill_content(): module = _load_prompt_module() - content = module.get_file_reviewer_skill_content() + content = module.FILE_REVIEWER_SKILL assert "file-level code reviewer" in content - # Unified review style (no more standard/roasted split) + # Unified review style assert "pragmatic" in content # JSON schema documented assert "path" in content @@ -128,5 +128,5 @@ def test_get_file_reviewer_skill_content(): # Tool access documented assert "terminal" in content assert "file_editor" in content - # Must not touch GitHub API - assert "Do NOT post anything to the GitHub" in content + # Sub-agent returns results via finish tool + assert "finish" in content From 2b1fcf3ed2f1a905cbde4f8c95e95819a6d04164 Mon Sep 17 00:00:00 2001 From: openhands Date: Mon, 20 Apr 2026 17:29:06 +0000 Subject: [PATCH 12/12] fix(pr-review): correct Known Limitation about sub-agent tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sub-agents DO have terminal and file_editor tools for reading source files and context — the README incorrectly stated they had no tools. Co-authored-by: openhands --- plugins/pr-review/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/pr-review/README.md b/plugins/pr-review/README.md index 14fcca69..f75e86d9 100644 --- a/plugins/pr-review/README.md +++ b/plugins/pr-review/README.md @@ -156,7 +156,7 @@ The `use-sub-agents` feature is **experimental** and has the following known con - **LLM-driven JSON parsing**: The coordinator agent relies on the LLM to parse and merge JSON responses from sub-agents. There is no code-level validation of sub-agent output, so malformed responses may cause incomplete reviews. - **Potential information loss during consolidation**: When merging findings from multiple sub-agents, the coordinator may lose or deduplicate findings imperfectly, especially for cross-file issues. - **No integration tests yet**: Current test coverage verifies prompt formatting only. End-to-end validation of the delegation flow requires manual workflow testing. -- **Sub-agents have no tools**: File reviewer sub-agents analyze the diff in their context window only — they cannot run commands or query the GitHub API. +- **Sub-agents have read-only tools**: File reviewer sub-agents have access to `terminal` and `file_editor` for inspecting full source files and surrounding context, but they cannot query the GitHub API or post reviews — only the coordinator handles GitHub interaction. These limitations are acceptable for an opt-in experimental feature and will be addressed as the feature matures.