OpenHands · simonrosenberg · Mar 13, 2026 · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026
diff --git a/.github/workflows/run-eval.yml b/.github/workflows/run-eval.yml
@@ -99,6 +99,18 @@ on:
                     - gemini
                     - gpt5
                     - planning
+            agent_type:
+                description: >-
+                    Agent type: 'default' for standard Agent,
+                    'acp-claude' for ACPAgent with Claude Code,
+                    'acp-codex' for ACPAgent with Codex.
+                required: false
+                default: default
+                type: choice
+                options:
+                    - default
+                    - acp-claude
+                    - acp-codex
 
 
 env:
@@ -276,6 +288,7 @@ jobs:
                   ENABLE_CONVERSATION_EVENT_LOGGING: ${{ github.event.inputs.enable_conversation_event_logging || false }}
                   MAX_RETRIES: ${{ github.event.inputs.max_retries || '3' }}
                   TOOL_PRESET: ${{ github.event.inputs.tool_preset || 'default' }}
+                  AGENT_TYPE: ${{ github.event.inputs.agent_type || 'default' }}
                   TRIGGERED_BY: ${{ github.actor }}
               run: |
                   echo "Dispatching evaluation workflow with SDK commit: $SDK_SHA (benchmark: $BENCHMARK, eval branch: $EVAL_BRANCH, benchmarks branch: $BENCHMARKS_BRANCH, tool preset: $TOOL_PRESET)"
@@ -294,8 +307,9 @@ jobs:
                     --argjson enable_conversation_event_logging "$ENABLE_CONVERSATION_EVENT_LOGGING" \
                     --arg max_retries "$MAX_RETRIES" \
                     --arg tool_preset "$TOOL_PRESET" \
+                    --arg agent_type "$AGENT_TYPE" \
                     --arg triggered_by "$TRIGGERED_BY" \
-                    '{ref: $ref, inputs: {sdk_commit: $sdk, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks, benchmark: $benchmark, instance_ids: $instance_ids, num_infer_workers: $num_infer_workers, num_eval_workers: $num_eval_workers, enable_conversation_event_logging: $enable_conversation_event_logging, max_retries: $max_retries, tool_preset: $tool_preset, triggered_by: $triggered_by}}')
+                    '{ref: $ref, inputs: {sdk_commit: $sdk, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks, benchmark: $benchmark, instance_ids: $instance_ids, num_infer_workers: $num_infer_workers, num_eval_workers: $num_eval_workers, enable_conversation_event_logging: $enable_conversation_event_logging, max_retries: $max_retries, tool_preset: $tool_preset, agent_type: $agent_type, triggered_by: $triggered_by}}')
                   RESPONSE=$(curl -sS -o /tmp/dispatch.out -w "%{http_code}" -X POST \
                     -H "Authorization: token $PAT_TOKEN" \
                     -H "Accept: application/vnd.github+json" \

diff --git a/examples/01_standalone_sdk/40_acp_agent_example.py b/examples/01_standalone_sdk/40_acp_agent_example.py
@@ -1,6 +1,6 @@
 """Example: Using ACPAgent with Claude Code ACP server.
 
-This example shows how to use an ACP-compatible server (claude-code-acp)
+This example shows how to use an ACP-compatible server (claude-agent-acp)
 as the agent backend instead of direct LLM calls.  It also demonstrates
 ``ask_agent()`` — a stateless side-question that forks the ACP session
 and leaves the main conversation untouched.
@@ -19,7 +19,7 @@
 from openhands.sdk.conversation import Conversation
 
 
-agent = ACPAgent(acp_command=["npx", "-y", "@zed-industries/claude-code-acp"])
+agent = ACPAgent(acp_command=["npx", "-y", "@zed-industries/claude-agent-acp"])
 
 try:
     cwd = os.getcwd()

diff --git a/examples/02_remote_agent_server/09_acp_agent_with_remote_runtime.py b/examples/02_remote_agent_server/09_acp_agent_with_remote_runtime.py
@@ -0,0 +1,75 @@
+"""Example: ACPAgent with Remote Runtime via API.
+
+This example demonstrates running an ACPAgent (Claude Code via ACP protocol)
+in a remote sandboxed environment via Runtime API. It follows the same pattern
+as 04_convo_with_api_sandboxed_server.py but uses ACPAgent instead of the
+default LLM-based Agent.
+
+Usage:
+  uv run examples/02_remote_agent_server/07_acp_agent_with_remote_runtime.py
+
+Requirements:
+  - ANTHROPIC_API_KEY: API key for Claude (forwarded to the container)
+  - RUNTIME_API_KEY: API key for runtime API access
+"""
+
+import os
+import time
+
+from openhands.sdk import (
+    Conversation,
+    RemoteConversation,
+    get_logger,
+)
+from openhands.sdk.agent import ACPAgent
+from openhands.workspace import APIRemoteWorkspace
+
+logger = get_logger(__name__)
+
+
+anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
+assert anthropic_api_key, "ANTHROPIC_API_KEY required"
+
+runtime_api_key = os.getenv("RUNTIME_API_KEY")
+assert runtime_api_key, "RUNTIME_API_KEY required"
+
+# If GITHUB_SHA is set (e.g. running in CI of a PR), use that to ensure consistency
+# Otherwise, use the latest image from main
+server_image_sha = os.getenv("GITHUB_SHA") or "main"
+server_image = f"ghcr.io/openhands/agent-server:{server_image_sha[:7]}-python-amd64"
+logger.info(f"Using server image: {server_image}")
+
+with APIRemoteWorkspace(
+    runtime_api_url=os.getenv("RUNTIME_API_URL", "https://runtime.eval.all-hands.dev"),
+    runtime_api_key=runtime_api_key,
+    server_image=server_image,
+    image_pull_policy="Always",
+    target_type="binary",  # CI builds binary target images
+    forward_env=["ANTHROPIC_API_KEY"],
+) as workspace:
+    agent = ACPAgent(
+        acp_command=["claude-agent-acp"],  # Pre-installed in Docker image
+    )
+
+    received_events: list = []
+    last_event_time = {"ts": time.time()}
+
+    def event_callback(event) -> None:
+        received_events.append(event)
+        last_event_time["ts"] = time.time()
+
+    conversation = Conversation(
+        agent=agent, workspace=workspace, callbacks=[event_callback]
+    )
+    assert isinstance(conversation, RemoteConversation)
+
+    try:
+        conversation.send_message(
+            "List the files in /workspace and describe what you see."
+        )
+        conversation.run()
+
+        while time.time() - last_event_time["ts"] < 2.0:
+            time.sleep(0.1)
+    finally:
+        conversation.close()
diff --git a/openhands-agent-server/openhands/agent_server/api.py b/openhands-agent-server/openhands/agent_server/api.py
@@ -37,6 +37,7 @@
 from openhands.agent_server.tool_router import tool_router
 from openhands.agent_server.vscode_router import vscode_router
 from openhands.agent_server.vscode_service import get_vscode_service
+from openhands.sdk.agent import ACPAgent  # noqa: F401  — register in DiscriminatedUnionMixin
 from openhands.sdk.logger import DEBUG, get_logger
 
 

diff --git a/openhands-agent-server/openhands/agent_server/docker/Dockerfile b/openhands-agent-server/openhands/agent_server/docker/Dockerfile
@@ -78,7 +78,22 @@ RUN set -eux; \
     chown -R ${USERNAME}:${USERNAME} /workspace; \
     rm -rf /var/lib/apt/lists/*
 
-# NOTE: we should NOT include UV_PROJECT_ENVIRONMENT here, 
+# Pre-install ACP servers for ACPAgent support (Claude Code + Codex)
+# Install Node.js/npm if not present (SWE-bench base images may lack them)
+RUN set -eux; \
+    if ! command -v npm >/dev/null 2>&1; then \
+        curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \
+        apt-get install -y --no-install-recommends nodejs && \
+        rm -rf /var/lib/apt/lists/*; \
+    fi; \
+    npm install -g @zed-industries/claude-agent-acp @zed-industries/codex-acp
+
+# Configure Claude Code managed settings for headless operation:
+# Allow all tool permissions (no human in the loop to approve).
+RUN mkdir -p /etc/claude-code && \
+    echo '{"permissions":{"allow":["Edit","Read","Bash"]}}' > /etc/claude-code/managed-settings.json
+
+# NOTE: we should NOT include UV_PROJECT_ENVIRONMENT here,
 # since the agent might use it to perform other work (e.g. tools that use Python)
 COPY --from=ghcr.io/astral-sh/uv /uv /uvx /bin/
 

diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py
@@ -425,7 +425,8 @@ async def start(self):
         workspace = self.stored.workspace
         assert isinstance(workspace, LocalWorkspace)
         Path(workspace.working_dir).mkdir(parents=True, exist_ok=True)
-        agent = Agent.model_validate(
+        agent_cls = type(self.stored.agent)
+        agent = agent_cls.model_validate(
             self.stored.agent.model_dump(context={"expose_secrets": True}),
         )
 
@@ -605,7 +606,7 @@ async def close(self):
         await self._pub_sub.close()
         if self._conversation:
             loop = asyncio.get_running_loop()
-            loop.run_in_executor(None, self._conversation.close)
+            await loop.run_in_executor(None, self._conversation.close)
 
     async def generate_title(
         self, llm: "LLM | None" = None, max_length: int = 50