Skip to content

Commit 7a76a1d

Browse files
Debug Agentclaude
andcommitted
refactor(acp): inline vision probe into supports_vision()
Map values were all True and the helper's only caller was ACPAgent.supports_vision(); drop the wrapper and check substring membership inline. Replaces the provider→bool dict with a plain tuple of known vision-capable ACP server markers. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent e85cbb9 commit 7a76a1d

2 files changed

Lines changed: 33 additions & 90 deletions

File tree

openhands-sdk/openhands/sdk/agent/acp_agent.py

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -134,40 +134,21 @@ def _make_dummy_llm() -> LLM:
134134
}
135135
_DEFAULT_BYPASS_MODE = "full-access"
136136

137-
# ACP server identifier substrings → whether the server forwards ACP
138-
# ImageContentBlock prompts to a vision-capable underlying model.
139-
#
140-
# Verified against upstream source (all declare prompt_capabilities.image=true
141-
# and map ContentBlock::Image to their provider's native image format):
142-
# claude-agent-acp → Claude Sonnet/Opus/Haiku 3.5+ (vision)
137+
# ACP servers known to forward ImageContentBlock prompts to a vision-capable
138+
# underlying model. Verified against upstream source — all declare
139+
# prompt_capabilities.image=true and map ContentBlock::Image to their
140+
# provider's native image format:
141+
# claude-agent-acp → Claude Sonnet/Opus/Haiku 3.5+
143142
# src/acp-agent.ts: promptToClaude() case "image"
144-
# gemini-cli → Gemini 1.5/2.x Pro/Flash (vision)
143+
# gemini-cli → Gemini 1.5/2.x Pro/Flash
145144
# packages/cli/src/acp/acpClient.ts: #resolvePrompt() case "image"
146-
# codex-acp → GPT-5 family via Codex CLI (vision)
145+
# codex-acp → GPT-5 family via Codex CLI
147146
# src/thread.rs: build_prompt_items() image → UserInput::Image
148-
_ACP_VISION_PROVIDER_MARKERS: dict[str, bool] = {
149-
"claude-agent-acp": True,
150-
"gemini-cli": True,
151-
"codex-acp": True,
152-
}
153-
154-
155-
def _acp_command_supports_vision(command: list[str], agent_name: str) -> bool:
156-
"""Return True if the ACP server forwards inline images to a vision model.
157-
158-
Matches on either the runtime agent name from InitializeResponse (when
159-
available) or the configured launch command (always available). Falls
160-
back to False for unknown servers so we do not silently send images a
161-
server may drop.
162-
"""
163-
haystacks: list[str] = []
164-
if agent_name:
165-
haystacks.append(agent_name.lower())
166-
haystacks.append(" ".join(command).lower())
167-
for marker, supports in _ACP_VISION_PROVIDER_MARKERS.items():
168-
if any(marker in h for h in haystacks):
169-
return supports
170-
return False
147+
_VISION_CAPABLE_ACP_SERVERS: tuple[str, ...] = (
148+
"claude-agent-acp",
149+
"gemini-cli",
150+
"codex-acp",
151+
)
171152

172153

173154
# ACP auth method ID → environment variable that supplies the credential.
@@ -815,9 +796,11 @@ def supports_vision(self) -> bool:
815796
does not recognise, so the base-class ``llm.vision_is_active()``
816797
always returns False for ACP agents. Resolve the real capability
817798
from the ACP server identity (launch command or the agent name
818-
reported by InitializeResponse) instead.
799+
reported by InitializeResponse) instead. Unknown servers fall back
800+
to False so we don't silently send images a server may drop.
819801
"""
820-
return _acp_command_supports_vision(self.acp_command, self._agent_name)
802+
probe = f"{self._agent_name} {' '.join(self.acp_command)}".lower()
803+
return any(marker in probe for marker in _VISION_CAPABLE_ACP_SERVERS)
821804

822805
def get_all_llms(self) -> Generator[LLM]:
823806
yield self.llm

tests/sdk/agent/test_acp_agent.py

Lines changed: 17 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313

1414
from openhands.sdk.agent.acp_agent import (
1515
ACPAgent,
16-
_acp_command_supports_vision,
1716
_build_session_meta,
1817
_estimate_cost_from_tokens,
1918
_extract_token_usage,
@@ -2069,80 +2068,41 @@ def test_empty_name_defaults_to_full_access(self):
20692068

20702069

20712070
# ---------------------------------------------------------------------------
2072-
# _acp_command_supports_vision + ACPAgent.supports_vision()
2071+
# ACPAgent.supports_vision()
20732072
# ---------------------------------------------------------------------------
20742073

20752074

2076-
class TestACPCommandSupportsVision:
2075+
class TestACPAgentSupportsVision:
20772076
"""All three ACP providers OpenHands supports — claude-agent-acp,
20782077
gemini-cli, codex-acp — declare prompt_capabilities.image=true and
20792078
forward images to a vision-capable underlying model."""
20802079

2081-
def test_claude_agent_acp_command(self):
2082-
assert (
2083-
_acp_command_supports_vision(
2084-
["npx", "-y", "@agentclientprotocol/claude-agent-acp"],
2085-
"",
2086-
)
2087-
is True
2088-
)
2089-
2090-
def test_claude_agent_acp_from_agent_name(self):
2091-
assert _acp_command_supports_vision(["/some/opaque/binary"], "claude-agent-acp")
2092-
assert _acp_command_supports_vision([], "claude-agent-acp")
2093-
2094-
def test_gemini_cli_command(self):
2095-
assert (
2096-
_acp_command_supports_vision(["gemini-cli", "--experimental-acp"], "")
2097-
is True
2098-
)
2099-
2100-
def test_gemini_cli_from_agent_name(self):
2101-
assert _acp_command_supports_vision(["node", "server.js"], "gemini-cli")
2102-
2103-
def test_codex_acp_command(self):
2104-
assert _acp_command_supports_vision(["codex-acp"], "") is True
2105-
2106-
def test_codex_acp_from_agent_name(self):
2107-
assert _acp_command_supports_vision([], "codex-acp")
2108-
2109-
def test_agent_name_takes_precedence_over_generic_command(self):
2110-
# A user can launch any of our three servers via ``node`` or a
2111-
# local path; the agent name from InitializeResponse still lets
2112-
# us identify the provider.
2113-
assert (
2114-
_acp_command_supports_vision(
2115-
["/usr/local/bin/node", "/opt/server.js"],
2116-
"claude-agent-acp",
2117-
)
2118-
is True
2119-
)
2120-
2121-
def test_unknown_server_defaults_to_false(self):
2122-
# Safe default: unknown servers may silently drop image blocks,
2123-
# so advertise no-vision rather than send content that gets lost.
2124-
assert _acp_command_supports_vision(["some-random-acp-server"], "") is False
2125-
2126-
def test_empty_inputs_default_to_false(self):
2127-
assert _acp_command_supports_vision([], "") is False
2128-
2129-
2130-
class TestACPAgentSupportsVision:
2131-
def test_claude_agent_supports_vision(self):
2080+
def test_claude_agent_via_command(self):
21322081
agent = ACPAgent(
21332082
acp_command=["npx", "-y", "@agentclientprotocol/claude-agent-acp"]
21342083
)
21352084
assert agent.supports_vision() is True
21362085

2137-
def test_gemini_cli_supports_vision(self):
2086+
def test_gemini_cli_via_command(self):
21382087
agent = ACPAgent(acp_command=["gemini-cli", "--experimental-acp"])
21392088
assert agent.supports_vision() is True
21402089

2141-
def test_codex_acp_supports_vision(self):
2090+
def test_codex_acp_via_command(self):
21422091
agent = ACPAgent(acp_command=["codex-acp"])
21432092
assert agent.supports_vision() is True
21442093

2145-
def test_unknown_acp_server_does_not_claim_vision(self):
2094+
def test_agent_name_resolves_opaque_launcher(self):
2095+
# A user can launch any of our three servers via ``node`` or a
2096+
# local path; the agent name from InitializeResponse still lets
2097+
# us identify the provider.
2098+
agent = ACPAgent(acp_command=["/usr/local/bin/node", "/opt/server.js"])
2099+
assert agent.supports_vision() is False
2100+
agent._agent_name = "claude-agent-acp"
2101+
assert agent.supports_vision() is True
2102+
2103+
def test_unknown_acp_server_defaults_to_false(self):
2104+
# Unknown servers may silently drop image blocks, so advertise
2105+
# no-vision rather than send content that gets lost.
21462106
agent = ACPAgent(acp_command=["echo", "test"])
21472107
assert agent.supports_vision() is False
21482108

0 commit comments

Comments
 (0)