Skip to content

Commit 472328d

Browse files
neubigopenhands-agentVascoSch92
authored
test(sdk): reproduce delegate resume compatibility regression (#2382)
Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: Vasco Schiavo <115561717+VascoSch92@users.noreply.github.com>
1 parent ed924a3 commit 472328d

File tree

5 files changed

+224
-70
lines changed

5 files changed

+224
-70
lines changed

openhands-sdk/openhands/sdk/agent/base.py

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -415,11 +415,11 @@ def verify(
415415
416416
Compatibility requirements:
417417
- Agent class/type must match.
418-
- Tools must match exactly (same tool names).
418+
- Tools may only be added, never removed.
419419
420-
Tools are part of the system prompt and cannot be changed mid-conversation.
421-
To use different tools, start a new conversation or use conversation forking
422-
(see https://github.com/OpenHands/OpenHands/issues/8560).
420+
Removing tools breaks backward compatibility because the LLM may have
421+
already been told about them. Adding new tools is safe — the LLM
422+
simply gains new capabilities on the next turn.
423423
424424
All other configuration (LLM, agent_context, condenser, etc.) can be
425425
freely changed between sessions.
@@ -457,24 +457,18 @@ def verify(
457457
if tool_class is not None:
458458
persisted_names.add(tool_class.name)
459459

460-
if runtime_names == persisted_names:
461-
return self
462-
463-
# Tools don't match - this is not allowed
460+
# Removing tools breaks backward compatibility because the LLM may
461+
# have already been told about them. Adding new tools is safe — the
462+
# LLM simply gains new capabilities on the next turn.
464463
missing_in_runtime = persisted_names - runtime_names
465-
added_in_runtime = runtime_names - persisted_names
466-
467-
details: list[str] = []
468464
if missing_in_runtime:
469-
details.append(f"removed: {sorted(missing_in_runtime)}")
470-
if added_in_runtime:
471-
details.append(f"added: {sorted(added_in_runtime)}")
472-
473-
raise ValueError(
474-
f"Cannot resume conversation: tools cannot be changed mid-conversation "
475-
f"({'; '.join(details)}). "
476-
f"To use different tools, start a new conversation."
477-
)
465+
raise ValueError(
466+
f"Cannot resume conversation: tools were removed mid-conversation "
467+
f"(removed: {sorted(missing_in_runtime)}). "
468+
f"To use different tools, start a new conversation."
469+
)
470+
471+
return self
478472

479473
def model_dump_succint(self, **kwargs):
480474
"""Like model_dump, but excludes None fields by default."""

tests/cross/test_agent_loading.py

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -160,17 +160,15 @@ def test_conversation_fails_when_removing_tools():
160160
visualizer=None,
161161
)
162162

163-
assert "tools cannot be changed mid-conversation" in str(exc_info.value)
163+
assert "tools were removed mid-conversation" in str(exc_info.value)
164164
assert "removed:" in str(exc_info.value)
165165
assert "FileEditorTool" in str(exc_info.value)
166166

167167

168-
def test_conversation_fails_when_adding_tools():
169-
"""Test that adding new tools fails.
168+
def test_conversation_succeeds_when_adding_tools():
169+
"""Test that adding new tools succeeds on resume.
170170
171-
Tools are part of the system prompt and cannot be changed mid-conversation.
172-
To use different tools, start a new conversation or use conversation forking.
173-
See: https://github.com/OpenHands/OpenHands/issues/8560
171+
Adding tools is allowed — only removing tools is rejected.
174172
"""
175173
with tempfile.TemporaryDirectory() as temp_dir:
176174
# Create conversation with only one tool
@@ -194,7 +192,7 @@ def test_conversation_fails_when_adding_tools():
194192
conversation_id = conversation.state.id
195193
del conversation
196194

197-
# Resume with additional tools - should FAIL (tools must match exactly)
195+
# Resume with additional tools - should SUCCEED (adding tools is allowed)
198196
expanded_tools = [
199197
Tool(name="TerminalTool"),
200198
Tool(name="FileEditorTool"), # New tool added
@@ -204,18 +202,14 @@ def test_conversation_fails_when_adding_tools():
204202
)
205203
expanded_agent = Agent(llm=llm2, tools=expanded_tools)
206204

207-
with pytest.raises(ValueError) as exc_info:
208-
LocalConversation(
209-
agent=expanded_agent,
210-
workspace=temp_dir,
211-
persistence_dir=temp_dir,
212-
conversation_id=conversation_id,
213-
visualizer=None,
214-
)
215-
216-
assert "tools cannot be changed mid-conversation" in str(exc_info.value)
217-
assert "added:" in str(exc_info.value)
218-
assert "FileEditorTool" in str(exc_info.value)
205+
conversation = LocalConversation(
206+
agent=expanded_agent,
207+
workspace=temp_dir,
208+
persistence_dir=temp_dir,
209+
conversation_id=conversation_id,
210+
visualizer=None,
211+
)
212+
assert conversation is not None
219213

220214

221215
def test_conversation_fails_when_used_tool_is_missing():
@@ -274,10 +268,8 @@ def test_conversation_fails_when_used_tool_is_missing():
274268
)
275269
reduced_agent = Agent(llm=llm2, tools=reduced_tools)
276270

277-
# This should raise - tools cannot be changed mid-conversation
278-
with pytest.raises(
279-
ValueError, match="tools cannot be changed mid-conversation"
280-
):
271+
# This should raise - tools were removed mid-conversation
272+
with pytest.raises(ValueError, match="tools were removed mid-conversation"):
281273
LocalConversation(
282274
agent=reduced_agent,
283275
workspace=temp_dir,

tests/cross/test_conversation_restore_behavior.py

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ def test_conversation_restore_fails_when_removing_tools(mock_completion):
231231
)
232232

233233
with pytest.raises(
234-
ValueError, match="tools cannot be changed mid-conversation"
234+
ValueError, match="tools were removed mid-conversation"
235235
) as exc:
236236
lifecycle.restore(runtime_agent)
237237

@@ -240,8 +240,11 @@ def test_conversation_restore_fails_when_removing_tools(mock_completion):
240240

241241

242242
@patch("openhands.sdk.llm.llm.litellm_completion")
243-
def test_conversation_restore_fails_when_adding_tools(mock_completion):
244-
"""Restore must fail when runtime tools add a new tool."""
243+
def test_conversation_restore_succeeds_when_adding_tools(mock_completion):
244+
"""Restore must succeed when runtime tools add a new tool.
245+
246+
Adding tools is allowed — only removing tools is rejected.
247+
"""
245248

246249
mock_completion.return_value = create_mock_litellm_response(
247250
content="I'll help you with that.", finish_reason="stop"
@@ -274,13 +277,8 @@ def test_conversation_restore_fails_when_adding_tools(mock_completion):
274277
skill_keyword="alpha",
275278
)
276279

277-
with pytest.raises(
278-
ValueError, match="tools cannot be changed mid-conversation"
279-
) as exc:
280-
lifecycle.restore(runtime_agent)
281-
282-
assert "added:" in str(exc.value)
283-
assert "FileEditorTool" in str(exc.value)
280+
conversation = lifecycle.restore(runtime_agent)
281+
assert conversation is not None
284282

285283

286284
@patch("openhands.sdk.llm.llm.litellm_completion")
@@ -364,7 +362,7 @@ def test_conversation_restore_fails_when_default_tools_removed(mock_completion):
364362
)
365363

366364
with pytest.raises(
367-
ValueError, match="tools cannot be changed mid-conversation"
365+
ValueError, match="tools were removed mid-conversation"
368366
) as exc:
369367
lifecycle.restore(runtime_agent)
370368

@@ -373,8 +371,11 @@ def test_conversation_restore_fails_when_default_tools_removed(mock_completion):
373371

374372

375373
@patch("openhands.sdk.llm.llm.litellm_completion")
376-
def test_conversation_restore_fails_when_default_tools_added(mock_completion):
377-
"""Restore must fail if include_default_tools adds a built-in tool."""
374+
def test_conversation_restore_succeeds_when_default_tools_added(mock_completion):
375+
"""Restore must succeed if include_default_tools adds a built-in tool.
376+
377+
Adding tools is allowed — only removing tools is rejected.
378+
"""
378379

379380
mock_completion.return_value = create_mock_litellm_response(
380381
content="I'll help you with that.", finish_reason="stop"
@@ -409,13 +410,8 @@ def test_conversation_restore_fails_when_default_tools_added(mock_completion):
409410
include_default_tools=["FinishTool", "ThinkTool"],
410411
)
411412

412-
with pytest.raises(
413-
ValueError, match="tools cannot be changed mid-conversation"
414-
) as exc:
415-
lifecycle.restore(runtime_agent)
416-
417-
assert "added:" in str(exc.value)
418-
assert "think" in str(exc.value)
413+
conversation = lifecycle.restore(runtime_agent)
414+
assert conversation is not None
419415

420416

421417
@patch("openhands.sdk.llm.llm.litellm_completion")
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
{
2+
"id": "11111111-2222-3333-4444-555555555555",
3+
"agent": {
4+
"llm": {
5+
"model": "gpt-4o-mini",
6+
"api_key": "**********",
7+
"openrouter_site_url": "https://docs.all-hands.dev/",
8+
"openrouter_app_name": "OpenHands",
9+
"num_retries": 5,
10+
"retry_multiplier": 8.0,
11+
"retry_min_wait": 8,
12+
"retry_max_wait": 64,
13+
"timeout": 300,
14+
"max_message_chars": 30000,
15+
"temperature": 0.0,
16+
"top_p": 1.0,
17+
"max_input_tokens": 128000,
18+
"max_output_tokens": 16384,
19+
"stream": false,
20+
"drop_params": true,
21+
"modify_params": true,
22+
"disable_stop_word": false,
23+
"caching_prompt": true,
24+
"log_completions": false,
25+
"log_completions_folder": "logs/completions",
26+
"native_tool_calling": true,
27+
"reasoning_effort": "high",
28+
"enable_encrypted_reasoning": true,
29+
"prompt_cache_retention": "24h",
30+
"extended_thinking_budget": 200000,
31+
"usage_id": "test-llm",
32+
"litellm_extra_body": {}
33+
},
34+
"tools": [
35+
{
36+
"name": "terminal",
37+
"params": {}
38+
},
39+
{
40+
"name": "file_editor",
41+
"params": {}
42+
},
43+
{
44+
"name": "task_tracker",
45+
"params": {}
46+
}
47+
],
48+
"mcp_config": {},
49+
"include_default_tools": [
50+
"FinishTool",
51+
"ThinkTool"
52+
],
53+
"system_prompt_filename": "system_prompt.j2",
54+
"security_policy_filename": "security_policy.j2",
55+
"system_prompt_kwargs": {
56+
"cli_mode": true,
57+
"llm_security_analyzer": true
58+
},
59+
"condenser": {
60+
"llm": {
61+
"model": "gpt-4o-mini",
62+
"api_key": "**********",
63+
"openrouter_site_url": "https://docs.all-hands.dev/",
64+
"openrouter_app_name": "OpenHands",
65+
"num_retries": 5,
66+
"retry_multiplier": 8.0,
67+
"retry_min_wait": 8,
68+
"retry_max_wait": 64,
69+
"timeout": 300,
70+
"max_message_chars": 30000,
71+
"temperature": 0.0,
72+
"top_p": 1.0,
73+
"max_input_tokens": 128000,
74+
"max_output_tokens": 16384,
75+
"stream": false,
76+
"drop_params": true,
77+
"modify_params": true,
78+
"disable_stop_word": false,
79+
"caching_prompt": true,
80+
"log_completions": false,
81+
"log_completions_folder": "logs/completions",
82+
"native_tool_calling": true,
83+
"reasoning_effort": "high",
84+
"enable_encrypted_reasoning": true,
85+
"prompt_cache_retention": "24h",
86+
"extended_thinking_budget": 200000,
87+
"usage_id": "condenser",
88+
"litellm_extra_body": {}
89+
},
90+
"max_size": 80,
91+
"keep_first": 4,
92+
"minimum_progress": 0.1,
93+
"hard_context_reset_max_retries": 5,
94+
"hard_context_reset_context_scaling": 0.8,
95+
"kind": "LLMSummarizingCondenser"
96+
},
97+
"kind": "Agent"
98+
},
99+
"workspace": {
100+
"working_dir": "/workspace/project/software-agent-sdk/.agent_tmp/repro/persistence",
101+
"kind": "LocalWorkspace"
102+
},
103+
"persistence_dir": "/workspace/project/software-agent-sdk/.agent_tmp/repro/persistence/11111111222233334444555555555555",
104+
"max_iterations": 500,
105+
"stuck_detection": true,
106+
"execution_status": "idle",
107+
"confirmation_policy": {
108+
"kind": "NeverConfirm"
109+
},
110+
"activated_knowledge_skills": [],
111+
"blocked_actions": {},
112+
"blocked_messages": {},
113+
"stats": {
114+
"usage_to_metrics": {}
115+
},
116+
"secret_registry": {
117+
"secret_sources": {}
118+
},
119+
"agent_state": {}
120+
}

0 commit comments

Comments
 (0)