Skip to content

Commit 46f3d78

Browse files
fix: improve conversation resilience for long-running and resumed sessions (#2384)
Co-authored-by: openhands <openhands@all-hands.dev>
1 parent c3bb8e7 commit 46f3d78

File tree

7 files changed

+234
-16
lines changed

7 files changed

+234
-16
lines changed

openhands-sdk/openhands/sdk/agent/agent.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
fix_malformed_tool_arguments,
1111
make_llm_completion,
1212
prepare_llm_messages,
13+
sanitize_json_control_chars,
1314
)
1415
from openhands.sdk.conversation import (
1516
ConversationCallbackType,
@@ -574,7 +575,10 @@ def _get_action_event(
574575
# Validate arguments
575576
security_risk: risk.SecurityRisk = risk.SecurityRisk.UNKNOWN
576577
try:
577-
arguments = json.loads(tool_call.arguments)
578+
# Sanitize raw control characters (U+0000–U+001F) that some
579+
# models emit as literal bytes instead of JSON escape sequences.
580+
sanitized_args = sanitize_json_control_chars(tool_call.arguments)
581+
arguments = json.loads(sanitized_args)
578582

579583
# Fix malformed arguments (e.g., JSON strings for list/dict fields)
580584
arguments = fix_malformed_tool_arguments(arguments, tool.action_type)

openhands-sdk/openhands/sdk/agent/utils.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import re
23
import types
34
from collections.abc import Sequence
45
from typing import (
@@ -19,6 +20,41 @@
1920
from openhands.sdk.tool import Action, ToolDefinition
2021

2122

23+
# Regex matching raw ASCII control characters (U+0000–U+001F) that are
24+
# illegal inside JSON strings per RFC 8259 §7.
25+
_CONTROL_CHAR_RE = re.compile(r"[\x00-\x1f]")
26+
27+
# Mapping from raw control-char ordinals to their JSON-legal two-character
28+
# escape sequences. Characters without a short alias fall back to \uXXXX.
29+
_CTRL_ESCAPE_TABLE: dict[int, str] = {
30+
0x08: "\\b",
31+
0x09: "\\t",
32+
0x0A: "\\n",
33+
0x0C: "\\f",
34+
0x0D: "\\r",
35+
}
36+
37+
38+
def _escape_control_char(m: re.Match[str]) -> str:
39+
"""Replace a single raw control character with its JSON escape."""
40+
ch = m.group(0)
41+
return _CTRL_ESCAPE_TABLE.get(ord(ch), f"\\u{ord(ch):04x}")
42+
43+
44+
def sanitize_json_control_chars(raw: str) -> str:
45+
"""Escape raw control characters in a JSON string produced by an LLM.
46+
47+
Some models (e.g. kimi-k2.5, minimax-m2.5) emit literal control
48+
characters (newline, tab, …) inside ``tool_call.arguments`` instead of
49+
their proper two-character JSON escape sequences (``\\n``, ``\\t``, …).
50+
``json.loads`` rejects these per RFC 8259.
51+
52+
This function replaces every raw U+0000–U+001F byte with the correct
53+
escape sequence so the string becomes valid JSON.
54+
"""
55+
return _CONTROL_CHAR_RE.sub(_escape_control_char, raw)
56+
57+
2258
def fix_malformed_tool_arguments(
2359
arguments: dict[str, Any], action_type: type[Action]
2460
) -> dict[str, Any]:

openhands-sdk/openhands/sdk/conversation/event_store.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,19 @@ def _get_single_item(self, idx: SupportsIndex) -> Event:
7979
i += self._length
8080
if i < 0 or i >= self._length:
8181
raise IndexError("Event index out of range")
82-
txt = self._fs.read(self._path(i))
82+
try:
83+
path = self._path(i)
84+
except KeyError:
85+
# In-memory index is stale (e.g., external file modifications
86+
# or concurrent writes). Rebuild from disk and retry once.
87+
logger.warning("Stale EventLog index at %d; rebuilding from disk.", i)
88+
self._length = self._scan_and_build_index()
89+
if i >= self._length:
90+
raise IndexError("Event index out of range")
91+
path = self._path(i)
92+
txt = self._fs.read(path)
8393
if not txt:
84-
raise FileNotFoundError(f"Missing event file: {self._path(i)}")
94+
raise FileNotFoundError(f"Missing event file: {path}")
8595
return Event.model_validate_json(txt)
8696

8797
def __iter__(self) -> Iterator[Event]:

openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -526,10 +526,13 @@ def send_message(self, message: str | Message, sender: str | None = None) -> Non
526526
"Only user messages are allowed to be sent to the agent."
527527
)
528528
with self._state:
529-
if self._state.execution_status == ConversationExecutionStatus.FINISHED:
529+
if self._state.execution_status in (
530+
ConversationExecutionStatus.FINISHED,
531+
ConversationExecutionStatus.STUCK,
532+
):
530533
self._state.execution_status = (
531534
ConversationExecutionStatus.IDLE
532-
) # now we have a new message
535+
) # new message resets terminal states
533536

534537
# TODO: We should add test cases for all these scenarios
535538
activated_skill_names: list[str] = []
@@ -584,6 +587,7 @@ def run(self) -> None:
584587
ConversationExecutionStatus.IDLE,
585588
ConversationExecutionStatus.PAUSED,
586589
ConversationExecutionStatus.ERROR,
590+
ConversationExecutionStatus.STUCK,
587591
]:
588592
self._state.execution_status = ConversationExecutionStatus.RUNNING
589593

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
"""Tests for sanitize_json_control_chars helper function.
2+
3+
This module tests the sanitize_json_control_chars helper that escapes raw
4+
control characters (U+0000–U+001F) in JSON strings produced by LLMs. Some
5+
models (e.g. kimi-k2.5, minimax-m2.5) emit literal control bytes instead of
6+
legal two-character JSON escape sequences, which causes json.loads() to fail.
7+
"""
8+
9+
import json
10+
11+
from openhands.sdk.agent.utils import sanitize_json_control_chars
12+
13+
14+
def test_valid_json_unchanged():
15+
"""Already-valid JSON is returned unmodified."""
16+
raw = '{"command": "echo hello", "path": "/tmp"}'
17+
assert sanitize_json_control_chars(raw) == raw
18+
19+
20+
def test_literal_newline_escaped():
21+
"""A raw 0x0A byte inside a JSON string is replaced with \\n."""
22+
raw = '{"command": "line1\nline2"}'
23+
sanitized = sanitize_json_control_chars(raw)
24+
assert "\n" not in sanitized
25+
parsed = json.loads(sanitized)
26+
assert parsed["command"] == "line1\nline2"
27+
28+
29+
def test_literal_tab_escaped():
30+
"""A raw 0x09 byte inside a JSON string is replaced with \\t."""
31+
raw = '{"indent": "col1\tcol2"}'
32+
sanitized = sanitize_json_control_chars(raw)
33+
assert "\t" not in sanitized
34+
parsed = json.loads(sanitized)
35+
assert parsed["indent"] == "col1\tcol2"
36+
37+
38+
def test_multiple_control_chars():
39+
"""Multiple different control characters are all escaped."""
40+
raw = '{"text": "a\tb\nc\rd"}'
41+
sanitized = sanitize_json_control_chars(raw)
42+
parsed = json.loads(sanitized)
43+
assert parsed["text"] == "a\tb\nc\rd"
44+
45+
46+
def test_null_byte_escaped():
47+
"""A raw NUL (0x00) byte is escaped to \\u0000."""
48+
raw = '{"data": "before\x00after"}'
49+
sanitized = sanitize_json_control_chars(raw)
50+
assert "\\u0000" in sanitized
51+
parsed = json.loads(sanitized)
52+
assert parsed["data"] == "before\x00after"
53+
54+
55+
def test_form_feed_and_backspace():
56+
"""Form-feed and backspace get their short escape aliases."""
57+
raw = '{"x": "a\x08b\x0cc"}'
58+
sanitized = sanitize_json_control_chars(raw)
59+
assert "\\b" in sanitized
60+
assert "\\f" in sanitized
61+
parsed = json.loads(sanitized)
62+
assert parsed["x"] == "a\x08b\x0cc"
63+
64+
65+
def test_already_escaped_sequences_preserved():
66+
"""Properly escaped sequences (\\n, \\t) are NOT double-escaped."""
67+
raw = r'{"command": "echo \"hello\\nworld\""}'
68+
sanitized = sanitize_json_control_chars(raw)
69+
# Already-valid escape sequences should parse correctly
70+
parsed = json.loads(sanitized)
71+
assert "hello\\nworld" in parsed["command"]
72+
73+
74+
def test_empty_string():
75+
"""Empty input returns empty output."""
76+
assert sanitize_json_control_chars("") == ""
77+
78+
79+
def test_realistic_tool_call_arguments():
80+
"""Simulates a realistic malformed tool_call.arguments from an LLM."""
81+
# The LLM emitted a literal newline inside the "command" value
82+
raw = '{"command": "cd /workspace && \\\npython test.py", "path": "/workspace"}'
83+
sanitized = sanitize_json_control_chars(raw)
84+
parsed = json.loads(sanitized)
85+
assert "python test.py" in parsed["command"]
86+
assert parsed["path"] == "/workspace"

tests/sdk/conversation/local/test_agent_status_transition.py

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
- PAUSED -> RUNNING (when run() is called after pause)
1313
- WAITING_FOR_CONFIRMATION -> RUNNING (when run() is called to confirm)
1414
- FINISHED -> IDLE -> RUNNING (when new message sent after completion)
15-
- FINISHED/STUCK -> remain unchanged (run() exits immediately)
15+
- STUCK -> IDLE (when new message sent) -> RUNNING (when run() is called)
16+
- STUCK -> RUNNING (when run() is called directly)
17+
- FINISHED -> remain unchanged (run() exits immediately without new message)
1618
"""
1719

1820
import threading
@@ -357,23 +359,60 @@ def test_run_exits_immediately_when_already_finished():
357359
assert llm.call_count == initial_call_count
358360

359361

360-
def test_run_exits_immediately_when_stuck():
361-
"""Test that run() exits immediately when status is STUCK."""
362-
# Use TestLLM with no scripted responses (should not be called)
363-
llm = TestLLM.from_messages([])
362+
def test_run_recovers_from_stuck():
363+
"""Test that run() resets STUCK status and lets the agent continue.
364+
365+
When a conversation is STUCK (e.g. stuck detector triggered or
366+
persisted STUCK state from a previous session), calling run() should
367+
reset the status to RUNNING so the agent can retry. Without this
368+
reset, a persisted STUCK state would permanently kill the session.
369+
"""
370+
# Provide a finish response so the agent can complete after unsticking.
371+
llm = TestLLM.from_messages(
372+
[Message(role="assistant", content=[TextContent(text="Recovered")])]
373+
)
364374
agent = Agent(llm=llm, tools=[])
365375
conversation = Conversation(agent=agent)
366376

367-
# Manually set status to STUCK (simulating stuck detection)
377+
# Seed a user message so the agent has context to work with
378+
conversation.send_message(
379+
Message(role="user", content=[TextContent(text="Please continue")])
380+
)
381+
382+
# Simulate stuck detection persisted from previous session
368383
conversation._state.execution_status = ConversationExecutionStatus.STUCK
369384

370-
# Call run - should exit immediately
371385
conversation.run()
372386

373-
# Status should still be STUCK
374-
assert conversation.state.execution_status == ConversationExecutionStatus.STUCK
375-
# LLM should not be called
376-
assert llm.call_count == 0
387+
# Agent should have recovered and finished normally
388+
assert conversation.state.execution_status == ConversationExecutionStatus.FINISHED
389+
assert llm.call_count == 1
390+
391+
392+
def test_send_message_resets_stuck_to_idle():
393+
"""Test STUCK → IDLE transition when a new user message arrives.
394+
395+
A new user message is an implicit signal to unstick the conversation,
396+
analogous to how FINISHED → IDLE works.
397+
"""
398+
llm = TestLLM.from_messages(
399+
[Message(role="assistant", content=[TextContent(text="Done")])]
400+
)
401+
agent = Agent(llm=llm, tools=[])
402+
conversation = Conversation(agent=agent)
403+
404+
# Simulate stuck state
405+
conversation._state.execution_status = ConversationExecutionStatus.STUCK
406+
407+
# Sending a new message should reset STUCK → IDLE
408+
conversation.send_message(
409+
Message(role="user", content=[TextContent(text="Try again")])
410+
)
411+
assert conversation.state.execution_status == ConversationExecutionStatus.IDLE
412+
413+
# Running should proceed normally: IDLE → RUNNING → FINISHED
414+
conversation.run()
415+
assert conversation.state.execution_status == ConversationExecutionStatus.FINISHED
377416

378417

379418
def test_execution_status_error_on_max_iterations():

tests/sdk/conversation/test_event_store.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,3 +402,42 @@ def test_event_log_concurrent_writes_serialized():
402402

403403
files = [f for f in fs.list("events") if not f.endswith(".lock")]
404404
assert len(files) == 2
405+
406+
407+
def test_get_single_item_recovers_from_stale_index():
408+
"""_get_single_item rebuilds the index when _idx_to_id is stale."""
409+
fs = InMemoryFileStore()
410+
log = EventLog(fs)
411+
412+
# Use UUID-like IDs to match EVENT_NAME_RE pattern
413+
evt_id = "00000000-0000-0000-0000-000000000001"
414+
event = create_test_event(evt_id, "Should recover")
415+
log.append(event)
416+
assert log[0].id == evt_id
417+
418+
# Simulate a stale in-memory index (e.g., external file modification)
419+
log._idx_to_id.clear()
420+
log._id_to_idx.clear()
421+
422+
# Access should rebuild the index transparently and succeed
423+
recovered = log[0]
424+
assert recovered.id == evt_id
425+
426+
427+
def test_get_single_item_stale_index_out_of_range():
428+
"""After index rebuild, raise IndexError if the index no longer exists."""
429+
fs = InMemoryFileStore()
430+
log = EventLog(fs)
431+
432+
evt_id = "00000000-0000-0000-0000-000000000002"
433+
event = create_test_event(evt_id, "Only one")
434+
log.append(event)
435+
436+
# Clear index AND artificially inflate length to simulate stale state
437+
log._idx_to_id.clear()
438+
log._id_to_idx.clear()
439+
log._length = 5 # pretend there are 5 events
440+
441+
# Index 3 doesn't exist on disk; should raise IndexError after rebuild
442+
with pytest.raises(IndexError, match="Event index out of range"):
443+
log[3]

0 commit comments

Comments
 (0)