Skip to content

Commit 071702b

Browse files
test: update example to demonstrate on_stop hook with syntax validation
This commit updates the RemoteConversation example to demonstrate: 1. Setting up a Stop hook that runs Python syntax validation 2. The hook denying the agent from stopping when syntax errors exist 3. Feedback being sent back to the agent 4. The agent continuing to run and attempt to fix issues The example shows the complete flow: - Agent creates file with syntax error - Agent tries to finish - Stop hook validates and denies with SyntaxError feedback - Agent receives feedback and continues - Cycle repeats until agent fixes the issue or max retries reached Co-authored-by: openhands <openhands@all-hands.dev>
1 parent 988b89b commit 071702b

File tree

2 files changed

+141
-29
lines changed

2 files changed

+141
-29
lines changed

examples/02_remote_agent_server/01_convo_with_local_agent_server.py

Lines changed: 114 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,15 @@
99
from pydantic import SecretStr
1010

1111
from openhands.sdk import LLM, Conversation, RemoteConversation, Workspace, get_logger
12-
from openhands.sdk.event import ConversationStateUpdateEvent
12+
from openhands.sdk.event import ConversationStateUpdateEvent, HookExecutionEvent
1313
from openhands.sdk.hooks import HookConfig, HookDefinition, HookMatcher
1414
from openhands.tools.preset.default import get_default_agent
1515

1616

1717
logger = get_logger(__name__)
1818

19-
# Hook script directory - reuse the hook scripts from 33_hooks example
20-
HOOK_SCRIPTS_DIR = (
21-
Path(__file__).parent.parent / "01_standalone_sdk/33_hooks/hook_scripts"
22-
)
19+
# Hook script directory for this example
20+
HOOK_SCRIPTS_DIR = Path(__file__).parent / "hook_scripts"
2321

2422

2523
def _stream_output(stream, prefix, target_stream):
@@ -179,21 +177,19 @@ def event_callback(event):
179177
# Server-side hooks (PreToolUse, PostToolUse, UserPromptSubmit, Stop) are
180178
# executed by the agent server. Client-side hooks (SessionStart, SessionEnd)
181179
# are executed locally.
182-
log_file = Path("/tmp/tool_usage.log")
183180

184181
hook_config = HookConfig(
185-
# PostToolUse hook - logs all tool usage to a file.
186-
# Note: PostToolUse hooks run on the agent server, so the referenced
187-
# script path must be accessible in the server environment.
188-
post_tool_use=[
182+
# Stop hook - run Python syntax check before allowing agent to finish.
183+
# If any Python file has syntax errors, the hook returns "deny" with the
184+
# error output, which gets sent back to the agent as feedback, and the
185+
# agent continues working to fix the issue.
186+
stop=[
189187
HookMatcher(
190-
matcher="*",
188+
matcher="*", # Match all stop reasons
191189
hooks=[
192190
HookDefinition(
193-
command=(
194-
f"LOG_FILE={log_file} {HOOK_SCRIPTS_DIR / 'log_tools.sh'}"
195-
),
196-
timeout=5,
191+
command=str(HOOK_SCRIPTS_DIR / "pre_commit_check.sh"),
192+
timeout=60,
197193
)
198194
],
199195
)
@@ -208,35 +204,124 @@ def event_callback(event):
208204
)
209205
assert isinstance(conversation, RemoteConversation)
210206

207+
# Track hook execution events
208+
hook_events: list[HookExecutionEvent] = []
209+
210+
def hook_event_tracker(event):
211+
"""Additional callback to track hook execution events."""
212+
if isinstance(event, HookExecutionEvent):
213+
hook_events.append(event)
214+
logger.info(f"🪝 HookExecutionEvent captured: {event.hook_event_type}")
215+
216+
# Append our hook tracker to the existing callbacks
217+
conversation._callbacks.append(hook_event_tracker)
218+
211219
try:
212220
logger.info(f"\n📋 Conversation ID: {conversation.state.id}")
213221

214-
# Send first message and run
215-
logger.info("📝 Sending first message...")
222+
# Test scenario: Ask the agent to create a Python file with syntax errors
223+
# The stop hook should detect the syntax error and send feedback back
224+
# to the agent to fix it
225+
logger.info("📝 Sending message to test on_stop hook with syntax check...")
216226
conversation.send_message(
217-
"Read the current repo and write 3 facts about the project into FACTS.txt."
227+
"Create a Python file called 'test_broken.py' in the current directory "
228+
"with an obvious syntax error (like 'def broken(:\n pass' - missing "
229+
"closing parenthesis). After creating the file, immediately use the "
230+
"finish action. Do NOT fix the syntax error yourself - I want to test "
231+
"the validation hook."
218232
)
219233

220234
# Generate title using a specific LLM
221235
title = conversation.generate_title(max_length=60, llm=title_gen_llm)
222236
logger.info(f"Generated conversation title: {title}")
223237

224238
logger.info("🚀 Running conversation...")
225-
conversation.run()
239+
logger.info(
240+
"Expected behavior: Agent creates broken .py file -> tries to finish "
241+
"-> stop hook runs syntax check -> check fails -> hook sends feedback "
242+
"-> agent fixes the syntax error -> tries to finish again -> passes"
243+
)
226244

227-
logger.info("✅ First task completed!")
228-
logger.info(f"Agent status: {conversation.state.execution_status}")
245+
# Keep running until the agent actually finishes
246+
# When a stop hook denies, the state goes: running -> finished -> running
247+
# The client's run() may return when it sees 'finished', so we need to
248+
# check if the agent is still running and continue
249+
max_runs = 5 # Prevent infinite loops
250+
run_count = 0
251+
while run_count < max_runs:
252+
run_count += 1
253+
logger.info(f"🔄 Run attempt #{run_count}")
254+
conversation.run()
255+
current_status = conversation.state.execution_status
256+
logger.info(f" After run(), status = {current_status}")
257+
258+
# Small delay to let any pending state updates arrive
259+
time.sleep(0.5)
260+
current_status = conversation.state.execution_status
261+
logger.info(f" After delay, status = {current_status}")
262+
263+
if current_status.value == "finished":
264+
logger.info(" ✅ Agent finished!")
265+
break
266+
elif current_status.value == "running":
267+
logger.info(" Agent still running (hook denied stop), continuing...")
268+
else:
269+
logger.info(f" Unexpected status: {current_status}, stopping")
270+
break
271+
272+
logger.info("✅ Task completed!")
273+
logger.info(f"Final agent status: {conversation.state.execution_status}")
229274

230275
# Wait for events to stop coming (no events for 2 seconds)
231276
logger.info("⏳ Waiting for events to stop...")
232277
while time.time() - event_tracker["last_event_time"] < 2.0:
233278
time.sleep(0.1)
234279
logger.info("✅ Events have stopped")
235280

236-
logger.info("🚀 Running conversation again...")
237-
conversation.send_message("Great! Now delete that file.")
238-
conversation.run()
239-
logger.info("✅ Second task completed!")
281+
# Analyze hook execution events
282+
logger.info("\n" + "=" * 50)
283+
logger.info("📊 Hook Execution Events Analysis")
284+
logger.info("=" * 50)
285+
286+
logger.info(f"Total HookExecutionEvents received: {len(hook_events)}")
287+
for i, he in enumerate(hook_events, 1):
288+
logger.info(f"\n Hook Event #{i}:")
289+
logger.info(f" Type: {he.hook_event_type}")
290+
logger.info(f" Command: {he.hook_command}")
291+
logger.info(f" Success: {he.success}")
292+
logger.info(f" Blocked: {he.blocked}")
293+
logger.info(f" Exit Code: {he.exit_code}")
294+
if he.additional_context:
295+
# Truncate for readability
296+
ctx = (
297+
he.additional_context[:500] + "..."
298+
if len(he.additional_context) > 500
299+
else he.additional_context
300+
)
301+
logger.info(f" Additional Context: {ctx}")
302+
if he.error:
303+
logger.info(f" Error: {he.error}")
304+
305+
# Count stop hooks that were denied (pre-commit failed)
306+
stop_events = [e for e in hook_events if e.hook_event_type == "Stop"]
307+
denied_stops = [e for e in stop_events if e.blocked]
308+
309+
logger.info(f"\nStop hook events: {len(stop_events)}")
310+
logger.info(f"Denied stops (pre-commit failures): {len(denied_stops)}")
311+
312+
if denied_stops:
313+
logger.info(
314+
"\n✅ SUCCESS: Stop hook denied at least once due to "
315+
"pre-commit failure!"
316+
)
317+
logger.info(
318+
" The agent should have received feedback and fixed the issue."
319+
)
320+
else:
321+
logger.info(
322+
"\n⚠️ No denied stops detected. Either pre-commit passed on first "
323+
"try or the hook didn't work as expected."
324+
)
240325

241326
# Demonstrate state.events functionality
242327
logger.info("\n" + "=" * 50)
@@ -247,18 +332,18 @@ def event_callback(event):
247332
total_events = len(conversation.state.events)
248333
logger.info(f"📈 Total events in conversation: {total_events}")
249334

250-
# Get recent events (last 5) using state.events
251-
logger.info("\n🔍 Getting last 5 events using state.events...")
335+
# Get recent events (last 10) using state.events
336+
logger.info("\n🔍 Getting last 10 events using state.events...")
252337
all_events = conversation.state.events
253-
recent_events = all_events[-5:] if len(all_events) >= 5 else all_events
338+
recent_events = all_events[-10:] if len(all_events) >= 10 else all_events
254339

255340
for i, event in enumerate(recent_events, 1):
256341
event_type = type(event).__name__
257342
timestamp = getattr(event, "timestamp", "Unknown")
258343
logger.info(f" {i}. {event_type} at {timestamp}")
259344

260345
# Let's see what the actual event types are
261-
logger.info("\n🔍 Event types found:")
346+
logger.info("\n🔍 Event types found in recent events:")
262347
event_types = set()
263348
for event in recent_events:
264349
event_type = type(event).__name__
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/bin/bash
2+
# Stop hook: Run Python syntax check on all .py files in the workspace
3+
# Returns deny if any Python file has syntax errors, with the error output as feedback
4+
#
5+
# This hook validates that the agent hasn't broken any Python files.
6+
# Environment variable CHECK_DIR can override the default working directory.
7+
8+
CHECK_DIR="${CHECK_DIR:-.}"
9+
10+
# Find all Python files and check for syntax errors
11+
ERRORS=""
12+
while IFS= read -r -d '' file; do
13+
# Run python syntax check
14+
result=$(python3 -m py_compile "$file" 2>&1)
15+
if [ $? -ne 0 ]; then
16+
ERRORS="${ERRORS}\n${result}"
17+
fi
18+
done < <(find "$CHECK_DIR" -name "*.py" -print0 2>/dev/null)
19+
20+
if [ -n "$ERRORS" ]; then
21+
# Escape the output for JSON
22+
ESCAPED_OUTPUT=$(echo -e "$ERRORS" | head -50 | python3 -c 'import json,sys; print(json.dumps(sys.stdin.read()))')
23+
echo "{\"decision\": \"deny\", \"additionalContext\": $ESCAPED_OUTPUT}"
24+
exit 2
25+
fi
26+
27+
exit 0

0 commit comments

Comments
 (0)