diff --git a/examples/02_remote_agent_server/01_convo_with_local_agent_server.py b/examples/02_remote_agent_server/01_convo_with_local_agent_server.py index 3dbace89dd..ab12d1e64a 100644 --- a/examples/02_remote_agent_server/01_convo_with_local_agent_server.py +++ b/examples/02_remote_agent_server/01_convo_with_local_agent_server.py @@ -4,16 +4,21 @@ import tempfile import threading import time +from pathlib import Path from pydantic import SecretStr from openhands.sdk import LLM, Conversation, RemoteConversation, Workspace, get_logger -from openhands.sdk.event import ConversationStateUpdateEvent +from openhands.sdk.event import ConversationStateUpdateEvent, HookExecutionEvent +from openhands.sdk.hooks import HookConfig, HookDefinition, HookMatcher from openhands.tools.preset.default import get_default_agent logger = get_logger(__name__) +# Hook script directory for this example +HOOK_SCRIPTS_DIR = Path(__file__).parent / "hook_scripts" + def _stream_output(stream, prefix, target_stream): """Stream output from subprocess to target stream with prefix.""" @@ -168,20 +173,62 @@ def event_callback(event): ) logger.info(f"Output: {result.stdout}") + # Configure hooks - demonstrating the hooks system with RemoteConversation + # Server-side hooks (PreToolUse, PostToolUse, UserPromptSubmit, Stop) are + # executed by the agent server. Client-side hooks (SessionStart, SessionEnd) + # are executed locally. + + hook_config = HookConfig( + # Stop hook - run Python syntax check before allowing agent to finish. + # If any Python file has syntax errors, the hook returns "deny" with the + # error output, which gets sent back to the agent as feedback, and the + # agent continues working to fix the issue. + stop=[ + HookMatcher( + matcher="*", # Match all stop reasons + hooks=[ + HookDefinition( + command=str(HOOK_SCRIPTS_DIR / "pycompile_check.sh"), + timeout=60, + ) + ], + ) + ], + ) + conversation = Conversation( agent=agent, workspace=workspace, callbacks=[event_callback], + hook_config=hook_config, ) assert isinstance(conversation, RemoteConversation) + # Track hook execution events + hook_events: list[HookExecutionEvent] = [] + + def hook_event_tracker(event): + """Additional callback to track hook execution events.""" + if isinstance(event, HookExecutionEvent): + hook_events.append(event) + logger.info(f"šŸŖ HookExecutionEvent captured: {event.hook_event_type}") + + # Append our hook tracker to the existing callbacks + conversation._callbacks.append(hook_event_tracker) + try: logger.info(f"\nšŸ“‹ Conversation ID: {conversation.state.id}") - # Send first message and run - logger.info("šŸ“ Sending first message...") + # Test scenario: Ask the agent to create a Python file with syntax errors + # The stop hook should detect the syntax error and send feedback back + # to the agent to fix it + logger.info("šŸ“ Sending message to test on_stop hook with syntax check...") conversation.send_message( - "Read the current repo and write 3 facts about the project into FACTS.txt." + "Create a Python file called 'test_broken.py' in the current directory " + "with an obvious syntax error (like 'def broken(:\n pass' - missing " + "closing parenthesis). After creating the file, immediately use the " + "finish action. If you receive any feedback about errors, fix them and " + "try to finish again." ) # Generate title using a specific LLM @@ -189,10 +236,41 @@ def event_callback(event): logger.info(f"Generated conversation title: {title}") logger.info("šŸš€ Running conversation...") - conversation.run() + logger.info( + "Expected behavior: Agent creates broken .py file -> tries to finish " + "-> stop hook runs syntax check -> check fails -> hook sends feedback " + "-> agent fixes the syntax error -> tries to finish again -> passes" + ) - logger.info("āœ… First task completed!") - logger.info(f"Agent status: {conversation.state.execution_status}") + # Keep running until the agent actually finishes + # When a stop hook denies, the state goes: running -> finished -> running + # The client's run() may return when it sees 'finished', so we need to + # check if the agent is still running and continue + max_runs = 10 # Allow enough retries for agent to fix issues + run_count = 0 + while run_count < max_runs: + run_count += 1 + logger.info(f"šŸ”„ Run attempt #{run_count}") + conversation.run() + current_status = conversation.state.execution_status + logger.info(f" After run(), status = {current_status}") + + # Small delay to let any pending state updates arrive + time.sleep(0.5) + current_status = conversation.state.execution_status + logger.info(f" After delay, status = {current_status}") + + if current_status.value == "finished": + logger.info(" āœ… Agent finished!") + break + elif current_status.value == "running": + logger.info(" Agent still running (hook denied stop), continuing...") + else: + logger.info(f" Unexpected status: {current_status}, stopping") + break + + logger.info("āœ… Task completed!") + logger.info(f"Final agent status: {conversation.state.execution_status}") # Wait for events to stop coming (no events for 2 seconds) logger.info("ā³ Waiting for events to stop...") @@ -200,10 +278,50 @@ def event_callback(event): time.sleep(0.1) logger.info("āœ… Events have stopped") - logger.info("šŸš€ Running conversation again...") - conversation.send_message("Great! Now delete that file.") - conversation.run() - logger.info("āœ… Second task completed!") + # Analyze hook execution events + logger.info("\n" + "=" * 50) + logger.info("šŸ“Š Hook Execution Events Analysis") + logger.info("=" * 50) + + logger.info(f"Total HookExecutionEvents received: {len(hook_events)}") + for i, he in enumerate(hook_events, 1): + logger.info(f"\n Hook Event #{i}:") + logger.info(f" Type: {he.hook_event_type}") + logger.info(f" Command: {he.hook_command}") + logger.info(f" Success: {he.success}") + logger.info(f" Blocked: {he.blocked}") + logger.info(f" Exit Code: {he.exit_code}") + if he.additional_context: + # Truncate for readability + ctx = ( + he.additional_context[:500] + "..." + if len(he.additional_context) > 500 + else he.additional_context + ) + logger.info(f" Additional Context: {ctx}") + if he.error: + logger.info(f" Error: {he.error}") + + # Count stop hooks that were denied (pre-commit failed) + stop_events = [e for e in hook_events if e.hook_event_type == "Stop"] + denied_stops = [e for e in stop_events if e.blocked] + + logger.info(f"\nStop hook events: {len(stop_events)}") + logger.info(f"Denied stops (pre-commit failures): {len(denied_stops)}") + + if denied_stops: + logger.info( + "\nāœ… SUCCESS: Stop hook denied at least once due to " + "pre-commit failure!" + ) + logger.info( + " The agent should have received feedback and fixed the issue." + ) + else: + logger.info( + "\nāš ļø No denied stops detected. Either pre-commit passed on first " + "try or the hook didn't work as expected." + ) # Demonstrate state.events functionality logger.info("\n" + "=" * 50) @@ -214,10 +332,10 @@ def event_callback(event): total_events = len(conversation.state.events) logger.info(f"šŸ“ˆ Total events in conversation: {total_events}") - # Get recent events (last 5) using state.events - logger.info("\nšŸ” Getting last 5 events using state.events...") + # Get recent events (last 10) using state.events + logger.info("\nšŸ” Getting last 10 events using state.events...") all_events = conversation.state.events - recent_events = all_events[-5:] if len(all_events) >= 5 else all_events + recent_events = all_events[-10:] if len(all_events) >= 10 else all_events for i, event in enumerate(recent_events, 1): event_type = type(event).__name__ @@ -225,7 +343,7 @@ def event_callback(event): logger.info(f" {i}. {event_type} at {timestamp}") # Let's see what the actual event types are - logger.info("\nšŸ” Event types found:") + logger.info("\nšŸ” Event types found in recent events:") event_types = set() for event in recent_events: event_type = type(event).__name__ diff --git a/examples/02_remote_agent_server/hook_scripts/pycompile_check.sh b/examples/02_remote_agent_server/hook_scripts/pycompile_check.sh new file mode 100755 index 0000000000..ca420a3cb1 --- /dev/null +++ b/examples/02_remote_agent_server/hook_scripts/pycompile_check.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Stop hook: Run Python syntax check on all .py files in the workspace +# Returns deny if any Python file has syntax errors, with the error output as feedback +# +# This hook validates that the agent hasn't broken any Python files. +# Environment variable CHECK_DIR can override the default working directory. + +CHECK_DIR="${CHECK_DIR:-.}" + +# Find all Python files and check for syntax errors +ERRORS="" +while IFS= read -r -d '' file; do + # Run python syntax check + result=$(python3 -m py_compile "$file" 2>&1) + if [ $? -ne 0 ]; then + ERRORS="${ERRORS}\n${result}" + fi +done < <(find "$CHECK_DIR" -name "*.py" -print0 2>/dev/null) + +if [ -n "$ERRORS" ]; then + # Escape the output for JSON + ESCAPED_OUTPUT=$(echo -e "$ERRORS" | head -50 | python3 -c 'import json,sys; print(json.dumps(sys.stdin.read()))') + echo "{\"decision\": \"deny\", \"additionalContext\": $ESCAPED_OUTPUT}" + exit 2 +fi + +exit 0 diff --git a/openhands-sdk/openhands/sdk/__init__.py b/openhands-sdk/openhands/sdk/__init__.py index 4687d77279..0ef640d442 100644 --- a/openhands-sdk/openhands/sdk/__init__.py +++ b/openhands-sdk/openhands/sdk/__init__.py @@ -22,7 +22,7 @@ RemoteConversation, ) from openhands.sdk.conversation.conversation_stats import ConversationStats -from openhands.sdk.event import Event, LLMConvertibleEvent +from openhands.sdk.event import Event, HookExecutionEvent, LLMConvertibleEvent from openhands.sdk.event.llm_convertible import MessageEvent from openhands.sdk.io import FileStore, LocalFileStore from openhands.sdk.llm import ( @@ -99,6 +99,7 @@ "MCPToolDefinition", "MCPToolObservation", "MessageEvent", + "HookExecutionEvent", "create_mcp_tools", "get_logger", "Conversation", diff --git a/openhands-sdk/openhands/sdk/conversation/base.py b/openhands-sdk/openhands/sdk/conversation/base.py index bfb3aa366a..f131889d1e 100644 --- a/openhands-sdk/openhands/sdk/conversation/base.py +++ b/openhands-sdk/openhands/sdk/conversation/base.py @@ -30,6 +30,7 @@ if TYPE_CHECKING: from openhands.sdk.agent.base import AgentBase from openhands.sdk.conversation.state import ConversationExecutionStatus + from openhands.sdk.hooks import HookConfig CallbackType = TypeVar( @@ -95,6 +96,11 @@ def stats(self) -> ConversationStats: """The conversation statistics.""" ... + @property + def hook_config(self) -> "HookConfig | None": + """The hook configuration for this conversation.""" + ... + class BaseConversation(ABC): """Abstract base class for conversation implementations. diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 29c003b875..671d253fd8 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -399,6 +399,9 @@ def _ensure_plugins_loaded(self) -> None: # Set up hook processor with the combined config if final_hook_config is not None: + # Store final hook_config in state for observability + self._state.hook_config = final_hook_config + self._hook_processor, self._on_event = create_hook_callback( hook_config=final_hook_config, working_dir=str(self.workspace.working_dir), diff --git a/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py index f0f813432e..e3b763fa7a 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py @@ -43,12 +43,7 @@ ConversationStateUpdateEvent, ) from openhands.sdk.event.llm_completion_log import LLMCompletionLogEvent -from openhands.sdk.hooks import ( - HookConfig, - HookEventProcessor, - HookEventType, - HookManager, -) +from openhands.sdk.hooks import HookConfig from openhands.sdk.llm import LLM, Message, TextContent from openhands.sdk.logger import DEBUG, get_logger from openhands.sdk.observability.laminar import observe @@ -531,6 +526,15 @@ def stats(self) -> ConversationStats: stats_data = info.get("stats", {}) return ConversationStats.model_validate(stats_data) + @property + def hook_config(self) -> HookConfig | None: + """Get hook configuration (fetched from remote).""" + info = self._get_conversation_info() + hook_config_data = info.get("hook_config") + if hook_config_data is not None: + return HookConfig.model_validate(hook_config_data) + return None + def model_dump(self, **_kwargs): """Get a dictionary representation of the remote state.""" info = self._get_conversation_info() @@ -558,7 +562,6 @@ class RemoteConversation(BaseConversation): max_iteration_per_run: int workspace: RemoteWorkspace _client: httpx.Client - _hook_processor: HookEventProcessor | None _cleanup_initiated: bool _terminal_status_queue: Queue[str] # Thread-safe queue for terminal status from WS delete_on_close: bool = False @@ -599,7 +602,8 @@ def __init__( a dict with keys: 'action_observation', 'action_error', 'monologue', 'alternating_pattern'. Values are integers representing the number of repetitions before triggering. - hook_config: Optional hook configuration for session hooks + hook_config: Optional hook configuration sent to the server. + All hooks are executed server-side. visualizer: Visualization configuration. Can be: - ConversationVisualizerBase subclass: Class to instantiate (default: ConversationVisualizer) @@ -613,7 +617,6 @@ def __init__( self.max_iteration_per_run = max_iteration_per_run self.workspace = workspace self._client = workspace.client - self._hook_processor = None self._cleanup_initiated = False self._terminal_status_queue: Queue[str] = Queue() @@ -662,6 +665,8 @@ def __init__( "agent_definitions": serialized_defs, # Include plugins to load on server "plugins": [p.model_dump() for p in plugins] if plugins else None, + # Include hook_config for server-side hooks + "hook_config": hook_config.model_dump() if hook_config else None, } if stuck_detection_thresholds is not None: # Convert to StuckDetectionThresholds if dict, then serialize @@ -776,25 +781,8 @@ def run_complete_callback(event: Event) -> None: self.update_secrets(secret_values) self._start_observability_span(str(self._id)) - if hook_config is not None: - unsupported = ( - HookEventType.PRE_TOOL_USE, - HookEventType.POST_TOOL_USE, - HookEventType.USER_PROMPT_SUBMIT, - HookEventType.STOP, - ) - if any(hook_config.has_hooks_for_event(t) for t in unsupported): - logger.warning( - "RemoteConversation only supports SessionStart/SessionEnd hooks; " - "other hook types will not be enforced." - ) - hook_manager = HookManager( - config=hook_config, - working_dir=os.getcwd(), - session_id=str(self._id), - ) - self._hook_processor = HookEventProcessor(hook_manager=hook_manager) - self._hook_processor.run_session_start() + # All hooks (including SessionStart/SessionEnd) are executed server-side. + # hook_config is sent in the creation payload. self.delete_on_close = delete_on_close def _create_llm_completion_log_callback(self) -> ConversationCallbackType: @@ -1239,8 +1227,7 @@ def close(self) -> None: if self._cleanup_initiated: return self._cleanup_initiated = True - if self._hook_processor is not None: - self._hook_processor.run_session_end() + # SessionEnd hooks are executed server-side (via hook_config in payload). try: # Stop WebSocket client if it exists if self._ws_client: diff --git a/openhands-sdk/openhands/sdk/conversation/state.py b/openhands-sdk/openhands/sdk/conversation/state.py index 0b204a2e48..2f396dc116 100644 --- a/openhands-sdk/openhands/sdk/conversation/state.py +++ b/openhands-sdk/openhands/sdk/conversation/state.py @@ -22,6 +22,7 @@ ) from openhands.sdk.event.base import Event from openhands.sdk.event.types import EventID +from openhands.sdk.hooks import HookConfig from openhands.sdk.io import FileStore, InMemoryFileStore, LocalFileStore from openhands.sdk.logger import get_logger from openhands.sdk.security.analyzer import SecurityAnalyzerBase @@ -169,6 +170,17 @@ class ConversationState(OpenHandsModel): "See https://docs.openhands.dev/sdk/guides/convo-persistence#how-state-persistence-works", ) + # Hook configuration for the conversation + hook_config: HookConfig | None = Field( + default=None, + description=( + "Hook configuration for this conversation. Includes definitions for " + "PreToolUse, PostToolUse, UserPromptSubmit, SessionStart, SessionEnd, " + "and Stop hooks. When set, these hooks are executed at the appropriate " + "points during conversation execution." + ), + ) + # ===== Private attrs (NOT Fields) ===== _fs: FileStore = PrivateAttr() # filestore for persistence _events: EventLog = PrivateAttr() # now the storage for events diff --git a/openhands-sdk/openhands/sdk/event/__init__.py b/openhands-sdk/openhands/sdk/event/__init__.py index 27da310db4..2390cee433 100644 --- a/openhands-sdk/openhands/sdk/event/__init__.py +++ b/openhands-sdk/openhands/sdk/event/__init__.py @@ -6,6 +6,7 @@ CondensationSummaryEvent, ) from openhands.sdk.event.conversation_state import ConversationStateUpdateEvent +from openhands.sdk.event.hook_execution import HookExecutionEvent from openhands.sdk.event.llm_completion_log import LLMCompletionLogEvent from openhands.sdk.event.llm_convertible import ( ActionEvent, @@ -40,6 +41,7 @@ "CondensationRequest", "CondensationSummaryEvent", "ConversationStateUpdateEvent", + "HookExecutionEvent", "LLMCompletionLogEvent", "EventID", "ToolCallID", diff --git a/openhands-sdk/openhands/sdk/event/hook_execution.py b/openhands-sdk/openhands/sdk/event/hook_execution.py new file mode 100644 index 0000000000..9dde48ba69 --- /dev/null +++ b/openhands-sdk/openhands/sdk/event/hook_execution.py @@ -0,0 +1,129 @@ +"""Hook execution event for observability into hook execution.""" + +from typing import Any, Literal + +from pydantic import Field +from rich.text import Text + +from openhands.sdk.event.base import Event +from openhands.sdk.event.types import SourceType + + +HookEventType = Literal[ + "PreToolUse", + "PostToolUse", + "UserPromptSubmit", + "SessionStart", + "SessionEnd", + "Stop", +] + + +class HookExecutionEvent(Event): + """Event emitted when a hook is executed. + + This event provides observability into hook execution, including: + - Which hook type was triggered + - The command that was run + - The result (success/blocked/error) + - Any output from the hook + + This allows clients to track hook execution via the event stream. + """ + + source: SourceType = Field( + default="hook", description="Source is always 'hook' for hook execution events" + ) + + # Hook identification + hook_event_type: HookEventType = Field( + ..., description="The type of hook event that triggered this execution" + ) + hook_command: str = Field(..., description="The hook command that was executed") + tool_name: str | None = Field( + default=None, + description="Tool name for PreToolUse/PostToolUse hooks", + ) + + # Execution result + success: bool = Field(..., description="Whether the hook executed successfully") + blocked: bool = Field( + default=False, + description="Whether the hook blocked the operation (exit code 2 or deny)", + ) + exit_code: int = Field(..., description="Exit code from the hook command") + + # Output + stdout: str = Field(default="", description="Standard output from the hook") + stderr: str = Field(default="", description="Standard error from the hook") + reason: str | None = Field( + default=None, description="Reason provided by hook (for blocking)" + ) + additional_context: str | None = Field( + default=None, + description="Additional context injected by hook (e.g., for UserPromptSubmit)", + ) + error: str | None = Field( + default=None, description="Error message if hook execution failed" + ) + + # Context + action_id: str | None = Field( + default=None, + description="ID of the action this hook is associated with (PreToolUse/PostToolUse)", # noqa: E501 + ) + message_id: str | None = Field( + default=None, + description="ID of the message this hook is associated with (UserPromptSubmit)", + ) + hook_input: dict[str, Any] | None = Field( + default=None, + description="The input data that was passed to the hook", + ) + + @property + def visualize(self) -> Text: + """Return Rich Text representation of this hook execution event.""" + content = Text() + content.append("Hook: ", style="bold") + content.append(f"{self.hook_event_type}") + if self.tool_name: + content.append(f" ({self.tool_name})") + content.append("\n") + + # Status + if self.blocked: + content.append("Status: ", style="bold") + content.append("BLOCKED", style="bold red") + if self.reason: + content.append(f" - {self.reason}") + elif self.success: + content.append("Status: ", style="bold") + content.append("SUCCESS", style="bold green") + else: + content.append("Status: ", style="bold") + content.append("FAILED", style="bold red") + if self.error: + content.append(f" - {self.error}") + + content.append(f"\nExit Code: {self.exit_code}") + + # Output (truncated) + if self.stdout: + output_preview = self.stdout[:200] + if len(self.stdout) > 200: + output_preview += "..." + content.append(f"\nOutput: {output_preview}") + + if self.additional_context: + content.append(f"\nInjected Context: {self.additional_context[:100]}...") + + return content + + def __str__(self) -> str: + """Plain text string representation for HookExecutionEvent.""" + status = ( + "BLOCKED" if self.blocked else ("SUCCESS" if self.success else "FAILED") + ) + tool_info = f" ({self.tool_name})" if self.tool_name else "" + return f"HookExecutionEvent: {self.hook_event_type}{tool_info} - {status}" diff --git a/openhands-sdk/openhands/sdk/event/types.py b/openhands-sdk/openhands/sdk/event/types.py index 28c2f3d713..417e58c305 100644 --- a/openhands-sdk/openhands/sdk/event/types.py +++ b/openhands-sdk/openhands/sdk/event/types.py @@ -2,7 +2,7 @@ EventType = Literal["action", "observation", "message", "system_prompt", "agent_error"] -SourceType = Literal["agent", "user", "environment"] +SourceType = Literal["agent", "user", "environment", "hook"] EventID = str """Type alias for event IDs.""" diff --git a/openhands-sdk/openhands/sdk/hooks/conversation_hooks.py b/openhands-sdk/openhands/sdk/hooks/conversation_hooks.py index 0fc77425c3..4eaac1c1ac 100644 --- a/openhands-sdk/openhands/sdk/hooks/conversation_hooks.py +++ b/openhands-sdk/openhands/sdk/hooks/conversation_hooks.py @@ -1,9 +1,17 @@ """Hook integration for conversations.""" +from collections.abc import Callable from typing import TYPE_CHECKING, Any -from openhands.sdk.event import ActionEvent, Event, MessageEvent, ObservationEvent +from openhands.sdk.event import ( + ActionEvent, + Event, + HookExecutionEvent, + MessageEvent, + ObservationEvent, +) from openhands.sdk.hooks.config import HookConfig +from openhands.sdk.hooks.executor import HookResult from openhands.sdk.hooks.manager import HookManager from openhands.sdk.hooks.types import HookEventType from openhands.sdk.llm import TextContent @@ -15,31 +23,82 @@ logger = get_logger(__name__) +# Max number of characters we persist in HookExecutionEvent log fields. +# Hooks can emit arbitrary output; truncation prevents event persistence bloat. +MAX_HOOK_LOG_CHARS = 50_000 +_TRUNCATION_SUFFIX = "\n" + + +def _truncate_hook_log(value: str | None) -> str | None: + if value is None: + return None + if len(value) <= MAX_HOOK_LOG_CHARS: + return value + if MAX_HOOK_LOG_CHARS <= len(_TRUNCATION_SUFFIX): + return value[:MAX_HOOK_LOG_CHARS] + return value[: MAX_HOOK_LOG_CHARS - len(_TRUNCATION_SUFFIX)] + _TRUNCATION_SUFFIX + + +# Type alias for the callback function that emits events +EventEmitter = Callable[[Event], None] + class HookEventProcessor: """Processes events and runs hooks at appropriate points. Call set_conversation_state() after creating Conversation for blocking to work. - Note on persistence: HookEvent/HookResult are ephemeral (for hook script I/O). - If hook execution traces need to be persisted (e.g., for observability), create - a HookExecutionObservation inheriting from Observation and emit it through the - event stream, rather than modifying these hook classes. + HookExecutionEvent is emitted for each hook execution when emit_hook_events=True, + providing full observability into hook execution for clients. """ def __init__( self, hook_manager: HookManager, original_callback: Any = None, + emit_hook_events: bool = True, ): self.hook_manager = hook_manager self.original_callback = original_callback self._conversation_state: ConversationState | None = None + self.emit_hook_events = emit_hook_events def set_conversation_state(self, state: "ConversationState") -> None: """Set conversation state for blocking support.""" self._conversation_state = state + def _emit_hook_execution_event( + self, + hook_event_type: HookEventType, + hook_command: str, + result: HookResult, + tool_name: str | None = None, + action_id: str | None = None, + message_id: str | None = None, + hook_input: dict[str, Any] | None = None, + ) -> None: + """Emit a HookExecutionEvent for observability.""" + if not self.emit_hook_events or not self.original_callback: + return + + event = HookExecutionEvent( + hook_event_type=hook_event_type.value, + hook_command=hook_command, + tool_name=tool_name, + success=result.success, + blocked=result.blocked, + exit_code=result.exit_code, + stdout=_truncate_hook_log(result.stdout) or "", + stderr=_truncate_hook_log(result.stderr) or "", + reason=_truncate_hook_log(result.reason), + additional_context=_truncate_hook_log(result.additional_context), + error=_truncate_hook_log(result.error), + action_id=action_id, + message_id=message_id, + hook_input=hook_input, + ) + self.original_callback(event) + def on_event(self, event: Event) -> None: """Process an event and run appropriate hooks.""" # Track the event to pass to callbacks (may be modified by hooks) @@ -67,7 +126,7 @@ def _handle_pre_tool_use(self, event: ActionEvent) -> None: return tool_name = event.tool_name - tool_input = {} + tool_input: dict[str, Any] = {} # Extract tool input from action if event.action is not None: @@ -76,11 +135,27 @@ def _handle_pre_tool_use(self, event: ActionEvent) -> None: except Exception as e: logger.debug(f"Could not extract tool input: {e}") + # Get hooks to emit events with command info + hooks = self.hook_manager.config.get_hooks_for_event( + HookEventType.PRE_TOOL_USE, tool_name + ) + should_continue, results = self.hook_manager.run_pre_tool_use( tool_name=tool_name, tool_input=tool_input, ) + # Emit HookExecutionEvents for each hook + for hook, result in zip(hooks, results, strict=False): + self._emit_hook_execution_event( + hook_event_type=HookEventType.PRE_TOOL_USE, + hook_command=hook.command, + result=result, + tool_name=tool_name, + action_id=event.id, + hook_input={"tool_name": tool_name, "tool_input": tool_input}, + ) + if not should_continue: reason = self.hook_manager.get_blocking_reason(results) logger.warning(f"Hook blocked action {tool_name}: {reason}") @@ -134,14 +209,31 @@ def _handle_post_tool_use(self, event: ObservationEvent) -> None: except Exception as e: logger.debug(f"Could not extract tool response: {e}") + # Get hooks to emit events with command info + hooks = self.hook_manager.config.get_hooks_for_event( + HookEventType.POST_TOOL_USE, tool_name + ) + results = self.hook_manager.run_post_tool_use( tool_name=tool_name, tool_input=tool_input, tool_response=tool_response, ) - # Log any hook errors - for result in results: + # Emit HookExecutionEvents for each hook and log errors + for hook, result in zip(hooks, results, strict=False): + self._emit_hook_execution_event( + hook_event_type=HookEventType.POST_TOOL_USE, + hook_command=hook.command, + result=result, + tool_name=tool_name, + action_id=action_event.id, + hook_input={ + "tool_name": tool_name, + "tool_input": tool_input, + "tool_response": tool_response, + }, + ) if result.error: logger.warning(f"PostToolUse hook error: {result.error}") @@ -161,10 +253,25 @@ def _handle_user_prompt_submit(self, event: MessageEvent) -> MessageEvent: if isinstance(content, TextContent): message += content.text + # Get hooks to emit events with command info + hooks = self.hook_manager.config.get_hooks_for_event( + HookEventType.USER_PROMPT_SUBMIT + ) + should_continue, additional_context, results = ( self.hook_manager.run_user_prompt_submit(message=message) ) + # Emit HookExecutionEvents for each hook + for hook, result in zip(hooks, results, strict=False): + self._emit_hook_execution_event( + hook_event_type=HookEventType.USER_PROMPT_SUBMIT, + hook_command=hook.command, + result=result, + message_id=event.id, + hook_input={"message": message}, + ) + if not should_continue: reason = self.hook_manager.get_blocking_reason(results) logger.warning(f"Hook blocked user message: {reason}") @@ -213,29 +320,52 @@ def is_message_blocked(self, message_id: str) -> bool: def run_session_start(self) -> None: """Run SessionStart hooks. Call after conversation is created.""" + hooks = self.hook_manager.config.get_hooks_for_event( + HookEventType.SESSION_START + ) results = self.hook_manager.run_session_start() - for r in results: - if r.error: - logger.warning(f"SessionStart hook error: {r.error}") + + for hook, result in zip(hooks, results, strict=False): + self._emit_hook_execution_event( + hook_event_type=HookEventType.SESSION_START, + hook_command=hook.command, + result=result, + ) + if result.error: + logger.warning(f"SessionStart hook error: {result.error}") def run_session_end(self) -> None: """Run SessionEnd hooks. Call before conversation is closed.""" + hooks = self.hook_manager.config.get_hooks_for_event(HookEventType.SESSION_END) results = self.hook_manager.run_session_end() - for r in results: - if r.error: - logger.warning(f"SessionEnd hook error: {r.error}") + + for hook, result in zip(hooks, results, strict=False): + self._emit_hook_execution_event( + hook_event_type=HookEventType.SESSION_END, + hook_command=hook.command, + result=result, + ) + if result.error: + logger.warning(f"SessionEnd hook error: {result.error}") def run_stop(self, reason: str | None = None) -> tuple[bool, str | None]: """Run Stop hooks. Returns (should_stop, feedback).""" if not self.hook_manager.has_hooks(HookEventType.STOP): return True, None + hooks = self.hook_manager.config.get_hooks_for_event(HookEventType.STOP) should_stop, results = self.hook_manager.run_stop(reason=reason) - # Log any errors - for r in results: - if r.error: - logger.warning(f"Stop hook error: {r.error}") + # Emit events and log errors + for hook, result in zip(hooks, results, strict=False): + self._emit_hook_execution_event( + hook_event_type=HookEventType.STOP, + hook_command=hook.command, + result=result, + hook_input={"reason": reason} if reason else None, + ) + if result.error: + logger.warning(f"Stop hook error: {result.error}") # Collect feedback if denied feedback = None @@ -258,8 +388,21 @@ def create_hook_callback( working_dir: str | None = None, session_id: str | None = None, original_callback: Any = None, + emit_hook_events: bool = True, ) -> tuple[HookEventProcessor, Any]: - """Create a hook-enabled event callback. Returns (processor, callback).""" + """Create a hook-enabled event callback. Returns (processor, callback). + + Args: + hook_config: Configuration for hooks to run. + working_dir: Working directory for hook execution. + session_id: Session ID passed to hooks. + original_callback: Callback to chain after hook processing. + emit_hook_events: If True, emit HookExecutionEvent for each hook execution. + Defaults to True for full observability. + + Returns: + Tuple of (HookEventProcessor, callback function). + """ hook_manager = HookManager( config=hook_config, working_dir=working_dir, @@ -269,6 +412,7 @@ def create_hook_callback( processor = HookEventProcessor( hook_manager=hook_manager, original_callback=original_callback, + emit_hook_events=emit_hook_events, ) return processor, processor.on_event diff --git a/tests/cross/test_remote_conversation_live_server.py b/tests/cross/test_remote_conversation_live_server.py index 5426d5ab90..3cef7a1f4b 100644 --- a/tests/cross/test_remote_conversation_live_server.py +++ b/tests/cross/test_remote_conversation_live_server.py @@ -24,12 +24,14 @@ CondensationSummaryEvent, ConversationStateUpdateEvent, Event, + HookExecutionEvent, LLMConvertibleEvent, MessageEvent, ObservationEvent, PauseEvent, SystemPromptEvent, ) +from openhands.sdk.hooks import HookConfig, HookDefinition, HookMatcher from openhands.sdk.subagent import AgentDefinition from openhands.sdk.subagent.registry import ( _reset_registry_for_tests, @@ -114,7 +116,6 @@ def server_env(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Generator[dic thread.start() # Wait for the server to be ready with health check - import httpx base_url = f"http://127.0.0.1:{port}" server_ready = False @@ -587,7 +588,6 @@ def test_events_not_lost_during_client_disconnection( See PR #1791 review for details: https://github.com/OpenHands/software-agent-sdk/pull/1791#pullrequestreview-3694259068 """ - import httpx def fake_completion_with_finish_tool( self, @@ -749,8 +749,6 @@ def test_post_run_reconcile_needed_under_ws_callback_lag( except for injecting a delay into the client-side callback. """ - import httpx - ws_delay_s = 0.75 def fake_completion_with_finish_tool( @@ -1028,6 +1026,182 @@ def fake_completion_with_tool_calls( # 2. ActionEvent always has a security_risk attribute +def test_hook_config_sent_to_server( + server_env, monkeypatch: pytest.MonkeyPatch, tmp_path: Path +): + """Test that hook_config is properly sent to the server and hooks are executed. + + This validates the fix for the bug where hook_config was accepted by + RemoteConversation but never sent to the server, meaning server-side hooks + (PreToolUse, PostToolUse, UserPromptSubmit, Stop) were never executed. + + The test: + 1. Configures both post_tool_use and stop hooks + 2. Uses a patched LLM that returns a finish tool call + 3. Verifies HookExecutionEvent events are received for both hook types + """ + # Create hook scripts that output JSON to indicate successful execution + post_tool_hook = tmp_path / "post_tool_hook.sh" + post_tool_hook.write_text('#!/bin/bash\necho \'{"decision": "allow"}\'\nexit 0\n') + post_tool_hook.chmod(0o755) + + stop_hook = tmp_path / "stop_hook.sh" + stop_hook.write_text('#!/bin/bash\necho \'{"decision": "allow"}\'\nexit 0\n') + stop_hook.chmod(0o755) + + hook_config = HookConfig( + post_tool_use=[ + HookMatcher( + matcher="*", + hooks=[ + HookDefinition( + command=str(post_tool_hook), + timeout=5, + ) + ], + ) + ], + stop=[ + HookMatcher( + matcher="*", + hooks=[ + HookDefinition( + command=str(stop_hook), + timeout=5, + ) + ], + ) + ], + ) + + # Create a patched LLM that returns a finish tool call to trigger hooks + call_count = {"count": 0} + + def fake_completion_with_finish( + self, + messages, + tools, + return_metrics=False, + add_security_risk_prediction=False, + **kwargs, + ): # type: ignore[no-untyped-def] + from openhands.sdk.llm.llm_response import LLMResponse + from openhands.sdk.llm.message import Message + from openhands.sdk.llm.utils.metrics import MetricsSnapshot + + call_count["count"] += 1 + + # First call: return finish tool call (triggers PostToolUse and Stop hooks) + if call_count["count"] == 1: + litellm_msg = LiteLLMMessage.model_validate( + { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": { + "name": "finish", + "arguments": '{"message": "Task complete"}', + }, + } + ], + } + ) + else: + # Subsequent calls: simple message + litellm_msg = LiteLLMMessage.model_validate( + {"role": "assistant", "content": "Done"} + ) + + raw_response = ModelResponse( + id=f"test-resp-{call_count['count']}", + created=int(time.time()), + model="test-model", + choices=[Choices(index=0, finish_reason="stop", message=litellm_msg)], + ) + + message = Message.from_llm_chat_message(litellm_msg) + metrics_snapshot = MetricsSnapshot( + model_name="test-model", + accumulated_cost=0.0, + max_budget_per_task=None, + accumulated_token_usage=None, + ) + + return LLMResponse( + message=message, metrics=metrics_snapshot, raw_response=raw_response + ) + + monkeypatch.setattr(LLM, "completion", fake_completion_with_finish, raising=True) + + # Create an Agent + llm = LLM(model="gpt-4o-mini", api_key=SecretStr("test")) + agent = Agent(llm=llm, tools=[]) + + # Create conversation via factory with hook_config + workspace = RemoteWorkspace( + host=server_env["host"], working_dir="/tmp/workspace/project" + ) + conv: RemoteConversation = Conversation( + agent=agent, + workspace=workspace, + hook_config=hook_config, + ) + + # Verify the conversation was created successfully + assert conv._id is not None + + # Send a message and run - this triggers the finish tool call + conv.send_message("Complete the task") + conv.run() + + # Wait for events to be received and check for HookExecutionEvents + found_post_tool_use_hook = False + found_stop_hook = False + events: list[Event] = [] + + for attempt in range(50): # up to ~5s + events = list(conv.state.events) + for e in events: + if isinstance(e, HookExecutionEvent): + if e.hook_event_type == "PostToolUse": + found_post_tool_use_hook = True + # Verify hook executed successfully + assert e.success is True + assert e.blocked is False + assert e.exit_code == 0 + assert str(post_tool_hook) in e.hook_command + elif e.hook_event_type == "Stop": + found_stop_hook = True + # Verify hook executed successfully + assert e.success is True + assert e.blocked is False + assert e.exit_code == 0 + assert str(stop_hook) in e.hook_command + + if found_post_tool_use_hook and found_stop_hook: + break + time.sleep(0.1) + + # Assert both hooks were executed and their events were received + assert found_post_tool_use_hook, ( + "Expected HookExecutionEvent for PostToolUse hook. " + f"Events received: {[type(e).__name__ for e in events]}" + ) + assert found_stop_hook, ( + "Expected HookExecutionEvent for Stop hook. " + f"Events received: {[type(e).__name__ for e in events]}" + ) + + # Verify state transitions occurred (proves the conversation ran successfully) + state = conv.state + assert state.execution_status.value in {"finished", "idle", "running"} + + conv.close() + + def test_subagent_definitions_forwarded_to_server(server_env, patched_llm): """Agent definitions registered on the client survive the HTTP roundtrip. diff --git a/tests/sdk/hooks/test_integration.py b/tests/sdk/hooks/test_integration.py index 36e76b2031..5b1a7feea5 100644 --- a/tests/sdk/hooks/test_integration.py +++ b/tests/sdk/hooks/test_integration.py @@ -3,7 +3,7 @@ import pytest from openhands.sdk.conversation.state import ConversationState -from openhands.sdk.event import ActionEvent, MessageEvent +from openhands.sdk.event import ActionEvent, HookExecutionEvent, MessageEvent from openhands.sdk.hooks.config import HookConfig from openhands.sdk.hooks.conversation_hooks import ( HookEventProcessor, @@ -603,10 +603,10 @@ def capture_callback(event): processor.on_event(original_event) - # Check that the callback received a modified event - assert len(processed_events) == 1 - processed_event = processed_events[0] - assert isinstance(processed_event, MessageEvent) + # Filter for MessageEvent (excluding HookExecutionEvent) + message_events = [e for e in processed_events if isinstance(e, MessageEvent)] + assert len(message_events) == 1 + processed_event = message_events[0] # The extended_content should contain the hook's additional context assert len(processed_event.extended_content) == 1 @@ -653,8 +653,10 @@ def capture_callback(event): processor.on_event(original_event) - # Get the LLM message from the processed event - processed_event = processed_events[0] + # Filter for MessageEvent (excluding HookExecutionEvent) + message_events = [e for e in processed_events if isinstance(e, MessageEvent)] + assert len(message_events) == 1 + processed_event = message_events[0] llm_message = processed_event.to_llm_message() # The content should include both original message and hook context @@ -707,7 +709,10 @@ def capture_callback(event): processor.on_event(original_event) - processed_event = processed_events[0] + # Filter for MessageEvent (excluding HookExecutionEvent) + message_events = [e for e in processed_events if isinstance(e, MessageEvent)] + assert len(message_events) == 1 + processed_event = message_events[0] # Both existing and hook context should be present assert len(processed_event.extended_content) == 2 @@ -941,3 +946,256 @@ def mock_step(self, conversation, on_event, on_token=None): ) ] assert len(feedback_messages) == 1, "Feedback message should be injected once" + + +class TestHookExecutionEventEmission: + """Tests for HookExecutionEvent emission during hook execution.""" + + @pytest.fixture + def mock_conversation_state(self, tmp_path): + """Create a mock conversation state using the factory method.""" + import uuid + + from pydantic import SecretStr + + from openhands.sdk.agent import Agent + from openhands.sdk.llm import LLM + from openhands.sdk.workspace import LocalWorkspace + + llm = LLM(model="test-model", api_key=SecretStr("test-key")) + agent = Agent(llm=llm, tools=[]) + workspace = LocalWorkspace(working_dir=str(tmp_path)) + + return ConversationState.create( + id=uuid.uuid4(), + agent=agent, + workspace=workspace, + persistence_dir=None, + ) + + def test_hook_execution_event_emitted_for_user_prompt_submit( + self, tmp_path, mock_conversation_state + ): + """Test that HookExecutionEvent is emitted when UserPromptSubmit hooks run.""" + script = tmp_path / "user_hook.sh" + script.write_text('#!/bin/bash\necho \'{"decision": "allow"}\'\nexit 0') + script.chmod(0o755) + + config = HookConfig.from_dict( + { + "hooks": { + "UserPromptSubmit": [ + {"hooks": [{"type": "command", "command": str(script)}]} + ] + } + } + ) + + manager = HookManager(config=config, working_dir=str(tmp_path)) + processed_events = [] + + def capture_callback(event): + processed_events.append(event) + + processor = HookEventProcessor( + hook_manager=manager, + original_callback=capture_callback, + emit_hook_events=True, + ) + processor.set_conversation_state(mock_conversation_state) + + original_event = MessageEvent( + source="user", + llm_message=Message(role="user", content=[TextContent(text="Hello")]), + ) + + processor.on_event(original_event) + + # Should have both HookExecutionEvent and MessageEvent + hook_events = [e for e in processed_events if isinstance(e, HookExecutionEvent)] + message_events = [e for e in processed_events if isinstance(e, MessageEvent)] + + assert len(hook_events) == 1 + assert len(message_events) == 1 + + hook_event = hook_events[0] + assert hook_event.hook_event_type == "UserPromptSubmit" + assert hook_event.hook_command == str(script) + assert hook_event.success is True + assert hook_event.blocked is False + assert hook_event.exit_code == 0 + assert hook_event.source == "hook" + + def test_hook_execution_event_not_emitted_when_disabled( + self, tmp_path, mock_conversation_state + ): + """Test that HookExecutionEvent is not emitted when emit_hook_events=False.""" + script = tmp_path / "user_hook.sh" + script.write_text('#!/bin/bash\necho \'{"decision": "allow"}\'\nexit 0') + script.chmod(0o755) + + config = HookConfig.from_dict( + { + "hooks": { + "UserPromptSubmit": [ + {"hooks": [{"type": "command", "command": str(script)}]} + ] + } + } + ) + + manager = HookManager(config=config, working_dir=str(tmp_path)) + processed_events = [] + + def capture_callback(event): + processed_events.append(event) + + processor = HookEventProcessor( + hook_manager=manager, + original_callback=capture_callback, + emit_hook_events=False, # Disabled + ) + processor.set_conversation_state(mock_conversation_state) + + original_event = MessageEvent( + source="user", + llm_message=Message(role="user", content=[TextContent(text="Hello")]), + ) + + processor.on_event(original_event) + + # Should only have MessageEvent, no HookExecutionEvent + hook_events = [e for e in processed_events if isinstance(e, HookExecutionEvent)] + message_events = [e for e in processed_events if isinstance(e, MessageEvent)] + + assert len(hook_events) == 0 + assert len(message_events) == 1 + + def test_hook_execution_event_captures_blocking( + self, tmp_path, mock_conversation_state + ): + """Test that HookExecutionEvent captures blocking status correctly.""" + script = tmp_path / "block_hook.sh" + script.write_text( + '#!/bin/bash\necho \'{"decision": "deny", "reason": "Blocked!"}\'\nexit 2' + ) + script.chmod(0o755) + + config = HookConfig.from_dict( + { + "hooks": { + "UserPromptSubmit": [ + {"hooks": [{"type": "command", "command": str(script)}]} + ] + } + } + ) + + manager = HookManager(config=config, working_dir=str(tmp_path)) + processed_events = [] + + def capture_callback(event): + processed_events.append(event) + + processor = HookEventProcessor( + hook_manager=manager, + original_callback=capture_callback, + emit_hook_events=True, + ) + processor.set_conversation_state(mock_conversation_state) + + original_event = MessageEvent( + source="user", + llm_message=Message(role="user", content=[TextContent(text="Hello")]), + ) + + processor.on_event(original_event) + + hook_events = [e for e in processed_events if isinstance(e, HookExecutionEvent)] + assert len(hook_events) == 1 + + hook_event = hook_events[0] + assert hook_event.blocked is True + assert hook_event.reason == "Blocked!" + assert hook_event.exit_code == 2 + + def test_hook_execution_event_emitted_for_session_start( + self, tmp_path, mock_conversation_state + ): + """Test that HookExecutionEvent is emitted for SessionStart hooks.""" + script = tmp_path / "session_start.sh" + script.write_text("#!/bin/bash\necho 'Session started'\nexit 0") + script.chmod(0o755) + + config = HookConfig.from_dict( + { + "hooks": { + "SessionStart": [ + {"hooks": [{"type": "command", "command": str(script)}]} + ] + } + } + ) + + manager = HookManager(config=config, working_dir=str(tmp_path)) + processed_events = [] + + def capture_callback(event): + processed_events.append(event) + + processor = HookEventProcessor( + hook_manager=manager, + original_callback=capture_callback, + emit_hook_events=True, + ) + processor.set_conversation_state(mock_conversation_state) + + processor.run_session_start() + + hook_events = [e for e in processed_events if isinstance(e, HookExecutionEvent)] + assert len(hook_events) == 1 + + hook_event = hook_events[0] + assert hook_event.hook_event_type == "SessionStart" + assert hook_event.success is True + + def test_hook_execution_event_emitted_for_stop( + self, tmp_path, mock_conversation_state + ): + """Test that HookExecutionEvent is emitted for Stop hooks.""" + script = tmp_path / "stop_hook.sh" + script.write_text('#!/bin/bash\necho \'{"decision": "allow"}\'\nexit 0') + script.chmod(0o755) + + config = HookConfig.from_dict( + { + "hooks": { + "Stop": [{"hooks": [{"type": "command", "command": str(script)}]}] + } + } + ) + + manager = HookManager(config=config, working_dir=str(tmp_path)) + processed_events = [] + + def capture_callback(event): + processed_events.append(event) + + processor = HookEventProcessor( + hook_manager=manager, + original_callback=capture_callback, + emit_hook_events=True, + ) + processor.set_conversation_state(mock_conversation_state) + + should_stop, _ = processor.run_stop(reason="finish") + + assert should_stop is True + + hook_events = [e for e in processed_events if isinstance(e, HookExecutionEvent)] + assert len(hook_events) == 1 + + hook_event = hook_events[0] + assert hook_event.hook_event_type == "Stop" + assert hook_event.success is True + assert hook_event.hook_input == {"reason": "finish"}