diff --git a/libs/langchain_v1/langchain/agents/middleware/human_in_the_loop.py b/libs/langchain_v1/langchain/agents/middleware/human_in_the_loop.py index cc1a4f2df3fd5..394603b806cba 100644 --- a/libs/langchain_v1/langchain/agents/middleware/human_in_the_loop.py +++ b/libs/langchain_v1/langchain/agents/middleware/human_in_the_loop.py @@ -1,8 +1,9 @@ """Human in the loop middleware.""" +import json from typing import Any, Literal, Protocol -from langchain_core.messages import AIMessage, ToolCall, ToolMessage +from langchain_core.messages import AIMessage, HumanMessage, ToolCall, ToolMessage from langgraph.runtime import Runtime from langgraph.types import interrupt from typing_extensions import NotRequired, TypedDict @@ -157,7 +158,111 @@ def format_tool_description( class HumanInTheLoopMiddleware(AgentMiddleware): - """Human in the loop middleware.""" + """Human in the loop middleware. + + This middleware allows human review and editing of tool calls before execution. + + Design Note - Message Ordering for OpenAI Compatibility: + When a user edits a tool call, the edit notification is embedded in the + AIMessage.content rather than created as a separate message. This design + choice ensures compatibility with OpenAI's strict message ordering rule: + AIMessage with tool_calls must be immediately followed by ToolMessage. + + The embedded notifications use clear visual separators (e.g., "=" lines) + and explicit language ("SYSTEM NOTIFICATION - NOT AI RESPONSE") to minimize + semantic confusion and help the model distinguish between its own output + and framework-generated metadata. + + For optimal results, provide appropriate system prompts to guide the model's + behavior. Use `get_recommended_system_prompt()` to generate recommended + prompts for your LLM provider. + + Example: + ```python + system_prompt = HumanInTheLoopMiddleware.get_recommended_system_prompt("openai") + agent = create_agent( + model="gpt-4", + tools=[...], + middleware=[HumanInTheLoopMiddleware(...)], + system_prompt=system_prompt, + ) + ``` + + Future Enhancement: + A provider-specific message adapter could generate optimal message formats + for each LLM provider (embedded for OpenAI, separate messages for others). + This would provide the best of both worlds: API compatibility and semantic + clarity. + """ + + @staticmethod + def get_recommended_system_prompt(provider: str = "openai") -> str: + """Get recommended system prompt for HITL middleware. + + This helper generates system prompts that help models correctly interpret + embedded edit notifications and avoid semantic confusion. + + Args: + provider: The LLM provider ("openai", "anthropic", "groq", "google", etc.) + + Returns: + Recommended system prompt to guide model behavior with HITL middleware. + + Example: + ```python + system_prompt = HumanInTheLoopMiddleware.get_recommended_system_prompt("openai") + agent = create_agent( + model="gpt-4", + tools=[...], + middleware=[HumanInTheLoopMiddleware(...)], + system_prompt=system_prompt, + ) + ``` + """ + base_prompt = """You are a helpful assistant. + +CRITICAL INSTRUCTIONS FOR SYSTEM NOTIFICATIONS: +1. You may see [SYSTEM NOTIFICATION] sections in messages. +2. These are NOT your words - they are framework-generated metadata. +3. DO NOT reference system notifications as if you said them. +4. Examples of INCORRECT responses: + ❌ "As I mentioned earlier, the user edited..." + ❌ "According to the system notification..." + ❌ "I noted that the parameters were modified..." +5. Examples of CORRECT responses: + ✅ "The task has been completed successfully." + ✅ "The file has been written to /path/to/file." +6. Focus on tool execution results, not on system metadata.""" + + provider_specific = { + "openai": """ + +TOOL EXECUTION RULES: +- When you see a ToolMessage, the tool has ALREADY been executed. +- Do NOT execute the same tool again. +- Report the result directly to the user.""", + "anthropic": """ + +TOOL EXECUTION RULES: +- When you see a ToolMessage, the tool has ALREADY been executed. +- Do NOT execute the same tool again. +- Provide a clear summary of the result.""", + "groq": """ + +TOOL EXECUTION RULES: +- When you see a ToolMessage, the tool has ALREADY been executed. +- Do NOT execute the same tool again. +- Do NOT attempt to re-execute with different parameters. +- Report only the actual execution result.""", + "google": """ + +TOOL EXECUTION RULES: +- When you see a ToolMessage, the tool has ALREADY been executed. +- Do NOT execute the same tool again. +- Summarize the result for the user.""", + } + + return base_prompt + provider_specific.get(provider.lower(), provider_specific["openai"]) def __init__( self, @@ -194,6 +299,53 @@ def __init__( resolved_configs[tool_name] = tool_config self.interrupt_on = resolved_configs self.description_prefix = description_prefix + # Track edits to provide context after tool execution + self._pending_edit_contexts: dict[str, dict[str, Any]] = {} + + def before_model(self, state: AgentState, runtime: Runtime) -> dict[str, Any] | None: # noqa: ARG002 + """Inject context messages after tool execution for edited tool calls.""" + if not self._pending_edit_contexts: + return None + + messages = state["messages"] + if not messages: + return None + + # Check recent messages for ToolMessages that correspond to edited tool calls + context_messages: list[HumanMessage] = [] + completed_edits: list[str] = [] + + # Look through recent messages for ToolMessages + for msg in reversed(messages): + if isinstance(msg, ToolMessage): + tool_call_id = msg.tool_call_id + if tool_call_id in self._pending_edit_contexts: + # Found a ToolMessage for an edited tool call + edit_info = self._pending_edit_contexts[tool_call_id] + args_json = json.dumps(edit_info["args"], indent=2) + context_msg = HumanMessage( + content=( + f"[IMPORTANT - DO NOT IGNORE] The tool '{edit_info['name']}' " + f"has ALREADY BEEN EXECUTED SUCCESSFULLY with edited parameters: " + f"{args_json}. The task is COMPLETE. DO NOT execute this tool again. " + f"Your response must reference the edited parameters shown above, " + f"NOT the user's original request." + ), + ) + context_messages.append(context_msg) + completed_edits.append(tool_call_id) + # Only check recent messages (stop at the last AIMessage with tool calls) + elif isinstance(msg, AIMessage) and msg.tool_calls: + break + + # Clear completed edits from pending contexts + for tool_call_id in completed_edits: + del self._pending_edit_contexts[tool_call_id] + + if context_messages: + return {"messages": context_messages} + + return None def _create_action_and_config( self, @@ -231,12 +383,67 @@ def _create_action_and_config( return action_request, review_config + def _build_updated_content( + self, + original_content: str | list[str | dict[Any, Any]], + edit_info: dict[str, dict[str, Any]], + ) -> str | list[str | dict[Any, Any]]: + """Build updated AIMessage content with embedded edit information. + + For OpenAI API compatibility, edit notifications are embedded in + AIMessage.content rather than separate messages. This ensures the + message ordering rule (AIMessage with tool_calls must be immediately + followed by ToolMessage) is respected. + + The notifications use clear visual separators and explicit language + to minimize semantic confusion. + + Args: + original_content: The original AIMessage content + edit_info: Dictionary mapping tool_call_id to edit information + + Returns: + Updated content with edit notices embedded + """ + if not edit_info: + return original_content + + # Build edit context messages with clear visual separation + separator = "=" * 60 + edit_notices = [] + + for info in edit_info.values(): + args_json = json.dumps(info["args"], indent=2) + notice = f"""{separator} +[SYSTEM NOTIFICATION - NOT AI RESPONSE] +This is framework-generated metadata. Do not attribute to AI. + +User edited the tool call: '{info["name"]}' +Modified parameters: +{args_json} + +⚠️ IMPORTANT: Do not reference this notification in your response. + Report only the tool execution results to the user. +{separator}""" + edit_notices.append(notice) + + edit_context = "\n\n".join(edit_notices) + + # For now, only handle string content. If content is a list, return as-is + # (this is a rare case and embedding edit info in list content is complex) + if isinstance(original_content, str): + if original_content: + return f"{original_content}\n\n{edit_context}" + return edit_context + # If content is a list, return original (could be enhanced in future) + return original_content + def _process_decision( self, decision: Decision, tool_call: ToolCall, config: InterruptOnConfig, - ) -> tuple[ToolCall | None, ToolMessage | None]: + ) -> tuple[ToolCall | None, ToolMessage | HumanMessage | None]: """Process a single decision and return the revised tool call and optional tool message.""" allowed_decisions = config["allowed_decisions"] @@ -244,15 +451,17 @@ def _process_decision( return tool_call, None if decision["type"] == "edit" and "edit" in allowed_decisions: edited_action = decision["edited_action"] - return ( - ToolCall( - type="tool_call", - name=edited_action["name"], - args=edited_action["args"], - id=tool_call["id"], - ), - None, + edited_tool_call = ToolCall( + type="tool_call", + name=edited_action["name"], + args=edited_action["args"], + id=tool_call["id"], ) + # Don't create a separate HumanMessage here - it would break OpenAI's + # message ordering rule (AIMessage with tool_calls must be immediately + # followed by ToolMessage). Instead, we'll embed edit info in AIMessage.content + # in the after_model method. + return edited_tool_call, None if decision["type"] == "reject" and "reject" in allowed_decisions: # Create a tool message with the human's text response content = decision.get("message") or ( @@ -298,7 +507,9 @@ def after_model(self, state: AgentState, runtime: Runtime) -> dict[str, Any] | N # Process all tool calls that require interrupts revised_tool_calls: list[ToolCall] = auto_approved_tool_calls.copy() - artificial_tool_messages: list[ToolMessage] = [] + artificial_tool_messages: list[ToolMessage | HumanMessage] = [] + # Track edits for post-execution context messages + edit_info: dict[str, dict[str, Any]] = {} # Create action requests and review configs for all tools that need approval action_requests: list[ActionRequest] = [] @@ -342,10 +553,44 @@ def after_model(self, state: AgentState, runtime: Runtime) -> dict[str, Any] | N revised_tool_call, tool_message = self._process_decision(decision, tool_call, config) if revised_tool_call: revised_tool_calls.append(revised_tool_call) + # Track if this was an edit for post-execution context + if decision["type"] == "edit": + tool_call_id = revised_tool_call["id"] + if tool_call_id: # Type guard for mypy + edit_info[tool_call_id] = { + "name": revised_tool_call["name"], + "args": revised_tool_call["args"], + } if tool_message: artificial_tool_messages.append(tool_message) - # Update the AI message to only include approved tool calls - last_ai_msg.tool_calls = revised_tool_calls + # Create a new AIMessage with updated tool calls instead of mutating the original + # This ensures LangGraph's state management properly persists the changes + # (fixes Issues #33787 and #33784 where edits weren't persisted correctly) + # + # CRITICAL: We must ensure last_ai_msg has an ID for the add_messages reducer + # to properly replace it (not append a duplicate). If no ID exists, generate one. + if last_ai_msg.id is None: + import uuid + + last_ai_msg.id = str(uuid.uuid4()) + + # Embed edit information in AIMessage.content to comply with OpenAI's message + # ordering rule (AIMessage with tool_calls must be immediately followed by ToolMessage) + updated_content = self._build_updated_content(last_ai_msg.content, edit_info) + + updated_ai_msg = AIMessage( + content=updated_content, + tool_calls=revised_tool_calls, + id=last_ai_msg.id, # Same ID ensures replacement, not appending + name=last_ai_msg.name, + additional_kwargs=last_ai_msg.additional_kwargs, + response_metadata=last_ai_msg.response_metadata, + usage_metadata=last_ai_msg.usage_metadata, + ) + + # Store edit info for use in before_model to inject post-execution context + if edit_info: + self._pending_edit_contexts.update(edit_info) - return {"messages": [last_ai_msg, *artificial_tool_messages]} + return {"messages": [updated_ai_msg, *artificial_tool_messages]} diff --git a/libs/langchain_v1/tests/unit_tests/agents/test_middleware_agent.py b/libs/langchain_v1/tests/unit_tests/agents/test_middleware_agent.py index 02fa96e6b65af..a8f245cab527d 100644 --- a/libs/langchain_v1/tests/unit_tests/agents/test_middleware_agent.py +++ b/libs/langchain_v1/tests/unit_tests/agents/test_middleware_agent.py @@ -565,9 +565,13 @@ def mock_edit(requests): result = middleware.after_model(state, None) assert result is not None assert "messages" in result + # Should have only AIMessage (edit context embedded to comply with OpenAI) assert len(result["messages"]) == 1 - assert result["messages"][0].tool_calls[0]["args"] == {"input": "edited"} - assert result["messages"][0].tool_calls[0]["id"] == "1" # ID should be preserved + updated_ai_msg = result["messages"][0] + assert updated_ai_msg.tool_calls[0]["args"] == {"input": "edited"} + assert updated_ai_msg.tool_calls[0]["id"] == "1" # ID should be preserved + # Check edit context is embedded in AIMessage.content + assert "edited" in updated_ai_msg.content.lower() def test_human_in_the_loop_middleware_single_tool_response() -> None: @@ -594,10 +598,9 @@ def mock_response(requests): assert "messages" in result assert len(result["messages"]) == 2 assert isinstance(result["messages"][0], AIMessage) - assert isinstance(result["messages"][1], ToolMessage) + assert isinstance(result["messages"][1], ToolMessage) # Reject creates ToolMessage assert result["messages"][1].content == "Custom response message" assert result["messages"][1].name == "test_tool" - assert result["messages"][1].tool_call_id == "1" def test_human_in_the_loop_middleware_multiple_tools_mixed_responses() -> None: @@ -695,6 +698,7 @@ def mock_edit_responses(requests): result = middleware.after_model(state, None) assert result is not None assert "messages" in result + # Should have only 1 AIMessage (edit context embedded in content to comply with OpenAI) assert len(result["messages"]) == 1 updated_ai_message = result["messages"][0] @@ -703,6 +707,13 @@ def mock_edit_responses(requests): assert updated_ai_message.tool_calls[1]["args"] == {"location": "New York"} assert updated_ai_message.tool_calls[1]["id"] == "2" # ID preserved + # Check that edit context is embedded in AIMessage.content (not separate HumanMessages) + # This ensures compliance with OpenAI's message ordering rule + assert "edited" in updated_ai_message.content.lower() + assert "get_forecast" in updated_ai_message.content + assert "get_temperature" in updated_ai_message.content + assert "New York" in updated_ai_message.content + def test_human_in_the_loop_middleware_edit_with_modified_args() -> None: """Test HumanInTheLoopMiddleware with edit action that includes modified args.""" @@ -737,13 +748,17 @@ def mock_edit_with_args(requests): result = middleware.after_model(state, None) assert result is not None assert "messages" in result - assert len(result["messages"]) == 1 + assert len(result["messages"]) == 1 # Only AIMessage (edit context embedded) - # Should have modified args + # The AIMessage should have modified args updated_ai_message = result["messages"][0] assert updated_ai_message.tool_calls[0]["args"] == {"input": "modified"} assert updated_ai_message.tool_calls[0]["id"] == "1" # ID preserved + # Edit context should be embedded in AIMessage.content + assert "edited" in updated_ai_message.content.lower() + assert "modified" in updated_ai_message.content + def test_human_in_the_loop_middleware_unknown_response_type() -> None: """Test HumanInTheLoopMiddleware with unknown response type.""" @@ -921,8 +936,12 @@ def test_human_in_the_loop_middleware_boolean_configs() -> None: result = middleware.after_model(state, None) assert result is not None assert "messages" in result + # Should have only AIMessage (edit context embedded) assert len(result["messages"]) == 1 - assert result["messages"][0].tool_calls[0]["args"] == {"input": "edited"} + updated_ai_msg = result["messages"][0] + assert updated_ai_msg.tool_calls[0]["args"] == {"input": "edited"} + # Check edit context is embedded in AIMessage.content + assert "edited" in updated_ai_msg.content.lower() middleware = HumanInTheLoopMiddleware(interrupt_on={"test_tool": False}) @@ -1024,6 +1043,192 @@ def mock_capture_requests(request): assert captured_request["action_requests"][1]["description"] == "Static description" +def test_human_in_the_loop_middleware_edit_actually_executes_with_edited_args( + sync_checkpointer: BaseCheckpointSaver, +) -> None: + """Test that HITL edit decision properly replaces original tool call (Issues #33787, #33784). + + This test reproduces the bug where after editing a tool call: + 1. The edited tool executes correctly + 2. BUT the agent's next model call sees the ORIGINAL (unedited) tool_calls in the AIMessage + 3. This causes the agent to re-attempt the original tool call + + The bug happens because HumanInTheLoopMiddleware directly modifies the AIMessage + object's tool_calls attribute, but LangGraph's state management may not properly + persist this mutation, causing subsequent reads to see the original values. + """ + # Track what arguments tools were actually called with + send_email_calls = [] + + @tool + def send_email_tool(to: str, subject: str, body: str) -> str: + """Send an email to a recipient. + + Args: + to: Email address of the recipient + subject: Subject line of the email + body: Body content of the email + """ + send_email_calls.append({"to": to, "subject": subject, "body": body}) + return f"Email sent successfully to {to} with subject: {subject}" + + # Create agent with HITL middleware + # Simulate the exact scenario from issue #33787: + # 1. First model call: agent wants to send email to alice@example.com + # 2. After edit (to alice@test.com), the model should see the edited params + # and not re-attempt the original call + agent = create_agent( + model=FakeToolCallingModel( + tool_calls=[ + # First call: agent proposes sending to alice@example.com + [ + { + "args": {"to": "alice@example.com", "subject": "Test", "body": "Hello"}, + "id": "call_001", + "name": "send_email_tool", + } + ], + # Second call (after edited tool execution): Agent should see the EDITED + # tool call in message history. If model still had original params in its + # context, it might try again. But with the fix, it sees edited params. + # For testing, we configure model to not make additional tool calls + # (empty list) to verify the agent loop completes successfully. + [], # No more tools - task completed + ] + ), + tools=[send_email_tool], + middleware=[ + HumanInTheLoopMiddleware( + interrupt_on={ + "send_email_tool": {"allowed_decisions": ["approve", "edit", "reject"]} + } + ) + ], + checkpointer=sync_checkpointer, + ) + + thread = {"configurable": {"thread_id": "test-hitl-bug-33787"}} + + # === STEP 1: First invocation - should interrupt before sending email === + result = agent.invoke( + { + "messages": [ + HumanMessage( + "Send an email to alice@example.com with subject 'Test' and body 'Hello'" + ) + ] + }, + thread, + ) + + # Verify we got an interrupt (email not sent yet) + assert len(send_email_calls) == 0, "Email should not have been sent yet" + last_ai_msg = result["messages"][-1] + assert isinstance(last_ai_msg, AIMessage) + assert len(last_ai_msg.tool_calls) == 1 + assert last_ai_msg.tool_calls[0]["args"]["to"] == "alice@example.com" + + # === STEP 2: Resume with edit decision - change recipient to alice@test.com === + from langgraph.types import Command + + result = agent.invoke( + Command( + resume={ + "decisions": [ + { + "type": "edit", + "message": "Changing recipient to test address", + "edited_action": Action( + name="send_email_tool", + args={ + "to": "alice@test.com", + "subject": "this is a test", + "body": "don't reply", + }, + ), + } + ] + } + ), + thread, + ) + + # CRITICAL ASSERTION 1: Email should be sent to EDITED address (alice@test.com) + assert len(send_email_calls) == 1, "Exactly one email should have been sent" + assert send_email_calls[0]["to"] == "alice@test.com", ( + f"Email should be sent to edited address 'alice@test.com', got '{send_email_calls[0]['to']}'" + ) + assert send_email_calls[0]["subject"] == "this is a test" + + # Verify there's context about the edit AND the actual tool execution result + human_messages = [m for m in result["messages"] if isinstance(m, HumanMessage)] + tool_messages = [m for m in result["messages"] if isinstance(m, ToolMessage)] + ai_messages = [m for m in result["messages"] if isinstance(m, AIMessage)] + + # Check for context messages: + # 1. Pre-execution: Embedded in AIMessage.content (to comply with OpenAI message ordering) + # 2. Post-execution: Separate HumanMessage with "[IMPORTANT - DO NOT IGNORE]" + context_messages = [m for m in human_messages if "edited" in m.content.lower()] + assert len(context_messages) == 1, ( + f"Should have exactly one edit context HumanMessage (post-execution only), " + f"but got {len(context_messages)}" + ) + # Post-execution message should have strong language and mention edited params + post_exec_msg = context_messages[0] + assert "ALREADY BEEN EXECUTED" in post_exec_msg.content + assert "alice@test.com" in post_exec_msg.content + + # Check that pre-execution context is embedded in AIMessage.content + # (This ensures OpenAI's message ordering rule is respected) + ai_msg_with_tool_calls = None + for msg in ai_messages: + if msg.tool_calls: + ai_msg_with_tool_calls = msg + break + assert ai_msg_with_tool_calls is not None, "Should find AIMessage with tool calls" + assert "edited" in ai_msg_with_tool_calls.content.lower(), ( + "Pre-execution edit context should be embedded in AIMessage.content" + ) + assert "alice@test.com" in ai_msg_with_tool_calls.content + + # Check for execution result (ToolMessage) + exec_messages = [m for m in tool_messages if "Email sent" in m.content] + assert len(exec_messages) == 1, "Should have exactly one tool execution result" + assert "alice@test.com" in exec_messages[0].content + + # CRITICAL ASSERTION 2: Verify the AIMessage in history was properly updated + # This is the core fix - the AIMessage with original params should be replaced + # with the edited version so that subsequent model calls see the correct context + ai_msg_with_original_id = None + for msg in result["messages"]: + if isinstance(msg, AIMessage) and getattr(msg, "id", None) == "0": + ai_msg_with_original_id = msg + break + + assert ai_msg_with_original_id is not None, "Should find the AIMessage that was edited" + assert len(ai_msg_with_original_id.tool_calls) == 1 + + # THE KEY ASSERTION: The AIMessage stored in state should have EDITED params + # Without the fix, this would still have alice@example.com due to mutation issues + assert ai_msg_with_original_id.tool_calls[0]["args"]["to"] == "alice@test.com", ( + f"BUG DETECTED (Issue #33787): AIMessage in state still has original params " + f"'{ai_msg_with_original_id.tool_calls[0]['args']['to']}' instead of edited 'alice@test.com'. " + f"The middleware should have created a NEW AIMessage with edited params to replace the original." + ) + + # CRITICAL ASSERTION 3: Agent should not have re-executed original tool call + # Only the EDITED call should have been executed + assert len(send_email_calls) == 1, ( + f"Expected exactly 1 email (the edited one), but {len(send_email_calls)} were sent. " + f"This suggests the agent re-attempted the original tool call." + ) + + # Final verification: the execution log confirms only edited params were used + assert send_email_calls[0]["to"] == "alice@test.com" + assert send_email_calls[0]["subject"] == "this is a test" + assert send_email_calls[0]["body"] == "don't reply" + + # Tests for SummarizationMiddleware def test_summarization_middleware_initialization() -> None: """Test SummarizationMiddleware initialization.""" diff --git a/libs/langchain_v1/uv.lock b/libs/langchain_v1/uv.lock index 9f4c90b46a4da..ebd3ea685ace7 100644 --- a/libs/langchain_v1/uv.lock +++ b/libs/langchain_v1/uv.lock @@ -1922,7 +1922,7 @@ wheels = [ [[package]] name = "langchain-openai" -version = "1.0.1" +version = "1.0.2" source = { editable = "../partners/openai" } dependencies = [ { name = "langchain-core" },