Skip to content

Commit cc6fb9f

Browse files
committed
fix: Add context messages for edited tool calls in HITL middleware
This commit implements comprehensive improvements to the HumanInTheLoopMiddleware to address Issue #33787: ## Changes 1. **Context AIMessage Injection**: When tool calls are edited, the middleware now returns a 3-tuple including an AIMessage that explains what was changed. This prevents the model from retrying the original action. 2. **Tool Call ID Preservation**: Original tool call IDs are preserved in edited ToolCalls to maintain lineage tracking through the agent execution. 3. **Schema Validation**: Added jsonschema-based validation for edited arguments when args_schema is provided in InterruptOnConfig, with graceful fallback if jsonschema is unavailable. 4. **JSON Formatting**: Arguments are now formatted with json.dumps(indent=2, sort_keys=True) for better readability and safe serialization in descriptions and context messages. 5. **Comprehensive Docstrings**: All methods now have Google-style docstrings documenting parameters, return values, and behavior for each decision type. 6. **Type Hints**: Complete type annotations including auto_approved_tool_calls for improved code clarity and IDE support. 7. **Consistent Ordering**: Auto-approved tools are processed first, followed by reviewed tools, maintaining predictable message ordering. ## Test Updates Updated 5 unit tests to expect the new correct behavior: - test_human_in_the_loop_middleware_single_tool_edit - test_human_in_the_loop_middleware_multiple_tools_edit_responses - test_human_in_the_loop_middleware_edit_with_modified_args - test_human_in_the_loop_middleware_interrupt_request_structure - test_human_in_the_loop_middleware_boolean_configs All 15 HITL middleware tests now pass. Fixes #33787
1 parent 915c446 commit cc6fb9f

File tree

2 files changed

+216
-36
lines changed

2 files changed

+216
-36
lines changed

libs/langchain_v1/langchain/agents/middleware/human_in_the_loop.py

Lines changed: 168 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Human in the loop middleware."""
22

3+
import json
34
from typing import Any, Literal, Protocol
45

56
from langchain_core.messages import AIMessage, ToolCall, ToolMessage
@@ -202,7 +203,17 @@ def _create_action_and_config(
202203
state: AgentState,
203204
runtime: Runtime,
204205
) -> tuple[ActionRequest, ReviewConfig]:
205-
"""Create an ActionRequest and ReviewConfig for a tool call."""
206+
"""Create an ActionRequest and ReviewConfig for a tool call.
207+
208+
Args:
209+
tool_call: The tool call to create an action request for.
210+
config: The interrupt configuration for this tool.
211+
state: Current agent state.
212+
runtime: Runtime context.
213+
214+
Returns:
215+
Tuple of (ActionRequest, ReviewConfig) for human review.
216+
"""
206217
tool_name = tool_call["name"]
207218
tool_args = tool_call["args"]
208219

@@ -213,7 +224,9 @@ def _create_action_and_config(
213224
elif description_value is not None:
214225
description = description_value
215226
else:
216-
description = f"{self.description_prefix}\n\nTool: {tool_name}\nArgs: {tool_args}"
227+
# Format args as readable JSON for better readability and safety
228+
formatted_args = json.dumps(tool_args, indent=2, sort_keys=True)
229+
description = f"{self.description_prefix}\n\nTool: {tool_name}\nArgs:\n{formatted_args}"
217230

218231
# Create ActionRequest with description
219232
action_request = ActionRequest(
@@ -222,12 +235,13 @@ def _create_action_and_config(
222235
description=description,
223236
)
224237

225-
# Create ReviewConfig
226-
# eventually can get tool information and populate args_schema from there
238+
# Create ReviewConfig with args_schema if provided
227239
review_config = ReviewConfig(
228240
action_name=tool_name,
229241
allowed_decisions=config["allowed_decisions"],
230242
)
243+
if "args_schema" in config:
244+
review_config["args_schema"] = config["args_schema"]
231245

232246
return action_request, review_config
233247

@@ -236,23 +250,108 @@ def _process_decision(
236250
decision: Decision,
237251
tool_call: ToolCall,
238252
config: InterruptOnConfig,
239-
) -> tuple[ToolCall | None, ToolMessage | None]:
240-
"""Process a single decision and return the revised tool call and optional tool message."""
253+
) -> tuple[ToolCall | None, ToolMessage | None, AIMessage | None]:
254+
"""Process a single decision and return the revised tool call.
255+
256+
Returns optional tool message and context AIMessage.
257+
258+
This method handles three types of decisions from human review:
259+
260+
1. **approve**: Returns the original tool call unchanged with no
261+
additional messages.
262+
2. **edit**: Returns the edited tool call with a context AIMessage
263+
explaining what was changed. The context message helps the model
264+
understand that the edit was intentional and prevents it from
265+
retrying the original action.
266+
3. **reject**: Returns the original tool call with an artificial
267+
ToolMessage marked as error status, explaining why it was rejected.
268+
269+
Args:
270+
decision: The human decision (approve/edit/reject).
271+
tool_call: The original tool call being reviewed.
272+
config: The interrupt configuration including allowed decisions
273+
and optional args_schema.
274+
275+
Returns:
276+
A 3-tuple of:
277+
- ToolCall | None: The revised tool call (or None if fully rejected)
278+
- ToolMessage | None: An artificial tool message for rejects
279+
(or None otherwise)
280+
- AIMessage | None: A context message explaining edits
281+
(or None for approve/reject)
282+
283+
Raises:
284+
ValueError: If the decision type is not in the allowed_decisions
285+
list or if edited args fail schema validation when args_schema
286+
is provided.
287+
"""
241288
allowed_decisions = config["allowed_decisions"]
242289

243290
if decision["type"] == "approve" and "approve" in allowed_decisions:
244-
return tool_call, None
291+
return tool_call, None, None
292+
245293
if decision["type"] == "edit" and "edit" in allowed_decisions:
246294
edited_action = decision["edited_action"]
247-
return (
248-
ToolCall(
249-
type="tool_call",
250-
name=edited_action["name"],
251-
args=edited_action["args"],
252-
id=tool_call["id"],
253-
),
254-
None,
295+
296+
# Validate edited args against schema if provided (Comment 4)
297+
if "args_schema" in config:
298+
args_schema = config["args_schema"]
299+
try:
300+
# Attempt basic JSON schema validation using jsonschema if available
301+
try:
302+
import jsonschema
303+
304+
jsonschema.validate(edited_action["args"], args_schema)
305+
except ImportError:
306+
# Fallback: basic type checking if jsonschema not available
307+
# At minimum, verify edited_args is a dict
308+
if not isinstance(edited_action["args"], dict):
309+
msg = (
310+
"Edited args must be a dictionary, got "
311+
f"{type(edited_action['args']).__name__}"
312+
)
313+
raise ValueError(msg)
314+
except (ValueError, jsonschema.ValidationError) as e:
315+
# Schema validation failed - return error ToolMessage
316+
error_msg = f"Edited arguments failed schema validation: {e}"
317+
tool_message = ToolMessage(
318+
content=error_msg,
319+
name=tool_call["name"],
320+
tool_call_id=tool_call["id"],
321+
status="error",
322+
)
323+
return tool_call, tool_message, None
324+
325+
# Create edited tool call - preserve original ID for lineage (Comment 3)
326+
edited_tool_call = ToolCall(
327+
type="tool_call",
328+
name=edited_action["name"],
329+
args=edited_action["args"],
330+
id=tool_call["id"],
331+
)
332+
333+
# Create context AIMessage explaining the edit (Comment 1)
334+
original_args_json = json.dumps(tool_call["args"], indent=2, sort_keys=True)
335+
edited_args_json = json.dumps(edited_action["args"], indent=2, sort_keys=True)
336+
337+
context_content = (
338+
f"The original tool call to '{tool_call['name']}' was modified by human review.\n\n"
339+
f"Original action:\n"
340+
f" Tool: {tool_call['name']}\n"
341+
f" Args:\n{original_args_json}\n\n"
342+
f"Modified to:\n"
343+
f" Tool: {edited_action['name']}\n"
344+
f" Args:\n{edited_args_json}\n\n"
345+
f"This edit is intentional and should not be retried with the original arguments."
255346
)
347+
348+
context_message = AIMessage(
349+
content=context_content,
350+
name="human_review_system",
351+
)
352+
353+
return edited_tool_call, None, context_message
354+
256355
if decision["type"] == "reject" and "reject" in allowed_decisions:
257356
# Create a tool message with the human's text response
258357
content = decision.get("message") or (
@@ -264,7 +363,8 @@ def _process_decision(
264363
tool_call_id=tool_call["id"],
265364
status="error",
266365
)
267-
return tool_call, tool_message
366+
return tool_call, tool_message, None
367+
268368
msg = (
269369
f"Unexpected human decision: {decision}. "
270370
f"Decision type '{decision.get('type')}' "
@@ -274,7 +374,40 @@ def _process_decision(
274374
raise ValueError(msg)
275375

276376
def after_model(self, state: AgentState, runtime: Runtime) -> dict[str, Any] | None:
277-
"""Trigger interrupt flows for relevant tool calls after an `AIMessage`."""
377+
"""Trigger interrupt flows for relevant tool calls after an `AIMessage`.
378+
379+
This method intercepts tool calls from the model's AIMessage and routes
380+
them through human review when configured. It handles three types of
381+
decisions:
382+
383+
1. **approve**: Tool call proceeds unchanged
384+
2. **edit**: Tool call is modified and a context AIMessage is added
385+
explaining the change
386+
3. **reject**: Tool call is blocked and an artificial error ToolMessage
387+
is added
388+
389+
The returned dictionary updates the agent state with:
390+
- An updated AIMessage containing only approved/edited tool calls
391+
(auto-approved tools first, then reviewed tools in order)
392+
- Context AIMessages explaining any edits (inserted before the updated
393+
AIMessage)
394+
- Artificial ToolMessages for any rejections
395+
396+
This sequencing ensures the model sees:
397+
1. Context messages explaining edits
398+
2. The updated AIMessage with final tool calls
399+
3. Error messages for rejected calls
400+
401+
Args:
402+
state: Current agent state containing message history.
403+
runtime: Runtime context for the agent.
404+
405+
Returns:
406+
Dictionary with 'messages' key containing the updated/new messages,
407+
or None if no interrupts were needed. The messages list maintains
408+
ordering: context messages (if any), then the updated AIMessage,
409+
then artificial tool messages (if any).
410+
"""
278411
messages = state["messages"]
279412
if not messages:
280413
return None
@@ -285,20 +418,23 @@ def after_model(self, state: AgentState, runtime: Runtime) -> dict[str, Any] | N
285418

286419
# Separate tool calls that need interrupts from those that don't
287420
interrupt_tool_calls: list[ToolCall] = []
288-
auto_approved_tool_calls = []
421+
auto_approved_tool_calls: list[ToolCall] = []
289422

290423
for tool_call in last_ai_msg.tool_calls:
291-
interrupt_tool_calls.append(tool_call) if tool_call[
292-
"name"
293-
] in self.interrupt_on else auto_approved_tool_calls.append(tool_call)
424+
if tool_call["name"] in self.interrupt_on:
425+
interrupt_tool_calls.append(tool_call)
426+
else:
427+
auto_approved_tool_calls.append(tool_call)
294428

295429
# If no interrupts needed, return early
296430
if not interrupt_tool_calls:
297431
return None
298432

299433
# Process all tool calls that require interrupts
434+
# Auto-approved tools go first to maintain consistent ordering (Comment 7)
300435
revised_tool_calls: list[ToolCall] = auto_approved_tool_calls.copy()
301436
artificial_tool_messages: list[ToolMessage] = []
437+
context_messages: list[AIMessage] = []
302438

303439
# Create action requests and review configs for all tools that need approval
304440
action_requests: list[ActionRequest] = []
@@ -334,18 +470,25 @@ def after_model(self, state: AgentState, runtime: Runtime) -> dict[str, Any] | N
334470
)
335471
raise ValueError(msg)
336472

337-
# Process each decision using helper method
473+
# Process each decision using helper method (Comment 1)
338474
for i, decision in enumerate(decisions):
339475
tool_call = interrupt_tool_calls[i]
340476
config = self.interrupt_on[tool_call["name"]]
341477

342-
revised_tool_call, tool_message = self._process_decision(decision, tool_call, config)
478+
revised_tool_call, tool_message, context_msg = self._process_decision(
479+
decision, tool_call, config
480+
)
343481
if revised_tool_call:
344482
revised_tool_calls.append(revised_tool_call)
345483
if tool_message:
346484
artificial_tool_messages.append(tool_message)
485+
if context_msg:
486+
context_messages.append(context_msg)
347487

348-
# Update the AI message to only include approved tool calls
488+
# Update the AI message in-place to include only approved/edited tool calls
349489
last_ai_msg.tool_calls = revised_tool_calls
350490

351-
return {"messages": [last_ai_msg, *artificial_tool_messages]}
491+
# Return messages in order: context messages, updated AI message,
492+
# then artificial tool messages. This ensures the model sees edit
493+
# explanations before the updated tool calls
494+
return {"messages": [*context_messages, last_ai_msg, *artificial_tool_messages]}

libs/langchain_v1/tests/unit_tests/agents/test_middleware_agent.py

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -565,9 +565,16 @@ def mock_edit(requests):
565565
result = middleware.after_model(state, None)
566566
assert result is not None
567567
assert "messages" in result
568-
assert len(result["messages"]) == 1
569-
assert result["messages"][0].tool_calls[0]["args"] == {"input": "edited"}
570-
assert result["messages"][0].tool_calls[0]["id"] == "1" # ID should be preserved
568+
# Should return 2 messages: context AIMessage + updated AIMessage
569+
assert len(result["messages"]) == 2
570+
# First message should be context explaining the edit
571+
assert isinstance(result["messages"][0], AIMessage)
572+
assert "modified by human review" in result["messages"][0].content
573+
assert result["messages"][0].name == "human_review_system"
574+
# Second message should be the updated AIMessage with edited tool calls
575+
assert isinstance(result["messages"][1], AIMessage)
576+
assert result["messages"][1].tool_calls[0]["args"] == {"input": "edited"}
577+
assert result["messages"][1].tool_calls[0]["id"] == "1" # ID should be preserved
571578

572579

573580
def test_human_in_the_loop_middleware_single_tool_response() -> None:
@@ -695,9 +702,23 @@ def mock_edit_responses(requests):
695702
result = middleware.after_model(state, None)
696703
assert result is not None
697704
assert "messages" in result
698-
assert len(result["messages"]) == 1
705+
# Should return 3 messages: 2 context AIMessages (one per edit) + 1 updated AIMessage
706+
assert len(result["messages"]) == 3
699707

700-
updated_ai_message = result["messages"][0]
708+
# First two messages should be context explaining the edits
709+
assert isinstance(result["messages"][0], AIMessage)
710+
assert "modified by human review" in result["messages"][0].content
711+
assert "get_forecast" in result["messages"][0].content
712+
assert result["messages"][0].name == "human_review_system"
713+
714+
assert isinstance(result["messages"][1], AIMessage)
715+
assert "modified by human review" in result["messages"][1].content
716+
assert "get_temperature" in result["messages"][1].content
717+
assert result["messages"][1].name == "human_review_system"
718+
719+
# Third message should be the updated AIMessage with edited tool calls
720+
updated_ai_message = result["messages"][2]
721+
assert isinstance(updated_ai_message, AIMessage)
701722
assert updated_ai_message.tool_calls[0]["args"] == {"location": "New York"}
702723
assert updated_ai_message.tool_calls[0]["id"] == "1" # ID preserved
703724
assert updated_ai_message.tool_calls[1]["args"] == {"location": "New York"}
@@ -737,10 +758,17 @@ def mock_edit_with_args(requests):
737758
result = middleware.after_model(state, None)
738759
assert result is not None
739760
assert "messages" in result
740-
assert len(result["messages"]) == 1
761+
# Should return 2 messages: context AIMessage + updated AIMessage
762+
assert len(result["messages"]) == 2
741763

742-
# Should have modified args
743-
updated_ai_message = result["messages"][0]
764+
# First message should be context explaining the edit
765+
assert isinstance(result["messages"][0], AIMessage)
766+
assert "modified by human review" in result["messages"][0].content
767+
assert result["messages"][0].name == "human_review_system"
768+
769+
# Second message should be the updated AIMessage with modified args
770+
updated_ai_message = result["messages"][1]
771+
assert isinstance(updated_ai_message, AIMessage)
744772
assert updated_ai_message.tool_calls[0]["args"] == {"input": "modified"}
745773
assert updated_ai_message.tool_calls[0]["id"] == "1" # ID preserved
746774

@@ -874,7 +902,9 @@ def mock_capture_requests(request):
874902
assert action_request["args"] == {"input": "test", "location": "SF"}
875903
assert "Custom prefix" in action_request["description"]
876904
assert "Tool: test_tool" in action_request["description"]
877-
assert "Args: {'input': 'test', 'location': 'SF'}" in action_request["description"]
905+
# Args should now be in JSON format (Comment 5: JSON formatting)
906+
assert '"input": "test"' in action_request["description"]
907+
assert '"location": "SF"' in action_request["description"]
878908

879909
assert len(captured_request["review_configs"]) == 1
880910
review_config = captured_request["review_configs"][0]
@@ -921,8 +951,15 @@ def test_human_in_the_loop_middleware_boolean_configs() -> None:
921951
result = middleware.after_model(state, None)
922952
assert result is not None
923953
assert "messages" in result
924-
assert len(result["messages"]) == 1
925-
assert result["messages"][0].tool_calls[0]["args"] == {"input": "edited"}
954+
# Should return 2 messages: context AIMessage + updated AIMessage
955+
assert len(result["messages"]) == 2
956+
# First message should be context explaining the edit
957+
assert isinstance(result["messages"][0], AIMessage)
958+
assert "modified by human review" in result["messages"][0].content
959+
assert result["messages"][0].name == "human_review_system"
960+
# Second message should be the updated AIMessage with edited tool calls
961+
assert isinstance(result["messages"][1], AIMessage)
962+
assert result["messages"][1].tool_calls[0]["args"] == {"input": "edited"}
926963

927964
middleware = HumanInTheLoopMiddleware(interrupt_on={"test_tool": False})
928965

0 commit comments

Comments
 (0)