@@ -1072,6 +1072,7 @@ async def _start_streaming(
10721072 tool_use_tracker ,
10731073 all_tools ,
10741074 server_conversation_tracker ,
1075+ session ,
10751076 )
10761077 should_run_agent_start_hooks = False
10771078
@@ -1084,6 +1085,24 @@ async def _start_streaming(
10841085 if server_conversation_tracker is not None :
10851086 server_conversation_tracker .track_server_items (turn_result .model_response )
10861087
1088+ # Check for soft cancel after tool execution completes (before next step)
1089+ if streamed_result ._cancel_mode == "after_turn" : # type: ignore[comparison-overlap]
1090+ # Save session with complete tool execution (tool calls + tool results)
1091+ if session is not None :
1092+ should_skip_session_save = (
1093+ await AgentRunner ._input_guardrail_tripwire_triggered_for_stream (
1094+ streamed_result
1095+ )
1096+ )
1097+ if should_skip_session_save is False :
1098+ await AgentRunner ._save_result_to_session (
1099+ session , [], turn_result .new_step_items
1100+ )
1101+
1102+ streamed_result .is_complete = True
1103+ streamed_result ._event_queue .put_nowait (QueueCompleteSentinel ())
1104+ break
1105+
10871106 if isinstance (turn_result .next_step , NextStepHandoff ):
10881107 # Save the conversation to session if enabled (before handoff)
10891108 # Note: Non-streaming path doesn't save handoff turns immediately,
@@ -1106,12 +1125,6 @@ async def _start_streaming(
11061125 streamed_result ._event_queue .put_nowait (
11071126 AgentUpdatedStreamEvent (new_agent = current_agent )
11081127 )
1109-
1110- # Check for soft cancel after handoff
1111- if streamed_result ._cancel_mode == "after_turn" : # type: ignore[comparison-overlap]
1112- streamed_result .is_complete = True
1113- streamed_result ._event_queue .put_nowait (QueueCompleteSentinel ())
1114- break
11151128 elif isinstance (turn_result .next_step , NextStepFinalOutput ):
11161129 streamed_result ._output_guardrails_task = asyncio .create_task (
11171130 cls ._run_output_guardrails (
@@ -1157,12 +1170,6 @@ async def _start_streaming(
11571170 await AgentRunner ._save_result_to_session (
11581171 session , [], turn_result .new_step_items
11591172 )
1160-
1161- # Check for soft cancel after turn completion
1162- if streamed_result ._cancel_mode == "after_turn" : # type: ignore[comparison-overlap]
1163- streamed_result .is_complete = True
1164- streamed_result ._event_queue .put_nowait (QueueCompleteSentinel ())
1165- break
11661173 except AgentsException as exc :
11671174 streamed_result .is_complete = True
11681175 streamed_result ._event_queue .put_nowait (QueueCompleteSentinel ())
@@ -1217,6 +1224,7 @@ async def _run_single_turn_streamed(
12171224 tool_use_tracker : AgentToolUseTracker ,
12181225 all_tools : list [Tool ],
12191226 server_conversation_tracker : _ServerConversationTracker | None = None ,
1227+ session : Session | None = None ,
12201228 ) -> SingleStepResult :
12211229 emitted_tool_call_ids : set [str ] = set ()
12221230 emitted_reasoning_item_ids : set [str ] = set ()
@@ -1369,6 +1377,113 @@ async def _run_single_turn_streamed(
13691377 if not final_response :
13701378 raise ModelBehaviorError ("Model did not produce a final response!" )
13711379
1380+ # Check for soft cancel after LLM response streaming completes (before tool execution)
1381+ # Only cancel here if there are no tools/handoffs to execute - otherwise let tools execute
1382+ # and the cancel will be honored after tool execution completes
1383+ if streamed_result ._cancel_mode == "after_turn" :
1384+ # Process the model response to check if there are tools/handoffs to execute
1385+ processed_response = RunImpl .process_model_response (
1386+ agent = agent ,
1387+ all_tools = all_tools ,
1388+ response = final_response ,
1389+ output_schema = output_schema ,
1390+ handoffs = handoffs ,
1391+ )
1392+
1393+ # If there are tools, handoffs, or approvals to execute, let normal flow continue
1394+ # The cancel will be honored after tool execution completes (before next step)
1395+ if processed_response .has_tools_or_approvals_to_run () or processed_response .handoffs :
1396+ # Continue with normal flow - tools will execute,
1397+ # then cancel after execution completes
1398+ pass
1399+ else :
1400+ # No tools/handoffs to execute - safe to cancel here and skip tool execution
1401+ # Note: We intentionally skip execute_tools_and_side_effects() since there are
1402+ # no tools to execute. This allows faster cancellation when the LLM response
1403+ # contains no actions.
1404+ tool_use_tracker .add_tool_use (agent , processed_response .tools_used )
1405+
1406+ # Filter out items that have already been sent to avoid duplicates
1407+ items_to_save = list (processed_response .new_items )
1408+
1409+ if emitted_tool_call_ids :
1410+ # Filter out tool call items that were already emitted during streaming
1411+ items_to_save = [
1412+ item
1413+ for item in items_to_save
1414+ if not (
1415+ isinstance (item , ToolCallItem )
1416+ and (
1417+ call_id := getattr (
1418+ item .raw_item , "call_id" , getattr (item .raw_item , "id" , None )
1419+ )
1420+ )
1421+ and call_id in emitted_tool_call_ids
1422+ )
1423+ ]
1424+
1425+ if emitted_reasoning_item_ids :
1426+ # Filter out reasoning items that were already emitted during streaming
1427+ items_to_save = [
1428+ item
1429+ for item in items_to_save
1430+ if not (
1431+ isinstance (item , ReasoningItem )
1432+ and (reasoning_id := getattr (item .raw_item , "id" , None ))
1433+ and reasoning_id in emitted_reasoning_item_ids
1434+ )
1435+ ]
1436+
1437+ # Filter out HandoffCallItem to avoid duplicates (already sent earlier)
1438+ items_to_save = [
1439+ item for item in items_to_save if not isinstance (item , HandoffCallItem )
1440+ ]
1441+
1442+ # Create SingleStepResult with NextStepRunAgain (we're stopping mid-turn)
1443+ single_step_result = SingleStepResult (
1444+ original_input = streamed_result .input ,
1445+ model_response = final_response ,
1446+ pre_step_items = streamed_result .new_items ,
1447+ new_step_items = items_to_save ,
1448+ next_step = NextStepRunAgain (),
1449+ tool_input_guardrail_results = [],
1450+ tool_output_guardrail_results = [],
1451+ )
1452+
1453+ # Save session with the model response items
1454+ # Exclude ToolCallItem objects to avoid saving incomplete tool calls without outputs
1455+ if session is not None :
1456+ should_skip_session_save = (
1457+ await AgentRunner ._input_guardrail_tripwire_triggered_for_stream (
1458+ streamed_result
1459+ )
1460+ )
1461+ if should_skip_session_save is False :
1462+ # Filter out tool calls - they don't have outputs yet, so shouldn't be saved
1463+ # This prevents saving incomplete tool calls that violate API requirements
1464+ items_for_session = [
1465+ item
1466+ for item in items_to_save
1467+ if not isinstance (item , ToolCallItem )
1468+ ]
1469+ # Type ignore: intentionally filtering out ToolCallItem to avoid saving
1470+ # incomplete tool calls without corresponding outputs
1471+ await AgentRunner ._save_result_to_session (
1472+ session , [], items_for_session # type: ignore[arg-type]
1473+ )
1474+
1475+ # Stream the items to the event queue
1476+ import dataclasses as _dc
1477+ RunImpl .stream_step_result_to_queue (
1478+ single_step_result , streamed_result ._event_queue
1479+ )
1480+
1481+ # Mark as complete and signal completion
1482+ streamed_result .is_complete = True
1483+ streamed_result ._event_queue .put_nowait (QueueCompleteSentinel ())
1484+
1485+ return single_step_result
1486+
13721487 # 3. Now, we can process the turn as we do in the non-streaming case
13731488 single_step_result = await cls ._get_single_step_result_from_response (
13741489 agent = agent ,
0 commit comments