@@ -44,12 +44,56 @@ def _get_last_input_tokens(event: AgentEvent) -> int:
4444 return last_generation_event .usage .input_tokens if last_generation_event .usage else 0
4545
4646
47+ def _find_tool_aware_boundary (
48+ messages : list [rg .Message ],
49+ min_messages_to_keep : int ,
50+ ) -> int :
51+ """
52+ Find the best summarization boundary while preserving tool call/response pairs.
53+
54+ This prevents breaking tool messages that would cause API errors with strict models
55+ (OpenAI, Anthropic) that require every tool_call_id to have a matching response.
56+
57+ Args:
58+ messages: List of messages to analyze (excluding system message)
59+ min_messages_to_keep: Minimum messages that must be kept after boundary
60+
61+ Returns:
62+ Index where to split (messages[:idx] summarized, messages[idx:] kept)
63+ Returns 0 if no valid boundary found
64+ """
65+ # Build tool_call_id -> assistant message index mapping
66+ tool_call_map : dict [str , int ] = {}
67+ for i , msg in enumerate (messages ):
68+ if msg .role == "assistant" and hasattr (msg , "tool_calls" ):
69+ for tc in getattr (msg , "tool_calls" , None ) or []:
70+ if hasattr (tc , "id" ):
71+ tool_call_map [tc .id ] = i
72+
73+ # Walk backward from desired split point to find first valid boundary
74+ for boundary in range (len (messages ) - min_messages_to_keep , - 1 , - 1 ):
75+ # Check if this boundary would orphan any tool responses
76+ has_orphan = False
77+ for msg in messages [boundary :]:
78+ if msg .role == "tool" and hasattr (msg , "tool_call_id" ):
79+ call_idx = tool_call_map .get (msg .tool_call_id )
80+ if call_idx is not None and call_idx < boundary :
81+ has_orphan = True
82+ break
83+
84+ if not has_orphan :
85+ return boundary
86+
87+ return 0 # No valid boundary found
88+
89+
4790@component
4891def summarize_when_long (
4992 model : str | rg .Generator | None = None ,
5093 max_tokens : int = 100_000 ,
5194 min_messages_to_keep : int = 5 ,
5295 guidance : str = "" ,
96+ preserve_tool_pairs : bool = True ,
5397) -> "Hook" :
5498 """
5599 Creates a hook to manage the agent's context window by summarizing the conversation history.
@@ -66,6 +110,9 @@ def summarize_when_long(
66110 (default is None, meaning no proactive summarization).
67111 min_messages_to_keep: The minimum number of messages to retain after summarization (default is 5).
68112 guidance: Additional guidance for the summarization process (default is "").
113+ preserve_tool_pairs: If True, ensures tool call/response pairs stay together to avoid breaking
114+ strict API requirements (OpenAI, Anthropic). Defaults to True. Set to False to use legacy
115+ behavior that may break tool pairs but allows more aggressive summarization.
69116 """
70117
71118 if min_messages_to_keep < 2 :
@@ -91,6 +138,10 @@ async def summarize_when_long( # noqa: PLR0912
91138 guidance ,
92139 help = "Additional guidance for the summarization process" ,
93140 ),
141+ preserve_tool_pairs : bool = Config (
142+ preserve_tool_pairs ,
143+ help = "Preserve tool call/response pairs to avoid breaking strict API requirements" ,
144+ ),
94145 ) -> Reaction | None :
95146 should_summarize = False
96147
@@ -123,26 +174,30 @@ async def summarize_when_long( # noqa: PLR0912
123174 messages .pop (0 ) if messages and messages [0 ].role == "system" else None
124175 )
125176
126- # Find the best point to summarize by walking the message list once.
127- # A boundary is valid after a simple assistant message or a finished tool block.
128- best_summarize_boundary = 0
129- for i , message in enumerate (messages ):
130- # If the remaining messages are less than or equal to our minimum, we can't slice any further.
131- if len (messages ) - i <= min_messages_to_keep :
132- break
133-
134- # Condition 1: The message is an assistant response without tool calls.
135- is_simple_assistant = message .role == "assistant" and not getattr (
136- message , "tool_calls" , None
137- )
138-
139- # Condition 2: The message is the last in a block of tool responses.
140- is_last_tool_in_block = message .role == "tool" and (
141- i + 1 == len (messages ) or messages [i + 1 ].role != "tool"
142- )
143-
144- if is_simple_assistant or is_last_tool_in_block :
145- best_summarize_boundary = i + 1
177+ # Find the best point to summarize
178+ if preserve_tool_pairs :
179+ # Use tool-aware boundary finding to prevent breaking tool call/response pairs
180+ best_summarize_boundary = _find_tool_aware_boundary (messages , min_messages_to_keep )
181+ else :
182+ # Legacy behavior: walk the message list once looking for simple boundaries
183+ best_summarize_boundary = 0
184+ for i , message in enumerate (messages ):
185+ # If the remaining messages are less than or equal to our minimum, we can't slice any further.
186+ if len (messages ) - i <= min_messages_to_keep :
187+ break
188+
189+ # Condition 1: The message is an assistant response without tool calls.
190+ is_simple_assistant = message .role == "assistant" and not getattr (
191+ message , "tool_calls" , None
192+ )
193+
194+ # Condition 2: The message is the last in a block of tool responses.
195+ is_last_tool_in_block = message .role == "tool" and (
196+ i + 1 == len (messages ) or messages [i + 1 ].role != "tool"
197+ )
198+
199+ if is_simple_assistant or is_last_tool_in_block :
200+ best_summarize_boundary = i + 1
146201
147202 if best_summarize_boundary == 0 :
148203 return None # No valid slice point was found.
0 commit comments