@@ -805,33 +805,45 @@ impl RlmEngine {
805805 ) -> ( Vec < Message > , String ) {
806806 let metadata = Self :: context_metadata ( cached_doc) ;
807807
808- // Walk backward from end, collecting messages until budget
809- let mut recent = Vec :: new ( ) ;
810- let mut tokens = 0 ;
811-
812- for msg in messages. iter ( ) . rev ( ) {
808+ // Walk backward from end, collecting messages until budget.
809+ //
810+ // Anthropic requires every tool_result block to have a matching tool_use
811+ // in the immediately preceding assistant message. This means we must NEVER
812+ // cut a conversation between an Assistant (tool_use) and the Tool (tool_result)
813+ // that follows it — they must always appear together.
814+ //
815+ // Strategy: collect greedily backward, then snap the cut point to the
816+ // nearest clean exchange boundary (just before a User message, or just
817+ // before an Assistant message that is NOT preceded by a Tool message).
818+ //
819+ // In Anthropic's message structure:
820+ // User → Assistant(tool_use) → Tool(tool_result) → [repeat] → Assistant(text)
821+ // A "clean cut point" is the boundary BEFORE a User message (i.e., after
822+ // a complete Tool → User transition), or the very start of the array.
823+
824+ let mut tokens = 0usize ;
825+ let mut cut_at = 0usize ; // include all by default
826+
827+ for ( i, msg) in messages. iter ( ) . enumerate ( ) . rev ( ) {
813828 let msg_tokens = msg. estimate_tokens ( ) ;
814- if tokens + msg_tokens > max_recent_tokens && !recent. is_empty ( ) {
829+ // Only cut if we already have something AND adding this would exceed budget.
830+ // (Never cut the very last message — the LLM needs at least the current state.)
831+ if tokens + msg_tokens > max_recent_tokens && i < messages. len ( ) - 1 {
832+ // Budget exceeded. Find the nearest clean cut point at or after i+1.
833+ // A clean cut point is just before a User message. This ensures we
834+ // never start the window with a Tool (tool_result) or mid-exchange
835+ // Assistant (tool_use) that has no matching context.
836+ let mut clean = i + 1 ;
837+ while clean < messages. len ( ) && messages[ clean] . role != Role :: User {
838+ clean += 1 ;
839+ }
840+ cut_at = clean;
815841 break ;
816842 }
817- recent. push ( msg. clone ( ) ) ;
818843 tokens += msg_tokens;
819844 }
820845
821- recent. reverse ( ) ;
822-
823- // Enforce Anthropic API constraint: every tool_result must have a matching
824- // tool_use in the immediately preceding assistant message.
825- // If the window starts mid-exchange (i.e., starts with a Tool role message),
826- // drop Tool messages from the front until the window starts with User or Assistant.
827- while let Some ( first) = recent. first ( ) {
828- if first. role == Role :: Tool {
829- recent. remove ( 0 ) ;
830- } else {
831- break ;
832- }
833- }
834-
846+ let recent = messages[ cut_at..] . to_vec ( ) ;
835847 ( recent, metadata)
836848 }
837849
@@ -1281,31 +1293,80 @@ mod tests {
12811293
12821294 #[ test]
12831295 fn context_window_never_starts_with_tool_result ( ) {
1284- // Simulate a conversation where context cut happens mid-exchange.
1285- // The RLM window must not start with a tool_result (no matching tool_use).
1296+ // Simulate a conversation where budget cut happens mid-exchange.
1297+ // The window must not start with a Tool role (no matching tool_use).
1298+ // It must snap to the next User message boundary.
12861299 let messages = vec ! [
1287- Message :: user( "turn 1 question" ) ,
1288- Message :: assistant( "answer 1" ) ,
1300+ Message :: user( "turn 1 question" ) , // [0]
1301+ Message :: assistant( "answer 1" ) , // [1]
12891302 // turn 2: tool exchange
1290- Message :: user( "turn 2 question" ) ,
1291- Message :: assistant( "calling tool" ) ,
1292- Message :: tool_result( "tc1" , "tool output" , false ) ,
1303+ Message :: user( "turn 2 question" ) , // [2]
1304+ Message :: assistant( "calling tool" ) , // [3] — has tool_use
1305+ Message :: tool_result( "tc1" , "tool output that is somewhat long to force a cut " , false ) , // [4]
12931306 // turn 3: next user
1294- Message :: user( "turn 3 question" ) ,
1295- Message :: assistant( "answer 3" ) ,
1307+ Message :: user( "turn 3 question" ) , // [5]
1308+ Message :: assistant( "answer 3" ) , // [6]
12961309 ] ;
12971310
1298- // Budget so small it cuts after the tool_result but includes the tool_result message
1299- // (walk backward: "answer 3" + "turn 3 question" + "tool output" fit, but "calling tool" does not)
1300- let small_budget = 50 ; // just enough for last 3 messages
1311+ // Use a budget that fits [4][5][6] but NOT [3][4][5][6].
1312+ // Messages [5]+[6] ≈ 8 tokens, [4] ≈ 15 tokens, [3] ≈ 4 tokens.
1313+ // Budget of 30 should fit [4][5][6] (≈27) but not add [3] (≈31).
1314+ // The algorithm must snap the cut to [5] (the next User after the overage point).
1315+ let budget = 30 ;
13011316 let doc = RlmEngine :: serialize_conversation ( & messages) ;
1302- let ( recent, _) = RlmEngine :: build_context_window_with_doc ( & messages, small_budget , & doc) ;
1317+ let ( recent, _) = RlmEngine :: build_context_window_with_doc ( & messages, budget , & doc) ;
13031318
1304- // First message must not be a Tool role
1305- assert ! (
1306- recent. is_empty( ) || recent[ 0 ] . role != Role :: Tool ,
1307- "Window must not start with a tool_result — first msg role: {:?}" ,
1308- recent. first( ) . map( |m| & m. role)
1319+ // Window must not be empty and must start with a User message
1320+ assert ! ( !recent. is_empty( ) , "Window should not be empty" ) ;
1321+ assert_eq ! (
1322+ recent[ 0 ] . role, Role :: User ,
1323+ "Window must start with User, not {:?}" , recent[ 0 ] . role
1324+ ) ;
1325+ // The last message must always be included
1326+ assert_eq ! (
1327+ recent. last( ) . unwrap( ) . text_content( ) , "answer 3" ,
1328+ "Most recent message must always be in window"
13091329 ) ;
13101330 }
1331+
1332+ #[ test]
1333+ fn context_window_keeps_tool_use_and_result_together ( ) {
1334+ // When budget is tight, the algorithm must keep tool_use + tool_result together
1335+ // or exclude both — never include just the tool_result without tool_use.
1336+ let messages = vec ! [
1337+ Message :: user( "q1" ) , // [0]
1338+ Message :: assistant( "a1" ) , // [1]
1339+ Message :: user( "q2" ) , // [2]
1340+ Message :: assistant( "calling" ) , // [3] tool_use
1341+ Message :: tool_result( "tc1" , "result" , false ) , // [4] tool_result
1342+ Message :: user( "q3" ) , // [5]
1343+ Message :: assistant( "a3" ) , // [6]
1344+ ] ;
1345+
1346+ // Try every budget from 0 to 10000, verify invariant holds
1347+ let doc = RlmEngine :: serialize_conversation ( & messages) ;
1348+ for budget in [ 5 , 10 , 15 , 20 , 25 , 30 , 50 , 100 , 10_000 ] {
1349+ let ( recent, _) = RlmEngine :: build_context_window_with_doc ( & messages, budget, & doc) ;
1350+
1351+ // Invariant: if a Tool message is in the window, the preceding message
1352+ // must be an Assistant message (the tool_use that matches it)
1353+ for ( i, msg) in recent. iter ( ) . enumerate ( ) {
1354+ if msg. role == Role :: Tool {
1355+ assert ! (
1356+ i > 0 && recent[ i - 1 ] . role == Role :: Assistant ,
1357+ "budget={budget}: Tool at window[{i}] has no preceding Assistant. Window roles: {:?}" ,
1358+ recent. iter( ) . map( |m| & m. role) . collect:: <Vec <_>>( )
1359+ ) ;
1360+ }
1361+ }
1362+
1363+ // Invariant: first message is never Tool role
1364+ if let Some ( first) = recent. first ( ) {
1365+ assert_ne ! (
1366+ first. role, Role :: Tool ,
1367+ "budget={budget}: Window starts with Tool role"
1368+ ) ;
1369+ }
1370+ }
1371+ }
13111372}
0 commit comments