fix: auto compaction failing, add truncation if compact request fails (#365)

brandonskiser · web-flow · commit 62b53719f326 · 2025-07-02T11:15:31.000-07:00
diff --git a/crates/chat-cli/src/cli/chat/cli/compact.rs b/crates/chat-cli/src/cli/chat/cli/compact.rs
@@ -35,6 +35,6 @@ pub struct CompactArgs {
 
 impl CompactArgs {
     pub async fn execute(self, os: &Os, session: &mut ChatSession) -> Result<ChatState, ChatError> {
-        session.compact_history(os, self.prompt, self.show_summary).await
+        session.compact_history(os, self.prompt, self.show_summary, true).await
     }
 }
diff --git a/crates/chat-cli/src/cli/chat/conversation.rs b/crates/chat-cli/src/cli/chat/conversation.rs
@@ -534,16 +534,10 @@ impl ConversationState {
         })
     }
 
-    /// Whether or not it is possible to create a summary out of this conversation state.
-    ///
-    /// Currently only checks if we have enough messages in the history to create a summary out of.
-    pub async fn can_create_summary_request(&mut self, os: &Os) -> Result<bool, ChatError> {
-        Ok(self
-            .backend_conversation_state(os, false, &mut vec![])
-            .await?
-            .history
-            .len()
-            >= 2)
+    pub async fn truncate_large_user_messages(&mut self) {
+        for (user_message, _) in &mut self.history {
+            user_message.truncate_safe(25_000);
+        }
     }
 
     /// Returns a [FigConversationState] capable of replacing the history of the current
@@ -597,14 +591,7 @@ impl ConversationState {
         };
 
         let conv_state = self.backend_conversation_state(os, false, &mut vec![]).await?;
-
-        // Include everything but the last message in the history.
-        let history_len = conv_state.history.len();
-        let history = if history_len < 2 {
-            vec![]
-        } else {
-            flatten_history(conv_state.history.take(history_len.saturating_sub(1)))
-        };
+        let history = flatten_history(conv_state.history);
 
         let user_input_message_context = UserInputMessageContext {
             env_state: Some(build_env_state()),
diff --git a/crates/chat-cli/src/cli/chat/message.rs b/crates/chat-cli/src/cli/chat/message.rs
@@ -4,7 +4,10 @@ use serde::{
     Deserialize,
     Serialize,
 };
-use tracing::error;
+use tracing::{
+    error,
+    warn,
+};
 
 use super::consts::MAX_CURRENT_WORKING_DIRECTORY_LEN;
 use super::tools::{
@@ -15,6 +18,7 @@ use super::util::{
     document_to_serde_value,
     serde_value_to_document,
     truncate_safe,
+    truncate_safe_in_place,
 };
 use crate::api_client::model::{
     AssistantResponseMessage,
@@ -55,6 +59,30 @@ pub enum UserMessageContent {
     },
 }
 
+impl UserMessageContent {
+    fn truncate_safe(&mut self, max_bytes: usize) {
+        match self {
+            UserMessageContent::Prompt { prompt } => {
+                truncate_safe_in_place(prompt, max_bytes);
+            },
+            UserMessageContent::CancelledToolUses {
+                prompt,
+                tool_use_results,
+            } => {
+                if let Some(prompt) = prompt {
+                    truncate_safe_in_place(prompt, max_bytes / 2);
+                    truncate_safe_tool_use_results(tool_use_results.as_mut_slice(), max_bytes / 2);
+                } else {
+                    truncate_safe_tool_use_results(tool_use_results.as_mut_slice(), max_bytes);
+                }
+            },
+            UserMessageContent::ToolUseResults { tool_use_results } => {
+                truncate_safe_tool_use_results(tool_use_results.as_mut_slice(), max_bytes);
+            },
+        }
+    }
+}
+
 impl UserMessage {
     /// Creates a new [UserMessage::Prompt], automatically detecting and adding the user's
     /// environment [UserEnvContext].
@@ -193,6 +221,14 @@ impl UserMessage {
             UserMessageContent::ToolUseResults { .. } => None,
         }
     }
+
+    /// Truncates the content contained in this user message to a maximum length of `max_bytes`.
+    ///
+    /// This isn't a perfect truncation - JSON tool use results are ignored, and only the content
+    /// of the user message is truncated, ignoring extra context fields.
+    pub fn truncate_safe(&mut self, max_bytes: usize) {
+        self.content.truncate_safe(max_bytes);
+    }
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -225,6 +261,20 @@ impl From<ToolUseResult> for ToolResult {
     }
 }
 
+fn truncate_safe_tool_use_results(tool_use_results: &mut [ToolUseResult], max_bytes: usize) {
+    let max_bytes = max_bytes / tool_use_results.len();
+    for result in tool_use_results {
+        for content in &mut result.content {
+            match content {
+                ToolUseResultBlock::Json(_) => {
+                    warn!("Unable to truncate JSON safely");
+                },
+                ToolUseResultBlock::Text(t) => truncate_safe_in_place(t, max_bytes),
+            }
+        }
+    }
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub enum ToolUseResultBlock {
     Json(serde_json::Value),
diff --git a/crates/chat-cli/src/cli/chat/mod.rs b/crates/chat-cli/src/cli/chat/mod.rs
@@ -426,6 +426,8 @@ pub enum ChatError {
         "Tool approval required but --no-interactive was specified. Use --trust-all-tools to automatically approve tools."
     )]
     NonInteractiveToolApproval,
+    #[error("The conversation history is too large to compact")]
+    CompactHistoryFailure,
 }
 
 impl ChatError {
@@ -440,6 +442,7 @@ impl ChatError {
             ChatError::Interrupted { .. } => None,
             ChatError::GetPromptError(_) => None,
             ChatError::NonInteractiveToolApproval => None,
+            ChatError::CompactHistoryFailure => None,
         }
     }
 }
@@ -456,6 +459,7 @@ impl ReasonCode for ChatError {
             ChatError::GetPromptError(_) => "GetPromptError".to_string(),
             ChatError::Auth(_) => "AuthError".to_string(),
             ChatError::NonInteractiveToolApproval => "NonInteractiveToolApproval".to_string(),
+            ChatError::CompactHistoryFailure => "CompactHistoryFailure".to_string(),
         }
     }
 }
@@ -618,9 +622,13 @@ impl ChatSession {
                     Ok(_) = ctrl_c_stream => Err(ChatError::Interrupted { tool_uses: Some(self.tool_uses.clone()) })
                 }
             },
-            ChatState::CompactHistory { prompt, show_summary } => {
+            ChatState::CompactHistory {
+                prompt,
+                show_summary,
+                attempt_truncated_compact_retry,
+            } => {
                 tokio::select! {
-                    res = self.compact_history(os, prompt, show_summary) => res,
+                    res = self.compact_history(os, prompt, show_summary, attempt_truncated_compact_retry) => res,
                     Ok(_) = ctrl_c_stream => Err(ChatError::Interrupted { tool_uses: Some(self.tool_uses.clone()) })
                 }
             },
@@ -697,40 +705,40 @@ impl ChatSession {
 
                 ("Tool use was interrupted", Report::from(err), false)
             },
+            ChatError::CompactHistoryFailure => {
+                // This error is not retryable - the user must take manual intervention to manage
+                // their context.
+                execute!(
+                    self.stderr,
+                    style::SetForegroundColor(Color::Red),
+                    style::Print("Your conversation is too large to continue.\n"),
+                    style::SetForegroundColor(Color::Reset),
+                    style::Print(format!("• Run {} to analyze your context usage\n", "/usage".green())),
+                    style::Print(format!("• Run {} to reset your conversation state\n", "/clear".green())),
+                    style::SetAttribute(Attribute::Reset),
+                    style::Print("\n\n"),
+                )?;
+                ("Unable to compact the conversation history", eyre!(err), true)
+            },
             ChatError::Client(err) => match *err {
                 // Errors from attempting to send too large of a conversation history. In
                 // this case, attempt to automatically compact the history for the user.
                 ApiClientError::ContextWindowOverflow { .. } => {
-                    if !self.conversation.can_create_summary_request(os).await? {
-                        execute!(
-                            self.stderr,
-                            style::SetForegroundColor(Color::Red),
-                            style::Print("Your conversation is too large to continue.\n"),
-                            style::SetForegroundColor(Color::Reset),
-                            style::Print(format!("• Run {} to analyze your context usage\n", "/usage".green())),
-                            style::Print(format!("• Run {} to reset your conversation state\n", "/clear".green())),
-                            style::SetAttribute(Attribute::Reset),
-                            style::Print("\n\n"),
-                        )?;
-
-                        self.conversation.reset_next_user_message();
-                        self.inner = Some(ChatState::PromptUser {
-                            skip_printing_tools: false,
-                        });
-
-                        return Ok(());
-                    }
-
                     self.inner = Some(ChatState::CompactHistory {
                         prompt: None,
                         show_summary: false,
+                        attempt_truncated_compact_retry: true,
                     });
 
-                    (
-                        "The context window has overflowed, summarizing the history...",
-                        Report::from(err),
-                        true,
-                    )
+                    execute!(
+                        self.stdout,
+                        style::SetForegroundColor(Color::Yellow),
+                        style::Print("The context window has overflowed, summarizing the history..."),
+                        style::SetAttribute(Attribute::Reset),
+                        style::Print("\n\n"),
+                    )?;
+
+                    return Ok(());
                 },
                 ApiClientError::QuotaBreach { message, .. } => (message, Report::from(err), true),
                 ApiClientError::ModelOverloadedError { request_id, .. } => {
@@ -890,6 +898,11 @@ enum ChatState {
         prompt: Option<String>,
         /// Whether or not the summary should be shown on compact success.
         show_summary: bool,
+        /// Whether or not we should truncate large messages in the conversation history if we
+        /// encounter a context window overfload while attempting compaction.
+        ///
+        /// This should be `true` everywhere other than [ChatSession::compact_history].
+        attempt_truncated_compact_retry: bool,
     },
     /// Exit the chat.
     Exit,
@@ -995,17 +1008,21 @@ impl ChatSession {
     /// Compacts the conversation history, replacing the history with a summary generated by the
     /// model.
     ///
-    /// The last two user messages in the history are not included in the compaction process.
+    /// If `attempt_truncated_compact_retry` is true, then  if we encounter a context window
+    /// overflow while attempting compaction, large user messages will be heavily truncated and
+    /// the compaction attempt will be retried, failing with [ChatError::CompactHistoryFailure] if
+    /// we fail again.
     async fn compact_history(
         &mut self,
         os: &Os,
         custom_prompt: Option<String>,
         show_summary: bool,
+        attempt_truncated_compact_retry: bool,
     ) -> Result<ChatState, ChatError> {
         let hist = self.conversation.history();
         debug!(?hist, "compacting history");
 
-        if self.conversation.history().len() < 2 {
+        if self.conversation.history().is_empty() {
             execute!(
                 self.stderr,
                 style::SetForegroundColor(Color::Yellow),
@@ -1046,23 +1063,29 @@ impl ChatSession {
                 .await;
                 match err {
                     ApiClientError::ContextWindowOverflow { .. } => {
-                        self.conversation.clear(true);
-
-                        self.spinner.take();
-                        execute!(
-                            self.stderr,
-                            terminal::Clear(terminal::ClearType::CurrentLine),
-                            cursor::MoveToColumn(0),
-                            style::SetForegroundColor(Color::Yellow),
-                            style::Print(
-                                "The context window usage has overflowed. Clearing the conversation history.\n\n"
-                            ),
-                            style::SetAttribute(Attribute::Reset)
-                        )?;
-
-                        return Ok(ChatState::PromptUser {
-                            skip_printing_tools: true,
-                        });
+                        error!(?attempt_truncated_compact_retry, "failed to send compaction request");
+                        if attempt_truncated_compact_retry {
+                            self.conversation.truncate_large_user_messages().await;
+                            if self.spinner.is_some() {
+                                drop(self.spinner.take());
+                                execute!(
+                                    self.stderr,
+                                    terminal::Clear(terminal::ClearType::CurrentLine),
+                                    cursor::MoveToColumn(0),
+                                    style::SetForegroundColor(Color::Yellow),
+                                    style::Print("Reducing context..."),
+                                    style::SetAttribute(Attribute::Reset),
+                                    style::Print("\n\n"),
+                                )?;
+                            }
+                            return Ok(ChatState::CompactHistory {
+                                prompt: custom_prompt,
+                                show_summary,
+                                attempt_truncated_compact_retry: false,
+                            });
+                        } else {
+                            return Err(ChatError::CompactHistoryFailure);
+                        }
                     },
                     err => return Err(err.into()),
                 }
@@ -1195,10 +1218,8 @@ impl ChatSession {
         // Check token usage and display warnings if needed
         if self.pending_tool_index.is_none() {
             // Only display warnings when not waiting for tool approval
-            if self.conversation.can_create_summary_request(os).await? {
-                if let Err(err) = self.display_char_warnings(os).await {
-                    warn!("Failed to display character limit warnings: {}", err);
-                }
+            if let Err(err) = self.display_char_warnings(os).await {
+                warn!("Failed to display character limit warnings: {}", err);
             }
         }
 
diff --git a/crates/chat-cli/src/cli/chat/util/mod.rs b/crates/chat-cli/src/cli/chat/util/mod.rs
@@ -34,6 +34,11 @@ pub fn truncate_safe(s: &str, max_bytes: usize) -> &str {
     &s[..byte_count]
 }
 
+pub fn truncate_safe_in_place(s: &mut String, max_chars: usize) {
+    let bytes = s.char_indices().nth(max_chars).map_or(s.len(), |(idx, _)| idx);
+    s.truncate(bytes);
+}
+
 pub fn animate_output(output: &mut impl Write, bytes: &[u8]) -> Result<(), ChatError> {
     for b in bytes.chunks(12) {
         output.write_all(b)?;
@@ -175,10 +180,30 @@ mod tests {
 
     #[test]
     fn test_truncate_safe() {
-        assert_eq!(truncate_safe("Hello World", 5), "Hello");
-        assert_eq!(truncate_safe("Hello ", 5), "Hello");
-        assert_eq!(truncate_safe("Hello World", 11), "Hello World");
-        assert_eq!(truncate_safe("Hello World", 15), "Hello World");
+        let tests = &[
+            ("Hello World", 5, "Hello"),
+            ("Hello ", 5, "Hello"),
+            ("Hello World", 11, "Hello World"),
+            ("Hello World", 15, "Hello World"),
+        ];
+        for (input, max_bytes, expected) in tests {
+            assert_eq!(
+                truncate_safe(input, *max_bytes),
+                *expected,
+                "input: {} with max bytes: {} failed",
+                input,
+                max_bytes
+            );
+            let mut in_place = input.to_string();
+            truncate_safe_in_place(&mut in_place, *max_bytes);
+            assert_eq!(
+                in_place.as_str(),
+                *expected,
+                "input: {} with max bytes: {} failed",
+                input,
+                max_bytes
+            );
+        }
     }
 
     #[test]

Original file line number	Diff line number	Diff line change
`@@ -35,6 +35,6 @@ pub struct CompactArgs {`
`35`	`35`
`36`	`36`	`impl CompactArgs {`
`37`	`37`	`pub async fn execute(self, os: &Os, session: &mut ChatSession) -> Result<ChatState, ChatError> {`
`38`		`- session.compact_history(os, self.prompt, self.show_summary).await`
	`38`	`+ session.compact_history(os, self.prompt, self.show_summary, true).await`
`39`	`39`	`}`
`40`	`40`	`}`