Skip to content

Commit 62b5371

Browse files
fix: auto compaction failing, add truncation if compact request fails (#365)
1 parent c1210b3 commit 62b5371

File tree

5 files changed

+157
-74
lines changed

5 files changed

+157
-74
lines changed

crates/chat-cli/src/cli/chat/cli/compact.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,6 @@ pub struct CompactArgs {
3535

3636
impl CompactArgs {
3737
pub async fn execute(self, os: &Os, session: &mut ChatSession) -> Result<ChatState, ChatError> {
38-
session.compact_history(os, self.prompt, self.show_summary).await
38+
session.compact_history(os, self.prompt, self.show_summary, true).await
3939
}
4040
}

crates/chat-cli/src/cli/chat/conversation.rs

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -534,16 +534,10 @@ impl ConversationState {
534534
})
535535
}
536536

537-
/// Whether or not it is possible to create a summary out of this conversation state.
538-
///
539-
/// Currently only checks if we have enough messages in the history to create a summary out of.
540-
pub async fn can_create_summary_request(&mut self, os: &Os) -> Result<bool, ChatError> {
541-
Ok(self
542-
.backend_conversation_state(os, false, &mut vec![])
543-
.await?
544-
.history
545-
.len()
546-
>= 2)
537+
pub async fn truncate_large_user_messages(&mut self) {
538+
for (user_message, _) in &mut self.history {
539+
user_message.truncate_safe(25_000);
540+
}
547541
}
548542

549543
/// Returns a [FigConversationState] capable of replacing the history of the current
@@ -597,14 +591,7 @@ impl ConversationState {
597591
};
598592

599593
let conv_state = self.backend_conversation_state(os, false, &mut vec![]).await?;
600-
601-
// Include everything but the last message in the history.
602-
let history_len = conv_state.history.len();
603-
let history = if history_len < 2 {
604-
vec![]
605-
} else {
606-
flatten_history(conv_state.history.take(history_len.saturating_sub(1)))
607-
};
594+
let history = flatten_history(conv_state.history);
608595

609596
let user_input_message_context = UserInputMessageContext {
610597
env_state: Some(build_env_state()),

crates/chat-cli/src/cli/chat/message.rs

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@ use serde::{
44
Deserialize,
55
Serialize,
66
};
7-
use tracing::error;
7+
use tracing::{
8+
error,
9+
warn,
10+
};
811

912
use super::consts::MAX_CURRENT_WORKING_DIRECTORY_LEN;
1013
use super::tools::{
@@ -15,6 +18,7 @@ use super::util::{
1518
document_to_serde_value,
1619
serde_value_to_document,
1720
truncate_safe,
21+
truncate_safe_in_place,
1822
};
1923
use crate::api_client::model::{
2024
AssistantResponseMessage,
@@ -55,6 +59,30 @@ pub enum UserMessageContent {
5559
},
5660
}
5761

62+
impl UserMessageContent {
63+
fn truncate_safe(&mut self, max_bytes: usize) {
64+
match self {
65+
UserMessageContent::Prompt { prompt } => {
66+
truncate_safe_in_place(prompt, max_bytes);
67+
},
68+
UserMessageContent::CancelledToolUses {
69+
prompt,
70+
tool_use_results,
71+
} => {
72+
if let Some(prompt) = prompt {
73+
truncate_safe_in_place(prompt, max_bytes / 2);
74+
truncate_safe_tool_use_results(tool_use_results.as_mut_slice(), max_bytes / 2);
75+
} else {
76+
truncate_safe_tool_use_results(tool_use_results.as_mut_slice(), max_bytes);
77+
}
78+
},
79+
UserMessageContent::ToolUseResults { tool_use_results } => {
80+
truncate_safe_tool_use_results(tool_use_results.as_mut_slice(), max_bytes);
81+
},
82+
}
83+
}
84+
}
85+
5886
impl UserMessage {
5987
/// Creates a new [UserMessage::Prompt], automatically detecting and adding the user's
6088
/// environment [UserEnvContext].
@@ -193,6 +221,14 @@ impl UserMessage {
193221
UserMessageContent::ToolUseResults { .. } => None,
194222
}
195223
}
224+
225+
/// Truncates the content contained in this user message to a maximum length of `max_bytes`.
226+
///
227+
/// This isn't a perfect truncation - JSON tool use results are ignored, and only the content
228+
/// of the user message is truncated, ignoring extra context fields.
229+
pub fn truncate_safe(&mut self, max_bytes: usize) {
230+
self.content.truncate_safe(max_bytes);
231+
}
196232
}
197233

198234
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -225,6 +261,20 @@ impl From<ToolUseResult> for ToolResult {
225261
}
226262
}
227263

264+
fn truncate_safe_tool_use_results(tool_use_results: &mut [ToolUseResult], max_bytes: usize) {
265+
let max_bytes = max_bytes / tool_use_results.len();
266+
for result in tool_use_results {
267+
for content in &mut result.content {
268+
match content {
269+
ToolUseResultBlock::Json(_) => {
270+
warn!("Unable to truncate JSON safely");
271+
},
272+
ToolUseResultBlock::Text(t) => truncate_safe_in_place(t, max_bytes),
273+
}
274+
}
275+
}
276+
}
277+
228278
#[derive(Debug, Clone, Serialize, Deserialize)]
229279
pub enum ToolUseResultBlock {
230280
Json(serde_json::Value),

crates/chat-cli/src/cli/chat/mod.rs

Lines changed: 71 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,8 @@ pub enum ChatError {
426426
"Tool approval required but --no-interactive was specified. Use --trust-all-tools to automatically approve tools."
427427
)]
428428
NonInteractiveToolApproval,
429+
#[error("The conversation history is too large to compact")]
430+
CompactHistoryFailure,
429431
}
430432

431433
impl ChatError {
@@ -440,6 +442,7 @@ impl ChatError {
440442
ChatError::Interrupted { .. } => None,
441443
ChatError::GetPromptError(_) => None,
442444
ChatError::NonInteractiveToolApproval => None,
445+
ChatError::CompactHistoryFailure => None,
443446
}
444447
}
445448
}
@@ -456,6 +459,7 @@ impl ReasonCode for ChatError {
456459
ChatError::GetPromptError(_) => "GetPromptError".to_string(),
457460
ChatError::Auth(_) => "AuthError".to_string(),
458461
ChatError::NonInteractiveToolApproval => "NonInteractiveToolApproval".to_string(),
462+
ChatError::CompactHistoryFailure => "CompactHistoryFailure".to_string(),
459463
}
460464
}
461465
}
@@ -618,9 +622,13 @@ impl ChatSession {
618622
Ok(_) = ctrl_c_stream => Err(ChatError::Interrupted { tool_uses: Some(self.tool_uses.clone()) })
619623
}
620624
},
621-
ChatState::CompactHistory { prompt, show_summary } => {
625+
ChatState::CompactHistory {
626+
prompt,
627+
show_summary,
628+
attempt_truncated_compact_retry,
629+
} => {
622630
tokio::select! {
623-
res = self.compact_history(os, prompt, show_summary) => res,
631+
res = self.compact_history(os, prompt, show_summary, attempt_truncated_compact_retry) => res,
624632
Ok(_) = ctrl_c_stream => Err(ChatError::Interrupted { tool_uses: Some(self.tool_uses.clone()) })
625633
}
626634
},
@@ -697,40 +705,40 @@ impl ChatSession {
697705

698706
("Tool use was interrupted", Report::from(err), false)
699707
},
708+
ChatError::CompactHistoryFailure => {
709+
// This error is not retryable - the user must take manual intervention to manage
710+
// their context.
711+
execute!(
712+
self.stderr,
713+
style::SetForegroundColor(Color::Red),
714+
style::Print("Your conversation is too large to continue.\n"),
715+
style::SetForegroundColor(Color::Reset),
716+
style::Print(format!("• Run {} to analyze your context usage\n", "/usage".green())),
717+
style::Print(format!("• Run {} to reset your conversation state\n", "/clear".green())),
718+
style::SetAttribute(Attribute::Reset),
719+
style::Print("\n\n"),
720+
)?;
721+
("Unable to compact the conversation history", eyre!(err), true)
722+
},
700723
ChatError::Client(err) => match *err {
701724
// Errors from attempting to send too large of a conversation history. In
702725
// this case, attempt to automatically compact the history for the user.
703726
ApiClientError::ContextWindowOverflow { .. } => {
704-
if !self.conversation.can_create_summary_request(os).await? {
705-
execute!(
706-
self.stderr,
707-
style::SetForegroundColor(Color::Red),
708-
style::Print("Your conversation is too large to continue.\n"),
709-
style::SetForegroundColor(Color::Reset),
710-
style::Print(format!("• Run {} to analyze your context usage\n", "/usage".green())),
711-
style::Print(format!("• Run {} to reset your conversation state\n", "/clear".green())),
712-
style::SetAttribute(Attribute::Reset),
713-
style::Print("\n\n"),
714-
)?;
715-
716-
self.conversation.reset_next_user_message();
717-
self.inner = Some(ChatState::PromptUser {
718-
skip_printing_tools: false,
719-
});
720-
721-
return Ok(());
722-
}
723-
724727
self.inner = Some(ChatState::CompactHistory {
725728
prompt: None,
726729
show_summary: false,
730+
attempt_truncated_compact_retry: true,
727731
});
728732

729-
(
730-
"The context window has overflowed, summarizing the history...",
731-
Report::from(err),
732-
true,
733-
)
733+
execute!(
734+
self.stdout,
735+
style::SetForegroundColor(Color::Yellow),
736+
style::Print("The context window has overflowed, summarizing the history..."),
737+
style::SetAttribute(Attribute::Reset),
738+
style::Print("\n\n"),
739+
)?;
740+
741+
return Ok(());
734742
},
735743
ApiClientError::QuotaBreach { message, .. } => (message, Report::from(err), true),
736744
ApiClientError::ModelOverloadedError { request_id, .. } => {
@@ -890,6 +898,11 @@ enum ChatState {
890898
prompt: Option<String>,
891899
/// Whether or not the summary should be shown on compact success.
892900
show_summary: bool,
901+
/// Whether or not we should truncate large messages in the conversation history if we
902+
/// encounter a context window overfload while attempting compaction.
903+
///
904+
/// This should be `true` everywhere other than [ChatSession::compact_history].
905+
attempt_truncated_compact_retry: bool,
893906
},
894907
/// Exit the chat.
895908
Exit,
@@ -995,17 +1008,21 @@ impl ChatSession {
9951008
/// Compacts the conversation history, replacing the history with a summary generated by the
9961009
/// model.
9971010
///
998-
/// The last two user messages in the history are not included in the compaction process.
1011+
/// If `attempt_truncated_compact_retry` is true, then if we encounter a context window
1012+
/// overflow while attempting compaction, large user messages will be heavily truncated and
1013+
/// the compaction attempt will be retried, failing with [ChatError::CompactHistoryFailure] if
1014+
/// we fail again.
9991015
async fn compact_history(
10001016
&mut self,
10011017
os: &Os,
10021018
custom_prompt: Option<String>,
10031019
show_summary: bool,
1020+
attempt_truncated_compact_retry: bool,
10041021
) -> Result<ChatState, ChatError> {
10051022
let hist = self.conversation.history();
10061023
debug!(?hist, "compacting history");
10071024

1008-
if self.conversation.history().len() < 2 {
1025+
if self.conversation.history().is_empty() {
10091026
execute!(
10101027
self.stderr,
10111028
style::SetForegroundColor(Color::Yellow),
@@ -1046,23 +1063,29 @@ impl ChatSession {
10461063
.await;
10471064
match err {
10481065
ApiClientError::ContextWindowOverflow { .. } => {
1049-
self.conversation.clear(true);
1050-
1051-
self.spinner.take();
1052-
execute!(
1053-
self.stderr,
1054-
terminal::Clear(terminal::ClearType::CurrentLine),
1055-
cursor::MoveToColumn(0),
1056-
style::SetForegroundColor(Color::Yellow),
1057-
style::Print(
1058-
"The context window usage has overflowed. Clearing the conversation history.\n\n"
1059-
),
1060-
style::SetAttribute(Attribute::Reset)
1061-
)?;
1062-
1063-
return Ok(ChatState::PromptUser {
1064-
skip_printing_tools: true,
1065-
});
1066+
error!(?attempt_truncated_compact_retry, "failed to send compaction request");
1067+
if attempt_truncated_compact_retry {
1068+
self.conversation.truncate_large_user_messages().await;
1069+
if self.spinner.is_some() {
1070+
drop(self.spinner.take());
1071+
execute!(
1072+
self.stderr,
1073+
terminal::Clear(terminal::ClearType::CurrentLine),
1074+
cursor::MoveToColumn(0),
1075+
style::SetForegroundColor(Color::Yellow),
1076+
style::Print("Reducing context..."),
1077+
style::SetAttribute(Attribute::Reset),
1078+
style::Print("\n\n"),
1079+
)?;
1080+
}
1081+
return Ok(ChatState::CompactHistory {
1082+
prompt: custom_prompt,
1083+
show_summary,
1084+
attempt_truncated_compact_retry: false,
1085+
});
1086+
} else {
1087+
return Err(ChatError::CompactHistoryFailure);
1088+
}
10661089
},
10671090
err => return Err(err.into()),
10681091
}
@@ -1195,10 +1218,8 @@ impl ChatSession {
11951218
// Check token usage and display warnings if needed
11961219
if self.pending_tool_index.is_none() {
11971220
// Only display warnings when not waiting for tool approval
1198-
if self.conversation.can_create_summary_request(os).await? {
1199-
if let Err(err) = self.display_char_warnings(os).await {
1200-
warn!("Failed to display character limit warnings: {}", err);
1201-
}
1221+
if let Err(err) = self.display_char_warnings(os).await {
1222+
warn!("Failed to display character limit warnings: {}", err);
12021223
}
12031224
}
12041225

crates/chat-cli/src/cli/chat/util/mod.rs

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ pub fn truncate_safe(s: &str, max_bytes: usize) -> &str {
3434
&s[..byte_count]
3535
}
3636

37+
pub fn truncate_safe_in_place(s: &mut String, max_chars: usize) {
38+
let bytes = s.char_indices().nth(max_chars).map_or(s.len(), |(idx, _)| idx);
39+
s.truncate(bytes);
40+
}
41+
3742
pub fn animate_output(output: &mut impl Write, bytes: &[u8]) -> Result<(), ChatError> {
3843
for b in bytes.chunks(12) {
3944
output.write_all(b)?;
@@ -175,10 +180,30 @@ mod tests {
175180

176181
#[test]
177182
fn test_truncate_safe() {
178-
assert_eq!(truncate_safe("Hello World", 5), "Hello");
179-
assert_eq!(truncate_safe("Hello ", 5), "Hello");
180-
assert_eq!(truncate_safe("Hello World", 11), "Hello World");
181-
assert_eq!(truncate_safe("Hello World", 15), "Hello World");
183+
let tests = &[
184+
("Hello World", 5, "Hello"),
185+
("Hello ", 5, "Hello"),
186+
("Hello World", 11, "Hello World"),
187+
("Hello World", 15, "Hello World"),
188+
];
189+
for (input, max_bytes, expected) in tests {
190+
assert_eq!(
191+
truncate_safe(input, *max_bytes),
192+
*expected,
193+
"input: {} with max bytes: {} failed",
194+
input,
195+
max_bytes
196+
);
197+
let mut in_place = input.to_string();
198+
truncate_safe_in_place(&mut in_place, *max_bytes);
199+
assert_eq!(
200+
in_place.as_str(),
201+
*expected,
202+
"input: {} with max bytes: {} failed",
203+
input,
204+
max_bytes
205+
);
206+
}
182207
}
183208

184209
#[test]

0 commit comments

Comments
 (0)