Skip to content

Commit 687a13b

Browse files
authored
feat: truncate on compact (#4942)
Truncate the message during compaction if it is just too large Do it iteratively as tokenization is basically free on server-side
1 parent fe8122e commit 687a13b

File tree

2 files changed

+148
-6
lines changed

2 files changed

+148
-6
lines changed

codex-rs/core/src/codex/compact.rs

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,10 @@ async fn run_compact_task_inner(
7070
input: Vec<InputItem>,
7171
) {
7272
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
73-
let turn_input = sess
73+
let mut turn_input = sess
7474
.turn_input_with_history(vec![initial_input_for_turn.clone().into()])
7575
.await;
76-
77-
let prompt = Prompt {
78-
input: turn_input,
79-
..Default::default()
80-
};
76+
let mut truncated_count = 0usize;
8177

8278
let max_retries = turn_context.client.get_provider().stream_max_retries();
8379
let mut retries = 0;
@@ -93,17 +89,36 @@ async fn run_compact_task_inner(
9389
sess.persist_rollout_items(&[rollout_item]).await;
9490

9591
loop {
92+
let prompt = Prompt {
93+
input: turn_input.clone(),
94+
..Default::default()
95+
};
9696
let attempt_result =
9797
drain_to_completed(&sess, turn_context.as_ref(), &sub_id, &prompt).await;
9898

9999
match attempt_result {
100100
Ok(()) => {
101+
if truncated_count > 0 {
102+
sess.notify_background_event(
103+
&sub_id,
104+
format!(
105+
"Trimmed {truncated_count} older conversation item(s) before compacting so the prompt fits the model context window."
106+
),
107+
)
108+
.await;
109+
}
101110
break;
102111
}
103112
Err(CodexErr::Interrupted) => {
104113
return;
105114
}
106115
Err(e @ CodexErr::ContextWindowExceeded) => {
116+
if turn_input.len() > 1 {
117+
turn_input.remove(0);
118+
truncated_count += 1;
119+
retries = 0;
120+
continue;
121+
}
107122
sess.set_total_tokens_full(&sub_id, turn_context.as_ref())
108123
.await;
109124
let event = Event {

codex-rs/core/tests/suite/compact.rs

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use core_test_support::responses::ev_function_call;
2222
use core_test_support::responses::mount_sse_once_match;
2323
use core_test_support::responses::mount_sse_sequence;
2424
use core_test_support::responses::sse;
25+
use core_test_support::responses::sse_failed;
2526
use core_test_support::responses::start_mock_server;
2627
use pretty_assertions::assert_eq;
2728
// --- Test helpers -----------------------------------------------------------
@@ -38,6 +39,8 @@ const SECOND_LARGE_REPLY: &str = "SECOND_LARGE_REPLY";
3839
const FIRST_AUTO_SUMMARY: &str = "FIRST_AUTO_SUMMARY";
3940
const SECOND_AUTO_SUMMARY: &str = "SECOND_AUTO_SUMMARY";
4041
const FINAL_REPLY: &str = "FINAL_REPLY";
42+
const CONTEXT_LIMIT_MESSAGE: &str =
43+
"Your input exceeds the context window of this model. Please adjust your input and try again.";
4144
const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
4245
const DUMMY_CALL_ID: &str = "call-multi-auto";
4346

@@ -622,6 +625,130 @@ async fn auto_compact_stops_after_failed_attempt() {
622625
);
623626
}
624627

628+
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
629+
async fn manual_compact_retries_after_context_window_error() {
630+
skip_if_no_network!();
631+
632+
let server = start_mock_server().await;
633+
634+
let user_turn = sse(vec![
635+
ev_assistant_message("m1", FIRST_REPLY),
636+
ev_completed("r1"),
637+
]);
638+
let compact_failed = sse_failed(
639+
"resp-fail",
640+
"context_length_exceeded",
641+
CONTEXT_LIMIT_MESSAGE,
642+
);
643+
let compact_succeeds = sse(vec![
644+
ev_assistant_message("m2", SUMMARY_TEXT),
645+
ev_completed("r2"),
646+
]);
647+
648+
let request_log = mount_sse_sequence(
649+
&server,
650+
vec![
651+
user_turn.clone(),
652+
compact_failed.clone(),
653+
compact_succeeds.clone(),
654+
],
655+
)
656+
.await;
657+
658+
let model_provider = ModelProviderInfo {
659+
base_url: Some(format!("{}/v1", server.uri())),
660+
..built_in_model_providers()["openai"].clone()
661+
};
662+
663+
let home = TempDir::new().unwrap();
664+
let mut config = load_default_config_for_test(&home);
665+
config.model_provider = model_provider;
666+
config.model_auto_compact_token_limit = Some(200_000);
667+
let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
668+
.new_conversation(config)
669+
.await
670+
.unwrap()
671+
.conversation;
672+
673+
codex
674+
.submit(Op::UserInput {
675+
items: vec![InputItem::Text {
676+
text: "first turn".into(),
677+
}],
678+
})
679+
.await
680+
.unwrap();
681+
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
682+
683+
codex.submit(Op::Compact).await.unwrap();
684+
685+
let EventMsg::BackgroundEvent(event) =
686+
wait_for_event(&codex, |ev| matches!(ev, EventMsg::BackgroundEvent(_))).await
687+
else {
688+
panic!("expected background event after compact retry");
689+
};
690+
assert!(
691+
event.message.contains("Trimmed 1 older conversation item"),
692+
"background event should mention trimmed item count: {}",
693+
event.message
694+
);
695+
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
696+
697+
let requests = request_log.requests();
698+
assert_eq!(
699+
requests.len(),
700+
3,
701+
"expected user turn and two compact attempts"
702+
);
703+
704+
let compact_attempt = requests[1].body_json();
705+
let retry_attempt = requests[2].body_json();
706+
707+
let compact_input = compact_attempt["input"]
708+
.as_array()
709+
.unwrap_or_else(|| panic!("compact attempt missing input array: {compact_attempt}"));
710+
let retry_input = retry_attempt["input"]
711+
.as_array()
712+
.unwrap_or_else(|| panic!("retry attempt missing input array: {retry_attempt}"));
713+
assert_eq!(
714+
compact_input
715+
.last()
716+
.and_then(|item| item.get("content"))
717+
.and_then(|v| v.as_array())
718+
.and_then(|items| items.first())
719+
.and_then(|entry| entry.get("text"))
720+
.and_then(|text| text.as_str()),
721+
Some(SUMMARIZATION_PROMPT),
722+
"compact attempt should include summarization prompt"
723+
);
724+
assert_eq!(
725+
retry_input
726+
.last()
727+
.and_then(|item| item.get("content"))
728+
.and_then(|v| v.as_array())
729+
.and_then(|items| items.first())
730+
.and_then(|entry| entry.get("text"))
731+
.and_then(|text| text.as_str()),
732+
Some(SUMMARIZATION_PROMPT),
733+
"retry attempt should include summarization prompt"
734+
);
735+
assert_eq!(
736+
retry_input.len(),
737+
compact_input.len().saturating_sub(1),
738+
"retry should drop exactly one history item (before {} vs after {})",
739+
compact_input.len(),
740+
retry_input.len()
741+
);
742+
if let (Some(first_before), Some(first_after)) = (compact_input.first(), retry_input.first()) {
743+
assert_ne!(
744+
first_before, first_after,
745+
"retry should drop the oldest conversation item"
746+
);
747+
} else {
748+
panic!("expected non-empty compact inputs");
749+
}
750+
}
751+
625752
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
626753
async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_events() {
627754
skip_if_no_network!();

0 commit comments

Comments
 (0)