Skip to content

Commit b3ddd50

Browse files
authored
Remote compact for API-key users (#7835)
1 parent 9429e8b commit b3ddd50

File tree

11 files changed

+101
-78
lines changed

11 files changed

+101
-78
lines changed

codex-rs/codex-api/src/requests/chat.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ impl<'a> ChatRequestBuilder<'a> {
7474
ResponseItem::CustomToolCallOutput { .. } => {}
7575
ResponseItem::WebSearchCall { .. } => {}
7676
ResponseItem::GhostSnapshot { .. } => {}
77-
ResponseItem::CompactionSummary { .. } => {}
77+
ResponseItem::Compaction { .. } => {}
7878
}
7979
}
8080

@@ -303,7 +303,7 @@ impl<'a> ChatRequestBuilder<'a> {
303303
ResponseItem::Reasoning { .. }
304304
| ResponseItem::WebSearchCall { .. }
305305
| ResponseItem::Other
306-
| ResponseItem::CompactionSummary { .. } => {
306+
| ResponseItem::Compaction { .. } => {
307307
continue;
308308
}
309309
}

codex-rs/core/src/codex.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2175,7 +2175,10 @@ pub(crate) async fn run_task(
21752175

21762176
// as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop.
21772177
if token_limit_reached {
2178-
if should_use_remote_compact_task(&sess) {
2178+
if should_use_remote_compact_task(
2179+
sess.as_ref(),
2180+
&turn_context.client.get_provider(),
2181+
) {
21792182
run_inline_remote_auto_compact_task(sess.clone(), turn_context.clone())
21802183
.await;
21812184
} else {

codex-rs/core/src/compact.rs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use std::sync::Arc;
22

3+
use crate::ModelProviderInfo;
34
use crate::Prompt;
45
use crate::client_common::ResponseEvent;
56
use crate::codex::Session;
@@ -18,7 +19,6 @@ use crate::truncate::TruncationPolicy;
1819
use crate::truncate::approx_token_count;
1920
use crate::truncate::truncate_text;
2021
use crate::util::backoff;
21-
use codex_app_server_protocol::AuthMode;
2222
use codex_protocol::items::TurnItem;
2323
use codex_protocol::models::ContentItem;
2424
use codex_protocol::models::ResponseInputItem;
@@ -32,13 +32,11 @@ pub const SUMMARIZATION_PROMPT: &str = include_str!("../templates/compact/prompt
3232
pub const SUMMARY_PREFIX: &str = include_str!("../templates/compact/summary_prefix.md");
3333
const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
3434

35-
pub(crate) fn should_use_remote_compact_task(session: &Session) -> bool {
36-
session
37-
.services
38-
.auth_manager
39-
.auth()
40-
.is_some_and(|auth| auth.mode == AuthMode::ChatGPT)
41-
&& session.enabled(Feature::RemoteCompaction)
35+
pub(crate) fn should_use_remote_compact_task(
36+
session: &Session,
37+
provider: &ModelProviderInfo,
38+
) -> bool {
39+
provider.is_openai() && session.enabled(Feature::RemoteCompaction)
4240
}
4341

4442
pub(crate) async fn run_inline_auto_compact_task(

codex-rs/core/src/context_manager/history.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ impl ContextManager {
9292
encrypted_content: Some(content),
9393
..
9494
}
95-
| ResponseItem::CompactionSummary {
95+
| ResponseItem::Compaction {
9696
encrypted_content: content,
9797
} => estimate_reasoning_length(content.len()) as i64,
9898
item => {
@@ -258,7 +258,7 @@ impl ContextManager {
258258
| ResponseItem::FunctionCall { .. }
259259
| ResponseItem::WebSearchCall { .. }
260260
| ResponseItem::CustomToolCall { .. }
261-
| ResponseItem::CompactionSummary { .. }
261+
| ResponseItem::Compaction { .. }
262262
| ResponseItem::GhostSnapshot { .. }
263263
| ResponseItem::Other => item.clone(),
264264
}
@@ -277,7 +277,7 @@ fn is_api_message(message: &ResponseItem) -> bool {
277277
| ResponseItem::LocalShellCall { .. }
278278
| ResponseItem::Reasoning { .. }
279279
| ResponseItem::WebSearchCall { .. }
280-
| ResponseItem::CompactionSummary { .. } => true,
280+
| ResponseItem::Compaction { .. } => true,
281281
ResponseItem::GhostSnapshot { .. } => false,
282282
ResponseItem::Other => false,
283283
}

codex-rs/core/src/model_provider_info.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ const MAX_STREAM_MAX_RETRIES: u64 = 100;
2828
const MAX_REQUEST_MAX_RETRIES: u64 = 100;
2929
pub const CHAT_WIRE_API_DEPRECATION_SUMMARY: &str = r#"Support for the "chat" wire API is deprecated and will soon be removed. Update your model provider definition in config.toml to use wire_api = "responses"."#;
3030

31+
const OPENAI_PROVIDER_NAME: &str = "OpenAI";
32+
3133
/// Wire protocol that the provider speaks. Most third-party services only
3234
/// implement the classic OpenAI Chat Completions JSON schema, whereas OpenAI
3335
/// itself (and a handful of others) additionally expose the more modern
@@ -210,7 +212,7 @@ impl ModelProviderInfo {
210212
}
211213
pub fn create_openai_provider() -> ModelProviderInfo {
212214
ModelProviderInfo {
213-
name: "OpenAI".into(),
215+
name: OPENAI_PROVIDER_NAME.into(),
214216
// Allow users to override the default OpenAI endpoint by
215217
// exporting `OPENAI_BASE_URL`. This is useful when pointing
216218
// Codex at a proxy, mock server, or Azure-style deployment
@@ -247,6 +249,10 @@ impl ModelProviderInfo {
247249
requires_openai_auth: true,
248250
}
249251
}
252+
253+
pub fn is_openai(&self) -> bool {
254+
self.name == OPENAI_PROVIDER_NAME
255+
}
250256
}
251257

252258
pub const DEFAULT_LMSTUDIO_PORT: u16 = 1234;

codex-rs/core/src/rollout/policy.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ pub(crate) fn should_persist_response_item(item: &ResponseItem) -> bool {
2828
| ResponseItem::CustomToolCallOutput { .. }
2929
| ResponseItem::WebSearchCall { .. }
3030
| ResponseItem::GhostSnapshot { .. }
31-
| ResponseItem::CompactionSummary { .. } => true,
31+
| ResponseItem::Compaction { .. } => true,
3232
ResponseItem::Other => false,
3333
}
3434
}

codex-rs/core/src/tasks/compact.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@ impl SessionTask for CompactTask {
2525
_cancellation_token: CancellationToken,
2626
) -> Option<String> {
2727
let session = session.clone_session();
28-
if crate::compact::should_use_remote_compact_task(&session) {
28+
if crate::compact::should_use_remote_compact_task(
29+
session.as_ref(),
30+
&ctx.client.get_provider(),
31+
) {
2932
crate::compact_remote::run_remote_compact_task(session, ctx).await
3033
} else {
3134
crate::compact::run_compact_task(session, ctx, input).await

codex-rs/core/tests/suite/compact.rs

Lines changed: 37 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ use core_test_support::responses::sse_failed;
3838
use core_test_support::responses::start_mock_server;
3939
use pretty_assertions::assert_eq;
4040
use serde_json::json;
41+
use wiremock::MockServer;
4142
// --- Test helpers -----------------------------------------------------------
4243

4344
pub(super) const FIRST_REPLY: &str = "FIRST_REPLY";
@@ -100,6 +101,13 @@ fn json_fragment(text: &str) -> String {
100101
.to_string()
101102
}
102103

104+
fn non_openai_model_provider(server: &MockServer) -> ModelProviderInfo {
105+
let mut provider = built_in_model_providers()["openai"].clone();
106+
provider.name = "OpenAI (test)".into();
107+
provider.base_url = Some(format!("{}/v1", server.uri()));
108+
provider
109+
}
110+
103111
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
104112
async fn summarize_context_three_requests_and_instructions() {
105113
skip_if_no_network!();
@@ -127,10 +135,7 @@ async fn summarize_context_three_requests_and_instructions() {
127135
let request_log = mount_sse_sequence(&server, vec![sse1, sse2, sse3]).await;
128136

129137
// Build config pointing to the mock server and spawn Codex.
130-
let model_provider = ModelProviderInfo {
131-
base_url: Some(format!("{}/v1", server.uri())),
132-
..built_in_model_providers()["openai"].clone()
133-
};
138+
let model_provider = non_openai_model_provider(&server);
134139
let home = TempDir::new().unwrap();
135140
let mut config = load_default_config_for_test(&home);
136141
config.model_provider = model_provider;
@@ -324,10 +329,7 @@ async fn manual_compact_uses_custom_prompt() {
324329

325330
let custom_prompt = "Use this compact prompt instead";
326331

327-
let model_provider = ModelProviderInfo {
328-
base_url: Some(format!("{}/v1", server.uri())),
329-
..built_in_model_providers()["openai"].clone()
330-
};
332+
let model_provider = non_openai_model_provider(&server);
331333
let home = TempDir::new().unwrap();
332334
let mut config = load_default_config_for_test(&home);
333335
config.model_provider = model_provider;
@@ -407,10 +409,7 @@ async fn manual_compact_emits_api_and_local_token_usage_events() {
407409
]);
408410
mount_sse_once(&server, sse_compact).await;
409411

410-
let model_provider = ModelProviderInfo {
411-
base_url: Some(format!("{}/v1", server.uri())),
412-
..built_in_model_providers()["openai"].clone()
413-
};
412+
let model_provider = non_openai_model_provider(&server);
414413
let home = TempDir::new().unwrap();
415414
let mut config = load_default_config_for_test(&home);
416415
config.model_provider = model_provider;
@@ -467,7 +466,11 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
467466

468467
let server = start_mock_server().await;
469468

469+
let non_openai_provider_name = non_openai_model_provider(&server).name;
470470
let codex = test_codex()
471+
.with_config(move |config| {
472+
config.model_provider.name = non_openai_provider_name;
473+
})
471474
.build(&server)
472475
.await
473476
.expect("build codex")
@@ -1050,10 +1053,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
10501053
};
10511054
mount_sse_once_match(&server, fourth_matcher, sse4).await;
10521055

1053-
let model_provider = ModelProviderInfo {
1054-
base_url: Some(format!("{}/v1", server.uri())),
1055-
..built_in_model_providers()["openai"].clone()
1056-
};
1056+
let model_provider = non_openai_model_provider(&server);
10571057

10581058
let home = TempDir::new().unwrap();
10591059
let mut config = load_default_config_for_test(&home);
@@ -1299,10 +1299,7 @@ async fn auto_compact_persists_rollout_entries() {
12991299
};
13001300
mount_sse_once_match(&server, third_matcher, sse3).await;
13011301

1302-
let model_provider = ModelProviderInfo {
1303-
base_url: Some(format!("{}/v1", server.uri())),
1304-
..built_in_model_providers()["openai"].clone()
1305-
};
1302+
let model_provider = non_openai_model_provider(&server);
13061303

13071304
let home = TempDir::new().unwrap();
13081305
let mut config = load_default_config_for_test(&home);
@@ -1403,10 +1400,7 @@ async fn manual_compact_retries_after_context_window_error() {
14031400
)
14041401
.await;
14051402

1406-
let model_provider = ModelProviderInfo {
1407-
base_url: Some(format!("{}/v1", server.uri())),
1408-
..built_in_model_providers()["openai"].clone()
1409-
};
1403+
let model_provider = non_openai_model_provider(&server);
14101404

14111405
let home = TempDir::new().unwrap();
14121406
let mut config = load_default_config_for_test(&home);
@@ -1539,10 +1533,7 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
15391533
)
15401534
.await;
15411535

1542-
let model_provider = ModelProviderInfo {
1543-
base_url: Some(format!("{}/v1", server.uri())),
1544-
..built_in_model_providers()["openai"].clone()
1545-
};
1536+
let model_provider = non_openai_model_provider(&server);
15461537

15471538
let home = TempDir::new().unwrap();
15481539
let mut config = load_default_config_for_test(&home);
@@ -1743,10 +1734,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
17431734

17441735
mount_sse_sequence(&server, vec![sse1, sse2, sse3, sse4, sse5, sse6]).await;
17451736

1746-
let model_provider = ModelProviderInfo {
1747-
base_url: Some(format!("{}/v1", server.uri())),
1748-
..built_in_model_providers()["openai"].clone()
1749-
};
1737+
let model_provider = non_openai_model_provider(&server);
17501738

17511739
let home = TempDir::new().unwrap();
17521740
let mut config = load_default_config_for_test(&home);
@@ -1856,10 +1844,7 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
18561844
// We don't assert on the post-compact request, so no need to keep its mock.
18571845
mount_sse_once(&server, post_auto_compact_turn).await;
18581846

1859-
let model_provider = ModelProviderInfo {
1860-
base_url: Some(format!("{}/v1", server.uri())),
1861-
..built_in_model_providers()["openai"].clone()
1862-
};
1847+
let model_provider = non_openai_model_provider(&server);
18631848

18641849
let home = TempDir::new().unwrap();
18651850
let mut config = load_default_config_for_test(&home);
@@ -1961,13 +1946,18 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
19611946
)
19621947
.await;
19631948

1964-
let compacted_history = vec![codex_protocol::models::ResponseItem::Message {
1965-
id: None,
1966-
role: "assistant".to_string(),
1967-
content: vec![codex_protocol::models::ContentItem::OutputText {
1968-
text: "REMOTE_COMPACT_SUMMARY".to_string(),
1969-
}],
1970-
}];
1949+
let compacted_history = vec![
1950+
codex_protocol::models::ResponseItem::Message {
1951+
id: None,
1952+
role: "assistant".to_string(),
1953+
content: vec![codex_protocol::models::ContentItem::OutputText {
1954+
text: "REMOTE_COMPACT_SUMMARY".to_string(),
1955+
}],
1956+
},
1957+
codex_protocol::models::ResponseItem::Compaction {
1958+
encrypted_content: "ENCRYPTED_COMPACTION_SUMMARY".to_string(),
1959+
},
1960+
];
19711961
let compact_mock =
19721962
mount_compact_json_once(&server, serde_json::json!({ "output": compacted_history })).await;
19731963

@@ -2028,4 +2018,8 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
20282018
resume_body.contains("REMOTE_COMPACT_SUMMARY") || resume_body.contains(FINAL_REPLY),
20292019
"resume request should follow remote compact and use compacted history"
20302020
);
2021+
assert!(
2022+
resume_body.contains("ENCRYPTED_COMPACTION_SUMMARY"),
2023+
"resume request should include compaction summary item"
2024+
);
20312025
}

codex-rs/core/tests/suite/compact_remote.rs

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -51,13 +51,18 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
5151
)
5252
.await;
5353

54-
let compacted_history = vec![ResponseItem::Message {
55-
id: None,
56-
role: "user".to_string(),
57-
content: vec![ContentItem::InputText {
58-
text: "REMOTE_COMPACTED_SUMMARY".to_string(),
59-
}],
60-
}];
54+
let compacted_history = vec![
55+
ResponseItem::Message {
56+
id: None,
57+
role: "user".to_string(),
58+
content: vec![ContentItem::InputText {
59+
text: "REMOTE_COMPACTED_SUMMARY".to_string(),
60+
}],
61+
},
62+
ResponseItem::Compaction {
63+
encrypted_content: "ENCRYPTED_COMPACTION_SUMMARY".to_string(),
64+
},
65+
];
6166
let compact_mock = responses::mount_compact_json_once(
6267
harness.server(),
6368
serde_json::json!({ "output": compacted_history.clone() }),
@@ -120,6 +125,10 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
120125
follow_up_body.contains("REMOTE_COMPACTED_SUMMARY"),
121126
"expected follow-up request to use compacted history"
122127
);
128+
assert!(
129+
follow_up_body.contains("ENCRYPTED_COMPACTION_SUMMARY"),
130+
"expected follow-up request to include compaction summary item"
131+
);
123132
assert!(
124133
!follow_up_body.contains("FIRST_REMOTE_REPLY"),
125134
"expected follow-up request to drop pre-compaction assistant messages"
@@ -159,13 +168,18 @@ async fn remote_compact_runs_automatically() -> Result<()> {
159168
)
160169
.await;
161170

162-
let compacted_history = vec![ResponseItem::Message {
163-
id: None,
164-
role: "user".to_string(),
165-
content: vec![ContentItem::InputText {
166-
text: "REMOTE_COMPACTED_SUMMARY".to_string(),
167-
}],
168-
}];
171+
let compacted_history = vec![
172+
ResponseItem::Message {
173+
id: None,
174+
role: "user".to_string(),
175+
content: vec![ContentItem::InputText {
176+
text: "REMOTE_COMPACTED_SUMMARY".to_string(),
177+
}],
178+
},
179+
ResponseItem::Compaction {
180+
encrypted_content: "ENCRYPTED_COMPACTION_SUMMARY".to_string(),
181+
},
182+
];
169183
let compact_mock = responses::mount_compact_json_once(
170184
harness.server(),
171185
serde_json::json!({ "output": compacted_history.clone() }),
@@ -190,6 +204,7 @@ async fn remote_compact_runs_automatically() -> Result<()> {
190204
assert_eq!(compact_mock.requests().len(), 1);
191205
let follow_up_body = responses_mock.single_request().body_json().to_string();
192206
assert!(follow_up_body.contains("REMOTE_COMPACTED_SUMMARY"));
207+
assert!(follow_up_body.contains("ENCRYPTED_COMPACTION_SUMMARY"));
193208

194209
Ok(())
195210
}
@@ -226,6 +241,9 @@ async fn remote_compact_persists_replacement_history_in_rollout() -> Result<()>
226241
text: "COMPACTED_USER_SUMMARY".to_string(),
227242
}],
228243
},
244+
ResponseItem::Compaction {
245+
encrypted_content: "ENCRYPTED_COMPACTION_SUMMARY".to_string(),
246+
},
229247
ResponseItem::Message {
230248
id: None,
231249
role: "assistant".to_string(),

0 commit comments

Comments
 (0)