Skip to content

Commit 1a89f70

Browse files
authored
refactor Conversation history file into its own directory (openai#6229)
This is just a refactor of `conversation_history` file by breaking it up into multiple smaller ones with helper. This refactor will help us move more functionality related to context management here. in a clean way.
1 parent 62474a3 commit 1a89f70

File tree

10 files changed

+1372
-1359
lines changed

10 files changed

+1372
-1359
lines changed

codex-rs/core/src/codex.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ use crate::client_common::ResponseEvent;
5858
use crate::config::Config;
5959
use crate::config::types::McpServerTransportConfig;
6060
use crate::config::types::ShellEnvironmentPolicy;
61-
use crate::conversation_history::ConversationHistory;
61+
use crate::context_manager::ContextManager;
6262
use crate::environment_context::EnvironmentContext;
6363
use crate::error::CodexErr;
6464
use crate::error::Result as CodexResult;
@@ -945,7 +945,7 @@ impl Session {
945945
turn_context: &TurnContext,
946946
rollout_items: &[RolloutItem],
947947
) -> Vec<ResponseItem> {
948-
let mut history = ConversationHistory::new();
948+
let mut history = ContextManager::new();
949949
for item in rollout_items {
950950
match item {
951951
RolloutItem::ResponseItem(response_item) => {
@@ -1032,7 +1032,7 @@ impl Session {
10321032
}
10331033
}
10341034

1035-
pub(crate) async fn clone_history(&self) -> ConversationHistory {
1035+
pub(crate) async fn clone_history(&self) -> ContextManager {
10361036
let state = self.state.lock().await;
10371037
state.clone_history()
10381038
}
@@ -2834,7 +2834,7 @@ mod tests {
28342834
turn_context: &TurnContext,
28352835
) -> (Vec<RolloutItem>, Vec<ResponseItem>) {
28362836
let mut rollout_items = Vec::new();
2837-
let mut live_history = ConversationHistory::new();
2837+
let mut live_history = ContextManager::new();
28382838

28392839
let initial_context = session.build_initial_context(turn_context);
28402840
for item in &initial_context {
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
use codex_protocol::models::FunctionCallOutputPayload;
2+
use codex_protocol::models::ResponseItem;
3+
use codex_protocol::protocol::TokenUsage;
4+
use codex_protocol::protocol::TokenUsageInfo;
5+
use std::ops::Deref;
6+
7+
use crate::context_manager::normalize;
8+
use crate::context_manager::truncate::format_output_for_model_body;
9+
use crate::context_manager::truncate::globally_truncate_function_output_items;
10+
11+
/// Transcript of conversation history
12+
#[derive(Debug, Clone, Default)]
13+
pub(crate) struct ContextManager {
14+
/// The oldest items are at the beginning of the vector.
15+
items: Vec<ResponseItem>,
16+
token_info: Option<TokenUsageInfo>,
17+
}
18+
19+
impl ContextManager {
20+
pub(crate) fn new() -> Self {
21+
Self {
22+
items: Vec::new(),
23+
token_info: TokenUsageInfo::new_or_append(&None, &None, None),
24+
}
25+
}
26+
27+
pub(crate) fn token_info(&self) -> Option<TokenUsageInfo> {
28+
self.token_info.clone()
29+
}
30+
31+
pub(crate) fn set_token_usage_full(&mut self, context_window: i64) {
32+
match &mut self.token_info {
33+
Some(info) => info.fill_to_context_window(context_window),
34+
None => {
35+
self.token_info = Some(TokenUsageInfo::full_context_window(context_window));
36+
}
37+
}
38+
}
39+
40+
/// `items` is ordered from oldest to newest.
41+
pub(crate) fn record_items<I>(&mut self, items: I)
42+
where
43+
I: IntoIterator,
44+
I::Item: std::ops::Deref<Target = ResponseItem>,
45+
{
46+
for item in items {
47+
let item_ref = item.deref();
48+
let is_ghost_snapshot = matches!(item_ref, ResponseItem::GhostSnapshot { .. });
49+
if !is_api_message(item_ref) && !is_ghost_snapshot {
50+
continue;
51+
}
52+
53+
let processed = Self::process_item(&item);
54+
self.items.push(processed);
55+
}
56+
}
57+
58+
pub(crate) fn get_history(&mut self) -> Vec<ResponseItem> {
59+
self.normalize_history();
60+
self.contents()
61+
}
62+
63+
// Returns the history prepared for sending to the model.
64+
// With extra response items filtered out and GhostCommits removed.
65+
pub(crate) fn get_history_for_prompt(&mut self) -> Vec<ResponseItem> {
66+
let mut history = self.get_history();
67+
Self::remove_ghost_snapshots(&mut history);
68+
history
69+
}
70+
71+
pub(crate) fn remove_first_item(&mut self) {
72+
if !self.items.is_empty() {
73+
// Remove the oldest item (front of the list). Items are ordered from
74+
// oldest → newest, so index 0 is the first entry recorded.
75+
let removed = self.items.remove(0);
76+
// If the removed item participates in a call/output pair, also remove
77+
// its corresponding counterpart to keep the invariants intact without
78+
// running a full normalization pass.
79+
normalize::remove_corresponding_for(&mut self.items, &removed);
80+
}
81+
}
82+
83+
pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
84+
self.items = items;
85+
}
86+
87+
pub(crate) fn update_token_info(
88+
&mut self,
89+
usage: &TokenUsage,
90+
model_context_window: Option<i64>,
91+
) {
92+
self.token_info = TokenUsageInfo::new_or_append(
93+
&self.token_info,
94+
&Some(usage.clone()),
95+
model_context_window,
96+
);
97+
}
98+
99+
/// This function enforces a couple of invariants on the in-memory history:
100+
/// 1. every call (function/custom) has a corresponding output entry
101+
/// 2. every output has a corresponding call entry
102+
fn normalize_history(&mut self) {
103+
// all function/tool calls must have a corresponding output
104+
normalize::ensure_call_outputs_present(&mut self.items);
105+
106+
// all outputs must have a corresponding function/tool call
107+
normalize::remove_orphan_outputs(&mut self.items);
108+
}
109+
110+
/// Returns a clone of the contents in the transcript.
111+
fn contents(&self) -> Vec<ResponseItem> {
112+
self.items.clone()
113+
}
114+
115+
fn remove_ghost_snapshots(items: &mut Vec<ResponseItem>) {
116+
items.retain(|item| !matches!(item, ResponseItem::GhostSnapshot { .. }));
117+
}
118+
119+
fn process_item(item: &ResponseItem) -> ResponseItem {
120+
match item {
121+
ResponseItem::FunctionCallOutput { call_id, output } => {
122+
let truncated = format_output_for_model_body(output.content.as_str());
123+
let truncated_items = output
124+
.content_items
125+
.as_ref()
126+
.map(|items| globally_truncate_function_output_items(items));
127+
ResponseItem::FunctionCallOutput {
128+
call_id: call_id.clone(),
129+
output: FunctionCallOutputPayload {
130+
content: truncated,
131+
content_items: truncated_items,
132+
success: output.success,
133+
},
134+
}
135+
}
136+
ResponseItem::CustomToolCallOutput { call_id, output } => {
137+
let truncated = format_output_for_model_body(output);
138+
ResponseItem::CustomToolCallOutput {
139+
call_id: call_id.clone(),
140+
output: truncated,
141+
}
142+
}
143+
ResponseItem::Message { .. }
144+
| ResponseItem::Reasoning { .. }
145+
| ResponseItem::LocalShellCall { .. }
146+
| ResponseItem::FunctionCall { .. }
147+
| ResponseItem::WebSearchCall { .. }
148+
| ResponseItem::CustomToolCall { .. }
149+
| ResponseItem::GhostSnapshot { .. }
150+
| ResponseItem::Other => item.clone(),
151+
}
152+
}
153+
}
154+
155+
/// API messages include every non-system item (user/assistant messages, reasoning,
156+
/// tool calls, tool outputs, shell calls, and web-search calls).
157+
fn is_api_message(message: &ResponseItem) -> bool {
158+
match message {
159+
ResponseItem::Message { role, .. } => role.as_str() != "system",
160+
ResponseItem::FunctionCallOutput { .. }
161+
| ResponseItem::FunctionCall { .. }
162+
| ResponseItem::CustomToolCall { .. }
163+
| ResponseItem::CustomToolCallOutput { .. }
164+
| ResponseItem::LocalShellCall { .. }
165+
| ResponseItem::Reasoning { .. }
166+
| ResponseItem::WebSearchCall { .. } => true,
167+
ResponseItem::GhostSnapshot { .. } => false,
168+
ResponseItem::Other => false,
169+
}
170+
}
171+
172+
#[cfg(test)]
173+
#[path = "history_tests.rs"]
174+
mod tests;

0 commit comments

Comments
 (0)