Skip to content

Commit 2437a8d

Browse files
authored
Record Git metadata to rollout (openai#1598)
# Summary - Writing effective evals for codex sessions requires context of the overall repository state at the moment the session began - This change adds this metadata (git repository, branch, commit hash) to the top of the rollout of the session (if available - if not it doesn't add anything) - Currently, this is only effective on a clean working tree, as we can't track uncommitted/untracked changes with the current metadata set. Ideally in the future we may want to track unclean changes somehow, or perhaps prompt the user to stash or commit them. # Testing - Added unit tests - `cargo test && cargo clippy --tests && cargo fmt -- --config imports_granularity=Item` ### Resulting Rollout <img width="1243" height="127" alt="Screenshot 2025-07-17 at 1 50 00 PM" src="https://github.com/user-attachments/assets/68108941-f015-45b2-985c-ea315ce05415" />
1 parent d2be072 commit 2437a8d

File tree

5 files changed

+475
-13
lines changed

5 files changed

+475
-13
lines changed

codex-rs/core/src/codex.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,7 @@ async fn submission_loop(
594594
let mut restored_items: Option<Vec<ResponseItem>> = None;
595595
let rollout_recorder: Option<RolloutRecorder> =
596596
if let Some(path) = resume_path.as_ref() {
597-
match RolloutRecorder::resume(path).await {
597+
match RolloutRecorder::resume(path, cwd.clone()).await {
598598
Ok((rec, saved)) => {
599599
session_id = saved.session_id;
600600
if !saved.items.is_empty() {

codex-rs/core/src/git_info.rs

Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
use std::path::Path;
2+
3+
use serde::Deserialize;
4+
use serde::Serialize;
5+
use tokio::process::Command;
6+
use tokio::time::Duration as TokioDuration;
7+
use tokio::time::timeout;
8+
9+
/// Timeout for git commands to prevent freezing on large repositories
10+
const GIT_COMMAND_TIMEOUT: TokioDuration = TokioDuration::from_secs(5);
11+
12+
#[derive(Serialize, Deserialize, Clone)]
13+
pub struct GitInfo {
14+
/// Current commit hash (SHA)
15+
#[serde(skip_serializing_if = "Option::is_none")]
16+
pub commit_hash: Option<String>,
17+
/// Current branch name
18+
#[serde(skip_serializing_if = "Option::is_none")]
19+
pub branch: Option<String>,
20+
/// Repository URL (if available from remote)
21+
#[serde(skip_serializing_if = "Option::is_none")]
22+
pub repository_url: Option<String>,
23+
}
24+
25+
/// Collect git repository information from the given working directory using command-line git.
26+
/// Returns None if no git repository is found or if git operations fail.
27+
/// Uses timeouts to prevent freezing on large repositories.
28+
/// All git commands (except the initial repo check) run in parallel for better performance.
29+
pub async fn collect_git_info(cwd: &Path) -> Option<GitInfo> {
30+
// Check if we're in a git repository first
31+
let is_git_repo = run_git_command_with_timeout(&["rev-parse", "--git-dir"], cwd)
32+
.await?
33+
.status
34+
.success();
35+
36+
if !is_git_repo {
37+
return None;
38+
}
39+
40+
// Run all git info collection commands in parallel
41+
let (commit_result, branch_result, url_result) = tokio::join!(
42+
run_git_command_with_timeout(&["rev-parse", "HEAD"], cwd),
43+
run_git_command_with_timeout(&["rev-parse", "--abbrev-ref", "HEAD"], cwd),
44+
run_git_command_with_timeout(&["remote", "get-url", "origin"], cwd)
45+
);
46+
47+
let mut git_info = GitInfo {
48+
commit_hash: None,
49+
branch: None,
50+
repository_url: None,
51+
};
52+
53+
// Process commit hash
54+
if let Some(output) = commit_result {
55+
if output.status.success() {
56+
if let Ok(hash) = String::from_utf8(output.stdout) {
57+
git_info.commit_hash = Some(hash.trim().to_string());
58+
}
59+
}
60+
}
61+
62+
// Process branch name
63+
if let Some(output) = branch_result {
64+
if output.status.success() {
65+
if let Ok(branch) = String::from_utf8(output.stdout) {
66+
let branch = branch.trim();
67+
if branch != "HEAD" {
68+
git_info.branch = Some(branch.to_string());
69+
}
70+
}
71+
}
72+
}
73+
74+
// Process repository URL
75+
if let Some(output) = url_result {
76+
if output.status.success() {
77+
if let Ok(url) = String::from_utf8(output.stdout) {
78+
git_info.repository_url = Some(url.trim().to_string());
79+
}
80+
}
81+
}
82+
83+
Some(git_info)
84+
}
85+
86+
/// Run a git command with a timeout to prevent blocking on large repositories
87+
async fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {
88+
let result = timeout(
89+
GIT_COMMAND_TIMEOUT,
90+
Command::new("git").args(args).current_dir(cwd).output(),
91+
)
92+
.await;
93+
94+
match result {
95+
Ok(Ok(output)) => Some(output),
96+
_ => None, // Timeout or error
97+
}
98+
}
99+
100+
#[cfg(test)]
101+
mod tests {
102+
#![allow(clippy::expect_used)]
103+
#![allow(clippy::unwrap_used)]
104+
105+
use super::*;
106+
107+
use std::fs;
108+
use std::path::PathBuf;
109+
use tempfile::TempDir;
110+
111+
// Helper function to create a test git repository
112+
async fn create_test_git_repo(temp_dir: &TempDir) -> PathBuf {
113+
let repo_path = temp_dir.path().to_path_buf();
114+
115+
// Initialize git repo
116+
Command::new("git")
117+
.args(["init"])
118+
.current_dir(&repo_path)
119+
.output()
120+
.await
121+
.expect("Failed to init git repo");
122+
123+
// Configure git user (required for commits)
124+
Command::new("git")
125+
.args(["config", "user.name", "Test User"])
126+
.current_dir(&repo_path)
127+
.output()
128+
.await
129+
.expect("Failed to set git user name");
130+
131+
Command::new("git")
132+
.args(["config", "user.email", "[email protected]"])
133+
.current_dir(&repo_path)
134+
.output()
135+
.await
136+
.expect("Failed to set git user email");
137+
138+
// Create a test file and commit it
139+
let test_file = repo_path.join("test.txt");
140+
fs::write(&test_file, "test content").expect("Failed to write test file");
141+
142+
Command::new("git")
143+
.args(["add", "."])
144+
.current_dir(&repo_path)
145+
.output()
146+
.await
147+
.expect("Failed to add files");
148+
149+
Command::new("git")
150+
.args(["commit", "-m", "Initial commit"])
151+
.current_dir(&repo_path)
152+
.output()
153+
.await
154+
.expect("Failed to commit");
155+
156+
repo_path
157+
}
158+
159+
#[tokio::test]
160+
async fn test_collect_git_info_non_git_directory() {
161+
let temp_dir = TempDir::new().expect("Failed to create temp dir");
162+
let result = collect_git_info(temp_dir.path()).await;
163+
assert!(result.is_none());
164+
}
165+
166+
#[tokio::test]
167+
async fn test_collect_git_info_git_repository() {
168+
let temp_dir = TempDir::new().expect("Failed to create temp dir");
169+
let repo_path = create_test_git_repo(&temp_dir).await;
170+
171+
let git_info = collect_git_info(&repo_path)
172+
.await
173+
.expect("Should collect git info from repo");
174+
175+
// Should have commit hash
176+
assert!(git_info.commit_hash.is_some());
177+
let commit_hash = git_info.commit_hash.unwrap();
178+
assert_eq!(commit_hash.len(), 40); // SHA-1 hash should be 40 characters
179+
assert!(commit_hash.chars().all(|c| c.is_ascii_hexdigit()));
180+
181+
// Should have branch (likely "main" or "master")
182+
assert!(git_info.branch.is_some());
183+
let branch = git_info.branch.unwrap();
184+
assert!(branch == "main" || branch == "master");
185+
186+
// Repository URL might be None for local repos without remote
187+
// This is acceptable behavior
188+
}
189+
190+
#[tokio::test]
191+
async fn test_collect_git_info_with_remote() {
192+
let temp_dir = TempDir::new().expect("Failed to create temp dir");
193+
let repo_path = create_test_git_repo(&temp_dir).await;
194+
195+
// Add a remote origin
196+
Command::new("git")
197+
.args([
198+
"remote",
199+
"add",
200+
"origin",
201+
"https://github.com/example/repo.git",
202+
])
203+
.current_dir(&repo_path)
204+
.output()
205+
.await
206+
.expect("Failed to add remote");
207+
208+
let git_info = collect_git_info(&repo_path)
209+
.await
210+
.expect("Should collect git info from repo");
211+
212+
// Should have repository URL
213+
assert_eq!(
214+
git_info.repository_url,
215+
Some("https://github.com/example/repo.git".to_string())
216+
);
217+
}
218+
219+
#[tokio::test]
220+
async fn test_collect_git_info_detached_head() {
221+
let temp_dir = TempDir::new().expect("Failed to create temp dir");
222+
let repo_path = create_test_git_repo(&temp_dir).await;
223+
224+
// Get the current commit hash
225+
let output = Command::new("git")
226+
.args(["rev-parse", "HEAD"])
227+
.current_dir(&repo_path)
228+
.output()
229+
.await
230+
.expect("Failed to get HEAD");
231+
let commit_hash = String::from_utf8(output.stdout).unwrap().trim().to_string();
232+
233+
// Checkout the commit directly (detached HEAD)
234+
Command::new("git")
235+
.args(["checkout", &commit_hash])
236+
.current_dir(&repo_path)
237+
.output()
238+
.await
239+
.expect("Failed to checkout commit");
240+
241+
let git_info = collect_git_info(&repo_path)
242+
.await
243+
.expect("Should collect git info from repo");
244+
245+
// Should have commit hash
246+
assert!(git_info.commit_hash.is_some());
247+
// Branch should be None for detached HEAD (since rev-parse --abbrev-ref HEAD returns "HEAD")
248+
assert!(git_info.branch.is_none());
249+
}
250+
251+
#[tokio::test]
252+
async fn test_collect_git_info_with_branch() {
253+
let temp_dir = TempDir::new().expect("Failed to create temp dir");
254+
let repo_path = create_test_git_repo(&temp_dir).await;
255+
256+
// Create and checkout a new branch
257+
Command::new("git")
258+
.args(["checkout", "-b", "feature-branch"])
259+
.current_dir(&repo_path)
260+
.output()
261+
.await
262+
.expect("Failed to create branch");
263+
264+
let git_info = collect_git_info(&repo_path)
265+
.await
266+
.expect("Should collect git info from repo");
267+
268+
// Should have the new branch name
269+
assert_eq!(git_info.branch, Some("feature-branch".to_string()));
270+
}
271+
272+
#[test]
273+
fn test_git_info_serialization() {
274+
let git_info = GitInfo {
275+
commit_hash: Some("abc123def456".to_string()),
276+
branch: Some("main".to_string()),
277+
repository_url: Some("https://github.com/example/repo.git".to_string()),
278+
};
279+
280+
let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
281+
let parsed: serde_json::Value = serde_json::from_str(&json).expect("Should parse JSON");
282+
283+
assert_eq!(parsed["commit_hash"], "abc123def456");
284+
assert_eq!(parsed["branch"], "main");
285+
assert_eq!(
286+
parsed["repository_url"],
287+
"https://github.com/example/repo.git"
288+
);
289+
}
290+
291+
#[test]
292+
fn test_git_info_serialization_with_nones() {
293+
let git_info = GitInfo {
294+
commit_hash: None,
295+
branch: None,
296+
repository_url: None,
297+
};
298+
299+
let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
300+
let parsed: serde_json::Value = serde_json::from_str(&json).expect("Should parse JSON");
301+
302+
// Fields with None values should be omitted due to skip_serializing_if
303+
assert!(!parsed.as_object().unwrap().contains_key("commit_hash"));
304+
assert!(!parsed.as_object().unwrap().contains_key("branch"));
305+
assert!(!parsed.as_object().unwrap().contains_key("repository_url"));
306+
}
307+
}

codex-rs/core/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ pub mod error;
1919
pub mod exec;
2020
pub mod exec_env;
2121
mod flags;
22+
pub mod git_info;
2223
mod is_safe_command;
2324
mod mcp_connection_manager;
2425
mod mcp_tool_call;

0 commit comments

Comments
 (0)