Skip to content

Commit f52320b

Browse files
authored
feat: grep_files as a tool (#4820)
Add `grep_files` to be able to perform more action in parallel
1 parent a43ae86 commit f52320b

File tree

7 files changed

+591
-9
lines changed

7 files changed

+591
-9
lines changed

codex-rs/core/src/model_family.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,10 @@ pub fn find_family_for_model(mut slug: &str) -> Option<ModelFamily> {
119119
reasoning_summary_format: ReasoningSummaryFormat::Experimental,
120120
base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
121121
experimental_supported_tools: vec![
122-
"read_file".to_string(),
122+
"grep_files".to_string(),
123123
"list_dir".to_string(),
124-
"test_sync_tool".to_string()
124+
"read_file".to_string(),
125+
"test_sync_tool".to_string(),
125126
],
126127
supports_parallel_tool_calls: true,
127128
)
@@ -134,7 +135,11 @@ pub fn find_family_for_model(mut slug: &str) -> Option<ModelFamily> {
134135
reasoning_summary_format: ReasoningSummaryFormat::Experimental,
135136
base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
136137
apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
137-
experimental_supported_tools: vec!["read_file".to_string(), "list_dir".to_string()],
138+
experimental_supported_tools: vec![
139+
"grep_files".to_string(),
140+
"list_dir".to_string(),
141+
"read_file".to_string(),
142+
],
138143
supports_parallel_tool_calls: true,
139144
)
140145

Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
use std::path::Path;
2+
use std::time::Duration;
3+
4+
use async_trait::async_trait;
5+
use serde::Deserialize;
6+
use tokio::process::Command;
7+
use tokio::time::timeout;
8+
9+
use crate::function_tool::FunctionCallError;
10+
use crate::tools::context::ToolInvocation;
11+
use crate::tools::context::ToolOutput;
12+
use crate::tools::context::ToolPayload;
13+
use crate::tools::registry::ToolHandler;
14+
use crate::tools::registry::ToolKind;
15+
16+
pub struct GrepFilesHandler;
17+
18+
const DEFAULT_LIMIT: usize = 100;
19+
const MAX_LIMIT: usize = 2000;
20+
const COMMAND_TIMEOUT: Duration = Duration::from_secs(30);
21+
22+
fn default_limit() -> usize {
23+
DEFAULT_LIMIT
24+
}
25+
26+
#[derive(Deserialize)]
27+
struct GrepFilesArgs {
28+
pattern: String,
29+
#[serde(default)]
30+
include: Option<String>,
31+
#[serde(default)]
32+
path: Option<String>,
33+
#[serde(default = "default_limit")]
34+
limit: usize,
35+
}
36+
37+
#[async_trait]
38+
impl ToolHandler for GrepFilesHandler {
39+
fn kind(&self) -> ToolKind {
40+
ToolKind::Function
41+
}
42+
43+
async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
44+
let ToolInvocation { payload, turn, .. } = invocation;
45+
46+
let arguments = match payload {
47+
ToolPayload::Function { arguments } => arguments,
48+
_ => {
49+
return Err(FunctionCallError::RespondToModel(
50+
"grep_files handler received unsupported payload".to_string(),
51+
));
52+
}
53+
};
54+
55+
let args: GrepFilesArgs = serde_json::from_str(&arguments).map_err(|err| {
56+
FunctionCallError::RespondToModel(format!(
57+
"failed to parse function arguments: {err:?}"
58+
))
59+
})?;
60+
61+
let pattern = args.pattern.trim();
62+
if pattern.is_empty() {
63+
return Err(FunctionCallError::RespondToModel(
64+
"pattern must not be empty".to_string(),
65+
));
66+
}
67+
68+
if args.limit == 0 {
69+
return Err(FunctionCallError::RespondToModel(
70+
"limit must be greater than zero".to_string(),
71+
));
72+
}
73+
74+
let limit = args.limit.min(MAX_LIMIT);
75+
let search_path = turn.resolve_path(args.path.clone());
76+
77+
verify_path_exists(&search_path).await?;
78+
79+
let include = args.include.as_deref().map(str::trim).and_then(|val| {
80+
if val.is_empty() {
81+
None
82+
} else {
83+
Some(val.to_string())
84+
}
85+
});
86+
87+
let search_results =
88+
run_rg_search(pattern, include.as_deref(), &search_path, limit, &turn.cwd).await?;
89+
90+
if search_results.is_empty() {
91+
Ok(ToolOutput::Function {
92+
content: "No matches found.".to_string(),
93+
success: Some(false),
94+
})
95+
} else {
96+
Ok(ToolOutput::Function {
97+
content: search_results.join("\n"),
98+
success: Some(true),
99+
})
100+
}
101+
}
102+
}
103+
104+
async fn verify_path_exists(path: &Path) -> Result<(), FunctionCallError> {
105+
tokio::fs::metadata(path).await.map_err(|err| {
106+
FunctionCallError::RespondToModel(format!("unable to access `{}`: {err}", path.display()))
107+
})?;
108+
Ok(())
109+
}
110+
111+
async fn run_rg_search(
112+
pattern: &str,
113+
include: Option<&str>,
114+
search_path: &Path,
115+
limit: usize,
116+
cwd: &Path,
117+
) -> Result<Vec<String>, FunctionCallError> {
118+
let mut command = Command::new("rg");
119+
command
120+
.current_dir(cwd)
121+
.arg("--files-with-matches")
122+
.arg("--sortr=modified")
123+
.arg("--regexp")
124+
.arg(pattern)
125+
.arg("--no-messages");
126+
127+
if let Some(glob) = include {
128+
command.arg("--glob").arg(glob);
129+
}
130+
131+
command.arg("--").arg(search_path);
132+
133+
let output = timeout(COMMAND_TIMEOUT, command.output())
134+
.await
135+
.map_err(|_| {
136+
FunctionCallError::RespondToModel("rg timed out after 30 seconds".to_string())
137+
})?
138+
.map_err(|err| {
139+
FunctionCallError::RespondToModel(format!(
140+
"failed to launch rg: {err}. Ensure ripgrep is installed and on PATH."
141+
))
142+
})?;
143+
144+
match output.status.code() {
145+
Some(0) => Ok(parse_results(&output.stdout, limit)),
146+
Some(1) => Ok(Vec::new()),
147+
_ => {
148+
let stderr = String::from_utf8_lossy(&output.stderr);
149+
Err(FunctionCallError::RespondToModel(format!(
150+
"rg failed: {stderr}"
151+
)))
152+
}
153+
}
154+
}
155+
156+
fn parse_results(stdout: &[u8], limit: usize) -> Vec<String> {
157+
let mut results = Vec::new();
158+
for line in stdout.split(|byte| *byte == b'\n') {
159+
if line.is_empty() {
160+
continue;
161+
}
162+
if let Ok(text) = std::str::from_utf8(line) {
163+
if text.is_empty() {
164+
continue;
165+
}
166+
results.push(text.to_string());
167+
if results.len() == limit {
168+
break;
169+
}
170+
}
171+
}
172+
results
173+
}
174+
175+
#[cfg(test)]
176+
mod tests {
177+
use super::*;
178+
use std::process::Command as StdCommand;
179+
use tempfile::tempdir;
180+
181+
#[test]
182+
fn parses_basic_results() {
183+
let stdout = b"/tmp/file_a.rs\n/tmp/file_b.rs\n";
184+
let parsed = parse_results(stdout, 10);
185+
assert_eq!(
186+
parsed,
187+
vec!["/tmp/file_a.rs".to_string(), "/tmp/file_b.rs".to_string()]
188+
);
189+
}
190+
191+
#[test]
192+
fn parse_truncates_after_limit() {
193+
let stdout = b"/tmp/file_a.rs\n/tmp/file_b.rs\n/tmp/file_c.rs\n";
194+
let parsed = parse_results(stdout, 2);
195+
assert_eq!(
196+
parsed,
197+
vec!["/tmp/file_a.rs".to_string(), "/tmp/file_b.rs".to_string()]
198+
);
199+
}
200+
201+
#[tokio::test]
202+
async fn run_search_returns_results() -> anyhow::Result<()> {
203+
if !rg_available() {
204+
return Ok(());
205+
}
206+
let temp = tempdir().expect("create temp dir");
207+
let dir = temp.path();
208+
std::fs::write(dir.join("match_one.txt"), "alpha beta gamma").unwrap();
209+
std::fs::write(dir.join("match_two.txt"), "alpha delta").unwrap();
210+
std::fs::write(dir.join("other.txt"), "omega").unwrap();
211+
212+
let results = run_rg_search("alpha", None, dir, 10, dir).await?;
213+
assert_eq!(results.len(), 2);
214+
assert!(results.iter().any(|path| path.ends_with("match_one.txt")));
215+
assert!(results.iter().any(|path| path.ends_with("match_two.txt")));
216+
Ok(())
217+
}
218+
219+
#[tokio::test]
220+
async fn run_search_with_glob_filter() -> anyhow::Result<()> {
221+
if !rg_available() {
222+
return Ok(());
223+
}
224+
let temp = tempdir().expect("create temp dir");
225+
let dir = temp.path();
226+
std::fs::write(dir.join("match_one.rs"), "alpha beta gamma").unwrap();
227+
std::fs::write(dir.join("match_two.txt"), "alpha delta").unwrap();
228+
229+
let results = run_rg_search("alpha", Some("*.rs"), dir, 10, dir).await?;
230+
assert_eq!(results.len(), 1);
231+
assert!(results.iter().all(|path| path.ends_with("match_one.rs")));
232+
Ok(())
233+
}
234+
235+
#[tokio::test]
236+
async fn run_search_respects_limit() -> anyhow::Result<()> {
237+
if !rg_available() {
238+
return Ok(());
239+
}
240+
let temp = tempdir().expect("create temp dir");
241+
let dir = temp.path();
242+
std::fs::write(dir.join("one.txt"), "alpha one").unwrap();
243+
std::fs::write(dir.join("two.txt"), "alpha two").unwrap();
244+
std::fs::write(dir.join("three.txt"), "alpha three").unwrap();
245+
246+
let results = run_rg_search("alpha", None, dir, 2, dir).await?;
247+
assert_eq!(results.len(), 2);
248+
Ok(())
249+
}
250+
251+
#[tokio::test]
252+
async fn run_search_handles_no_matches() -> anyhow::Result<()> {
253+
if !rg_available() {
254+
return Ok(());
255+
}
256+
let temp = tempdir().expect("create temp dir");
257+
let dir = temp.path();
258+
std::fs::write(dir.join("one.txt"), "omega").unwrap();
259+
260+
let results = run_rg_search("alpha", None, dir, 5, dir).await?;
261+
assert!(results.is_empty());
262+
Ok(())
263+
}
264+
265+
fn rg_available() -> bool {
266+
StdCommand::new("rg")
267+
.arg("--version")
268+
.output()
269+
.map(|output| output.status.success())
270+
.unwrap_or(false)
271+
}
272+
}

codex-rs/core/src/tools/handlers/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
pub mod apply_patch;
22
mod exec_stream;
3+
mod grep_files;
34
mod list_dir;
45
mod mcp;
56
mod plan;
@@ -13,6 +14,7 @@ pub use plan::PLAN_TOOL;
1314

1415
pub use apply_patch::ApplyPatchHandler;
1516
pub use exec_stream::ExecStreamHandler;
17+
pub use grep_files::GrepFilesHandler;
1618
pub use list_dir::ListDirHandler;
1719
pub use mcp::McpHandler;
1820
pub use plan::PlanHandler;

0 commit comments

Comments
 (0)