|
| 1 | +use std::path::Path; |
| 2 | +use std::time::Duration; |
| 3 | + |
| 4 | +use async_trait::async_trait; |
| 5 | +use serde::Deserialize; |
| 6 | +use tokio::process::Command; |
| 7 | +use tokio::time::timeout; |
| 8 | + |
| 9 | +use crate::function_tool::FunctionCallError; |
| 10 | +use crate::tools::context::ToolInvocation; |
| 11 | +use crate::tools::context::ToolOutput; |
| 12 | +use crate::tools::context::ToolPayload; |
| 13 | +use crate::tools::registry::ToolHandler; |
| 14 | +use crate::tools::registry::ToolKind; |
| 15 | + |
| 16 | +pub struct GrepFilesHandler; |
| 17 | + |
| 18 | +const DEFAULT_LIMIT: usize = 100; |
| 19 | +const MAX_LIMIT: usize = 2000; |
| 20 | +const COMMAND_TIMEOUT: Duration = Duration::from_secs(30); |
| 21 | + |
| 22 | +fn default_limit() -> usize { |
| 23 | + DEFAULT_LIMIT |
| 24 | +} |
| 25 | + |
| 26 | +#[derive(Deserialize)] |
| 27 | +struct GrepFilesArgs { |
| 28 | + pattern: String, |
| 29 | + #[serde(default)] |
| 30 | + include: Option<String>, |
| 31 | + #[serde(default)] |
| 32 | + path: Option<String>, |
| 33 | + #[serde(default = "default_limit")] |
| 34 | + limit: usize, |
| 35 | +} |
| 36 | + |
| 37 | +#[async_trait] |
| 38 | +impl ToolHandler for GrepFilesHandler { |
| 39 | + fn kind(&self) -> ToolKind { |
| 40 | + ToolKind::Function |
| 41 | + } |
| 42 | + |
| 43 | + async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> { |
| 44 | + let ToolInvocation { payload, turn, .. } = invocation; |
| 45 | + |
| 46 | + let arguments = match payload { |
| 47 | + ToolPayload::Function { arguments } => arguments, |
| 48 | + _ => { |
| 49 | + return Err(FunctionCallError::RespondToModel( |
| 50 | + "grep_files handler received unsupported payload".to_string(), |
| 51 | + )); |
| 52 | + } |
| 53 | + }; |
| 54 | + |
| 55 | + let args: GrepFilesArgs = serde_json::from_str(&arguments).map_err(|err| { |
| 56 | + FunctionCallError::RespondToModel(format!( |
| 57 | + "failed to parse function arguments: {err:?}" |
| 58 | + )) |
| 59 | + })?; |
| 60 | + |
| 61 | + let pattern = args.pattern.trim(); |
| 62 | + if pattern.is_empty() { |
| 63 | + return Err(FunctionCallError::RespondToModel( |
| 64 | + "pattern must not be empty".to_string(), |
| 65 | + )); |
| 66 | + } |
| 67 | + |
| 68 | + if args.limit == 0 { |
| 69 | + return Err(FunctionCallError::RespondToModel( |
| 70 | + "limit must be greater than zero".to_string(), |
| 71 | + )); |
| 72 | + } |
| 73 | + |
| 74 | + let limit = args.limit.min(MAX_LIMIT); |
| 75 | + let search_path = turn.resolve_path(args.path.clone()); |
| 76 | + |
| 77 | + verify_path_exists(&search_path).await?; |
| 78 | + |
| 79 | + let include = args.include.as_deref().map(str::trim).and_then(|val| { |
| 80 | + if val.is_empty() { |
| 81 | + None |
| 82 | + } else { |
| 83 | + Some(val.to_string()) |
| 84 | + } |
| 85 | + }); |
| 86 | + |
| 87 | + let search_results = |
| 88 | + run_rg_search(pattern, include.as_deref(), &search_path, limit, &turn.cwd).await?; |
| 89 | + |
| 90 | + if search_results.is_empty() { |
| 91 | + Ok(ToolOutput::Function { |
| 92 | + content: "No matches found.".to_string(), |
| 93 | + success: Some(false), |
| 94 | + }) |
| 95 | + } else { |
| 96 | + Ok(ToolOutput::Function { |
| 97 | + content: search_results.join("\n"), |
| 98 | + success: Some(true), |
| 99 | + }) |
| 100 | + } |
| 101 | + } |
| 102 | +} |
| 103 | + |
| 104 | +async fn verify_path_exists(path: &Path) -> Result<(), FunctionCallError> { |
| 105 | + tokio::fs::metadata(path).await.map_err(|err| { |
| 106 | + FunctionCallError::RespondToModel(format!("unable to access `{}`: {err}", path.display())) |
| 107 | + })?; |
| 108 | + Ok(()) |
| 109 | +} |
| 110 | + |
| 111 | +async fn run_rg_search( |
| 112 | + pattern: &str, |
| 113 | + include: Option<&str>, |
| 114 | + search_path: &Path, |
| 115 | + limit: usize, |
| 116 | + cwd: &Path, |
| 117 | +) -> Result<Vec<String>, FunctionCallError> { |
| 118 | + let mut command = Command::new("rg"); |
| 119 | + command |
| 120 | + .current_dir(cwd) |
| 121 | + .arg("--files-with-matches") |
| 122 | + .arg("--sortr=modified") |
| 123 | + .arg("--regexp") |
| 124 | + .arg(pattern) |
| 125 | + .arg("--no-messages"); |
| 126 | + |
| 127 | + if let Some(glob) = include { |
| 128 | + command.arg("--glob").arg(glob); |
| 129 | + } |
| 130 | + |
| 131 | + command.arg("--").arg(search_path); |
| 132 | + |
| 133 | + let output = timeout(COMMAND_TIMEOUT, command.output()) |
| 134 | + .await |
| 135 | + .map_err(|_| { |
| 136 | + FunctionCallError::RespondToModel("rg timed out after 30 seconds".to_string()) |
| 137 | + })? |
| 138 | + .map_err(|err| { |
| 139 | + FunctionCallError::RespondToModel(format!( |
| 140 | + "failed to launch rg: {err}. Ensure ripgrep is installed and on PATH." |
| 141 | + )) |
| 142 | + })?; |
| 143 | + |
| 144 | + match output.status.code() { |
| 145 | + Some(0) => Ok(parse_results(&output.stdout, limit)), |
| 146 | + Some(1) => Ok(Vec::new()), |
| 147 | + _ => { |
| 148 | + let stderr = String::from_utf8_lossy(&output.stderr); |
| 149 | + Err(FunctionCallError::RespondToModel(format!( |
| 150 | + "rg failed: {stderr}" |
| 151 | + ))) |
| 152 | + } |
| 153 | + } |
| 154 | +} |
| 155 | + |
| 156 | +fn parse_results(stdout: &[u8], limit: usize) -> Vec<String> { |
| 157 | + let mut results = Vec::new(); |
| 158 | + for line in stdout.split(|byte| *byte == b'\n') { |
| 159 | + if line.is_empty() { |
| 160 | + continue; |
| 161 | + } |
| 162 | + if let Ok(text) = std::str::from_utf8(line) { |
| 163 | + if text.is_empty() { |
| 164 | + continue; |
| 165 | + } |
| 166 | + results.push(text.to_string()); |
| 167 | + if results.len() == limit { |
| 168 | + break; |
| 169 | + } |
| 170 | + } |
| 171 | + } |
| 172 | + results |
| 173 | +} |
| 174 | + |
| 175 | +#[cfg(test)] |
| 176 | +mod tests { |
| 177 | + use super::*; |
| 178 | + use std::process::Command as StdCommand; |
| 179 | + use tempfile::tempdir; |
| 180 | + |
| 181 | + #[test] |
| 182 | + fn parses_basic_results() { |
| 183 | + let stdout = b"/tmp/file_a.rs\n/tmp/file_b.rs\n"; |
| 184 | + let parsed = parse_results(stdout, 10); |
| 185 | + assert_eq!( |
| 186 | + parsed, |
| 187 | + vec!["/tmp/file_a.rs".to_string(), "/tmp/file_b.rs".to_string()] |
| 188 | + ); |
| 189 | + } |
| 190 | + |
| 191 | + #[test] |
| 192 | + fn parse_truncates_after_limit() { |
| 193 | + let stdout = b"/tmp/file_a.rs\n/tmp/file_b.rs\n/tmp/file_c.rs\n"; |
| 194 | + let parsed = parse_results(stdout, 2); |
| 195 | + assert_eq!( |
| 196 | + parsed, |
| 197 | + vec!["/tmp/file_a.rs".to_string(), "/tmp/file_b.rs".to_string()] |
| 198 | + ); |
| 199 | + } |
| 200 | + |
| 201 | + #[tokio::test] |
| 202 | + async fn run_search_returns_results() -> anyhow::Result<()> { |
| 203 | + if !rg_available() { |
| 204 | + return Ok(()); |
| 205 | + } |
| 206 | + let temp = tempdir().expect("create temp dir"); |
| 207 | + let dir = temp.path(); |
| 208 | + std::fs::write(dir.join("match_one.txt"), "alpha beta gamma").unwrap(); |
| 209 | + std::fs::write(dir.join("match_two.txt"), "alpha delta").unwrap(); |
| 210 | + std::fs::write(dir.join("other.txt"), "omega").unwrap(); |
| 211 | + |
| 212 | + let results = run_rg_search("alpha", None, dir, 10, dir).await?; |
| 213 | + assert_eq!(results.len(), 2); |
| 214 | + assert!(results.iter().any(|path| path.ends_with("match_one.txt"))); |
| 215 | + assert!(results.iter().any(|path| path.ends_with("match_two.txt"))); |
| 216 | + Ok(()) |
| 217 | + } |
| 218 | + |
| 219 | + #[tokio::test] |
| 220 | + async fn run_search_with_glob_filter() -> anyhow::Result<()> { |
| 221 | + if !rg_available() { |
| 222 | + return Ok(()); |
| 223 | + } |
| 224 | + let temp = tempdir().expect("create temp dir"); |
| 225 | + let dir = temp.path(); |
| 226 | + std::fs::write(dir.join("match_one.rs"), "alpha beta gamma").unwrap(); |
| 227 | + std::fs::write(dir.join("match_two.txt"), "alpha delta").unwrap(); |
| 228 | + |
| 229 | + let results = run_rg_search("alpha", Some("*.rs"), dir, 10, dir).await?; |
| 230 | + assert_eq!(results.len(), 1); |
| 231 | + assert!(results.iter().all(|path| path.ends_with("match_one.rs"))); |
| 232 | + Ok(()) |
| 233 | + } |
| 234 | + |
| 235 | + #[tokio::test] |
| 236 | + async fn run_search_respects_limit() -> anyhow::Result<()> { |
| 237 | + if !rg_available() { |
| 238 | + return Ok(()); |
| 239 | + } |
| 240 | + let temp = tempdir().expect("create temp dir"); |
| 241 | + let dir = temp.path(); |
| 242 | + std::fs::write(dir.join("one.txt"), "alpha one").unwrap(); |
| 243 | + std::fs::write(dir.join("two.txt"), "alpha two").unwrap(); |
| 244 | + std::fs::write(dir.join("three.txt"), "alpha three").unwrap(); |
| 245 | + |
| 246 | + let results = run_rg_search("alpha", None, dir, 2, dir).await?; |
| 247 | + assert_eq!(results.len(), 2); |
| 248 | + Ok(()) |
| 249 | + } |
| 250 | + |
| 251 | + #[tokio::test] |
| 252 | + async fn run_search_handles_no_matches() -> anyhow::Result<()> { |
| 253 | + if !rg_available() { |
| 254 | + return Ok(()); |
| 255 | + } |
| 256 | + let temp = tempdir().expect("create temp dir"); |
| 257 | + let dir = temp.path(); |
| 258 | + std::fs::write(dir.join("one.txt"), "omega").unwrap(); |
| 259 | + |
| 260 | + let results = run_rg_search("alpha", None, dir, 5, dir).await?; |
| 261 | + assert!(results.is_empty()); |
| 262 | + Ok(()) |
| 263 | + } |
| 264 | + |
| 265 | + fn rg_available() -> bool { |
| 266 | + StdCommand::new("rg") |
| 267 | + .arg("--version") |
| 268 | + .output() |
| 269 | + .map(|output| output.status.success()) |
| 270 | + .unwrap_or(false) |
| 271 | + } |
| 272 | +} |
0 commit comments