Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion refact-agent/engine/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ Installable by the end user:
- [x] Code completion with RAG
- [x] Chat with tool usage
- [x] definition() references() tools
- [x] vecdb search() with scope
- [x] vecdb search() with scope (semantic search)
- [x] regex_search() with scope (pattern matching)
- [x] @file @tree @web @definition @references @search mentions in chat
- [x] locate() uses test-time compute to find good project cross-section
- [x] Latest gpt-4o gpt-4o-mini
Expand Down
2 changes: 1 addition & 1 deletion refact-agent/engine/src/agentic/compress_trajectory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ OR (1) goal/thinking/coding/outcome (2) string according to the guidelines
Example:
[
["goal", "Rename my_function1 to my_function2"],
["thinking", "There are definition(), search() and locate() tools, all can be used to find my_function1, system prompt says I need to start with locate()."],
["thinking", "There are definition(), search(), regex_search() and locate() tools, all can be used to find my_function1, system prompt says I need to start with locate()."],
["locate(problem_statement=\"Rename my_function1 to my_function2\")", "The file my_script.py (1337 lines) has my_function1 on line 42."],
["thinking", "I can rewrite my_function1 inside my_script.py, so I'll do that."],
["update_textdoc(path=\"my_script\", old_str=\"...\", replacement=\"...\", multiple=false)", "The output of update_textdoc() has 15 lines_add and 15 lines_remove, confirming the operation."],
Expand Down
2 changes: 2 additions & 0 deletions refact-agent/engine/src/tools/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub mod tools_description;
pub mod tools_execute;
pub mod scope_utils;

mod tool_ast_definition;
mod tool_ast_reference;
Expand All @@ -9,6 +10,7 @@ mod tool_relevant_files;
mod tool_cat;
mod tool_rm;
mod tool_mv;
mod tool_regex_search;

mod tool_deep_thinking;

Expand Down
173 changes: 173 additions & 0 deletions refact-agent/engine/src/tools/scope_utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
use std::sync::Arc;
use tokio::sync::RwLock as ARwLock;

use crate::at_commands::at_file::{file_repair_candidates, return_one_candidate_or_a_good_error};
use crate::files_correction::{correct_to_nearest_dir_path, get_project_dirs};
use crate::global_context::GlobalContext;

/// Resolves a scope string into a list of files to search.
///
/// # Arguments
///
/// * `gcx` - Global context
/// * `scope` - Scope string, can be "workspace", a directory path (ending with / or \), or a file path
///
/// # Returns
///
/// * `Ok(Vec<String>)` - List of file paths to search
/// * `Err(String)` - Error message if scope resolution fails
///
/// # Examples
///
/// ```
/// let files = resolve_scope(gcx.clone(), "workspace").await?;
/// let files = resolve_scope(gcx.clone(), "src/").await?;
/// let files = resolve_scope(gcx.clone(), "src/main.rs").await?;
/// ```
pub async fn resolve_scope(
gcx: Arc<ARwLock<GlobalContext>>,
scope: &str,
) -> Result<Vec<String>, String> {
let scope_string = scope.to_string();
// Case 1: Workspace scope
if scope == "workspace" {
let workspace_files = gcx.read().await.documents_state.workspace_files.lock().unwrap().clone();
return Ok(workspace_files.into_iter()
.map(|f| f.to_string_lossy().to_string())
.collect::<Vec<_>>());
}

// Check if scope is a directory (ends with / or \)
let scope_is_dir = scope.ends_with('/') || scope.ends_with('\\');

// Case 2: Directory scope
if scope_is_dir {
let dir_path = return_one_candidate_or_a_good_error(
gcx.clone(),
&scope_string,
&correct_to_nearest_dir_path(gcx.clone(), &scope_string, false, 10).await,
&get_project_dirs(gcx.clone()).await,
true,
).await?;

let workspace_files = gcx.read().await.documents_state.workspace_files.lock().unwrap().clone();
return Ok(workspace_files.into_iter()
.filter(|f| f.starts_with(&dir_path))
.map(|f| f.to_string_lossy().to_string())
.collect::<Vec<_>>());
}

// Case 3: File scope (with fallback to directory if file not found)
match return_one_candidate_or_a_good_error(
gcx.clone(),
&scope_string,
&file_repair_candidates(gcx.clone(), &scope_string, 10, false).await,
&get_project_dirs(gcx.clone()).await,
false,
).await {
// File found
Ok(file_path) => Ok(vec![file_path]),

// File not found, try as directory
Err(file_err) => {
match return_one_candidate_or_a_good_error(
gcx.clone(),
&scope_string,
&correct_to_nearest_dir_path(gcx.clone(), &scope_string, false, 10).await,
&get_project_dirs(gcx.clone()).await,
true,
).await {
// Directory found
Ok(dir_path) => {
let workspace_files = gcx.read().await.documents_state.workspace_files.lock().unwrap().clone();
Ok(workspace_files.into_iter()
.filter(|f| f.starts_with(&dir_path))
.map(|f| f.to_string_lossy().to_string())
.collect::<Vec<_>>())
},
// Neither file nor directory found
Err(_) => Err(file_err),
}
},
}
}

/// Creates a SQL-like filter string for the given scope.
/// This is specifically for the search tool which uses SQL-like filters.
///
/// # Arguments
///
/// * `gcx` - Global context
/// * `scope` - Scope string
///
/// # Returns
///
/// * `Ok(Option<String>)` - SQL-like filter string, or None for workspace scope
/// * `Err(String)` - Error message if scope resolution fails
pub async fn create_scope_filter(
gcx: Arc<ARwLock<GlobalContext>>,
scope: &str,
) -> Result<Option<String>, String> {
let scope_string = scope.to_string();
if scope == "workspace" {
return Ok(None);
}

let scope_is_dir = scope.ends_with('/') || scope.ends_with('\\');

if scope_is_dir {
let dir_path = return_one_candidate_or_a_good_error(
gcx.clone(),
&scope_string,
&correct_to_nearest_dir_path(gcx.clone(), &scope_string, false, 10).await,
&get_project_dirs(gcx.clone()).await,
true,
).await?;

return Ok(Some(format!("(scope LIKE '{}%')", dir_path)));
}

match return_one_candidate_or_a_good_error(
gcx.clone(),
&scope_string,
&file_repair_candidates(gcx.clone(), &scope_string, 10, false).await,
&get_project_dirs(gcx.clone()).await,
false,
).await {
Ok(file_path) => Ok(Some(format!("(scope = \"{}\")", file_path))),
Err(file_err) => {
match return_one_candidate_or_a_good_error(
gcx.clone(),
&scope_string,
&correct_to_nearest_dir_path(gcx.clone(), &scope_string, false, 10).await,
&get_project_dirs(gcx.clone()).await,
true,
).await {
Ok(dir_path) => Ok(Some(format!("(scope LIKE '{}%')", dir_path))),
Err(_) => Err(file_err),
}
},
}
}

/// Validates that the scope is not empty and returns an appropriate error message if it is.
///
/// # Arguments
///
/// * `files` - List of files resolved from the scope
/// * `scope` - Original scope string for error reporting
///
/// # Returns
///
/// * `Ok(Vec<String>)` - The same list of files if not empty
/// * `Err(String)` - Error message if the list is empty
pub fn validate_scope_files(
files: Vec<String>,
scope: &str,
) -> Result<Vec<String>, String> {
if files.is_empty() {
Err(format!("No files found in scope: {}", scope))
} else {
Ok(files)
}
}
16 changes: 10 additions & 6 deletions refact-agent/engine/src/tools/tool_locate_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,19 @@ MORE_TOCHANGE = likely to change as well, as a consequence of completing the tas
USAGE = code that uses the things the task description is about
SIMILAR = code that might provide an example of how to write similar things

Your job is to use search() calls and summarize the results.
Your job is to use search() and regex_search() calls and summarize the results.

Some good ideas:

search("MyClass1") -- if MyClass1 mentioned in the task, for each symbol
search("log message 1 mentioned") -- when the task has log messages, for each message
search(" def f():\n print(\"the example function!\")") -- look for the code piece mentioned in the task
search("MyClass1") -- if MyClass1 mentioned in the task, for semantic search of each symbol
search("log message 1 mentioned") -- when the task has log messages, for semantic search of each message
search(" def f():\n print(\"the example function!\")") -- look for semantically similar code to the piece mentioned in the task
search("imaginary_call(imaginary_arguments)\nmore_calls()\n") -- you can imagine what kind of code you need to find

regex_search("MyClass1") -- if you need to find exact occurrences of a class name
regex_search("(?i)error.*not found") -- if you need to find specific error patterns
regex_search("function\\s+name\\s*\\(") -- if you need to find function declarations with specific patterns

Call any of those that make sense in parallel. Make at least two calls in parallel, pay special attention that at least one
search() call should not have a restrictive scope, because you are running the risk of getting no results at all.
"###;
Expand Down Expand Up @@ -191,13 +195,13 @@ async fn find_relevant_files_with_search(
let mut msgs = vec![];
msgs.push(ChatMessage::new("system".to_string(), LS_SYSTEM_PROMPT.to_string()));
msgs.push(ChatMessage::new("user".to_string(), user_query.to_string()));
msgs.push(ChatMessage::new("cd_instruction".to_string(), "Look at user query above. Follow the system prompt. Run several search() calls in parallel.".to_string()));
msgs.push(ChatMessage::new("cd_instruction".to_string(), "Look at user query above. Follow the system prompt. Run several search() and regex_search() calls in parallel.".to_string()));

let result = subchat(
ccx.clone(),
subchat_params.subchat_model.as_str(),
msgs,
vec!["search".to_string()],
vec!["search".to_string(), "regex_search".to_string()],
1,
subchat_params.subchat_max_new_tokens,
LS_WRAP_UP,
Expand Down
Loading