Skip to content

Commit dc2f98b

Browse files
committed
[sidecar] add a swe_bench_agent_bin
1 parent 60de39a commit dc2f98b

File tree

1 file changed

+222
-0
lines changed

1 file changed

+222
-0
lines changed
Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
use std::{path::PathBuf, sync::Arc};
2+
3+
/// This contains the binary responsible for running the agents as a farm
4+
/// Dead simple where the inputs are the input to the git repository containing the input
5+
/// and the problem statement, keeping it super simple and limited
6+
use clap::Parser;
7+
use llm_client::{
8+
clients::types::LLMType,
9+
provider::{AnthropicAPIKey, LLMProvider, LLMProviderAPIKeys},
10+
};
11+
use sidecar::{
12+
agentic::symbol::{
13+
events::{input::SymbolEventRequestId, message_event::SymbolEventMessageProperties},
14+
identifier::LLMProperties,
15+
},
16+
application::{application::Application, config::configuration::Configuration},
17+
repo::types::RepoRef,
18+
user_context::types::UserContext,
19+
};
20+
21+
pub async fn check_session_storage_path(config: Arc<Configuration>, session_id: String) -> String {
22+
let mut session_path = config.index_dir.clone();
23+
session_path = session_path.join("session");
24+
// check if the plan_storage_path_exists
25+
if tokio::fs::metadata(&session_path).await.is_err() {
26+
tokio::fs::create_dir(&session_path)
27+
.await
28+
.expect("directory creation to not fail");
29+
}
30+
session_path = session_path.join(session_id);
31+
session_path
32+
.to_str()
33+
.expect("path conversion to work on all platforms")
34+
.to_owned()
35+
}
36+
37+
/// Define the command-line arguments
38+
#[derive(Parser, Debug)]
39+
#[command(
40+
author = "skcd",
41+
version = "1.0",
42+
about = "Agent binary sidecar runner"
43+
)]
44+
struct CliArgs {
45+
/// Git directory name
46+
#[arg(long)]
47+
timeout: usize,
48+
49+
/// Endpoint URL
50+
#[arg(long)]
51+
editor_url: String,
52+
53+
/// Timeout in seconds
54+
#[arg(long)]
55+
input: PathBuf,
56+
57+
/// Anthropic api key
58+
#[arg(long, default_value = None)]
59+
anthropic_api_key: String,
60+
61+
/// OPen Router api key
62+
#[arg(long, default_value = None)]
63+
openrouter_api_key: Option<String>,
64+
65+
/// The run id for the current run
66+
#[arg(long)]
67+
run_id: String,
68+
69+
#[arg(long)]
70+
repo_name: String,
71+
72+
/// Directory to dump all the logs into
73+
#[arg(long)]
74+
log_directory: String,
75+
76+
/// Use json mode strictly
77+
#[arg(long, default_value = "true")]
78+
json_mode: bool,
79+
80+
/// Use midwit mode (aka sonnet3.5 with tool)
81+
#[arg(long, default_value = "true")]
82+
midwit_mode: bool,
83+
84+
/// Run in single trajectory but a lot of them
85+
#[arg(long, default_value = None)]
86+
single_traj_search: Option<usize>,
87+
88+
/// Maximum depth for the search tree
89+
#[arg(long, default_value = "30")]
90+
max_depth: u32,
91+
92+
/// Model name override
93+
#[arg(long)]
94+
model_name: Option<String>,
95+
}
96+
97+
/// Define the SWEbenchInstance struct for serialization
98+
#[derive(Debug, serde::Serialize, serde::Deserialize)]
99+
struct SWEbenchInstance {
100+
repo: String,
101+
instance_id: String,
102+
base_commit: String,
103+
patch: String,
104+
test_patch: String,
105+
problem_statement: String,
106+
hints_text: String,
107+
created_at: String,
108+
version: String,
109+
#[serde(rename = "FAIL_TO_PASS")]
110+
fail_to_pass: String,
111+
#[serde(rename = "PASS_TO_PASS")]
112+
pass_to_pass: String,
113+
environment_setup_commit: String,
114+
}
115+
116+
#[derive(Debug, serde::Serialize, serde::Deserialize)]
117+
struct InputParts {
118+
git_drname: String,
119+
instance: SWEbenchInstance,
120+
}
121+
122+
#[tokio::main]
123+
async fn main() -> Result<(), Box<dyn std::error::Error>> {
124+
println!("agent::start");
125+
let args = CliArgs::parse();
126+
eprintln!("run_id::{}", &args.run_id);
127+
128+
let mut configuration = Configuration::default();
129+
// we apply the edits directly over here
130+
configuration.apply_directly = true;
131+
132+
// setup the application
133+
Application::install_logging(&configuration);
134+
Application::setup_scratch_pad(&configuration).await;
135+
136+
let application = Application::initialize(configuration)
137+
.await
138+
.expect("application setup should work");
139+
let exchange_id = "0".to_owned();
140+
141+
let llm_model = if let Some(model_name) = args.model_name {
142+
LLMType::Custom(model_name)
143+
} else {
144+
LLMType::ClaudeSonnet3_7
145+
};
146+
147+
let llm_provider = LLMProperties::new(
148+
llm_model,
149+
LLMProvider::Anthropic,
150+
LLMProviderAPIKeys::Anthropic(AnthropicAPIKey::new(args.anthropic_api_key.to_owned())),
151+
);
152+
// Define context crunching LLM properties - using the same model as the main agent for now
153+
let _context_crunching_llm = Some(llm_provider.clone());
154+
let cancellation_token = tokio_util::sync::CancellationToken::new();
155+
let (sender, _receiver) = tokio::sync::mpsc::unbounded_channel();
156+
let message_properties = SymbolEventMessageProperties::new(
157+
SymbolEventRequestId::new("0".to_owned(), args.run_id.to_owned()),
158+
sender.clone(),
159+
args.editor_url.clone(),
160+
cancellation_token.clone(),
161+
llm_provider,
162+
);
163+
164+
let session_storage_path =
165+
check_session_storage_path(application.config.clone(), args.run_id.clone()).await;
166+
167+
let session_service = application.session_service.clone();
168+
169+
let input_path = args.input;
170+
let input_content = tokio::fs::read(input_path).await.expect("path content");
171+
let input_parts: InputParts =
172+
serde_json::from_slice(&input_content).expect("Parse the serde json");
173+
174+
let cloned_session_id = args.run_id.to_string();
175+
let user_message = input_parts.instance.problem_statement.clone();
176+
let cloned_working_directory = input_parts.git_drname.to_owned();
177+
let tool_box = application.tool_box.clone();
178+
let llm_broker = application.llm_broker.clone();
179+
180+
let aide_rules = Some(format!(
181+
r#"You are helping the user in the repository present in {}
182+
FOLLOW these steps to resolve the issue:
183+
1. As a first step, it might be a good idea to explore the repo to familiarize yourself with its structure.
184+
2. Create a script to reproduce the error and execute it with `python reproduce_error.py` using the execute_command (which uses bash internally), to confirm the error. You should always use `python reproduce_error.py` command exactly to run the reproduction error script.
185+
3. Edit the sourcecode of the repo to resolve the issue
186+
4. Rerun your reproduce script and confirm that the error is fixed!
187+
188+
Your thinking should be thorough and so it's fine if it's very long."#,
189+
args.repo_name,
190+
));
191+
192+
// wait for the agent to finish over here while busy looping
193+
println!("agent::tool_use::start");
194+
let _ = session_service
195+
.tool_use_agentic(
196+
cloned_session_id,
197+
session_storage_path,
198+
user_message,
199+
exchange_id,
200+
vec![],
201+
vec![],
202+
"bash".to_owned(),
203+
vec![],
204+
RepoRef::local(&cloned_working_directory).expect("repo_ref to work"),
205+
cloned_working_directory,
206+
tool_box,
207+
llm_broker,
208+
UserContext::default(),
209+
aide_rules,
210+
false,
211+
false,
212+
false,
213+
Some(args.log_directory.clone()),
214+
Some(args.repo_name.clone()),
215+
message_properties,
216+
false, // not in devtools context
217+
None, // No context crunching LLM for agent_bin
218+
)
219+
.await;
220+
println!("agent::tool_use::end");
221+
Ok(())
222+
}

0 commit comments

Comments
 (0)