Skip to content

Commit 49b30cb

Browse files
integrate cursor-cli agent
Signed-off-by: Rahul Shetty <[email protected]>
1 parent c860276 commit 49b30cb

File tree

3 files changed

+66
-26
lines changed

3 files changed

+66
-26
lines changed

src/agentready/cli/benchmark.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@
2727
default=None,
2828
help="Benchmark subset (tbench: smoketest/full)",
2929
)
30+
@click.option(
31+
"--agent",
32+
type=click.Choice(["claude-code", "cursor-cli"]),
33+
default="claude-code",
34+
help="Agent for evaluation",
35+
)
3036
@click.option(
3137
"--model",
3238
type=click.Choice(["claude-haiku-4-5", "claude-sonnet-4-5"]),
@@ -53,7 +59,7 @@
5359
help="Skip dependency checks (for advanced users)",
5460
)
5561
def benchmark(
56-
repository, harness, subset, model, verbose, timeout, output_dir, skip_preflight
62+
repository, harness, subset, agent, model, verbose, timeout, output_dir, skip_preflight
5763
):
5864
"""Run agent coding benchmarks.
5965
@@ -81,14 +87,14 @@ def benchmark(
8187
# Route to appropriate harness
8288
if harness == "tbench":
8389
_run_tbench(
84-
repo_path, subset, model, verbose, timeout, output_dir, skip_preflight
90+
repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight
8591
)
8692
else:
8793
click.echo(f"Unknown harness: {harness}", err=True)
8894
raise click.Abort()
8995

9096

91-
def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_preflight):
97+
def _run_tbench(repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight):
9298
"""Run Terminal-Bench evaluation."""
9399
# Default subset to 'full' if not specified
94100
if subset is None:
@@ -107,6 +113,7 @@ def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_pre
107113
click.echo("AgentReady Terminal-Bench Benchmark")
108114
click.echo(f"{'=' * 50}\n")
109115
click.echo(f"Repository: {repo_path}")
116+
click.echo(f"Agent: {agent}")
110117
click.echo(f"Model: {model}")
111118
click.echo(f"Subset: {subset} ({'1-2 tasks' if smoketest else '89 tasks'})")
112119
click.echo(f"Timeout: {timeout}s\n")
@@ -135,7 +142,11 @@ def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_pre
135142
raise click.Abort()
136143

137144
# Validate API key BEFORE creating HarborConfig
138-
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
145+
if agent == "claude-code":
146+
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
147+
elif agent == "cursor-cli":
148+
api_key = os.environ.get("CURSOR_API_KEY", "")
149+
139150
if not api_key:
140151
click.echo(
141152
"Error: ANTHROPIC_API_KEY environment variable not set.\n"
@@ -146,8 +157,8 @@ def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_pre
146157

147158
# Create HarborConfig (will not raise ValueError now)
148159
harbor_config = HarborConfig(
149-
model=f"anthropic/{model}",
150-
agent="claude-code",
160+
model=model,
161+
agent=agent,
151162
jobs_dir=Path(tempfile.mkdtemp()),
152163
api_key=api_key,
153164
timeout=timeout,

src/agentready/services/eval_harness/harbor_config.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,21 @@
1212
ALLOWED_MODELS = {
1313
"anthropic/claude-haiku-4-5",
1414
"anthropic/claude-sonnet-4-5",
15+
"cursor/composer-1",
16+
"cursor/gpt-5.2-codex",
17+
"cursor/gpt-5.2-codex-fast",
18+
"cursor/gemini-3-pro",
19+
"cursor/opus-4.5",
20+
"cursor/sonnet-4.5",
21+
"cursor/sonnet-4.5-thinking",
22+
"cursor/gpt-5.1-high",
23+
"cursor/gemini-3-flash",
1524
}
1625

1726
# Allowed agents (excludes oracle as it's not relevant for real-world assessment)
1827
ALLOWED_AGENTS = {
1928
"claude-code",
29+
"cursor-cli",
2030
}
2131

2232

src/agentready/services/eval_harness/tbench_runner.py

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -125,31 +125,50 @@ def _real_tbench_result(repo_path: Path, config: HarborConfig) -> TbenchResult:
125125
# Pass through current environment but ensure API key is set
126126
# Harbor's claude-code agent has MiniMax API hardcoded - override it
127127
clean_env = os.environ.copy()
128-
clean_env["ANTHROPIC_API_KEY"] = config.api_key
129-
clean_env["ANTHROPIC_AUTH_TOKEN"] = config.api_key # Harbor uses this
130-
clean_env["ANTHROPIC_BASE_URL"] = "https://api.anthropic.com" # Override MiniMax
131-
clean_env["ANTHROPIC_API_BASE"] = "https://api.anthropic.com" # Alternative var
128+
129+
# Define agent-specific environment variable configurations
130+
# Structure: (Env Key, Env Value, Is Sensitive)
131+
agent_env_configs = {
132+
"claude-code": [
133+
("ANTHROPIC_API_KEY", config.api_key, True),
134+
("ANTHROPIC_AUTH_TOKEN", config.api_key, True),
135+
("ANTHROPIC_BASE_URL", "https://api.anthropic.com", False),
136+
("ANTHROPIC_API_BASE", "https://api.anthropic.com", False),
137+
],
138+
"cursor-cli": [
139+
("CURSOR_API_KEY", config.api_key, True),
140+
],
141+
}
142+
143+
if config.agent not in agent_env_configs:
144+
raise ValueError(f"Invalid agent: {config.agent}")
145+
146+
# Set environment variables and build display/copyable lists
147+
env_vars_display = []
148+
env_vars_copyable = []
149+
150+
for var_name, var_value, is_sensitive in agent_env_configs[config.agent]:
151+
clean_env[var_name] = var_value
152+
153+
# Build display string (truncate sensitive values)
154+
if is_sensitive:
155+
display_value = f"{var_value[:20]}..."
156+
else:
157+
display_value = var_value
158+
env_vars_display.append(f"{var_name}={display_value}")
159+
160+
# Build copyable string (use variable reference for sensitive values)
161+
if is_sensitive:
162+
copyable_value = f"${var_name}"
163+
else:
164+
copyable_value = var_value
165+
env_vars_copyable.append(f"{var_name}={copyable_value}")
166+
132167
# Clear MiniMax settings if present
133168
clean_env.pop("MINIMAX_API_KEY", None)
134169

135170
# Print Harbor command for debugging and manual execution
136171
shell_cmd = " ".join(shlex.quote(arg) for arg in cmd)
137-
138-
# Prepare environment variable strings (truncate API key for security in display)
139-
env_vars_display = [
140-
f"ANTHROPIC_API_KEY={config.api_key[:20]}...", # Truncated for display
141-
f"ANTHROPIC_AUTH_TOKEN={config.api_key[:20]}...",
142-
f"ANTHROPIC_BASE_URL={clean_env['ANTHROPIC_BASE_URL']}",
143-
f"ANTHROPIC_API_BASE={clean_env['ANTHROPIC_API_BASE']}",
144-
]
145-
146-
# Full command for copy/paste (use $ANTHROPIC_API_KEY to avoid exposing key)
147-
env_vars_copyable = [
148-
"ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY",
149-
"ANTHROPIC_AUTH_TOKEN=$ANTHROPIC_API_KEY",
150-
f"ANTHROPIC_BASE_URL={clean_env['ANTHROPIC_BASE_URL']}",
151-
f"ANTHROPIC_API_BASE={clean_env['ANTHROPIC_API_BASE']}",
152-
]
153172
full_cmd_copyable = " ".join(env_vars_copyable) + " " + shell_cmd
154173

155174
print(f"\n{'=' * 70}")

0 commit comments

Comments
 (0)