integrate cursor-cli agent

rh-rahulshetty · rh-rahulshetty · commit 49b30cb9a406 · 2026-01-29T21:11:59.000+05:30
Signed-off-by: Rahul Shetty &lt;rashetty@redhat.com&gt;
diff --git a/src/agentready/cli/benchmark.py b/src/agentready/cli/benchmark.py
@@ -27,6 +27,12 @@
     default=None,
     help="Benchmark subset (tbench: smoketest/full)",
 )
+@click.option(
+    "--agent",
+    type=click.Choice(["claude-code", "cursor-cli"]),
+    default="claude-code",
+    help="Agent for evaluation",
+)
 @click.option(
     "--model",
     type=click.Choice(["claude-haiku-4-5", "claude-sonnet-4-5"]),
@@ -53,7 +59,7 @@
     help="Skip dependency checks (for advanced users)",
 )
 def benchmark(
-    repository, harness, subset, model, verbose, timeout, output_dir, skip_preflight
+    repository, harness, subset, agent, model, verbose, timeout, output_dir, skip_preflight
 ):
     """Run agent coding benchmarks.
 
@@ -81,14 +87,14 @@ def benchmark(
     # Route to appropriate harness
     if harness == "tbench":
         _run_tbench(
-            repo_path, subset, model, verbose, timeout, output_dir, skip_preflight
+            repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight
         )
     else:
         click.echo(f"Unknown harness: {harness}", err=True)
         raise click.Abort()
 
 
-def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_preflight):
+def _run_tbench(repo_path, subset, agent, model, verbose, timeout, output_dir, skip_preflight):
     """Run Terminal-Bench evaluation."""
     # Default subset to 'full' if not specified
     if subset is None:
@@ -107,6 +113,7 @@ def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_pre
         click.echo("AgentReady Terminal-Bench Benchmark")
         click.echo(f"{'=' * 50}\n")
         click.echo(f"Repository: {repo_path}")
+        click.echo(f"Agent: {agent}")
         click.echo(f"Model: {model}")
         click.echo(f"Subset: {subset} ({'1-2 tasks' if smoketest else '89 tasks'})")
         click.echo(f"Timeout: {timeout}s\n")
@@ -135,7 +142,11 @@ def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_pre
             raise click.Abort()
 
     # Validate API key BEFORE creating HarborConfig
-    api_key = os.environ.get("ANTHROPIC_API_KEY", "")
+    if agent == "claude-code":
+        api_key = os.environ.get("ANTHROPIC_API_KEY", "")
+    elif agent == "cursor-cli":
+        api_key = os.environ.get("CURSOR_API_KEY", "")
+
     if not api_key:
         click.echo(
             "Error: ANTHROPIC_API_KEY environment variable not set.\n"
@@ -146,8 +157,8 @@ def _run_tbench(repo_path, subset, model, verbose, timeout, output_dir, skip_pre
 
     # Create HarborConfig (will not raise ValueError now)
     harbor_config = HarborConfig(
-        model=f"anthropic/{model}",
-        agent="claude-code",
+        model=model,
+        agent=agent,
         jobs_dir=Path(tempfile.mkdtemp()),
         api_key=api_key,
         timeout=timeout,
diff --git a/src/agentready/services/eval_harness/harbor_config.py b/src/agentready/services/eval_harness/harbor_config.py
@@ -12,11 +12,21 @@
 ALLOWED_MODELS = {
     "anthropic/claude-haiku-4-5",
     "anthropic/claude-sonnet-4-5",
+    "cursor/composer-1",
+    "cursor/gpt-5.2-codex",
+    "cursor/gpt-5.2-codex-fast",
+    "cursor/gemini-3-pro",
+    "cursor/opus-4.5",
+    "cursor/sonnet-4.5",
+    "cursor/sonnet-4.5-thinking",
+    "cursor/gpt-5.1-high",
+    "cursor/gemini-3-flash",
 }
 
 # Allowed agents (excludes oracle as it's not relevant for real-world assessment)
 ALLOWED_AGENTS = {
     "claude-code",
+    "cursor-cli",
 }
 
 
diff --git a/src/agentready/services/eval_harness/tbench_runner.py b/src/agentready/services/eval_harness/tbench_runner.py
@@ -125,31 +125,50 @@ def _real_tbench_result(repo_path: Path, config: HarborConfig) -> TbenchResult:
     # Pass through current environment but ensure API key is set
     # Harbor's claude-code agent has MiniMax API hardcoded - override it
     clean_env = os.environ.copy()
-    clean_env["ANTHROPIC_API_KEY"] = config.api_key
-    clean_env["ANTHROPIC_AUTH_TOKEN"] = config.api_key  # Harbor uses this
-    clean_env["ANTHROPIC_BASE_URL"] = "https://api.anthropic.com"  # Override MiniMax
-    clean_env["ANTHROPIC_API_BASE"] = "https://api.anthropic.com"  # Alternative var
+
+    # Define agent-specific environment variable configurations
+    # Structure: (Env Key, Env Value, Is Sensitive)
+    agent_env_configs = {
+        "claude-code": [
+            ("ANTHROPIC_API_KEY", config.api_key, True),
+            ("ANTHROPIC_AUTH_TOKEN", config.api_key, True),
+            ("ANTHROPIC_BASE_URL", "https://api.anthropic.com", False),
+            ("ANTHROPIC_API_BASE", "https://api.anthropic.com", False),
+        ],
+        "cursor-cli": [
+            ("CURSOR_API_KEY", config.api_key, True),
+        ],
+    }
+
+    if config.agent not in agent_env_configs:
+        raise ValueError(f"Invalid agent: {config.agent}")
+
+    # Set environment variables and build display/copyable lists
+    env_vars_display = []
+    env_vars_copyable = []
+
+    for var_name, var_value, is_sensitive in agent_env_configs[config.agent]:
+        clean_env[var_name] = var_value
+
+        # Build display string (truncate sensitive values)
+        if is_sensitive:
+            display_value = f"{var_value[:20]}..."
+        else:
+            display_value = var_value
+        env_vars_display.append(f"{var_name}={display_value}")
+
+        # Build copyable string (use variable reference for sensitive values)
+        if is_sensitive:
+            copyable_value = f"${var_name}"
+        else:
+            copyable_value = var_value
+        env_vars_copyable.append(f"{var_name}={copyable_value}")
+
     # Clear MiniMax settings if present
     clean_env.pop("MINIMAX_API_KEY", None)
 
     # Print Harbor command for debugging and manual execution
     shell_cmd = " ".join(shlex.quote(arg) for arg in cmd)
-
-    # Prepare environment variable strings (truncate API key for security in display)
-    env_vars_display = [
-        f"ANTHROPIC_API_KEY={config.api_key[:20]}...",  # Truncated for display
-        f"ANTHROPIC_AUTH_TOKEN={config.api_key[:20]}...",
-        f"ANTHROPIC_BASE_URL={clean_env['ANTHROPIC_BASE_URL']}",
-        f"ANTHROPIC_API_BASE={clean_env['ANTHROPIC_API_BASE']}",
-    ]
-
-    # Full command for copy/paste (use $ANTHROPIC_API_KEY to avoid exposing key)
-    env_vars_copyable = [
-        "ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY",
-        "ANTHROPIC_AUTH_TOKEN=$ANTHROPIC_API_KEY",
-        f"ANTHROPIC_BASE_URL={clean_env['ANTHROPIC_BASE_URL']}",
-        f"ANTHROPIC_API_BASE={clean_env['ANTHROPIC_API_BASE']}",
-    ]
     full_cmd_copyable = " ".join(env_vars_copyable) + " " + shell_cmd
 
     print(f"\n{'=' * 70}")