Skip to content

Commit 9c0e0b4

Browse files
committed
feat: add mcp_prompt to agent configuration for custom MCP tool guidance and simplify the default MCP tools prompt
1 parent a17ac5c commit 9c0e0b4

File tree

5 files changed

+27
-20
lines changed

5 files changed

+27
-20
lines changed

docs/ROAD-TO-V2-Overhaul.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ Tasks updated in this pass:
6464

6565
The intent here was high-signal evaluation: remove "mind-reading" requirements, but do not turn tasks into copy-paste exercises.
6666

67+
## What changed in MCP tools mode
68+
69+
The `--use-mcp-tools` prompt was rewritten to be minimal and neutral. The previous prompt included workflow coaching (read files, search code, don't guess) that could inflate scores independent of actual MCP tool usage. The new prompt is a single sentence nudging the agent to use its MCP tools proactively, without teaching problem-solving strategy. This makes with-vs-without comparisons fairer.
70+
71+
A new `mcp_prompt` config field was added to `AgentConfig`, allowing per-agent MCP tool guidance (e.g., telling Gemini to use `@web` search). This is appended under an `AGENT-SPECIFIC TOOLS:` header when `--use-mcp-tools` is set.
72+
6773
## Compatibility and comparing old runs
6874

6975
`1.7.x` is intentionally not identical to `v1.6.1` behavior. If you are comparing against historical leaderboard-era runs, use legacy mode:

internal/cli/eval.go

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,17 +1107,17 @@ func runTaskWithAgent(ctx context.Context, r *runner.Runner, t *task.Task, agent
11071107
return result
11081108
}
11091109

1110-
// Build agent command
1111-
prompt := buildAgentPrompt(t, evalUseMCPTools)
1112-
result.PromptChars = utf8.RuneCountInString(prompt)
1113-
11141110
// Get agent configuration
11151111
agentCfg := cfg.GetAgent(agent)
11161112
if agentCfg == nil {
11171113
result.Error = fmt.Sprintf("unknown agent: %s", agent)
11181114
return result
11191115
}
11201116

1117+
// Build agent command
1118+
prompt := buildAgentPrompt(t, evalUseMCPTools, agentCfg.MCPPrompt)
1119+
result.PromptChars = utf8.RuneCountInString(prompt)
1120+
11211121
agentTimeout := time.Duration(timeout) * time.Second
11221122
if agentTimeout <= 0 {
11231123
agentTimeout = 600 * time.Second
@@ -1453,7 +1453,7 @@ func toolchainInfo(lang task.Language) string {
14531453
}
14541454
}
14551455

1456-
func buildAgentPrompt(t *task.Task, useMCPTools bool) string {
1456+
func buildAgentPrompt(t *task.Task, useMCPTools bool, mcpPrompt string) string {
14571457
stubFiles := make([]string, 0, len(t.Files.Stub))
14581458
for _, f := range t.Files.Stub {
14591459
stubFiles = append(stubFiles, task.StripTxtExtension(f))
@@ -1506,13 +1506,10 @@ RULES:
15061506
prompt += `
15071507
15081508
MCP TOOLS:
1509-
You have access to MCP (Model Context Protocol) tools. Use them proactively:
1510-
- Use file reading tools to examine stub files and test files thoroughly
1511-
- Use code search tools to find patterns, helper functions, or related implementations
1512-
- Use any available analysis tools to understand the codebase structure
1513-
- Prefer using tools to gather context over making assumptions
1514-
1515-
Do NOT guess at implementation details that tools can help you discover.`
1509+
You have access to MCP tools. Carefully assess what they do and how they can be used as effectively as possible, then use them as proactively as you can wherever and whenever most suitable.`
1510+
if mcpPrompt != "" {
1511+
prompt += "\n\nAGENT-SPECIFIC TOOLS:\n" + mcpPrompt
1512+
}
15161513
}
15171514

15181515
return prompt

internal/cli/eval_prompt_test.go

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ func TestBuildAgentPromptIncludesKeyInfo(t *testing.T) {
3030
},
3131
}
3232

33-
prompt := buildAgentPrompt(tt, false)
33+
prompt := buildAgentPrompt(tt, false, "")
3434

3535
for _, s := range []string{
3636
"Description: " + tt.Description,
@@ -66,19 +66,16 @@ func TestBuildAgentPromptWithMCPTools(t *testing.T) {
6666
}
6767

6868
// Test without MCP tools
69-
promptWithoutMCP := buildAgentPrompt(tt, false)
69+
promptWithoutMCP := buildAgentPrompt(tt, false, "")
7070
if strings.Contains(promptWithoutMCP, "MCP TOOLS:") {
7171
t.Fatalf("prompt without MCP tools should not contain MCP section\n\nPrompt:\n%s", promptWithoutMCP)
7272
}
7373

7474
// Test with MCP tools
75-
promptWithMCP := buildAgentPrompt(tt, true)
75+
promptWithMCP := buildAgentPrompt(tt, true, "")
7676
for _, s := range []string{
7777
"MCP TOOLS:",
78-
"Model Context Protocol",
79-
"file reading tools",
80-
"code search tools",
81-
"Do NOT guess at implementation details",
78+
"proactively as you can",
8279
} {
8380
if !strings.Contains(promptWithMCP, s) {
8481
t.Fatalf("prompt with MCP tools missing %q\n\nPrompt:\n%s", s, promptWithMCP)
@@ -128,7 +125,7 @@ func TestBuildAgentPromptIncludesToolchainInfo(t *testing.T) {
128125
},
129126
}
130127

131-
prompt := buildAgentPrompt(tt, false)
128+
prompt := buildAgentPrompt(tt, false, "")
132129
wantLine := "- Toolchain: " + tc.want
133130
if !strings.Contains(prompt, wantLine) {
134131
t.Fatalf("prompt missing %q\n\nPrompt:\n%s", wantLine, prompt)

internal/config/config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ type AgentConfig struct {
2020
ReasoningFlagPosition string `toml:"reasoning_flag_position"` // "before" or "after" {prompt} in args (default: "before")
2121
Env map[string]string `toml:"env"` // Environment variables
2222
DefaultTimeout int `toml:"default_timeout"` // Per-agent minimum timeout in seconds (overrides harness default if larger)
23+
MCPPrompt string `toml:"mcp_prompt,omitempty"` // Agent-specific MCP tool guidance (appended when --use-mcp-tools is set)
2324
}
2425

2526
// DefaultAgents provides built-in configurations for popular coding agents.

sanity.toml.example

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,9 @@ auto_pull = true # automatically pull images if missing
6363
# [agents.special-agent]
6464
# command = "special"
6565
# args = ["run", "{prompt}", "--verbose", "--no-confirm"]
66+
#
67+
# Example: Agent with custom MCP tool guidance (appended when --use-mcp-tools is set)
68+
# [agents.gemini]
69+
# command = "gemini"
70+
# args = ["--yolo", "{prompt}"]
71+
# mcp_prompt = "Use @web search to look up API docs when uncertain."

0 commit comments

Comments
 (0)