feat: add mcp_prompt to agent configuration for custom MCP tool guidance and simplify the default MCP tools prompt

lemon07r · lemon07r · commit 9c0e0b4a727b · 2026-02-21T02:31:16.000-05:00
diff --git a/docs/ROAD-TO-V2-Overhaul.md b/docs/ROAD-TO-V2-Overhaul.md
@@ -64,6 +64,12 @@ Tasks updated in this pass:
 
 The intent here was high-signal evaluation: remove "mind-reading" requirements, but do not turn tasks into copy-paste exercises.
 
+## What changed in MCP tools mode
+
+The `--use-mcp-tools` prompt was rewritten to be minimal and neutral. The previous prompt included workflow coaching (read files, search code, don't guess) that could inflate scores independent of actual MCP tool usage. The new prompt is a single sentence nudging the agent to use its MCP tools proactively, without teaching problem-solving strategy. This makes with-vs-without comparisons fairer.
+
+A new `mcp_prompt` config field was added to `AgentConfig`, allowing per-agent MCP tool guidance (e.g., telling Gemini to use `@web` search). This is appended under an `AGENT-SPECIFIC TOOLS:` header when `--use-mcp-tools` is set.
+
 ## Compatibility and comparing old runs
 
 `1.7.x` is intentionally not identical to `v1.6.1` behavior. If you are comparing against historical leaderboard-era runs, use legacy mode:
diff --git a/internal/cli/eval.go b/internal/cli/eval.go
@@ -1107,17 +1107,17 @@ func runTaskWithAgent(ctx context.Context, r *runner.Runner, t *task.Task, agent
 		return result
 	}
 
-	// Build agent command
-	prompt := buildAgentPrompt(t, evalUseMCPTools)
-	result.PromptChars = utf8.RuneCountInString(prompt)
-
 	// Get agent configuration
 	agentCfg := cfg.GetAgent(agent)
 	if agentCfg == nil {
 		result.Error = fmt.Sprintf("unknown agent: %s", agent)
 		return result
 	}
 
+	// Build agent command
+	prompt := buildAgentPrompt(t, evalUseMCPTools, agentCfg.MCPPrompt)
+	result.PromptChars = utf8.RuneCountInString(prompt)
+
 	agentTimeout := time.Duration(timeout) * time.Second
 	if agentTimeout <= 0 {
 		agentTimeout = 600 * time.Second
@@ -1453,7 +1453,7 @@ func toolchainInfo(lang task.Language) string {
 	}
 }
 
-func buildAgentPrompt(t *task.Task, useMCPTools bool) string {
+func buildAgentPrompt(t *task.Task, useMCPTools bool, mcpPrompt string) string {
 	stubFiles := make([]string, 0, len(t.Files.Stub))
 	for _, f := range t.Files.Stub {
 		stubFiles = append(stubFiles, task.StripTxtExtension(f))
@@ -1506,13 +1506,10 @@ RULES:
 		prompt += `
 
 MCP TOOLS:
-You have access to MCP (Model Context Protocol) tools. Use them proactively:
-- Use file reading tools to examine stub files and test files thoroughly
-- Use code search tools to find patterns, helper functions, or related implementations
-- Use any available analysis tools to understand the codebase structure
-- Prefer using tools to gather context over making assumptions
-
-Do NOT guess at implementation details that tools can help you discover.`
+You have access to MCP tools. Carefully assess what they do and how they can be used as effectively as possible, then use them as proactively as you can wherever and whenever most suitable.`
+		if mcpPrompt != "" {
+			prompt += "\n\nAGENT-SPECIFIC TOOLS:\n" + mcpPrompt
+		}
 	}
 
 	return prompt
diff --git a/internal/cli/eval_prompt_test.go b/internal/cli/eval_prompt_test.go
@@ -30,7 +30,7 @@ func TestBuildAgentPromptIncludesKeyInfo(t *testing.T) {
 		},
 	}
 
-	prompt := buildAgentPrompt(tt, false)
+	prompt := buildAgentPrompt(tt, false, "")
 
 	for _, s := range []string{
 		"Description: " + tt.Description,
@@ -66,19 +66,16 @@ func TestBuildAgentPromptWithMCPTools(t *testing.T) {
 	}
 
 	// Test without MCP tools
-	promptWithoutMCP := buildAgentPrompt(tt, false)
+	promptWithoutMCP := buildAgentPrompt(tt, false, "")
 	if strings.Contains(promptWithoutMCP, "MCP TOOLS:") {
 		t.Fatalf("prompt without MCP tools should not contain MCP section\n\nPrompt:\n%s", promptWithoutMCP)
 	}
 
 	// Test with MCP tools
-	promptWithMCP := buildAgentPrompt(tt, true)
+	promptWithMCP := buildAgentPrompt(tt, true, "")
 	for _, s := range []string{
 		"MCP TOOLS:",
-		"Model Context Protocol",
-		"file reading tools",
-		"code search tools",
-		"Do NOT guess at implementation details",
+  "proactively as you can",
 	} {
 		if !strings.Contains(promptWithMCP, s) {
 			t.Fatalf("prompt with MCP tools missing %q\n\nPrompt:\n%s", s, promptWithMCP)
@@ -128,7 +125,7 @@ func TestBuildAgentPromptIncludesToolchainInfo(t *testing.T) {
 				},
 			}
 
-			prompt := buildAgentPrompt(tt, false)
+			prompt := buildAgentPrompt(tt, false, "")
 			wantLine := "- Toolchain: " + tc.want
 			if !strings.Contains(prompt, wantLine) {
 				t.Fatalf("prompt missing %q\n\nPrompt:\n%s", wantLine, prompt)
diff --git a/internal/config/config.go b/internal/config/config.go
@@ -20,6 +20,7 @@ type AgentConfig struct {
 	ReasoningFlagPosition string            `toml:"reasoning_flag_position"` // "before" or "after" {prompt} in args (default: "before")
 	Env                   map[string]string `toml:"env"`                     // Environment variables
 	DefaultTimeout        int               `toml:"default_timeout"`         // Per-agent minimum timeout in seconds (overrides harness default if larger)
+	MCPPrompt             string            `toml:"mcp_prompt,omitempty"`    // Agent-specific MCP tool guidance (appended when --use-mcp-tools is set)
 }
 
 // DefaultAgents provides built-in configurations for popular coding agents.
diff --git a/sanity.toml.example b/sanity.toml.example
@@ -63,3 +63,9 @@ auto_pull = true            # automatically pull images if missing
 # [agents.special-agent]
 # command = "special"
 # args = ["run", "{prompt}", "--verbose", "--no-confirm"]
+#
+# Example: Agent with custom MCP tool guidance (appended when --use-mcp-tools is set)
+# [agents.gemini]
+# command = "gemini"
+# args = ["--yolo", "{prompt}"]
+# mcp_prompt = "Use @web search to look up API docs when uncertain."

Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,7 @@ type AgentConfig struct {`
`20`	`20`	ReasoningFlagPosition string `toml:"reasoning_flag_position"` // "before" or "after" {prompt} in args (default: "before")
`21`	`21`	Env map[string]string `toml:"env"` // Environment variables
`22`	`22`	DefaultTimeout int `toml:"default_timeout"` // Per-agent minimum timeout in seconds (overrides harness default if larger)
	`23`	+ MCPPrompt string `toml:"mcp_prompt,omitempty"` // Agent-specific MCP tool guidance (appended when --use-mcp-tools is set)
`23`	`24`	`}`
`24`	`25`
`25`	`26`	`// DefaultAgents provides built-in configurations for popular coding agents.`