Skip to content

Commit 87f629e

Browse files
committed
feat: introduce agent skills support with new CLI flags, configuration, and prompt modifications.
1 parent 59056a2 commit 87f629e

File tree

6 files changed

+44
-14
lines changed

6 files changed

+44
-14
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,10 @@ pubspec.lock
120120
**/zig-out/
121121
**/.zig-cache/
122122

123+
# Agent Skills artifacts
124+
skills/
125+
skills-lock.json
126+
123127
# Pampax semantic search index
124128
.pampa/
125129
pampa.codemap.json

internal/cli/batch.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ type BatchDefaults struct {
2828
Parallel int `toml:"parallel"`
2929
KeepWorkspaces bool `toml:"keep_workspaces"`
3030
UseMCPTools bool `toml:"use_mcp_tools"`
31+
UseSkills bool `toml:"use_skills"`
3132
DisableMCP bool `toml:"disable_mcp"`
3233
NoSandbox bool `toml:"no_sandbox"`
3334
Legacy bool `toml:"legacy"`
@@ -89,6 +90,7 @@ The TOML file supports defaults that apply to all runs, with per-run overrides.`
8990
Parallel: defaults.Parallel,
9091
KeepWorkspaces: defaults.KeepWorkspaces,
9192
UseMCPTools: defaults.UseMCPTools,
93+
UseSkills: defaults.UseSkills,
9294
DisableMCP: defaults.DisableMCP,
9395
NoSandbox: defaults.NoSandbox,
9496
Legacy: defaults.Legacy,

internal/cli/eval.go

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ var (
4747
evalParallel int
4848
evalDryRun bool
4949
evalUseMCPTools bool
50+
evalUseSkills bool
5051
evalDisableMCP bool
5152
evalNoSandbox bool
5253
evalLegacy bool
@@ -293,6 +294,7 @@ type EvalSummary struct {
293294
ByTier map[string]EvalAggregate `json:"by_tier,omitempty"`
294295
ByDifficulty map[string]EvalAggregate `json:"by_difficulty,omitempty"`
295296
UseMCPTools bool `json:"use_mcp_tools"`
297+
UseSkills bool `json:"use_skills"`
296298
DisableMCP bool `json:"disable_mcp"`
297299
Sandbox bool `json:"sandbox"`
298300
Legacy bool `json:"legacy"`
@@ -326,6 +328,7 @@ type SharedConfig struct {
326328
Parallel int
327329
KeepWorkspaces bool
328330
UseMCPTools bool
331+
UseSkills bool
329332
DisableMCP bool
330333
NoSandbox bool
331334
Legacy bool
@@ -344,6 +347,7 @@ type RunConfig struct {
344347
Timeout int `json:"timeout"`
345348
Parallel int `json:"parallel"`
346349
UseMCPTools bool `json:"use_mcp_tools"`
350+
UseSkills bool `json:"use_skills"`
347351
DisableMCP bool `json:"disable_mcp"`
348352
NoSandbox bool `json:"no_sandbox"`
349353
Legacy bool `json:"legacy"`
@@ -406,8 +410,8 @@ Examples:
406410
shared := SharedConfig{
407411
Tier: evalTier, Difficulty: evalDifficulty, Lang: evalLang,
408412
Tasks: evalTasks, Timeout: evalTimeout, Parallel: evalParallel,
409-
KeepWorkspaces: evalKeepWorkspaces, UseMCPTools: evalUseMCPTools,
410-
DisableMCP: evalDisableMCP, NoSandbox: evalNoSandbox,
413+
KeepWorkspaces: evalKeepWorkspaces, UseMCPTools: evalUseMCPTools,
414+
UseSkills: evalUseSkills, DisableMCP: evalDisableMCP, NoSandbox: evalNoSandbox,
411415
Legacy: evalLegacy, DryRun: evalDryRun,
412416
}
413417

@@ -439,8 +443,8 @@ Examples:
439443
shared = SharedConfig{
440444
Tier: evalTier, Difficulty: evalDifficulty, Lang: evalLang,
441445
Tasks: evalTasks, Timeout: evalTimeout, Parallel: evalParallel,
442-
KeepWorkspaces: evalKeepWorkspaces, UseMCPTools: evalUseMCPTools,
443-
DisableMCP: evalDisableMCP, NoSandbox: evalNoSandbox,
446+
KeepWorkspaces: evalKeepWorkspaces, UseMCPTools: evalUseMCPTools,
447+
UseSkills: evalUseSkills, DisableMCP: evalDisableMCP, NoSandbox: evalNoSandbox,
444448
Legacy: evalLegacy, DryRun: evalDryRun,
445449
}
446450

@@ -775,6 +779,7 @@ func evalRunSingle( //nolint:gocognit,gocyclo,maintidx
775779
evalModel = spec.Model
776780
evalReasoning = spec.Reasoning
777781
evalUseMCPTools = shared.UseMCPTools
782+
evalUseSkills = shared.UseSkills
778783
evalDisableMCP = shared.DisableMCP
779784
evalLegacy = shared.Legacy
780785
evalKeepWorkspaces = shared.KeepWorkspaces
@@ -1279,6 +1284,7 @@ func evalRunSingle( //nolint:gocognit,gocyclo,maintidx
12791284
ByTier: finalize(byTier),
12801285
ByDifficulty: finalize(byDifficulty),
12811286
UseMCPTools: shared.UseMCPTools,
1287+
UseSkills: shared.UseSkills,
12821288
DisableMCP: shared.DisableMCP,
12831289
Sandbox: evalSandboxActive,
12841290
Legacy: shared.Legacy,
@@ -1408,7 +1414,7 @@ func runTaskWithAgent(ctx context.Context, r *runner.Runner, t *task.Task, agent
14081414
}
14091415

14101416
// Build agent command
1411-
prompt := buildAgentPrompt(t, evalUseMCPTools, agentCfg.MCPPrompt)
1417+
prompt := buildAgentPrompt(t, evalUseMCPTools, evalUseSkills, agentCfg.MCPPrompt)
14121418
result.PromptChars = utf8.RuneCountInString(prompt)
14131419
agentTimeout := resolveAgentTimeout(timeout, agentCfg.DefaultTimeout, t.AgentTimeout)
14141420

@@ -2095,7 +2101,7 @@ func toolchainInfo(lang task.Language) string {
20952101
}
20962102
}
20972103

2098-
func buildAgentPrompt(t *task.Task, useMCPTools bool, mcpPrompt string) string {
2104+
func buildAgentPrompt(t *task.Task, useMCPTools, useSkills bool, mcpPrompt string) string {
20992105
stubFiles := make([]string, 0, len(t.Files.Stub))
21002106
for _, f := range t.Files.Stub {
21012107
stubFiles = append(stubFiles, task.StripTxtExtension(f))
@@ -2112,6 +2118,9 @@ func buildAgentPrompt(t *task.Task, useMCPTools bool, mcpPrompt string) string {
21122118
mcpEnvironmentLine := ""
21132119
mcpImportantLine := ""
21142120
mcpRuleLine := ""
2121+
skillsEnvironmentLine := ""
2122+
skillsImportantLine := ""
2123+
skillsRuleLine := ""
21152124
taskInstructions := `1. Read the stub file(s) (function signatures with panic()/todo!/Unimplemented placeholders).
21162125
2. Read the visible test file(s) to understand expected behavior and edge cases.
21172126
3. Implement the stub file(s), replacing placeholders with working code.
@@ -2128,6 +2137,11 @@ func buildAgentPrompt(t *task.Task, useMCPTools bool, mcpPrompt string) string {
21282137
mcpImportantLine = "\n- Prefer your MCP server tools over built-in alternatives if both can accomplish the same step or objective."
21292138
mcpRuleLine = "\n- You MUST actively use your MCP server tools to assist you with your work. Do NOT ignore them. Make your first MCP server tool call before writing any code."
21302139
}
2140+
if useSkills {
2141+
skillsEnvironmentLine = "\n- You have access to Agent Skills. Check your available skills and read their documentation before starting work."
2142+
skillsImportantLine = "\n- Prefer your Agent Skills over manual alternatives if both can accomplish the same step or objective."
2143+
skillsRuleLine = "\n- You MUST actively use your Agent Skills to assist you with your work. Do NOT ignore them. Review your available skills before writing any code."
2144+
}
21312145

21322146
prompt := fmt.Sprintf(`You are solving a coding task called "%s".
21332147
@@ -2145,23 +2159,23 @@ ENVIRONMENT:
21452159
- Final validation runs automatically in a Docker container.
21462160
- Toolchain: %s
21472161
- You may run local tests/commands in the workspace while iterating.
2148-
- Toolchains are preinstalled; extra installs are optional.%s
2162+
- Toolchains are preinstalled; extra installs are optional.%s%s
21492163
21502164
YOUR TASK:
21512165
%s
21522166
21532167
IMPORTANT:
2154-
- There may be hidden tests that check additional edge cases for the same public API.%s
2168+
- There may be hidden tests that check additional edge cases for the same public API.%s%s
21552169
21562170
RULES:
21572171
- ONLY edit the stub/solution source file(s).
21582172
- Do NOT modify test files or support files.
21592173
- You may add new helper source files if needed.
21602174
- Evaluation fails if you modify protected files.
2161-
- Do NOT navigate to parent directories or read files outside the workspace.%s`,
2175+
- Do NOT navigate to parent directories or read files outside the workspace.%s%s`,
21622176
t.Name, t.Language, t.Tier, t.Difficulty, t.Description,
21632177
strings.Join(stubFiles, ", "), strings.Join(testFiles, ", "),
2164-
toolchainInfo(t.Language), mcpEnvironmentLine, taskInstructions, mcpImportantLine, mcpRuleLine)
2178+
toolchainInfo(t.Language), mcpEnvironmentLine, skillsEnvironmentLine, taskInstructions, mcpImportantLine, skillsImportantLine, mcpRuleLine, skillsRuleLine)
21652179

21662180
return prompt
21672181
}
@@ -3137,6 +3151,7 @@ type LeaderboardSubmission struct {
31373151
Timeout int `json:"timeout"`
31383152
Parallel int `json:"parallel"`
31393153
UseMCPTools bool `json:"use_mcp_tools"`
3154+
UseSkills bool `json:"use_skills"`
31403155
DisableMCP bool `json:"disable_mcp"`
31413156
Sandbox bool `json:"sandbox"`
31423157
Legacy bool `json:"legacy"`
@@ -3181,6 +3196,7 @@ func generateLeaderboardSubmission(summary EvalSummary, attestation *EvalAttesta
31813196
Timeout: summary.Timeout,
31823197
Parallel: summary.Parallel,
31833198
UseMCPTools: summary.UseMCPTools,
3199+
UseSkills: summary.UseSkills,
31843200
DisableMCP: summary.DisableMCP,
31853201
Sandbox: summary.Sandbox,
31863202
Legacy: summary.Legacy,
@@ -3252,6 +3268,9 @@ func writeReportSummary(sb *strings.Builder, summary EvalSummary) {
32523268
if summary.UseMCPTools {
32533269
sb.WriteString("| MCP Tools Mode | Yes |\n")
32543270
}
3271+
if summary.UseSkills {
3272+
sb.WriteString("| Skills Mode | Yes |\n")
3273+
}
32553274
if summary.DisableMCP {
32563275
sb.WriteString("| MCP Disabled | Yes |\n")
32573276
}
@@ -3814,6 +3833,7 @@ func saveRunConfig(outputDir string, allTasks []*task.Task) error {
38143833
Timeout: evalTimeout,
38153834
Parallel: evalParallel,
38163835
UseMCPTools: evalUseMCPTools,
3836+
UseSkills: evalUseSkills,
38173837
DisableMCP: evalDisableMCP,
38183838
NoSandbox: evalNoSandbox,
38193839
Legacy: evalLegacy,
@@ -3857,6 +3877,7 @@ func applyRunConfig(runCfg *RunConfig) {
38573877
evalTimeout = runCfg.Timeout
38583878
evalParallel = runCfg.Parallel
38593879
evalUseMCPTools = runCfg.UseMCPTools
3880+
evalUseSkills = runCfg.UseSkills
38603881
evalDisableMCP = runCfg.DisableMCP
38613882
evalNoSandbox = runCfg.NoSandbox
38623883
evalLegacy = runCfg.Legacy
@@ -4113,6 +4134,7 @@ func init() {
41134134
evalCmd.Flags().BoolVar(&evalKeepWorkspaces, "keep-workspaces", false, "keep workspace directories after evaluation")
41144135
evalCmd.Flags().BoolVar(&evalDryRun, "dry-run", false, "show what tasks would be run without executing")
41154136
evalCmd.Flags().BoolVar(&evalUseMCPTools, "use-mcp-tools", false, "inject MCP tool usage instructions into agent prompt")
4137+
evalCmd.Flags().BoolVar(&evalUseSkills, "use-skills", false, "inject Agent Skills usage instructions into agent prompt")
41164138
evalCmd.Flags().BoolVar(&evalDisableMCP, "disable-mcp", false, "disable MCP tools for agents that support it (currently: opencode)")
41174139
evalCmd.Flags().BoolVar(&evalNoSandbox, "no-sandbox", false, "disable bubblewrap sandbox for agent processes")
41184140
evalCmd.Flags().BoolVar(&evalLegacy, "legacy", false, "expose hidden tests to agent during workspace init (pre-v1.6.0 behavior)")

internal/cli/eval_multi.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,7 @@ func restoreSharedConfigGlobals(shared SharedConfig) {
383383
evalParallel = shared.Parallel
384384
evalKeepWorkspaces = shared.KeepWorkspaces
385385
evalUseMCPTools = shared.UseMCPTools
386+
evalUseSkills = shared.UseSkills
386387
evalDisableMCP = shared.DisableMCP
387388
evalNoSandbox = shared.NoSandbox
388389
evalLegacy = shared.Legacy

internal/cli/eval_prompt_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ func TestBuildAgentPromptIncludesKeyInfo(t *testing.T) {
3131
},
3232
}
3333

34-
prompt := buildAgentPrompt(tt, false, "")
34+
prompt := buildAgentPrompt(tt, false, false, "")
3535

3636
for _, s := range []string{
3737
"Description: " + tt.Description,
@@ -76,7 +76,7 @@ func TestBuildAgentPromptWithMCPTools(t *testing.T) {
7676
}
7777

7878
// Test without MCP tools
79-
promptWithoutMCP := buildAgentPrompt(tt, false, "")
79+
promptWithoutMCP := buildAgentPrompt(tt, false, false, "")
8080
for _, forbidden := range []string{
8181
"You have access to MCP server tools. Review what is available to you before starting work.",
8282
"1. Use your MCP server tools to help complete your task(s) wherever and whenever applicable.",
@@ -91,7 +91,7 @@ func TestBuildAgentPromptWithMCPTools(t *testing.T) {
9191
}
9292

9393
// Test with MCP tools
94-
promptWithMCP := buildAgentPrompt(tt, true, "agent-specific text should not appear")
94+
promptWithMCP := buildAgentPrompt(tt, true, false, "agent-specific text should not appear")
9595
for _, s := range []string{
9696
"- You have access to MCP server tools. Review what is available to you before starting work.",
9797
"1. Use your MCP server tools to help complete your task(s) wherever and whenever applicable.",
@@ -157,7 +157,7 @@ func TestBuildAgentPromptIncludesToolchainInfo(t *testing.T) {
157157
},
158158
}
159159

160-
prompt := buildAgentPrompt(tt, false, "")
160+
prompt := buildAgentPrompt(tt, false, false, "")
161161
wantLine := "- Toolchain: " + tc.want
162162
if !strings.Contains(prompt, wantLine) {
163163
t.Fatalf("prompt missing %q\n\nPrompt:\n%s", wantLine, prompt)

internal/config/config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ var Default = Config{
243243
".opencode/bin",
244244
".bun/bin",
245245
".npm-global",
246+
".agents",
246247
},
247248
},
248249
}

0 commit comments

Comments
 (0)