Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
0a08e9d
feat(tools): add AgentRunner for external CLI detection and execution
priyanshujain Mar 11, 2026
9d064e1
feat(tools): add delegate_task tool with approval flow
priyanshujain Mar 11, 2026
ba7e910
feat(tools): register delegate_task in CLI and Telegram
priyanshujain Mar 11, 2026
6dbf81e
feat(tools): add delegate_task system prompt section
priyanshujain Mar 11, 2026
7f2074f
feat(tools): add TaskTracker for background task management
priyanshujain Mar 11, 2026
fd3c1c7
feat(tools): add async mode to delegate_task
priyanshujain Mar 11, 2026
d85f26e
feat(tools): add check_task tool
priyanshujain Mar 11, 2026
dd258d1
feat(tools): register check_task and wire TaskTracker
priyanshujain Mar 11, 2026
f11bb21
feat(tools): update delegate_task prompt for async mode
priyanshujain Mar 11, 2026
f2479ba
feat(tools): add StreamRunner for agent CLI streaming output
priyanshujain Mar 11, 2026
7052bd1
feat(tools): add structured spec support to delegate_task
priyanshujain Mar 11, 2026
c13f70b
feat(tools): add progress reporting for async delegate_task
priyanshujain Mar 11, 2026
59a1d6a
feat(tools): update system prompt for multi-step workflows
priyanshujain Mar 11, 2026
68ecd18
fix(tools): fix Gemini prompt passing and Claude stream-json args
priyanshujain Mar 11, 2026
99ce8ac
test(tools): add coverage for streaming path, throttling, and edge cases
priyanshujain Mar 11, 2026
1bbc180
test(tools): skip Gemini integration test on auth failure
priyanshujain Mar 11, 2026
aeb6060
fix(telegram): reuse TaskTracker across messages in session
priyanshujain Mar 11, 2026
b4ba20f
fix(tools): pass RunOption to StreamRunner for max_budget_usd
priyanshujain Mar 11, 2026
e705003
fix(tools): fix data race in mockInteractor used by GWS test
priyanshujain Mar 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 137 additions & 0 deletions agent/tools/agent_runner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package tools

import (
"bytes"
"context"
"fmt"
"os"
"os/exec"
"strings"
"time"
)

// AgentKind identifies an external AI CLI agent.
type AgentKind string

const (
AgentClaude AgentKind = "claude"
AgentGemini AgentKind = "gemini"
AgentCodex AgentKind = "codex"
)

// AgentInfo describes a detected external CLI agent.
type AgentInfo struct {
Kind AgentKind
Binary string // absolute path from LookPath
}

// DetectAgents scans PATH for known AI CLI agents in priority order.
func DetectAgents() []AgentInfo {
candidates := []AgentKind{AgentClaude, AgentGemini, AgentCodex}
var found []AgentInfo
for _, kind := range candidates {
if bin, err := exec.LookPath(string(kind)); err == nil {
found = append(found, AgentInfo{Kind: kind, Binary: bin})
}
}
return found
}

// RunOption configures an agent run.
type RunOption func(*runOptions)

type runOptions struct {
maxBudgetUSD float64
}

// WithMaxBudget sets the maximum API cost budget (Claude only).
func WithMaxBudget(usd float64) RunOption {
return func(o *runOptions) { o.maxBudgetUSD = usd }
}

// AgentRunnerInterface abstracts agent CLI execution for testability.
type AgentRunnerInterface interface {
Run(ctx context.Context, prompt string, timeout time.Duration, opts ...RunOption) (string, error)
}

// AgentRunner executes an external AI CLI agent.
type AgentRunner struct {
info AgentInfo
}

// NewAgentRunner creates a runner for the given agent.
func NewAgentRunner(info AgentInfo) *AgentRunner {
return &AgentRunner{info: info}
}

// Run executes the CLI with the given prompt and timeout, returning stdout.
func (r *AgentRunner) Run(ctx context.Context, prompt string, timeout time.Duration, opts ...RunOption) (string, error) {
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

var ro runOptions
for _, o := range opts {
o(&ro)
}
args := r.buildArgs(ro)
// Gemini takes prompt as -p argument; others use stdin.
if r.info.Kind == AgentGemini {
args = append(args, prompt)
}
cmd := exec.CommandContext(ctx, r.info.Binary, args...)
cmd.Env = r.buildEnv()
if r.info.Kind != AgentGemini {
cmd.Stdin = strings.NewReader(prompt)
}

var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr

if err := cmd.Run(); err != nil {
if ctx.Err() == context.DeadlineExceeded {
return "", fmt.Errorf("agent %s timed out after %s", r.info.Kind, timeout)
}
combined := stdout.String() + stderr.String()
if combined != "" {
return "", fmt.Errorf("agent %s: %s", r.info.Kind, combined)
}
return "", fmt.Errorf("agent %s: %w", r.info.Kind, err)
}
return stdout.String(), nil
}

func (r *AgentRunner) buildArgs(opts runOptions) []string {
switch r.info.Kind {
case AgentClaude:
args := []string{"--print", "--output-format", "text"}
if opts.maxBudgetUSD > 0 {
args = append(args, "--max-budget-usd", fmt.Sprintf("%.2f", opts.maxBudgetUSD))
}
return args
case AgentGemini:
return []string{"-p"}
default:
return nil
}
}

func (r *AgentRunner) buildEnv() []string {
env := os.Environ()
if r.info.Kind == AgentClaude {
return filterEnv(env, "CLAUDECODE")
}
return env
}

// filterEnv returns env with entries matching the given key prefix removed.
func filterEnv(env []string, key string) []string {
prefix := key + "="
filtered := make([]string, 0, len(env))
for _, e := range env {
if !strings.HasPrefix(e, prefix) {
filtered = append(filtered, e)
}
}
return filtered
}
203 changes: 203 additions & 0 deletions agent/tools/agent_runner_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
package tools

import (
"context"
"os/exec"
"strings"
"testing"
"time"
)

// mockAgentRunner is a test double for AgentRunnerInterface.
type mockAgentRunner struct {
output string
err error
called bool
prompt string
timeout time.Duration
}

func (m *mockAgentRunner) Run(_ context.Context, prompt string, timeout time.Duration, _ ...RunOption) (string, error) {
m.called = true
m.prompt = prompt
m.timeout = timeout
if m.err != nil {
return "", m.err
}
return m.output, nil
}

// blockingAgentRunner blocks until released or context is cancelled.
type blockingAgentRunner struct {
output string
err error
release chan struct{}
called chan struct{}
}

func newBlockingRunner(output string, err error) *blockingAgentRunner {
return &blockingAgentRunner{
output: output,
err: err,
release: make(chan struct{}),
called: make(chan struct{}),
}
}

func (b *blockingAgentRunner) Run(ctx context.Context, _ string, _ time.Duration, _ ...RunOption) (string, error) {
close(b.called)
select {
case <-b.release:
if b.err != nil {
return "", b.err
}
return b.output, nil
case <-ctx.Done():
return "", ctx.Err()
}
}

func TestDetectAgents_Priority(t *testing.T) {
agents := DetectAgents()
if len(agents) == 0 {
t.Skip("no AI CLIs found on PATH")
}
// Verify priority: claude < gemini < codex by index.
order := map[AgentKind]int{AgentClaude: 0, AgentGemini: 1, AgentCodex: 2}
prev := -1
for _, a := range agents {
idx, ok := order[a.Kind]
if !ok {
t.Errorf("unexpected agent kind: %s", a.Kind)
continue
}
if idx <= prev {
t.Errorf("agent %s (idx %d) came after idx %d — wrong priority", a.Kind, idx, prev)
}
prev = idx
if a.Binary == "" {
t.Errorf("agent %s has empty binary path", a.Kind)
}
}
}

func TestAgentRunner_BuildsClaudeArgs(t *testing.T) {
r := NewAgentRunner(AgentInfo{Kind: AgentClaude, Binary: "/usr/local/bin/claude"})
args := r.buildArgs(runOptions{})
want := []string{"--print", "--output-format", "text"}
if len(args) != len(want) {
t.Fatalf("args = %v, want %v", args, want)
}
for i, a := range args {
if a != want[i] {
t.Errorf("args[%d] = %q, want %q", i, a, want[i])
}
}
}

func TestAgentRunner_BuildsGeminiArgs(t *testing.T) {
r := NewAgentRunner(AgentInfo{Kind: AgentGemini, Binary: "/usr/local/bin/gemini"})
args := r.buildArgs(runOptions{})
want := []string{"-p"}
if len(args) != len(want) {
t.Fatalf("args = %v, want %v", args, want)
}
if args[0] != "-p" {
t.Errorf("args[0] = %q, want %q", args[0], "-p")
}
}

func TestAgentRunner_StripsCLAUDECODE(t *testing.T) {
t.Setenv("CLAUDECODE", "1")
r := NewAgentRunner(AgentInfo{Kind: AgentClaude, Binary: "/usr/local/bin/claude"})
env := r.buildEnv()
for _, e := range env {
if e == "CLAUDECODE=1" {
t.Error("CLAUDECODE should be stripped from child env")
}
}
}

func TestAgentRunner_GeminiKeepsCLAUDECODE(t *testing.T) {
t.Setenv("CLAUDECODE", "1")
r := NewAgentRunner(AgentInfo{Kind: AgentGemini, Binary: "/usr/local/bin/gemini"})
env := r.buildEnv()
found := false
for _, e := range env {
if e == "CLAUDECODE=1" {
found = true
break
}
}
if !found {
t.Error("CLAUDECODE should NOT be stripped for gemini")
}
}

func TestAgentRunner_Timeout(t *testing.T) {
r := NewAgentRunner(AgentInfo{Kind: AgentClaude, Binary: "sleep"})
_, err := r.Run(context.Background(), "", 100*time.Millisecond)
if err == nil {
t.Fatal("expected timeout error")
}
}

func TestFilterEnv(t *testing.T) {
env := []string{"HOME=/home/user", "CLAUDECODE=1", "PATH=/usr/bin"}
got := filterEnv(env, "CLAUDECODE")
if len(got) != 2 {
t.Fatalf("got %d entries, want 2", len(got))
}
for _, e := range got {
if e == "CLAUDECODE=1" {
t.Error("CLAUDECODE not filtered")
}
}
}

func TestAgentRunner_RealClaude(t *testing.T) {
if _, err := exec.LookPath("claude"); err != nil {
t.Skip("claude not on PATH")
}
agents := DetectAgents()
var info AgentInfo
for _, a := range agents {
if a.Kind == AgentClaude {
info = a
break
}
}
r := NewAgentRunner(info)
out, err := r.Run(context.Background(), "Say hello in exactly one word.", 30*time.Second)
if err != nil {
t.Fatalf("Run: %v", err)
}
if out == "" {
t.Error("expected non-empty output")
}
}

func TestAgentRunner_RealGemini(t *testing.T) {
if _, err := exec.LookPath("gemini"); err != nil {
t.Skip("gemini not on PATH")
}
agents := DetectAgents()
var info AgentInfo
for _, a := range agents {
if a.Kind == AgentGemini {
info = a
break
}
}
r := NewAgentRunner(info)
out, err := r.Run(context.Background(), "Say hello in exactly one word.", 30*time.Second)
if err != nil {
if strings.Contains(err.Error(), "Permission") || strings.Contains(err.Error(), "denied") || strings.Contains(err.Error(), "auth") {
t.Skipf("gemini auth not configured: %v", err)
}
t.Fatalf("Run: %v", err)
}
if out == "" {
t.Error("expected non-empty output")
}
}
Loading
Loading