Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
8e1e08c
feat(tools): add three-outcome filter with FilterResult type
priyanshujain Mar 18, 2026
91ced18
feat(tools): add approval gate to BashTool for non-allowlisted commands
priyanshujain Mar 18, 2026
de24e36
feat(cli): add CLIInteractor for terminal approval prompts
priyanshujain Mar 18, 2026
282357e
feat(tools): add approval gates to file_write, file_edit, and registry
priyanshujain Mar 18, 2026
0a1214b
feat(tools): add dir_explore tool for safe directory exploration
priyanshujain Mar 18, 2026
a02ceae
feat(tools): add content_search tool for regex file searching
priyanshujain Mar 18, 2026
09f7377
feat(tools): add sandbox runtime infrastructure (Seatbelt/bwrap)
priyanshujain Mar 18, 2026
b1ddce0
feat(tools): add sandbox_exec tool for sandboxed code execution
priyanshujain Mar 18, 2026
cc04ae4
feat(tools): register new tools and update prompt/sanitize
priyanshujain Mar 18, 2026
aa9cdab
docs(safety): update for three-tier tool safety model
priyanshujain Mar 18, 2026
0b2468e
refactor(tools): clarify DefaultBlocklist as legacy fallback
priyanshujain Mar 18, 2026
b7da690
test(tools): add file_write and file_edit approval gate tests
priyanshujain Mar 18, 2026
e648239
test(tools): add bash tool tests for approval flow, filter, and edge …
priyanshujain Mar 18, 2026
eebe7a9
test(tools): add tests for file tools, sandbox_exec errors, approval …
priyanshujain Mar 18, 2026
9873d29
test(tools): add edge case tests for sandbox, content_search, dir_exp…
priyanshujain Mar 18, 2026
c0ca948
test(tools): add dir_explore, content_search, sandbox_exec to untrust…
priyanshujain Mar 18, 2026
a054f48
test(tools): add prompt conditional section and registry interactor t…
priyanshujain Mar 18, 2026
e21af7a
test(cli): add NotifyLink test for CLIInteractor
priyanshujain Mar 18, 2026
371561c
fix(tools): reject whitespace-only bash commands
priyanshujain Mar 18, 2026
fbd68c4
fix(tools): use 0600 permissions for sandbox temp code files
priyanshujain Mar 18, 2026
0340733
refactor(tools): combine duplicate file_write/file_edit extractPatter…
priyanshujain Mar 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions agent/tools/approval_rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,20 @@ func extractPattern(toolName string, input json.RawMessage) string {
return ""
}
switch toolName {
case "bash":
if cmd, ok := m["command"]; ok {
var s string
if json.Unmarshal(cmd, &s) == nil {
return firstToken(s)
}
}
case "file_write", "file_edit":
if p, ok := m["path"]; ok {
var s string
if json.Unmarshal(p, &s) == nil {
return s
}
}
case "slack_send", "slack_read_channel":
if ch, ok := m["channel"]; ok {
var s string
Expand Down
56 changes: 56 additions & 0 deletions agent/tools/approval_rules_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,59 @@ func TestExtractPattern_GWSMissingCommand(t *testing.T) {
t.Errorf("pattern = %q, want gws_execute (fallback)", p)
}
}

func TestExtractPattern_BashCommand(t *testing.T) {
input, _ := json.Marshal(map[string]string{"command": "curl example.com"})
if p := extractPattern("bash", input); p != "curl" {
t.Errorf("pattern = %q, want curl", p)
}
}

func TestExtractPattern_BashSingleWord(t *testing.T) {
input, _ := json.Marshal(map[string]string{"command": "ls"})
if p := extractPattern("bash", input); p != "ls" {
t.Errorf("pattern = %q, want ls", p)
}
}

func TestExtractPattern_FileWrite(t *testing.T) {
input, _ := json.Marshal(map[string]string{"path": "/tmp/test.txt", "content": "hello"})
if p := extractPattern("file_write", input); p != "/tmp/test.txt" {
t.Errorf("pattern = %q, want /tmp/test.txt", p)
}
}

func TestExtractPattern_FileEdit(t *testing.T) {
input, _ := json.Marshal(map[string]string{"path": "/tmp/test.txt", "old_string": "a", "new_string": "b"})
if p := extractPattern("file_edit", input); p != "/tmp/test.txt" {
t.Errorf("pattern = %q, want /tmp/test.txt", p)
}
}

func TestApprovalRuleSet_WildcardPattern(t *testing.T) {
s := NewApprovalRuleSet()
s.Add(ApprovalRule{ToolName: "bash", Pattern: ""})
input, _ := json.Marshal(map[string]string{"command": "anything"})
if !s.Matches("bash", input) {
t.Error("empty pattern should match any input")
}
}

func TestApprovalRuleSet_DuplicateRulePrevention(t *testing.T) {
s := NewApprovalRuleSet()
input, _ := json.Marshal(map[string]string{"channel": "#general"})
for i := 0; i < autoApproveThreshold*3; i++ {
s.RecordApproval("slack_send", input)
}
s.mu.Lock()
count := 0
for _, r := range s.rules {
if r.ToolName == "slack_send" && r.Pattern == "#general" {
count++
}
}
s.mu.Unlock()
if count != 1 {
t.Errorf("expected 1 rule, got %d (duplicate prevention failed)", count)
}
}
43 changes: 37 additions & 6 deletions agent/tools/bash.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ const defaultBashTimeout = 30 * time.Second

// BashTool executes shell commands.
type BashTool struct {
timeout time.Duration
filter *CommandFilter
workDir string
timeout time.Duration
filter *CommandFilter
workDir string
interactor Interactor
approvalRules *ApprovalRuleSet
}

// BashOption configures a BashTool.
Expand All @@ -32,6 +34,16 @@ func WithWorkDir(dir string) BashOption {
return func(b *BashTool) { b.workDir = dir }
}

// WithInteractor sets the interactor for approval prompts.
func WithInteractor(i Interactor) BashOption {
return func(b *BashTool) { b.interactor = i }
}

// WithApprovalRuleSet sets the approval rules for session-scoped auto-approve.
func WithApprovalRuleSet(rules *ApprovalRuleSet) BashOption {
return func(b *BashTool) { b.approvalRules = rules }
}

// NewBashTool creates a new bash tool with the given timeout and options.
func NewBashTool(timeout time.Duration, opts ...BashOption) *BashTool {
if timeout == 0 {
Expand Down Expand Up @@ -68,6 +80,7 @@ func (b *BashTool) Execute(ctx context.Context, input json.RawMessage) (string,
if err := json.Unmarshal(input, &in); err != nil {
return "", fmt.Errorf("parse input: %w", err)
}
in.Command = strings.TrimSpace(in.Command)
if in.Command == "" {
return "", fmt.Errorf("command is required")
}
Expand All @@ -76,14 +89,32 @@ func (b *BashTool) Execute(ctx context.Context, input json.RawMessage) (string,
return "", fmt.Errorf("gws commands must use the gws_execute tool, not bash")
}

if err := b.filter.Check(in.Command); err != nil {
return "", fmt.Errorf("command blocked: %w", err)
filterResult, filterErr := b.filter.CheckWithResult(in.Command)
switch filterResult {
case FilterDeny:
if filterErr != nil {
return "", fmt.Errorf("command blocked: %w", filterErr)
}
return "", fmt.Errorf("command blocked")
case FilterPrompt:
if b.interactor == nil {
return "", fmt.Errorf("command blocked: no interactor for approval")
}
return GuardedAction(ctx, b.interactor, RiskMedium,
"Run: "+in.Command,
func() (string, error) { return b.runCommand(ctx, in.Command) },
WithApprovalRules(b.approvalRules, "bash", input),
)
}

return b.runCommand(ctx, in.Command)
}

func (b *BashTool) runCommand(ctx context.Context, command string) (string, error) {
ctx, cancel := context.WithTimeout(ctx, b.timeout)
defer cancel()

cmd := exec.CommandContext(ctx, "bash", "-c", in.Command)
cmd := exec.CommandContext(ctx, "bash", "-c", command)
if b.workDir != "" {
cmd.Dir = b.workDir
}
Expand Down
104 changes: 77 additions & 27 deletions agent/tools/bash_filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,20 @@ import (
"strings"
)

// FilterResult indicates the outcome of a command filter check.
type FilterResult int

const (
FilterAllow FilterResult = iota // on allowlist, run freely
FilterDeny // hard blocked
FilterPrompt // not on allowlist, ask user
)

// CommandFilter validates shell commands against an allowlist or blocklist.
type CommandFilter struct {
allowed []string // if set, only these prefixes pass
blocked []string // if set, these prefixes are rejected
allowed []string // if set, only these prefixes pass
blocked []string // if set, these prefixes are rejected
softAllow bool // if true, non-matching returns FilterPrompt instead of FilterDeny
}

// NewAllowlistFilter creates a filter that only permits commands
Expand All @@ -19,34 +29,61 @@ func NewAllowlistFilter(prefixes []string) *CommandFilter {
return &CommandFilter{allowed: prefixes}
}

// NewSoftAllowlistFilter creates a filter that auto-allows commands on the
// allowlist and returns FilterPrompt (not FilterDeny) for everything else.
// Use this for interactive mode where unknown commands should be approved by the user.
func NewSoftAllowlistFilter(prefixes []string) *CommandFilter {
return &CommandFilter{allowed: prefixes, softAllow: true}
}

// NewBlocklistFilter creates a filter that rejects commands
// whose first token matches any of the given prefixes.
func NewBlocklistFilter(prefixes []string) *CommandFilter {
return &CommandFilter{blocked: prefixes}
}

// Check validates the given command string. It splits on shell
// operators (|, &&, ;, ||) and checks each segment. It also
// detects command substitution via $() and backticks.
func (f *CommandFilter) Check(command string) error {
// CheckWithResult validates the given command string and returns a FilterResult
// indicating whether to allow, deny, or prompt the user.
func (f *CommandFilter) CheckWithResult(command string) (FilterResult, error) {
if f == nil {
return nil
return FilterAllow, nil
}

segments := splitShellSegments(command)
for _, seg := range segments {
if err := f.checkSegment(seg); err != nil {
return err
result, err := f.checkSegmentResult(seg)
if err != nil || result != FilterAllow {
return result, err
}
}

// Check inside $() and backtick substitutions.
for _, sub := range extractSubstitutions(command) {
if err := f.Check(sub); err != nil {
return fmt.Errorf("in command substitution: %w", err)
result, err := f.CheckWithResult(sub)
if err != nil {
return result, fmt.Errorf("in command substitution: %w", err)
}
if result != FilterAllow {
return result, nil
}
}

return FilterAllow, nil
}

// Check validates the given command string. It splits on shell
// operators (|, &&, ;, ||) and checks each segment. It also
// detects command substitution via $() and backticks.
func (f *CommandFilter) Check(command string) error {
result, err := f.CheckWithResult(command)
if err != nil {
return err
}
if result == FilterDeny {
return fmt.Errorf("command not permitted")
}
if result == FilterPrompt {
return fmt.Errorf("command requires approval")
}
return nil
}

Expand All @@ -55,41 +92,43 @@ func basename(token string) string {
return filepath.Base(token)
}

// checkSegment validates a single command segment.
// Allowlist: only the first token must match.
// Blocklist: every token is checked to catch wrappers like "env curl".
func (f *CommandFilter) checkSegment(seg string) error {
// checkSegmentResult validates a single command segment and returns a FilterResult.
func (f *CommandFilter) checkSegmentResult(seg string) (FilterResult, error) {
fields := strings.Fields(strings.TrimSpace(seg))
if len(fields) == 0 {
return nil
return FilterAllow, nil
}
if len(f.allowed) > 0 {
return f.checkToken(fields[0])
return f.checkTokenResult(fields[0])
}
for _, tok := range fields {
if err := f.checkToken(tok); err != nil {
return err
result, err := f.checkTokenResult(tok)
if err != nil || result != FilterAllow {
return result, err
}
}
return nil
return FilterAllow, nil
}

func (f *CommandFilter) checkToken(token string) error {
func (f *CommandFilter) checkTokenResult(token string) (FilterResult, error) {
base := basename(token)
if len(f.allowed) > 0 {
for _, prefix := range f.allowed {
if base == prefix {
return nil
return FilterAllow, nil
}
}
return fmt.Errorf("command %q not in allowlist", token)
if f.softAllow {
return FilterPrompt, nil
}
return FilterDeny, fmt.Errorf("command %q not in allowlist", token)
}
for _, prefix := range f.blocked {
if base == prefix {
return fmt.Errorf("command %q is blocked", token)
return FilterDeny, fmt.Errorf("command %q is blocked", token)
}
}
return nil
return FilterAllow, nil
}

// splitShellSegments splits a command on |, &&, ;, and || operators.
Expand Down Expand Up @@ -139,7 +178,18 @@ func extractSubstitutions(cmd string) []string {
return subs
}

// DefaultBlocklist is the default set of blocked commands for interactive mode.
// InteractiveAllowlist is the set of commands auto-allowed in interactive mode.
// Commands not on this list require user approval (FilterPrompt).
var InteractiveAllowlist = []string{
"obk", "sqlite3",
"ls", "cat", "head", "tail", "wc", "sort", "uniq", "diff",
"find", "grep", "rg",
"date", "cal", "echo", "printf",
"git", "tree", "file", "stat", "jq", "which",
}

// DefaultBlocklist is the legacy blocklist used when no Interactor is provided
// (e.g. subagents). Interactive mode now uses InteractiveAllowlist instead.
var DefaultBlocklist = []string{
// Network
"curl", "wget", "nc", "ncat", "nmap",
Expand Down
Loading
Loading