diff --git a/cmd/odek/dispatch.go b/cmd/odek/dispatch.go index 95c9db2..58a6af2 100644 --- a/cmd/odek/dispatch.go +++ b/cmd/odek/dispatch.go @@ -53,6 +53,8 @@ func dispatch(args []string) int { return cliExit(telegramCmd(rest)) case "schedule": return cliExit(scheduleCmd(rest)) + case "memory": + return cliExit(memoryCmd(rest)) default: fmt.Fprintf(os.Stderr, "odek: unknown command %q\n", cmd) printUsage() diff --git a/cmd/odek/main.go b/cmd/odek/main.go index e0e5690..e7e5d0b 100644 --- a/cmd/odek/main.go +++ b/cmd/odek/main.go @@ -452,6 +452,7 @@ func printUsage() { odek mcp [--sandbox] odek telegram odek schedule + odek memory > odek version Commands: @@ -477,6 +478,10 @@ Commands: Subcommands: list, add, rm, enable, disable, run, next, daemon The daemon (or the Telegram bot) fires jobs and delivers results to stdout, a log, or a Telegram chat. + memory Review and promote past-session memory episodes + list: show episodes excluded from recall (untrusted) + promote : approve one so it can be recalled. + Human-gated on purpose — not available to the agent. init Create a config file (default: ./odek.json) version Print version and exit diff --git a/cmd/odek/memory_cmd.go b/cmd/odek/memory_cmd.go new file mode 100644 index 0000000..e011417 --- /dev/null +++ b/cmd/odek/memory_cmd.go @@ -0,0 +1,66 @@ +package main + +import ( + "fmt" + "os" + "strings" + + "github.com/BackendStack21/odek/internal/memory" +) + +// memoryCmd handles `odek memory [args]`. +// +// This is the human-gated surface for the episode-memory trust control. +// Episodes whose originating session touched external content (web/http/MCP/ +// audio, or reads of sensitive paths) are stored but excluded from recall +// until a human promotes them. Promotion lives HERE — on the CLI — and is +// deliberately NOT exposed as an agent tool, so a prompt-injected agent cannot +// approve its own poisoned memory. +func memoryCmd(args []string) error { + if len(args) == 0 { + fmt.Fprintf(os.Stderr, "Usage: odek memory [args]\n") + return nil + } + + dir := expandHome("~/.odek/memory") + store := memory.NewEpisodeStore(dir, nil) + + sub := args[0] + subArgs := args[1:] + + switch sub { + case "list", "ls", "pending": + pending, err := store.PendingReview() + if err != nil { + return err + } + if len(pending) == 0 { + fmt.Println("No episodes pending review — all stored episodes are recallable.") + return nil + } + fmt.Printf("%d episode(s) pending review (excluded from recall until promoted):\n\n", len(pending)) + for _, ep := range pending { + fmt.Printf("• %s (%d turns, %s)\n", ep.SessionID, ep.Turns, ep.CreatedAt.Format("2006-01-02 15:04")) + if len(ep.Provenance.Sources) > 0 { + fmt.Printf(" sources: %s\n", strings.Join(ep.Provenance.Sources, ", ")) + } + fmt.Printf(" %s\n\n", ep.Summary) + } + fmt.Println("Review the summary above, then promote with: odek memory promote ") + return nil + + case "promote": + if len(subArgs) == 0 { + return fmt.Errorf("usage: odek memory promote ") + } + id := subArgs[0] + if err := store.Promote(id); err != nil { + return err + } + fmt.Printf("odek: promoted episode %q — it can now be recalled into future sessions\n", id) + return nil + + default: + return fmt.Errorf("unknown memory subcommand %q (expected: list, promote)", sub) + } +} diff --git a/cmd/odek/memory_cmd_test.go b/cmd/odek/memory_cmd_test.go new file mode 100644 index 0000000..8d9b4a9 --- /dev/null +++ b/cmd/odek/memory_cmd_test.go @@ -0,0 +1,53 @@ +package main + +import ( + "path/filepath" + "testing" + + "github.com/BackendStack21/odek/internal/memory" +) + +// TestMemoryCmd_ListAndPromote exercises the human-gated promote path end to +// end through the CLI command: a seeded untrusted episode is pending, the +// command promotes it, and the approval is persisted to the on-disk index. +func TestMemoryCmd_ListAndPromote(t *testing.T) { + home := setupTestHome(t) + dir := filepath.Join(home, ".odek", "memory") + + es := memory.NewEpisodeStore(dir, nil) + if err := es.WriteWithProvenance("20260108-web", "researched a library", 5, + memory.EpisodeProvenance{Untrusted: true, Sources: []string{"browser"}}); err != nil { + t.Fatalf("seed: %v", err) + } + + if err := memoryCmd([]string{"list"}); err != nil { + t.Fatalf("memory list: %v", err) + } + if err := memoryCmd([]string{"promote", "20260108-web"}); err != nil { + t.Fatalf("memory promote: %v", err) + } + + fresh := memory.NewEpisodeStore(dir, nil) + idx, err := fresh.ReadIndex() + if err != nil { + t.Fatalf("read index: %v", err) + } + if len(idx) != 1 || !idx[0].Provenance.UserApproved { + t.Errorf("episode not approved after promote: %+v", idx) + } + + if err := memoryCmd([]string{"promote", "does-not-exist"}); err == nil { + t.Error("promoting an unknown id should error") + } + if err := memoryCmd([]string{"bogus"}); err == nil { + t.Error("unknown subcommand should error") + } +} + +// TestMemoryCmd_ListEmpty: list on a clean home must not error. +func TestMemoryCmd_ListEmpty(t *testing.T) { + setupTestHome(t) + if err := memoryCmd([]string{"list"}); err != nil { + t.Fatalf("memory list on empty home: %v", err) + } +} diff --git a/docs/CONFIG.md b/docs/CONFIG.md index ec4d8ff..e9dcf6b 100644 --- a/docs/CONFIG.md +++ b/docs/CONFIG.md @@ -200,7 +200,8 @@ The `memory` section controls the persistent memory system (see [docs/MEMORY.md] "llm_extract": true, "llm_consolidate": true, "merge_threshold": 0.7, - "add_threshold": 0.3 + "add_threshold": 0.3, + "auto_approve_episodes": false } } ``` @@ -213,12 +214,13 @@ The `memory` section controls the persistent memory system (see [docs/MEMORY.md] | `buffer_lines` | 20 | Max turn summaries in session buffer | | `buffer_enabled` | true | Enable the turn-level buffer | | `merge_on_write` | true | Use go-vector RP similarity to auto-merge related entries | -| `extract_on_end` | true | Extract durable facts via LLM at session end (≥3 turns) | +| `extract_on_end` | true | At session end (≥3 turns), extract a narrative episode summary via LLM for later recall | | `llm_search` | true | Use LLM to rank episode search results by relevance | | `llm_extract` | true | Use LLM for end-of-session fact extraction | | `llm_consolidate` | true | Use LLM to merge related fact entries | | `merge_threshold` | 0.7 | go-vector cosine threshold for auto-merge (0.0–1.0) | | `add_threshold` | 0.3 | go-vector cosine threshold for auto-add (0.0–1.0) | +| `auto_approve_episodes` | false | **Security trade-off.** When true, untrusted episodes (sessions that touched web/MCP/out-of-workspace content) are auto-approved at session end so they are recalled without a manual `odek memory promote`. Leaving it `false` keeps the human review gate (recommended). | ## Sub-agent configuration diff --git a/docs/SECURITY.md b/docs/SECURITY.md index 3eeb34c..e25e4c5 100644 --- a/docs/SECURITY.md +++ b/docs/SECURITY.md @@ -100,11 +100,25 @@ Both: `internal/memory` tracks `EpisodeProvenance{Untrusted, Sources, UserApproved}` for every episode. An episode derived from a session that ingested untrusted content is **stored on disk for audit but never auto-replayed** into future sessions. This stops a single successful injection from becoming a persistent backdoor through the episode pipeline. -To use a tainted episode anyway, the user must explicitly promote it (set `UserApproved=true`). +Taint is decided per tool call by `memory.ToolCallTaints` (the single source of truth, shared with skills): + +- **Always untrusted:** `browser`, `http_batch`, `transcribe` (network / opaque-audio content), `session_search` (recall of prior-session transcripts, which may carry earlier-injected text), and any MCP tool (`server__tool`). +- **Path-reading tools** (`read_file`, `search_files`, `multi_grep`, `batch_read`, `json_query`, `head_tail`, `count_lines`, `checksum`, `word_count`, `sort`, `tr`, `diff`, `file_info`, `glob`, `tree`, `base64`) taint when **any** of their path arguments resolves **outside the workspace trust zone** — the workspace dir, the sandbox `/workspace` mount, or `~/.odek`. Reads confined to the workspace stay trusted, so ordinary coding sessions remain recallable; reads of anything else (system/credential paths, home files, sibling repos) taint. The check is a workspace-containment allowlist rather than a sensitive-path denylist, and it resolves symlinks (so e.g. `/etc` → `/private/etc` on macOS cannot disguise an escape). A malformed argument string is treated conservatively as untrusted. When adding a new file-reading tool, add it to `PathReadingTools`. + +To use a tainted episode anyway, the user explicitly promotes it (sets `UserApproved=true`) from the CLI: + +``` +odek memory list # episodes excluded from recall, with their sources +odek memory promote # approve one after reviewing its summary +``` + +Promotion is **CLI-only and human-gated** — it is deliberately *not* exposed as an agent tool, so a prompt-injected agent cannot self-approve its own poisoned memory. + +**Opt-out of the gate (`memory.auto_approve_episodes`, default `false`).** Operators who accept the risk (e.g. a fully sandboxed, single-tenant deployment) can set `auto_approve_episodes: true` to have untrusted episodes stamped `AutoApproved` at session end so they are recalled without a manual promote. This **disables the persistence-injection protection** for episodes — a single successful injection can then influence future sessions automatically — so it is off by default and should stay off in any environment exposed to untrusted input. The on-disk record still keeps `Untrusted=true` and `Sources`, and uses a distinct `AutoApproved` flag (never `UserApproved`) so the audit trail shows the approval was automatic. ### 6. Skill provenance gate -`internal/skills` carries the same provenance model. Skills auto-saved from sessions that touched `browser` / `http_batch` / `read_file` / `search_files` / `multi_grep` / `transcribe` / any MCP tool are tagged with `Provenance.Untrusted=true` and `NeedsReview=true`. The skill loader pins those skills to the Lazy set regardless of their `auto_load` flag. +`internal/skills` carries the same provenance model and shares the exact taint decision (`memory.ToolCallTaints`). Skills auto-saved from sessions that crossed the trust boundary — `browser` / `http_batch` / `transcribe` / any MCP tool, or a `read_file` / `search_files` / `multi_grep` of a **sensitive** path — are tagged with `Provenance.Untrusted=true` and `NeedsReview=true`. The skill loader pins those skills to the Lazy set regardless of their `auto_load` flag. After reviewing the skill body, promote it: diff --git a/internal/config/loader.go b/internal/config/loader.go index 85b8068..e63742b 100644 --- a/internal/config/loader.go +++ b/internal/config/loader.go @@ -649,22 +649,22 @@ func LoadConfig(cli CLIFlags) ResolvedConfig { MaxIter: cfg.MaxIter, System: cfg.System, - SandboxImage: cfg.SandboxImage, // empty = resolve at call site (Dockerfile.odek or alpine:latest) - SandboxNetwork: ifZero(cfg.SandboxNetwork, DefaultSandboxNetwork), - SandboxMemory: cfg.SandboxMemory, - SandboxCPUs: cfg.SandboxCPUs, - SandboxUser: cfg.SandboxUser, - SandboxEnv: cfg.SandboxEnv, - SandboxVolumes: cfg.SandboxVolumes, - Skills: resolveSkills(cfg.Skills), - Dangerous: resolveDangerous(cfg.Dangerous), - Memory: resolveMemory(cfg.Memory), - MCPServers: cfg.MCPServers, - Telegram: resolveTelegram(cfg.Telegram), - Transcription: resolveTranscription(cfg.Transcription), - Schedules: resolveSchedules(cfg.Schedules), - InteractionMode: ifZero(cfg.InteractionMode, "engaging"), - ToolProgress: ifZero(cfg.ToolProgress, "all"), + SandboxImage: cfg.SandboxImage, // empty = resolve at call site (Dockerfile.odek or alpine:latest) + SandboxNetwork: ifZero(cfg.SandboxNetwork, DefaultSandboxNetwork), + SandboxMemory: cfg.SandboxMemory, + SandboxCPUs: cfg.SandboxCPUs, + SandboxUser: cfg.SandboxUser, + SandboxEnv: cfg.SandboxEnv, + SandboxVolumes: cfg.SandboxVolumes, + Skills: resolveSkills(cfg.Skills), + Dangerous: resolveDangerous(cfg.Dangerous), + Memory: resolveMemory(cfg.Memory), + MCPServers: cfg.MCPServers, + Telegram: resolveTelegram(cfg.Telegram), + Transcription: resolveTranscription(cfg.Transcription), + Schedules: resolveSchedules(cfg.Schedules), + InteractionMode: ifZero(cfg.InteractionMode, "engaging"), + ToolProgress: ifZero(cfg.ToolProgress, "all"), } // MaxConcurrency: default to 3 if not set @@ -850,6 +850,9 @@ func resolveMemory(cfg *memory.MemoryConfig) memory.MemoryConfig { if cfg.MinTurnsForExtraction > 0 { def.MinTurnsForExtraction = cfg.MinTurnsForExtraction } + if cfg.AutoApproveEpisodes != nil { + def.AutoApproveEpisodes = cfg.AutoApproveEpisodes + } return def } diff --git a/internal/danger/classifier.go b/internal/danger/classifier.go index bb32ebc..5b73e62 100644 --- a/internal/danger/classifier.go +++ b/internal/danger/classifier.go @@ -145,6 +145,15 @@ func ClassifyPath(path string) RiskClass { } abs = filepath.Clean(abs) + // macOS canonicalizes /etc, /var, and /tmp as symlinks under /private. + // Strip the /private prefix so the sensitivity checks below match + // consistently — e.g. /private/etc/master.passwd must classify the same + // as /etc/master.passwd (system_write), and /private/var/folders/... must + // still resolve to the temp dir (local_write). + if strings.HasPrefix(abs, "/private/") { + abs = strings.TrimPrefix(abs, "/private") + } + for _, prefix := range []string{"/boot", "/dev", "/proc", "/sys", "/mnt", "/media"} { if strings.HasPrefix(abs, prefix) { return Destructive diff --git a/internal/memory/episodes.go b/internal/memory/episodes.go index 7834c3f..5fe0837 100644 --- a/internal/memory/episodes.go +++ b/internal/memory/episodes.go @@ -217,7 +217,7 @@ func (e *EpisodeStore) Search(query string, limit int) ([]EpisodeMeta, error) { // EpisodeProvenance exists to close. filtered := idx[:0:len(idx)] for _, ep := range idx { - if ep.Provenance.Untrusted && !ep.Provenance.UserApproved { + if ep.Provenance.Untrusted && !ep.Provenance.UserApproved && !ep.Provenance.AutoApproved { continue } filtered = append(filtered, ep) @@ -244,6 +244,59 @@ func (e *EpisodeStore) Search(query string, limit int) ([]EpisodeMeta, error) { return ranked, nil } +// ── Promotion (human-gated escape hatch) ────────────────────────────── + +// Promote marks a tainted episode as user-approved so it can be replayed +// into future sessions. This is the human-gated escape hatch for episodes +// whose originating session legitimately touched external content. It is +// intentionally NOT exposed to the agent (only via `odek memory promote`) so +// that a prompt-injected agent cannot self-approve poisoned memory. +// +// Returns an error if the session is unknown or already approved. +func (e *EpisodeStore) Promote(sessionID string) error { + if err := session.ValidateSessionID(sessionID); err != nil { + return fmt.Errorf("memory: episodes promote: %w", err) + } + e.mu.Lock() + defer e.mu.Unlock() + + idx, err := e.ReadIndex() + if err != nil { + return err + } + found := false + for i := range idx { + if idx[i].SessionID == sessionID { + found = true + if idx[i].Provenance.UserApproved { + return fmt.Errorf("memory: episode %q is already approved", sessionID) + } + idx[i].Provenance.UserApproved = true + } + } + if !found { + return fmt.Errorf("memory: episode %q not found", sessionID) + } + return e.writeIndex(idx) +} + +// PendingReview returns the episodes that are untrusted and not yet +// user-approved — the ones currently excluded from recall that a user may +// want to promote. Ordered newest-first (as ReadIndex returns them). +func (e *EpisodeStore) PendingReview() ([]EpisodeMeta, error) { + idx, err := e.ReadIndex() + if err != nil { + return nil, err + } + var pending []EpisodeMeta + for _, ep := range idx { + if ep.Provenance.Untrusted && !ep.Provenance.UserApproved && !ep.Provenance.AutoApproved { + pending = append(pending, ep) + } + } + return pending, nil +} + // ── Index helpers ───────────────────────────────────────────────────── // addToIndex appends an entry to the index and writes it. diff --git a/internal/memory/episodes_promote_test.go b/internal/memory/episodes_promote_test.go new file mode 100644 index 0000000..406ba9b --- /dev/null +++ b/internal/memory/episodes_promote_test.go @@ -0,0 +1,195 @@ +package memory + +import "testing" + +// ── Auto-approve episode learnings (opt-in, off by default) ─────────── + +// TestEpisode_AutoApprovedIsRecallable: an episode stamped AutoApproved is +// recalled by Search and is not listed as pending — same effect as a human +// promote, but recorded as automatic. +func TestEpisode_AutoApprovedIsRecallable(t *testing.T) { + es := NewEpisodeStore(t.TempDir(), nil) + if err := es.WriteWithProvenance("20260301-auto", "auto-approved external research", 5, + EpisodeProvenance{Untrusted: true, AutoApproved: true, Sources: []string{"browser"}}); err != nil { + t.Fatalf("write: %v", err) + } + res, err := es.Search("any", 10) + if err != nil { + t.Fatalf("search: %v", err) + } + if len(res) != 1 || res[0].SessionID != "20260301-auto" { + t.Errorf("auto-approved episode should be recalled, got %v", res) + } + pending, _ := es.PendingReview() + if len(pending) != 0 { + t.Errorf("auto-approved episode should not be pending, got %v", pending) + } +} + +// TestDefaultMemoryConfig_AutoApproveOff: the secure default is false. +func TestDefaultMemoryConfig_AutoApproveOff(t *testing.T) { + d := DefaultMemoryConfig() + if d.AutoApproveEpisodes == nil || *d.AutoApproveEpisodes { + t.Errorf("AutoApproveEpisodes default should be false, got %v", d.AutoApproveEpisodes) + } +} + +// TestOnSessionEnd_AutoApproveStamping: with the flag on, an untrusted episode +// extracted at session end is stamped AutoApproved (not UserApproved) and is +// recallable; with the flag off (default) it stays pending/excluded. +func TestOnSessionEnd_AutoApproveStamping(t *testing.T) { + llm := &mockLLM{responses: map[string]string{"Summarize": "researched a library online"}} + msgs := []string{"user: hi", "assistant: ok", "user: go", "assistant: done"} + prov := EpisodeProvenance{Untrusted: true, Sources: []string{"browser"}} + + // Flag ON → auto-approved + recallable. + on := DefaultMemoryConfig() + on.AutoApproveEpisodes = boolPtr(true) + mOn := NewMemoryManager(t.TempDir(), llm, on) + mOn.OnSessionEndWithProvenance("20260303-on", 5, msgs, prov) + + idx, err := mOn.episodes.ReadIndex() + if err != nil || len(idx) != 1 { + t.Fatalf("expected 1 episode, got %v err=%v", idx, err) + } + p := idx[0].Provenance + if !p.Untrusted || !p.AutoApproved || p.UserApproved { + t.Errorf("flag-on episode provenance = %+v; want Untrusted+AutoApproved, not UserApproved", p) + } + if res, _ := mOn.SearchEpisodes("any", 10); len(res) != 1 { + t.Errorf("flag-on episode should be recallable, got %v", res) + } + if pend, _ := mOn.PendingReviewEpisodes(); len(pend) != 0 { + t.Errorf("flag-on episode should not be pending, got %v", pend) + } + + // Flag OFF (default) → stays untrusted, excluded, pending. + mOff := NewMemoryManager(t.TempDir(), llm, DefaultMemoryConfig()) + mOff.OnSessionEndWithProvenance("20260304-off", 5, msgs, prov) + if res, _ := mOff.SearchEpisodes("any", 10); len(res) != 0 { + t.Errorf("flag-off untrusted episode must be excluded from recall, got %v", res) + } + if pend, _ := mOff.PendingReviewEpisodes(); len(pend) != 1 { + t.Errorf("flag-off untrusted episode should be pending, got %v", pend) + } +} + +// TestEpisode_PromoteMakesRecallable is the escape-hatch test: a tainted +// episode is excluded from recall and listed as pending; after Promote it is +// recallable and no longer pending. +func TestEpisode_PromoteMakesRecallable(t *testing.T) { + es := NewEpisodeStore(t.TempDir(), nil) + if err := es.WriteWithProvenance("20260105-web", "researched a library", 5, + EpisodeProvenance{Untrusted: true, Sources: []string{"browser"}}); err != nil { + t.Fatalf("write: %v", err) + } + + // Pending before promote; excluded from Search. + pending, err := es.PendingReview() + if err != nil { + t.Fatalf("pending: %v", err) + } + if len(pending) != 1 || pending[0].SessionID != "20260105-web" { + t.Fatalf("PendingReview = %v, want the tainted episode", pending) + } + res, _ := es.Search("any", 10) + for _, ep := range res { + if ep.SessionID == "20260105-web" { + t.Fatal("tainted episode should be excluded from Search before promote") + } + } + + // Promote it. + if err := es.Promote("20260105-web"); err != nil { + t.Fatalf("promote: %v", err) + } + + // Now recallable and no longer pending. + pending, _ = es.PendingReview() + if len(pending) != 0 { + t.Errorf("PendingReview after promote = %v, want empty", pending) + } + res, _ = es.Search("any", 10) + var saw bool + for _, ep := range res { + if ep.SessionID == "20260105-web" { + saw = true + } + } + if !saw { + t.Errorf("promoted episode should be returned by Search, got %v", res) + } +} + +// TestEpisode_PromotePersists verifies the approval survives a fresh store +// (i.e. it was written back to the on-disk index). +func TestEpisode_PromotePersists(t *testing.T) { + dir := t.TempDir() + es := NewEpisodeStore(dir, nil) + if err := es.WriteWithProvenance("20260106-x", "s", 5, + EpisodeProvenance{Untrusted: true, Sources: []string{"http_batch"}}); err != nil { + t.Fatalf("write: %v", err) + } + if err := es.Promote("20260106-x"); err != nil { + t.Fatalf("promote: %v", err) + } + + fresh := NewEpisodeStore(dir, nil) + idx, err := fresh.ReadIndex() + if err != nil { + t.Fatalf("read index: %v", err) + } + if len(idx) != 1 || !idx[0].Provenance.UserApproved { + t.Errorf("UserApproved did not persist: %+v", idx) + } +} + +// TestMemoryManager_PromoteAndPending covers the manager-level wrappers +// (PromoteEpisode / PendingReviewEpisodes), including the disabled-memory guard. +func TestMemoryManager_PromoteAndPending(t *testing.T) { + dir := t.TempDir() + m := NewMemoryManager(dir, nil, MemoryConfig{Enabled: boolPtr(true)}) + if err := m.episodes.WriteWithProvenance("20260201-web", "researched X", 5, + EpisodeProvenance{Untrusted: true, Sources: []string{"browser"}}); err != nil { + t.Fatalf("seed: %v", err) + } + + pending, err := m.PendingReviewEpisodes() + if err != nil || len(pending) != 1 { + t.Fatalf("PendingReviewEpisodes = %v, err=%v; want 1", pending, err) + } + if err := m.PromoteEpisode("20260201-web"); err != nil { + t.Fatalf("PromoteEpisode: %v", err) + } + pending, _ = m.PendingReviewEpisodes() + if len(pending) != 0 { + t.Errorf("after promote, pending = %v, want empty", pending) + } + + // Disabled memory: both wrappers must error rather than touch the store. + off := NewMemoryManager(dir, nil, MemoryConfig{Enabled: boolPtr(false)}) + if err := off.PromoteEpisode("20260201-web"); err == nil { + t.Error("PromoteEpisode on disabled memory should error") + } + if _, err := off.PendingReviewEpisodes(); err == nil { + t.Error("PendingReviewEpisodes on disabled memory should error") + } +} + +func TestEpisode_PromoteErrors(t *testing.T) { + es := NewEpisodeStore(t.TempDir(), nil) + if err := es.WriteWithProvenance("20260107-a", "s", 5, + EpisodeProvenance{Untrusted: true}); err != nil { + t.Fatalf("write: %v", err) + } + + if err := es.Promote("does-not-exist"); err == nil { + t.Error("promoting an unknown episode should error") + } + if err := es.Promote("20260107-a"); err != nil { + t.Fatalf("first promote: %v", err) + } + if err := es.Promote("20260107-a"); err == nil { + t.Error("promoting an already-approved episode should error") + } +} diff --git a/internal/memory/memory.go b/internal/memory/memory.go index 1f7ed9f..01cf326 100644 --- a/internal/memory/memory.go +++ b/internal/memory/memory.go @@ -37,6 +37,13 @@ type MemoryConfig struct { MergeThreshold float32 `json:"merge_threshold,omitempty"` AddThreshold float32 `json:"add_threshold,omitempty"` MinTurnsForExtraction int `json:"min_turns_for_extraction,omitempty"` + + // AutoApproveEpisodes, when true, stamps untrusted episodes as approved at + // session-end so they are recalled without a manual `odek memory promote`. + // SECURITY: this is the opt-in escape valve that trades the human review + // gate for convenience — a session that ingested external/untrusted content + // can then influence future sessions automatically. Off (false) by default. + AutoApproveEpisodes *bool `json:"auto_approve_episodes,omitempty"` } // BoolPtr returns a pointer to a bool value. @@ -60,6 +67,7 @@ func DefaultMemoryConfig() MemoryConfig { MergeThreshold: MergeThreshold, AddThreshold: AddThreshold, MinTurnsForExtraction: defaultMinTurnsForExtraction, + AutoApproveEpisodes: boolPtr(false), // secure default — human gate stays on } } @@ -134,6 +142,9 @@ func NewMemoryManager(memoryDir string, llc LLMClient, cfg MemoryConfig) *Memory if cfg.MinTurnsForExtraction > 0 { def.MinTurnsForExtraction = cfg.MinTurnsForExtraction } + if cfg.AutoApproveEpisodes != nil { + def.AutoApproveEpisodes = cfg.AutoApproveEpisodes + } cfg = def factsDir := memoryDir @@ -490,6 +501,14 @@ func (m *MemoryManager) OnSessionEndWithProvenance(sessionID string, turns int, return } + // Opt-in auto-approval: stamp untrusted episodes as approved so they are + // recalled without a manual `odek memory promote`. Off by default; the + // audit record keeps Untrusted + Sources so it stays clear the content was + // external and the approval was automatic (AutoApproved, not UserApproved). + if prov.Untrusted && m.cfg.AutoApproveEpisodes != nil && *m.cfg.AutoApproveEpisodes { + prov.AutoApproved = true + } + // Write as episode, preserving the provenance signal. m.episodes.WriteIfEnoughWithProvenance(sessionID, extraction, turns, prov) } @@ -499,10 +518,31 @@ func (m *MemoryManager) SearchEpisodes(query string, limit int) ([]EpisodeMeta, return m.episodes.Search(query, limit) } -// FormatEpisodeContext searches episodes with a recency-based ranker -// (no LLM — safe for per-turn use without recursion risk) and returns -// formatted context to inject as a system message. Returns empty string -// if no episodes found or memory is disabled. +// PromoteEpisode marks a tainted episode as user-approved so it can be +// recalled into future sessions. Human-gated escape hatch — see +// EpisodeStore.Promote. +func (m *MemoryManager) PromoteEpisode(sessionID string) error { + if m.cfg.Enabled == nil || !*m.cfg.Enabled { + return fmt.Errorf("memory: disabled") + } + return m.episodes.Promote(sessionID) +} + +// PendingReviewEpisodes lists episodes that are untrusted and not yet +// user-approved (currently excluded from recall). +func (m *MemoryManager) PendingReviewEpisodes() ([]EpisodeMeta, error) { + if m.cfg.Enabled == nil || !*m.cfg.Enabled { + return nil, fmt.Errorf("memory: disabled") + } + return m.episodes.PendingReview() +} + +// FormatEpisodeContext searches past episodes for ones relevant to the +// current task and returns formatted context to inject as a system message. +// Ranking uses the episode store's configured RankStrategy (the LLM ranker by +// default, RP similarity otherwise — see NewMemoryManager). Untrusted, +// unpromoted episodes are excluded by Search. Returns empty string if no +// episodes are found or memory is disabled. func (m *MemoryManager) FormatEpisodeContext(query string) string { if m.cfg.Enabled == nil || !*m.cfg.Enabled { return "" diff --git a/internal/memory/provenance.go b/internal/memory/provenance.go index 82caa02..de0df1a 100644 --- a/internal/memory/provenance.go +++ b/internal/memory/provenance.go @@ -1,6 +1,9 @@ package memory import ( + "encoding/json" + "os" + "path/filepath" "strings" "github.com/BackendStack21/odek/internal/llm" @@ -10,49 +13,210 @@ import ( // produced an episode. The default zero value means trusted. // // An untrusted episode is one whose originating session ingested -// content from outside the agent's trust boundary (fetched pages, files -// outside the working directory, MCP tool output, audio transcription). -// Such episodes are stored on disk for audit but are NEVER auto- -// replayed into future sessions — they must be explicitly promoted -// (UserApproved=true) by the user first. This stops a one-shot prompt -// injection from becoming a persistent backdoor. +// content from outside the agent's trust boundary (fetched pages, MCP +// tool output, audio transcription, prior-session recall, or reads of +// files outside the workspace). Such episodes are stored on disk for +// audit but are NEVER auto-replayed into future sessions — they must be +// explicitly promoted (UserApproved=true) by the user first (see +// `odek memory promote`). This stops a one-shot prompt injection from +// becoming a persistent backdoor. +// +// AutoApproved is the opt-in escape valve: when the operator sets +// memory.auto_approve_episodes=true, untrusted episodes are stamped +// AutoApproved at creation so they are recalled without a manual promote. +// It is kept distinct from UserApproved so the audit trail still shows the +// approval was automatic (policy) rather than a human decision; Untrusted and +// Sources remain recorded either way. type EpisodeProvenance struct { Untrusted bool `json:"untrusted,omitempty"` Sources []string `json:"sources,omitempty"` UserApproved bool `json:"user_approved,omitempty"` + AutoApproved bool `json:"auto_approved,omitempty"` +} + +// AlwaysExternalTools are tools whose RESULT content originates outside the +// agent's trust boundary regardless of their arguments: network fetches, +// opaque transcribed audio, and recall of prior-session transcripts (which may +// themselves carry previously-injected content). +var AlwaysExternalTools = map[string]bool{ + "browser": true, + "http_batch": true, + "transcribe": true, + "session_search": true, } -// UntrustedToolNames is the canonical set of tools whose results come from -// outside the agent's trust boundary. Any tool whose output reaches the -// model wrapped in belongs here. +// PathReadingTools are tools that read filesystem content (or structure) into +// the transcript. They taint the episode only when one of their path arguments +// resolves OUTSIDE the workspace (see pathReadEscapes) — reads confined to the +// workspace, or to odek's own ~/.odek state, stay trusted so ordinary coding +// sessions remain recallable. // -// This is the single source of truth — skills/selfimprove.go imports it -// to derive skill provenance from the same definition. -var UntrustedToolNames = map[string]bool{ - "browser": true, - "http_batch": true, - "transcribe": true, - "read_file": true, // conservative: covers /etc/, $HOME/.ssh, etc. +// This must list every tool that surfaces file contents/structure to the +// model. A tool missing here would let an injected agent read a secret into a +// TRUSTED, recallable episode; when adding a new file-reading tool, add it +// here too. +var PathReadingTools = map[string]bool{ + "read_file": true, "search_files": true, "multi_grep": true, + "batch_read": true, + "json_query": true, + "head_tail": true, + "count_lines": true, + "checksum": true, + "word_count": true, + "sort": true, + "tr": true, + "diff": true, + "file_info": true, + "glob": true, + "tree": true, + "base64": true, +} + +// UntrustedToolNames is the union of the two categories above. It is retained +// as the canonical "these tools can produce untrusted content" set for +// external references and documentation. The actual per-call decision is made +// by ToolCallTaints, which is argument-aware for the path-reading tools. +var UntrustedToolNames = func() map[string]bool { + m := make(map[string]bool, len(AlwaysExternalTools)+len(PathReadingTools)) + for k := range AlwaysExternalTools { + m[k] = true + } + for k := range PathReadingTools { + m[k] = true + } + return m +}() + +// ToolCallTaints reports whether a single recorded tool call crossed the +// agent's trust boundary. It is the single source of truth shared by episode +// (memory) and skill (skills) provenance so the two stay in lockstep. +// +// - MCP adapter calls (name contains "__") always taint — third-party servers +// return arbitrary text. +// - AlwaysExternalTools always taint, regardless of arguments. +// - PathReadingTools taint only when one of their path arguments resolves +// OUTSIDE the workspace trust zone (workspace dir, the sandbox /workspace +// mount, or ~/.odek). Symlinks are resolved so e.g. /etc → /private/etc on +// macOS cannot disguise an escape. A malformed argument string taints +// conservatively; absent/empty paths default to the workspace (trusted). +// - Everything else (shell, patch, write_file, …) is trusted. +func ToolCallTaints(name, argsJSON string) bool { + if strings.Contains(name, "__") { + return true + } + if AlwaysExternalTools[name] { + return true + } + if PathReadingTools[name] { + return pathReadEscapes(argsJSON) + } + return false +} + +// pathReadEscapes extracts every filesystem path argument from a path-reading +// tool call and reports whether any of them resolves outside the workspace +// trust zone. The known path-bearing argument shapes across odek's file tools +// are: "path", "path_a"/"path_b" (diff), and a "files":[{"path":…}] array +// (batch_read, head_tail, count_lines, checksum, word_count, sort). +func pathReadEscapes(argsJSON string) bool { + var a struct { + Path string `json:"path"` + PathA string `json:"path_a"` + PathB string `json:"path_b"` + Files []struct { + Path string `json:"path"` + } `json:"files"` + } + if err := json.Unmarshal([]byte(argsJSON), &a); err != nil { + return true // can't determine the paths → assume the worst + } + + roots := trustedRoots() + candidates := []string{a.Path, a.PathA, a.PathB} + for _, f := range a.Files { + candidates = append(candidates, f.Path) + } + + sawPath := false + for _, p := range candidates { + if strings.TrimSpace(p) == "" { + continue + } + sawPath = true + if pathOutsideRoots(p, roots) { + return true + } + } + // No path argument at all → the tool defaulted to the workspace (trusted). + _ = sawPath + return false +} + +// trustedRoots returns the set of directory prefixes within which a file read +// is considered inside the trust boundary: the current workspace (process cwd +// at session-end), the conventional sandbox mount "/workspace", and odek's own +// ~/.odek state directory. Each is included both as-is and symlink-resolved. +func trustedRoots() []string { + var roots []string + add := func(p string) { + if p == "" { + return + } + c := filepath.Clean(p) + roots = append(roots, c) + if r, err := filepath.EvalSymlinks(c); err == nil && r != c { + roots = append(roots, r) + } + } + if cwd, err := os.Getwd(); err == nil { + add(cwd) + } + add("/workspace") // sandbox mount point (see internal/sandbox) + if home, err := os.UserHomeDir(); err == nil { + add(filepath.Join(home, ".odek")) + } + return roots +} + +// pathOutsideRoots reports whether p resolves outside every trusted root. +// The path is checked both as filepath.Abs(p) and symlink-resolved, so a +// symlinked sensitive path (e.g. /etc → /private/etc) cannot evade detection. +func pathOutsideRoots(p string, roots []string) bool { + abs, err := filepath.Abs(p) + if err != nil { + return true // unresolvable → conservative + } + abs = filepath.Clean(abs) + cands := []string{abs} + if r, err := filepath.EvalSymlinks(abs); err == nil && r != abs { + cands = append(cands, r) + } + for _, c := range cands { + for _, root := range roots { + if c == root || strings.HasPrefix(c, root+string(filepath.Separator)) { + return false // inside a trusted root + } + } + } + return true } // DeriveProvenance walks a session's structured messages and returns // the provenance an episode derived from those messages should carry. -// A message qualifies as untrusted if it contains a tool call whose -// name is in untrustedToolNames OR follows the MCP adapter naming -// convention (server__tool). +// A message taints the episode if it contains a tool call that crossed +// the trust boundary per ToolCallTaints. func DeriveProvenance(messages []llm.Message) EpisodeProvenance { prov := EpisodeProvenance{} seen := make(map[string]bool) for _, m := range messages { for _, tc := range m.ToolCalls { - name := tc.Function.Name - tainted := UntrustedToolNames[name] || strings.Contains(name, "__") - if !tainted { + if !ToolCallTaints(tc.Function.Name, tc.Function.Arguments) { continue } prov.Untrusted = true + name := tc.Function.Name if !seen[name] { seen[name] = true prov.Sources = append(prov.Sources, name) diff --git a/internal/memory/provenance_test.go b/internal/memory/provenance_test.go index b3ec997..92bf881 100644 --- a/internal/memory/provenance_test.go +++ b/internal/memory/provenance_test.go @@ -1,6 +1,8 @@ package memory import ( + "os" + "path/filepath" "testing" "github.com/BackendStack21/odek/internal/llm" @@ -15,6 +17,18 @@ func toolMsg(name string) llm.Message { } } +// toolMsgArgs builds an assistant message with one tool call carrying the +// given raw JSON arguments string (as recorded on real sessions). +func toolMsgArgs(name, argsJSON string) llm.Message { + tc := llm.ToolCall{} + tc.Function.Name = name + tc.Function.Arguments = argsJSON + return llm.Message{ + Role: "assistant", + ToolCalls: []llm.ToolCall{tc}, + } +} + func TestDeriveProvenance_Empty(t *testing.T) { prov := DeriveProvenance(nil) if prov.Untrusted { @@ -46,10 +60,113 @@ func TestDeriveProvenance_MCPAdapterTaints(t *testing.T) { } } -func TestDeriveProvenance_ReadFileTaints(t *testing.T) { - prov := DeriveProvenance([]llm.Message{toolMsg("read_file")}) +// ── Path-aware taint (the fix) ──────────────────────────────────────── + +// A read confined to the workspace (relative path) must NOT taint — this is +// the headline behavior change that makes episodes from normal coding +// sessions recallable again. +func TestDeriveProvenance_ReadFileWorkspaceTrusted(t *testing.T) { + for _, p := range []string{"internal/x.go", "./README.md", "cmd/odek/main.go"} { + prov := DeriveProvenance([]llm.Message{ + toolMsg("shell"), + toolMsgArgs("read_file", `{"path":"`+p+`"}`), + }) + if prov.Untrusted { + t.Errorf("read_file %q is in-workspace and must be trusted, got %+v", p, prov) + } + } +} + +// search_files / multi_grep with no path default to the workspace → trusted. +func TestDeriveProvenance_SearchDefaultPathTrusted(t *testing.T) { + msgs := []llm.Message{ + toolMsgArgs("search_files", `{"pattern":"TODO","file_glob":"*.go"}`), + toolMsgArgs("multi_grep", `{"patterns":["a","b"]}`), + } + prov := DeriveProvenance(msgs) + if prov.Untrusted { + t.Errorf("workspace-default search should be trusted, got %+v", prov) + } +} + +// A read of a sensitive system path still taints — the original concern the +// provenance control exists for. +func TestDeriveProvenance_ReadFileSensitivePathTaints(t *testing.T) { + prov := DeriveProvenance([]llm.Message{ + toolMsgArgs("read_file", `{"path":"/etc/passwd"}`), + }) if !prov.Untrusted { - t.Errorf("read_file should taint (conservative — file may be outside CWD), got %+v", prov) + t.Fatalf("/etc/passwd read must taint, got %+v", prov) + } + if len(prov.Sources) != 1 || prov.Sources[0] != "read_file" { + t.Errorf("Sources = %v, want [read_file]", prov.Sources) + } +} + +// Home credential dirs (resolved absolutely) still taint. +func TestDeriveProvenance_ReadFileHomeSecretTaints(t *testing.T) { + home, err := os.UserHomeDir() + if err != nil || home == "" { + t.Skip("no home dir") + } + secret := filepath.Join(home, ".ssh", "id_rsa") + prov := DeriveProvenance([]llm.Message{ + toolMsgArgs("read_file", `{"path":"`+secret+`"}`), + }) + if !prov.Untrusted { + t.Errorf("%s read must taint, got %+v", secret, prov) + } +} + +// Malformed / empty argument strings are treated conservatively (taint), +// since we cannot tell what path was touched. +func TestDeriveProvenance_ReadFileMalformedArgsTaints(t *testing.T) { + for _, args := range []string{"", "not json", "{"} { + prov := DeriveProvenance([]llm.Message{toolMsgArgs("read_file", args)}) + if !prov.Untrusted { + t.Errorf("malformed read_file args %q should conservatively taint, got %+v", args, prov) + } + } +} + +// Network / audio tools always taint regardless of arguments. +func TestDeriveProvenance_AlwaysExternalToolsTaint(t *testing.T) { + for _, name := range []string{"http_batch", "transcribe"} { + prov := DeriveProvenance([]llm.Message{toolMsgArgs(name, `{"path":"internal/x.go"}`)}) + if !prov.Untrusted { + t.Errorf("%s must always taint, got %+v", name, prov) + } + } +} + +// ToolCallTaints is the shared predicate used by both memory and skills. +func TestToolCallTaints(t *testing.T) { + cases := []struct { + name, args string + want bool + }{ + {"shell", `{"command":"ls"}`, false}, + {"write_file", `{"path":"/etc/x"}`, false}, // not a read tool + {"read_file", `{"path":"internal/x.go"}`, false}, + {"read_file", `{"path":"/etc/shadow"}`, true}, + {"read_file", `{"path":"/private/etc/master.passwd"}`, true}, // macOS /private symlink + {"read_file", ``, true}, + {"search_files", `{"pattern":"x"}`, false}, + {"read_file", `{"path":"/workspace/foo.go"}`, false}, // sandbox mount is trusted + // Broadened file-reading tool coverage (was the D-01 bypass): + {"batch_read", `{"files":[{"path":"/etc/shadow"}]}`, true}, + {"batch_read", `{"files":[{"path":"internal/x.go"}]}`, false}, + {"json_query", `{"path":"/etc/passwd"}`, true}, + {"diff", `{"path_a":"/etc/hosts","path_b":"a.txt"}`, true}, // any path escaping taints + {"count_lines", `{"files":[{"path":"go.mod"}]}`, false}, + {"session_search", `{"query":"password"}`, true}, // recall of prior transcripts + {"browser", `{"url":"https://x"}`, true}, + {"github__list_issues", `{}`, true}, + } + for _, c := range cases { + if got := ToolCallTaints(c.name, c.args); got != c.want { + t.Errorf("ToolCallTaints(%q,%q) = %v, want %v", c.name, c.args, got, c.want) + } } } diff --git a/internal/skills/provenance_test.go b/internal/skills/provenance_test.go index 2fe222d..8076eba 100644 --- a/internal/skills/provenance_test.go +++ b/internal/skills/provenance_test.go @@ -13,6 +13,44 @@ func msgWithTool(name string) LlmMessage { } } +func msgWithToolArgs(name, argsJSON string) LlmMessage { + tc := LlmToolCall{} + tc.Function.Name = name + tc.Function.Arguments = argsJSON + return LlmMessage{ + Role: "assistant", + ToolCalls: []LlmToolCall{tc}, + } +} + +// Mirrors the memory fix: a skill learned from a session that only read +// workspace files must NOT be flagged for review. This also guards the +// llm.Message → LlmMessage conversion: if Arguments were dropped upstream, +// the path tool would read as empty-args and conservatively taint, failing +// this test. +func TestDeriveProvenance_WorkspaceReadTrusted(t *testing.T) { + msgs := []LlmMessage{ + msgWithTool("shell"), + msgWithToolArgs("read_file", `{"path":"internal/x.go"}`), + msgWithToolArgs("search_files", `{"pattern":"TODO"}`), + } + prov := DeriveProvenance(msgs) + if prov.Untrusted || prov.NeedsReview { + t.Errorf("workspace-only session should yield a trusted skill, got %+v", prov) + } +} + +func TestDeriveProvenance_SensitiveReadTaints(t *testing.T) { + msgs := []LlmMessage{msgWithToolArgs("read_file", `{"path":"/etc/passwd"}`)} + prov := DeriveProvenance(msgs) + if !prov.Untrusted || !prov.NeedsReview { + t.Fatalf("reading /etc/passwd should taint the skill, got %+v", prov) + } + if len(prov.Sources) != 1 || prov.Sources[0] != "read_file" { + t.Errorf("Sources = %v, want [read_file]", prov.Sources) + } +} + func TestDeriveProvenance_EmptyIsTrusted(t *testing.T) { prov := DeriveProvenance(nil) if prov.Untrusted { diff --git a/internal/skills/selfimprove.go b/internal/skills/selfimprove.go index 64c0557..f686652 100644 --- a/internal/skills/selfimprove.go +++ b/internal/skills/selfimprove.go @@ -600,32 +600,29 @@ func PassesQualityGate(s SkillSuggestion) bool { // ── Provenance derivation ───────────────────────────────────────────── -// untrustedToolNames is the canonical set defined in internal/memory. -// Referencing the same map ensures both packages stay in sync. -var untrustedToolNames = memory.UntrustedToolNames - // DeriveProvenance scans the session's tool calls and returns the trust // signals appropriate for any skill derived from it. A skill is marked // Untrusted (with NeedsReview = true) if any of the messages involved -// a tool whose output is sourced from external content the agent -// ingested. The sources list records which tools triggered the flag so -// the user can review what to inspect. +// a tool call that crossed the agent's trust boundary. The sources list +// records which tools triggered the flag so the user can review what to +// inspect. // -// We additionally treat any MCP tool call (name contains "__" — the -// adapter naming convention) as untrusted, since MCP servers return -// arbitrary text from third-party processes. +// The per-call decision is delegated to memory.ToolCallTaints — the single +// source of truth shared with episode provenance. That keeps the two systems +// in lockstep and makes both argument-aware: path-scoped reads (read_file, +// search_files, multi_grep) only taint when they touch a sensitive path, while +// network/MCP/audio tools always taint. func DeriveProvenance(messages []LlmMessage) SkillProvenance { prov := SkillProvenance{} seen := make(map[string]bool) for _, m := range messages { for _, tc := range m.ToolCalls { - name := tc.Function.Name - tainted := untrustedToolNames[name] || strings.Contains(name, "__") - if !tainted { + if !memory.ToolCallTaints(tc.Function.Name, tc.Function.Arguments) { continue } prov.Untrusted = true prov.NeedsReview = true + name := tc.Function.Name if !seen[name] { seen[name] = true prov.Sources = append(prov.Sources, name)