BackendStack21 · jkyberneees · Jun 6, 2026 · Jun 6, 2026 · Jun 6, 2026 · Jun 6, 2026
diff --git a/cmd/odek/dispatch.go b/cmd/odek/dispatch.go
@@ -53,6 +53,8 @@ func dispatch(args []string) int {
 		return cliExit(telegramCmd(rest))
 	case "schedule":
 		return cliExit(scheduleCmd(rest))
+	case "memory":
+		return cliExit(memoryCmd(rest))
 	default:
 		fmt.Fprintf(os.Stderr, "odek: unknown command %q\n", cmd)
 		printUsage()

diff --git a/cmd/odek/main.go b/cmd/odek/main.go
@@ -452,6 +452,7 @@ func printUsage() {
   odek mcp [--sandbox]
   odek telegram
   odek schedule <list|add|rm|enable|disable|run|next|daemon>
+  odek memory <list|promote <session_id>>
   odek version
 
 Commands:
@@ -477,6 +478,10 @@ Commands:
                        Subcommands: list, add, rm, enable, disable, run, next, daemon
                        The daemon (or the Telegram bot) fires jobs and delivers
                        results to stdout, a log, or a Telegram chat.
+  memory              Review and promote past-session memory episodes
+                       list: show episodes excluded from recall (untrusted)
+                       promote <session_id>: approve one so it can be recalled.
+                       Human-gated on purpose — not available to the agent.
   init                Create a config file (default: ./odek.json)
   version             Print version and exit
 

diff --git a/cmd/odek/memory_cmd.go b/cmd/odek/memory_cmd.go
@@ -0,0 +1,66 @@
+package main
+
+import (
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/BackendStack21/odek/internal/memory"
+)
+
+// memoryCmd handles `odek memory <list|promote> [args]`.
+//
+// This is the human-gated surface for the episode-memory trust control.
+// Episodes whose originating session touched external content (web/http/MCP/
+// audio, or reads of sensitive paths) are stored but excluded from recall
+// until a human promotes them. Promotion lives HERE — on the CLI — and is
+// deliberately NOT exposed as an agent tool, so a prompt-injected agent cannot
+// approve its own poisoned memory.
+func memoryCmd(args []string) error {
+	if len(args) == 0 {
+		fmt.Fprintf(os.Stderr, "Usage: odek memory <list|promote> [args]\n")
+		return nil
+	}
+
+	dir := expandHome("~/.odek/memory")
+	store := memory.NewEpisodeStore(dir, nil)
+
+	sub := args[0]
+	subArgs := args[1:]
+
+	switch sub {
+	case "list", "ls", "pending":
+		pending, err := store.PendingReview()
+		if err != nil {
+			return err
+		}
+		if len(pending) == 0 {
+			fmt.Println("No episodes pending review — all stored episodes are recallable.")
+			return nil
+		}
+		fmt.Printf("%d episode(s) pending review (excluded from recall until promoted):\n\n", len(pending))
+		for _, ep := range pending {
+			fmt.Printf("• %s  (%d turns, %s)\n", ep.SessionID, ep.Turns, ep.CreatedAt.Format("2006-01-02 15:04"))
+			if len(ep.Provenance.Sources) > 0 {
+				fmt.Printf("    sources: %s\n", strings.Join(ep.Provenance.Sources, ", "))
+			}
+			fmt.Printf("    %s\n\n", ep.Summary)
+		}
+		fmt.Println("Review the summary above, then promote with:  odek memory promote <session_id>")
+		return nil
+
+	case "promote":
+		if len(subArgs) == 0 {
+			return fmt.Errorf("usage: odek memory promote <session_id>")
+		}
+		id := subArgs[0]
+		if err := store.Promote(id); err != nil {
+			return err
+		}
+		fmt.Printf("odek: promoted episode %q — it can now be recalled into future sessions\n", id)
+		return nil
+
+	default:
+		return fmt.Errorf("unknown memory subcommand %q (expected: list, promote)", sub)
+	}
+}
diff --git a/cmd/odek/memory_cmd_test.go b/cmd/odek/memory_cmd_test.go
@@ -0,0 +1,53 @@
+package main
+
+import (
+	"path/filepath"
+	"testing"
+
+	"github.com/BackendStack21/odek/internal/memory"
+)
+
+// TestMemoryCmd_ListAndPromote exercises the human-gated promote path end to
+// end through the CLI command: a seeded untrusted episode is pending, the
+// command promotes it, and the approval is persisted to the on-disk index.
+func TestMemoryCmd_ListAndPromote(t *testing.T) {
+	home := setupTestHome(t)
+	dir := filepath.Join(home, ".odek", "memory")
+
+	es := memory.NewEpisodeStore(dir, nil)
+	if err := es.WriteWithProvenance("20260108-web", "researched a library", 5,
+		memory.EpisodeProvenance{Untrusted: true, Sources: []string{"browser"}}); err != nil {
+		t.Fatalf("seed: %v", err)
+	}
+
+	if err := memoryCmd([]string{"list"}); err != nil {
+		t.Fatalf("memory list: %v", err)
+	}
+	if err := memoryCmd([]string{"promote", "20260108-web"}); err != nil {
+		t.Fatalf("memory promote: %v", err)
+	}
+
+	fresh := memory.NewEpisodeStore(dir, nil)
+	idx, err := fresh.ReadIndex()
+	if err != nil {
+		t.Fatalf("read index: %v", err)
+	}
+	if len(idx) != 1 || !idx[0].Provenance.UserApproved {
+		t.Errorf("episode not approved after promote: %+v", idx)
+	}
+
+	if err := memoryCmd([]string{"promote", "does-not-exist"}); err == nil {
+		t.Error("promoting an unknown id should error")
+	}
+	if err := memoryCmd([]string{"bogus"}); err == nil {
+		t.Error("unknown subcommand should error")
+	}
+}
+
+// TestMemoryCmd_ListEmpty: list on a clean home must not error.
+func TestMemoryCmd_ListEmpty(t *testing.T) {
+	setupTestHome(t)
+	if err := memoryCmd([]string{"list"}); err != nil {
+		t.Fatalf("memory list on empty home: %v", err)
+	}
+}
diff --git a/docs/CONFIG.md b/docs/CONFIG.md
@@ -200,7 +200,8 @@ The `memory` section controls the persistent memory system (see [docs/MEMORY.md]
     "llm_extract": true,
     "llm_consolidate": true,
     "merge_threshold": 0.7,
-    "add_threshold": 0.3
+    "add_threshold": 0.3,
+    "auto_approve_episodes": false
   }
 }
 ```
@@ -213,12 +214,13 @@ The `memory` section controls the persistent memory system (see [docs/MEMORY.md]
 | `buffer_lines` | 20 | Max turn summaries in session buffer |
 | `buffer_enabled` | true | Enable the turn-level buffer |
 | `merge_on_write` | true | Use go-vector RP similarity to auto-merge related entries |
-| `extract_on_end` | true | Extract durable facts via LLM at session end (≥3 turns) |
+| `extract_on_end` | true | At session end (≥3 turns), extract a narrative episode summary via LLM for later recall |
 | `llm_search` | true | Use LLM to rank episode search results by relevance |
 | `llm_extract` | true | Use LLM for end-of-session fact extraction |
 | `llm_consolidate` | true | Use LLM to merge related fact entries |
 | `merge_threshold` | 0.7 | go-vector cosine threshold for auto-merge (0.0–1.0) |
 | `add_threshold` | 0.3 | go-vector cosine threshold for auto-add (0.0–1.0) |
+| `auto_approve_episodes` | false | **Security trade-off.** When true, untrusted episodes (sessions that touched web/MCP/out-of-workspace content) are auto-approved at session end so they are recalled without a manual `odek memory promote`. Leaving it `false` keeps the human review gate (recommended). |
 
 ## Sub-agent configuration
 

diff --git a/docs/SECURITY.md b/docs/SECURITY.md
@@ -100,11 +100,25 @@ Both:
 
 `internal/memory` tracks `EpisodeProvenance{Untrusted, Sources, UserApproved}` for every episode. An episode derived from a session that ingested untrusted content is **stored on disk for audit but never auto-replayed** into future sessions. This stops a single successful injection from becoming a persistent backdoor through the episode pipeline.
 
-To use a tainted episode anyway, the user must explicitly promote it (set `UserApproved=true`).
+Taint is decided per tool call by `memory.ToolCallTaints` (the single source of truth, shared with skills):
+
+- **Always untrusted:** `browser`, `http_batch`, `transcribe` (network / opaque-audio content), `session_search` (recall of prior-session transcripts, which may carry earlier-injected text), and any MCP tool (`server__tool`).
+- **Path-reading tools** (`read_file`, `search_files`, `multi_grep`, `batch_read`, `json_query`, `head_tail`, `count_lines`, `checksum`, `word_count`, `sort`, `tr`, `diff`, `file_info`, `glob`, `tree`, `base64`) taint when **any** of their path arguments resolves **outside the workspace trust zone** — the workspace dir, the sandbox `/workspace` mount, or `~/.odek`. Reads confined to the workspace stay trusted, so ordinary coding sessions remain recallable; reads of anything else (system/credential paths, home files, sibling repos) taint. The check is a workspace-containment allowlist rather than a sensitive-path denylist, and it resolves symlinks (so e.g. `/etc` → `/private/etc` on macOS cannot disguise an escape). A malformed argument string is treated conservatively as untrusted. When adding a new file-reading tool, add it to `PathReadingTools`.
+
+To use a tainted episode anyway, the user explicitly promotes it (sets `UserApproved=true`) from the CLI:
+
+```
+odek memory list                    # episodes excluded from recall, with their sources
+odek memory promote <session_id>    # approve one after reviewing its summary
+```
+
+Promotion is **CLI-only and human-gated** — it is deliberately *not* exposed as an agent tool, so a prompt-injected agent cannot self-approve its own poisoned memory.
+
+**Opt-out of the gate (`memory.auto_approve_episodes`, default `false`).** Operators who accept the risk (e.g. a fully sandboxed, single-tenant deployment) can set `auto_approve_episodes: true` to have untrusted episodes stamped `AutoApproved` at session end so they are recalled without a manual promote. This **disables the persistence-injection protection** for episodes — a single successful injection can then influence future sessions automatically — so it is off by default and should stay off in any environment exposed to untrusted input. The on-disk record still keeps `Untrusted=true` and `Sources`, and uses a distinct `AutoApproved` flag (never `UserApproved`) so the audit trail shows the approval was automatic.
 
 ### 6. Skill provenance gate
 
-`internal/skills` carries the same provenance model. Skills auto-saved from sessions that touched `browser` / `http_batch` / `read_file` / `search_files` / `multi_grep` / `transcribe` / any MCP tool are tagged with `Provenance.Untrusted=true` and `NeedsReview=true`. The skill loader pins those skills to the Lazy set regardless of their `auto_load` flag.
+`internal/skills` carries the same provenance model and shares the exact taint decision (`memory.ToolCallTaints`). Skills auto-saved from sessions that crossed the trust boundary — `browser` / `http_batch` / `transcribe` / any MCP tool, or a `read_file` / `search_files` / `multi_grep` of a **sensitive** path — are tagged with `Provenance.Untrusted=true` and `NeedsReview=true`. The skill loader pins those skills to the Lazy set regardless of their `auto_load` flag.
 
 After reviewing the skill body, promote it:
 

diff --git a/internal/config/loader.go b/internal/config/loader.go
@@ -649,22 +649,22 @@ func LoadConfig(cli CLIFlags) ResolvedConfig {
 		MaxIter:  cfg.MaxIter,
 		System:   cfg.System,
 
-		SandboxImage:        cfg.SandboxImage, // empty = resolve at call site (Dockerfile.odek or alpine:latest)
-		SandboxNetwork:      ifZero(cfg.SandboxNetwork, DefaultSandboxNetwork),
-		SandboxMemory:       cfg.SandboxMemory,
-		SandboxCPUs:         cfg.SandboxCPUs,
-		SandboxUser:         cfg.SandboxUser,
-		SandboxEnv:          cfg.SandboxEnv,
-		SandboxVolumes:      cfg.SandboxVolumes,
-		Skills:              resolveSkills(cfg.Skills),
-		Dangerous:           resolveDangerous(cfg.Dangerous),
-		Memory:              resolveMemory(cfg.Memory),
-		MCPServers:          cfg.MCPServers,
-		Telegram:            resolveTelegram(cfg.Telegram),
-		Transcription:       resolveTranscription(cfg.Transcription),
-		Schedules:           resolveSchedules(cfg.Schedules),
-		InteractionMode:     ifZero(cfg.InteractionMode, "engaging"),
-		ToolProgress:        ifZero(cfg.ToolProgress, "all"),
+		SandboxImage:    cfg.SandboxImage, // empty = resolve at call site (Dockerfile.odek or alpine:latest)
+		SandboxNetwork:  ifZero(cfg.SandboxNetwork, DefaultSandboxNetwork),
+		SandboxMemory:   cfg.SandboxMemory,
+		SandboxCPUs:     cfg.SandboxCPUs,
+		SandboxUser:     cfg.SandboxUser,
+		SandboxEnv:      cfg.SandboxEnv,
+		SandboxVolumes:  cfg.SandboxVolumes,
+		Skills:          resolveSkills(cfg.Skills),
+		Dangerous:       resolveDangerous(cfg.Dangerous),
+		Memory:          resolveMemory(cfg.Memory),
+		MCPServers:      cfg.MCPServers,
+		Telegram:        resolveTelegram(cfg.Telegram),
+		Transcription:   resolveTranscription(cfg.Transcription),
+		Schedules:       resolveSchedules(cfg.Schedules),
+		InteractionMode: ifZero(cfg.InteractionMode, "engaging"),
+		ToolProgress:    ifZero(cfg.ToolProgress, "all"),
 	}
 
 	// MaxConcurrency: default to 3 if not set
@@ -850,6 +850,9 @@ func resolveMemory(cfg *memory.MemoryConfig) memory.MemoryConfig {
 	if cfg.MinTurnsForExtraction > 0 {
 		def.MinTurnsForExtraction = cfg.MinTurnsForExtraction
 	}
+	if cfg.AutoApproveEpisodes != nil {
+		def.AutoApproveEpisodes = cfg.AutoApproveEpisodes
+	}
 	return def
 }
 

diff --git a/internal/danger/classifier.go b/internal/danger/classifier.go
@@ -145,6 +145,15 @@ func ClassifyPath(path string) RiskClass {
 	}
 	abs = filepath.Clean(abs)
 
+	// macOS canonicalizes /etc, /var, and /tmp as symlinks under /private.
+	// Strip the /private prefix so the sensitivity checks below match
+	// consistently — e.g. /private/etc/master.passwd must classify the same
+	// as /etc/master.passwd (system_write), and /private/var/folders/... must
+	// still resolve to the temp dir (local_write).
+	if strings.HasPrefix(abs, "/private/") {
+		abs = strings.TrimPrefix(abs, "/private")
+	}
+
 	for _, prefix := range []string{"/boot", "/dev", "/proc", "/sys", "/mnt", "/media"} {
 		if strings.HasPrefix(abs, prefix) {
 			return Destructive

diff --git a/internal/memory/episodes.go b/internal/memory/episodes.go
@@ -217,7 +217,7 @@ func (e *EpisodeStore) Search(query string, limit int) ([]EpisodeMeta, error) {
 	// EpisodeProvenance exists to close.
 	filtered := idx[:0:len(idx)]
 	for _, ep := range idx {
-		if ep.Provenance.Untrusted && !ep.Provenance.UserApproved {
+		if ep.Provenance.Untrusted && !ep.Provenance.UserApproved && !ep.Provenance.AutoApproved {
 			continue
 		}
 		filtered = append(filtered, ep)
@@ -244,6 +244,59 @@ func (e *EpisodeStore) Search(query string, limit int) ([]EpisodeMeta, error) {
 	return ranked, nil
 }
 
+// ── Promotion (human-gated escape hatch) ──────────────────────────────
+
+// Promote marks a tainted episode as user-approved so it can be replayed
+// into future sessions. This is the human-gated escape hatch for episodes
+// whose originating session legitimately touched external content. It is
+// intentionally NOT exposed to the agent (only via `odek memory promote`) so
+// that a prompt-injected agent cannot self-approve poisoned memory.
+//
+// Returns an error if the session is unknown or already approved.
+func (e *EpisodeStore) Promote(sessionID string) error {
+	if err := session.ValidateSessionID(sessionID); err != nil {
+		return fmt.Errorf("memory: episodes promote: %w", err)
+	}
+	e.mu.Lock()
+	defer e.mu.Unlock()
+
+	idx, err := e.ReadIndex()
+	if err != nil {
+		return err
+	}
+	found := false
+	for i := range idx {
+		if idx[i].SessionID == sessionID {
+			found = true
+			if idx[i].Provenance.UserApproved {
+				return fmt.Errorf("memory: episode %q is already approved", sessionID)
+			}
+			idx[i].Provenance.UserApproved = true
+		}
+	}
+	if !found {
+		return fmt.Errorf("memory: episode %q not found", sessionID)
+	}
+	return e.writeIndex(idx)
+}
+
+// PendingReview returns the episodes that are untrusted and not yet
+// user-approved — the ones currently excluded from recall that a user may
+// want to promote. Ordered newest-first (as ReadIndex returns them).
+func (e *EpisodeStore) PendingReview() ([]EpisodeMeta, error) {
+	idx, err := e.ReadIndex()
+	if err != nil {
+		return nil, err
+	}
+	var pending []EpisodeMeta
+	for _, ep := range idx {
+		if ep.Provenance.Untrusted && !ep.Provenance.UserApproved && !ep.Provenance.AutoApproved {
+			pending = append(pending, ep)
+		}
+	}
+	return pending, nil
+}
+
 // ── Index helpers ─────────────────────────────────────────────────────
 
 // addToIndex appends an entry to the index and writes it.