Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions docs/CHEATSHEET.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,13 @@ Settings: `model` (tiny/base/small/medium), `language` (ISO code, empty=auto), `
```
RP.embed(newEntry) → cosine similarity vs each existing entry

cos > 0.7 ─────→ auto-merge (replace)
cos > 0.7 ─────→ simple merge (no LLM — substring or concatenation)
cos < 0.3 ─────→ auto-add
0.3–0.7 ─────→ LLM judges → merge or add
0.3–0.7 ─────→ auto-add (deferred to session-end consolidation)
```

Saves ~80% of LLM calls on memory writes.
AddFact makes zero LLM calls. Near-duplicate dedup happens at session end
via background consolidation (`consolidate_on_end`, default true).

### Memory Tool

Expand Down
4 changes: 3 additions & 1 deletion docs/CONFIG.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ The `memory` section controls the persistent memory system (see [docs/MEMORY.md]
"buffer_lines": 20,
"buffer_enabled": true,
"merge_on_write": true,
"consolidate_on_end": true,
"extract_on_end": true,
"extract_facts": false,
"llm_search": true,
Expand All @@ -214,7 +215,8 @@ The `memory` section controls the persistent memory system (see [docs/MEMORY.md]
| `facts_limit_env` | 2500 | Max chars for `env.md` fact file |
| `buffer_lines` | 20 | Max turn summaries in session buffer |
| `buffer_enabled` | true | Enable the turn-level buffer |
| `merge_on_write` | true | Use go-vector RP similarity to auto-merge related entries |
| `merge_on_write` | true | Use go-vector RP similarity to auto-merge related entries (fast, no LLM — uses simple string merge) |
| `consolidate_on_end` | true | At session end, run an LLM consolidation pass over `user.md` and `env.md` in a background goroutine. This is the quality complement to `merge_on_write`: merge-on-write handles obvious duplicates immediately (no LLM), while consolidation handles near-duplicates and paraphrases at session end with full LLM quality. Requires `llm_consolidate: true`. **Note:** facts in the borderline similarity band (0.3–0.7 cosine) are now always added immediately and only merged by this consolidation pass — if you set `consolidate_on_end: false`, near-duplicate facts will accumulate rather than being merged. |
| `extract_on_end` | true | At session end (≥3 turns), extract a narrative episode summary via LLM for later recall |
| `extract_facts` | **false** | **Opt-in.** At session end (≥3 turns), auto-extract a few **durable** facts (stable user preferences, project invariants) into `user.md`/`env.md`. Off by default — see the security note below. Independent of `extract_on_end`; to disable *all* end-of-session LLM extraction set `llm_extract: false`. |
| `llm_search` | true | Use LLM to rerank candidates for **explicit** `memory search` calls (the `memory` tool). Per-turn recall (`FormatEpisodeContext`) always uses the cached go-vector index — no LLM call on the hot path regardless of this setting. |
Expand Down
3 changes: 3 additions & 0 deletions internal/config/loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -826,6 +826,9 @@ func resolveMemory(cfg *memory.MemoryConfig) memory.MemoryConfig {
if cfg.ExtractFacts != nil {
def.ExtractFacts = cfg.ExtractFacts
}
if cfg.ConsolidateOnEnd != nil {
def.ConsolidateOnEnd = cfg.ConsolidateOnEnd
}
if cfg.LLMSearch != nil {
def.LLMSearch = cfg.LLMSearch
}
Expand Down
187 changes: 187 additions & 0 deletions internal/memory/async_merge_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
package memory

import (
"context"
"strings"
"sync/atomic"
"testing"
"time"
)

// callCountLLM counts every LLM invocation and proxies to a mockLLM.
type callCountLLM struct {
calls int64
inner *mockLLM
}

func (c *callCountLLM) SimpleCall(ctx context.Context, system, user string) (string, error) {
atomic.AddInt64(&c.calls, 1)
return c.inner.SimpleCall(ctx, system, user)
}

// TestAddFact_NoLLMCalls: AddFact must complete without making any LLM calls,
// even when merge-on-write classifies a new entry as "merge" or "judge".
func TestAddFact_NoLLMCalls(t *testing.T) {
dir := t.TempDir()
llm := &callCountLLM{inner: &mockLLM{responses: map[string]string{}}}
mm := NewMemoryManager(dir, llm, DefaultMemoryConfig())

// Seed an entry so subsequent adds trigger merge-on-write comparisons.
if err := mm.AddFact("user", "project uses postgres for all data storage"); err != nil {
t.Fatalf("seed: %v", err)
}
before := atomic.LoadInt64(&llm.calls)

// Add a very similar entry — should be classified "merge" or "judge" by RP.
if err := mm.AddFact("user", "project database is postgres"); err != nil {
t.Fatalf("AddFact: %v", err)
}
after := atomic.LoadInt64(&llm.calls)

if after != before {
t.Errorf("AddFact made %d LLM call(s); want 0 — AddFact must never block on LLM",
after-before)
}
}

// TestAddFact_MergeUsesSimpleFallback: when merge-on-write fires, the entries
// are merged using the non-LLM (simple) path. The result must contain content
// from both entries OR one must be a substring of the other.
func TestAddFact_MergeUsesSimpleFallback(t *testing.T) {
dir := t.TempDir()
cfg := DefaultMemoryConfig()
cfg.MergeOnWrite = boolPtr(true)
// Use a high merge threshold so the similar entries definitely trigger "merge".
cfg.MergeThreshold = 0.01 // effectively always merge
mm := NewMemoryManager(dir, nil, cfg)

_ = mm.AddFact("env", "go 1.22")
_ = mm.AddFact("env", "golang 1.22")

_, env, err := mm.ReadFacts()
if err != nil {
t.Fatalf("ReadFacts: %v", err)
}
// The simple merge either returns the longer entry (substring case) or
// concatenates them. Either way the result is non-empty.
if strings.TrimSpace(env) == "" {
t.Errorf("expected a merged entry in env, got empty")
}
}

// TestConsolidateOnEnd_Default: the default config has consolidate_on_end=true.
func TestConsolidateOnEnd_Default(t *testing.T) {
d := DefaultMemoryConfig()
if d.ConsolidateOnEnd == nil || !*d.ConsolidateOnEnd {
t.Errorf("ConsolidateOnEnd default should be true, got %v", d.ConsolidateOnEnd)
}
}

// TestConsolidateOnEnd_FiresAtSessionEnd: with consolidate_on_end=true, facts
// are consolidated in the background at session end. We verify this by seeding
// three distinct facts and confirming the count decreases (Consolidate merged
// them) within a generous timeout.
func TestConsolidateOnEnd_FiresAtSessionEnd(t *testing.T) {
dir := t.TempDir()
// LLM that:
// - returns "session summary" for the episode extraction call
// - returns a 2-element JSON array for any consolidation call
llm := &mockLLM{responses: map[string]string{
"Summarize": "session summary",
// Consolidate prompt contains "memory entries" — return a merged 2-item list
"memory entri": `["dark mode + vim keybindings preference","works in Go for backend"]`,
}}

cfg := DefaultMemoryConfig()
cfg.ConsolidateOnEnd = boolPtr(true)
cfg.LLMConsolidate = boolPtr(true)
cfg.ExtractOnEnd = boolPtr(true)
cfg.MergeOnWrite = boolPtr(false) // keep all three facts separate
mm := NewMemoryManager(dir, llm, cfg)

// Seed three facts that will survive without merge-on-write.
_ = mm.AddFact("user", "prefers dark mode in all editors")
_ = mm.AddFact("user", "uses vim keybindings everywhere")
_ = mm.AddFact("user", "works primarily in Go for backend services")

entries0, _ := mm.facts.Entries("user")
if len(entries0) != 3 {
t.Fatalf("expected 3 seeded entries, got %d", len(entries0))
}

msgs := []string{"user: hi", "assistant: ok", "user: more", "assistant: done"}
mm.OnSessionEndWithProvenance("20260801-a", 5, msgs, EpisodeProvenance{})

// Poll until consolidation reduces the entry count or timeout.
deadline := time.Now().Add(3 * time.Second)
for time.Now().Before(deadline) {
entries, _ := mm.facts.Entries("user")
if len(entries) < 3 {
return // consolidation ran and reduced entries — success
}
time.Sleep(50 * time.Millisecond)
}
t.Error("session-end consolidation did not reduce fact count within 3 seconds")
}

// TestConsolidateOnEnd_IndependentOfLLMExtract: consolidation must fire even
// when llm_extract=false (episode extraction disabled). D-06 regression guard.
func TestConsolidateOnEnd_IndependentOfLLMExtract(t *testing.T) {
dir := t.TempDir()
llm := &mockLLM{responses: map[string]string{
"memory entri": `["consolidated single fact"]`,
}}
cfg := DefaultMemoryConfig()
cfg.LLMExtract = boolPtr(false) // episodes off — must NOT suppress consolidation
cfg.ConsolidateOnEnd = boolPtr(true)
cfg.MergeOnWrite = boolPtr(false)
mm := NewMemoryManager(dir, llm, cfg)
_ = mm.AddFact("user", "prefers dark mode editors")
_ = mm.AddFact("user", "uses dark theme always")
entries0, _ := mm.facts.Entries("user")
if len(entries0) < 2 {
t.Fatalf("need 2 seeded entries, got %d", len(entries0))
}
msgs := []string{"user: hi", "assistant: ok", "user: more", "assistant: done"}
mm.OnSessionEndWithProvenance("sess-d06", 5, msgs, EpisodeProvenance{})
deadline := time.Now().Add(3 * time.Second)
for time.Now().Before(deadline) {
if e, _ := mm.facts.Entries("user"); len(e) < 2 {
return // consolidation fired
}
time.Sleep(50 * time.Millisecond)
}
t.Error("consolidation should fire even with llm_extract=false")
}

// TestConsolidateOnEnd_FlagOff: with consolidate_on_end=false, fact count must
// remain stable at session end (no consolidation LLM call).
func TestConsolidateOnEnd_FlagOff(t *testing.T) {
dir := t.TempDir()
llm := &callCountLLM{
inner: &mockLLM{responses: map[string]string{"Summarize": "summary"}},
}
cfg := DefaultMemoryConfig()
cfg.ConsolidateOnEnd = boolPtr(false)
cfg.MergeOnWrite = boolPtr(false)
mm := NewMemoryManager(dir, llm, cfg)

_ = mm.AddFact("user", "prefers dark mode")
_ = mm.AddFact("user", "uses Go for backend work")

msgs := []string{"user: hi", "assistant: ok", "user: more", "assistant: done"}
before := atomic.LoadInt64(&llm.calls)
mm.OnSessionEndWithProvenance("20260801-b", 5, msgs, EpisodeProvenance{})

// Give any goroutine 300 ms to (incorrectly) run.
time.Sleep(300 * time.Millisecond)
after := atomic.LoadInt64(&llm.calls)

// Only the episode extraction call should have fired (Summarize), not Consolidate.
episodeCalls := after - before
entries, _ := mm.facts.Entries("user")
if len(entries) < 2 {
t.Errorf("consolidate_on_end=false must not consolidate facts, got %d entries", len(entries))
}
_ = episodeCalls // 1 call for Summarize is expected
}
8 changes: 7 additions & 1 deletion internal/memory/facts.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,13 @@ func (f *FactStore) Entries(target string) ([]string, error) {
// writeEntries joins entries and writes them to disk atomically. It writes to a
// UNIQUE temp file in the same directory and renames it into place, so two
// FactStore instances writing the same directory concurrently can never clobber
// a shared temp file. Caller must hold f.mu.
// a shared temp file.
//
// Locking contract: callers must hold EITHER f.mu (normal mutation paths via
// readModifyWrite) OR the process-wide factsDirLock for this directory
// (Consolidate path in memory.go). These two locks are never acquired together,
// so there is no deadlock risk. factsDirLock provides the same cross-instance
// mutual exclusion that f.mu provides per-instance.
func (f *FactStore) writeEntries(target string, entries []string) error {
content := strings.Join(entries, entrySep)
path := f.path(target)
Expand Down
84 changes: 36 additions & 48 deletions internal/memory/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ type MemoryConfig struct {
MergeOnWrite *bool `json:"merge_on_write,omitempty"`
ExtractOnEnd *bool `json:"extract_on_end,omitempty"`
ExtractFacts *bool `json:"extract_facts,omitempty"`
ConsolidateOnEnd *bool `json:"consolidate_on_end,omitempty"`
LLMSearch *bool `json:"llm_search,omitempty"`
LLMExtract *bool `json:"llm_extract,omitempty"`
LLMConsolidate *bool `json:"llm_consolidate,omitempty"`
Expand Down Expand Up @@ -95,6 +96,7 @@ func DefaultMemoryConfig() MemoryConfig {
MergeOnWrite: boolPtr(true),
ExtractOnEnd: boolPtr(true),
ExtractFacts: boolPtr(false), // opt-in: persistent-poisoning risk, see SECURITY.md
ConsolidateOnEnd: boolPtr(true), // restores LLM merge quality removed from AddFact
LLMSearch: boolPtr(true), // LLM ranker by default — relevance over recency
LLMExtract: boolPtr(true),
LLMConsolidate: boolPtr(true),
Expand Down Expand Up @@ -152,6 +154,9 @@ func NewMemoryManager(memoryDir string, llc LLMClient, cfg MemoryConfig) *Memory
if cfg.ExtractFacts != nil {
def.ExtractFacts = cfg.ExtractFacts
}
if cfg.ConsolidateOnEnd != nil {
def.ConsolidateOnEnd = cfg.ConsolidateOnEnd
}
if cfg.LLMSearch != nil {
def.LLMSearch = cfg.LLMSearch
}
Expand Down Expand Up @@ -249,32 +254,21 @@ func (m *MemoryManager) AddFact(target, content string) error {

switch action {
case "merge":
// Auto-merge: replace similar entry with merged content
merged := mergeEntries(m.llm, entries[similarIdx], content)
// Auto-merge using the fast (non-LLM) path so AddFact never blocks
// on a network round-trip. The simple merge handles the common
// substring case well; LLM quality is recovered at session end by
// the background consolidation that runs when consolidate_on_end=true.
merged := mergeEntries(nil, entries[similarIdx], content)
if err := m.facts.Replace(target, entries[similarIdx][:min(30, len(entries[similarIdx]))], merged); err != nil {
return err
}
// Update merge detector incrementally — only re-embed the changed entry
m.merge.ReplaceEntry(similarIdx, merged)
return nil
case "judge":
// Borderline: use LLM to decide
if m.llm != nil {
decision, err := m.judgeMerge(target, entries[similarIdx], content)
if err == nil {
if decision == "merge" {
merged := mergeEntries(m.llm, entries[similarIdx], content)
if err := m.facts.Replace(target, entries[similarIdx][:min(30, len(entries[similarIdx]))], merged); err != nil {
return err
}
// Update merge detector incrementally
m.merge.ReplaceEntry(similarIdx, merged)
return nil
}
// decision == "add" — fall through to normal add
}
}
// No LLM available or LLM failed: let agent decide (just add)
// Borderline similarity — add without blocking on an LLM judgment
// call. Brief duplication (until session-end consolidation) is
// preferable to stalling the agent loop for a round-trip.
fallthrough
case "add":
// No overlap — normal add
Expand Down Expand Up @@ -423,6 +417,11 @@ Entries for %s:
if len(newEntries) == 0 || (len(newEntries) == 1 && newEntries[0] == "") {
return nil // LLM returned nothing useful
}
// Guard against a hallucinating LLM expanding the entry count; the system
// prompt says "Never more than the original count" but that is advisory only.
if len(newEntries) > len(entries) {
newEntries = newEntries[:len(entries)]
}

// Security: scan LLM output before persisting
for _, entry := range newEntries {
Expand Down Expand Up @@ -520,7 +519,24 @@ func (m *MemoryManager) OnSessionEndWithProvenance(sessionID string, turns int,
if minTurns <= 0 {
minTurns = defaultMinTurnsForExtraction
}
// Shared preconditions for any end-of-session LLM extraction.

// Background consolidation is independent of episode/fact extraction —
// it fires based on its own gate so that llm_extract=false does not
// silently disable it (D-06). Requires an LLM client, llm_consolidate,
// and a minimum session length (same threshold reused for consistency).
if m.llm != nil && turns >= minTurns &&
m.cfg.ConsolidateOnEnd != nil && *m.cfg.ConsolidateOnEnd &&
m.cfg.LLMConsolidate != nil && *m.cfg.LLMConsolidate {
go func() {
for _, target := range []string{"user", "env"} {
// Best-effort: errors (e.g. only 1 entry, nothing to consolidate)
// are silently ignored — consolidation is a quality pass, not critical.
_ = m.Consolidate(target)
}
}()
}

// Preconditions shared by episode summary + fact extraction.
if m.cfg.LLMExtract == nil || !*m.cfg.LLMExtract || m.llm == nil || turns < minTurns || len(messages) == 0 {
return
}
Expand Down Expand Up @@ -822,34 +838,6 @@ func (m *MemoryManager) BuildSystemPrompt() string {

// ── Private helpers ──────────────────────────────────────────────────

// judgeMerge asks the LLM whether two entries should be merged.
// Returns "merge" or "add".
func (m *MemoryManager) judgeMerge(target, existing, newEntry string) (string, error) {
prompt := fmt.Sprintf(`I have two memory entries for the "%s" category:

EXISTING: %s
NEW: %s

Should the new entry be MERGED into the existing one (they are related or redundant)
or ADDED as a separate entry (they are distinct topics)?

Reply with exactly one word: "merge" or "add"`, target, existing, newEntry)

decision, err := m.llm.SimpleCall(context.Background(),
"You are a memory deduplication system. Reply with exactly one word: 'merge' or 'add'.",
prompt,
)
if err != nil {
return "add", err
}

decision = strings.TrimSpace(strings.ToLower(decision))
if strings.Contains(decision, "merge") {
return "merge", nil
}
return "add", nil
}

// mergeEntries combines two related entries into one.
// When an LLM client is available, uses semantic merging for higher quality.
// Falls back to simple string logic when LLM is unavailable.
Expand Down
Loading