Skip to content

Commit 6674fed

Browse files
fix: preserve newlines and show metadata in auto-recall (#602)
* fix: preserve newlines and show metadata in auto-recall injection Two changes to improve auto-recall context quality: 1. sanitizeForContext: replace newlines with literal \n instead of collapsing to spaces. Preserves paragraph structure and meaning, especially important for non-Latin scripts (Hebrew, CJK) where line breaks carry semantic weight. 2. Auto-recall line format: show folder, date, and source from entry metadata instead of category:scope. Users store rich metadata via memory-pro import — the recall display should surface it. Before: - [other:global] all text on one line no structure After: - [Goals] 2024-05-30 (apple_notes) text with\npreserved structure Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix: use folder name as display category for 'other' entries in auto-recall prefix - Check r.entry.category === "other" (raw stored value) instead of displayCategory, since parseSmartMetadata always enriches "other" to a semantic category via reverseMapLegacyCategory — making displayCategory === "other" unreachable. - Retain tierPrefix and scope in prefix (restore what PR originally removed). - Append date and source suffix only when available. - Apple Notes import with folder "Goals" now renders as [Goals:global] instead of [other:global] or [patterns:global]. - Entries without folder metadata are unaffected — canonical prefix preserved. - Add 3 tests covering: folder override for "other" entries, no override for non-"other" entries, and tier prefix presence for entries with tier metadata. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * feat: add recallPrefix.categoryField config for custom category display Adds an optional recallPrefix.categoryField plugin config that lets users specify which raw metadata field to use as the category label in auto-recall prefix lines, instead of the built-in category. When set, the value of metadata[categoryField] replaces the built-in category in the [category:scope] prefix — falling back to displayCategory when the field is absent on an entry. This makes it easy to surface meaningful grouping labels from import-based workflows (e.g. Apple Notes folder names, Notion notebooks, Obsidian collections) without hardcoding any field names in core logic. Default behavior (categoryField unset) is unchanged — built-in category is used for all entries, so existing users see no difference. Example config: recallPrefix: { categoryField: "folder" } // entry with metadata.folder = "Goals" → prefix: [W][Goals:global] // entry without metadata.folder → prefix: [W][preferences:global] Adds 3 tests covering: field present, field absent (fallback), and no config (default behavior unchanged). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 66d64dd commit 6674fed

File tree

2 files changed

+229
-2
lines changed

2 files changed

+229
-2
lines changed

index.ts

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,26 @@ interface PluginConfig {
225225
skipLowValue?: boolean;
226226
maxExtractionsPerHour?: number;
227227
};
228+
recallPrefix?: {
229+
/**
230+
* Metadata field to use as the category label in auto-recall prefix lines.
231+
* When set, the value of `metadata[categoryField]` replaces the built-in
232+
* category in the `[category:scope]` prefix — if the field is present on
233+
* the entry. Falls back to the built-in category when the field is absent.
234+
*
235+
* Useful for import-based workflows where entries carry a meaningful
236+
* grouping label in a custom metadata field (e.g. "folder" for Apple Notes
237+
* imports, "notebook" for Notion, "collection" for Obsidian).
238+
*
239+
* Default: unset — built-in category is used for all entries.
240+
*
241+
* @example
242+
* recallPrefix: { categoryField: "folder" }
243+
* // Entry with metadata.folder = "Goals" → prefix: [W][Goals:global]
244+
* // Entry without metadata.folder → prefix: [W][preference:global]
245+
*/
246+
categoryField?: string;
247+
};
228248
}
229249

230250
type ReflectionThinkLevel = "off" | "minimal" | "low" | "medium" | "high";
@@ -1357,7 +1377,7 @@ export function detectCategory(
13571377

13581378
function sanitizeForContext(text: string): string {
13591379
return text
1360-
.replace(/[\r\n]+/g, " ")
1380+
.replace(/[\r\n]+/g, "\\n")
13611381
.replace(/<\/?[a-zA-Z][^>]*>/g, "")
13621382
.replace(/</g, "\uFF1C")
13631383
.replace(/>/g, "\uFF1E")
@@ -2399,7 +2419,34 @@ const memoryLanceDBProPlugin = {
23992419
const summary = sanitizeForContext(contentText).slice(0, effectivePerItemMaxChars);
24002420
return {
24012421
id: r.entry.id,
2402-
prefix: `${tierPrefix}[${displayCategory}:${r.entry.scope}]`,
2422+
prefix: (() => {
2423+
// If recallPrefix.categoryField is configured, read that field directly
2424+
// from the raw metadata JSON and use it as the category label when present.
2425+
// Falls back to displayCategory when the field is absent or unset.
2426+
// Reading from raw JSON (not metaObj) avoids relying on parseSmartMetadata
2427+
// passing through unknown fields.
2428+
const categoryFieldName = config.recallPrefix?.categoryField;
2429+
let effectiveCategory = displayCategory;
2430+
if (categoryFieldName) {
2431+
try {
2432+
const rawMeta: Record<string, unknown> = r.entry.metadata
2433+
? (JSON.parse(r.entry.metadata) as Record<string, unknown>)
2434+
: {};
2435+
const fieldValue = rawMeta[categoryFieldName];
2436+
if (typeof fieldValue === "string" && fieldValue) {
2437+
effectiveCategory = fieldValue;
2438+
}
2439+
} catch {
2440+
// malformed metadata — keep displayCategory
2441+
}
2442+
}
2443+
const base = `${tierPrefix}[${effectiveCategory}:${r.entry.scope}]`;
2444+
const parts: string[] = [base];
2445+
if (r.entry.timestamp)
2446+
parts.push(new Date(r.entry.timestamp).toISOString().slice(0, 10));
2447+
if (metaObj.source) parts.push(`(${metaObj.source})`);
2448+
return parts.join(" ");
2449+
})(),
24032450
summary,
24042451
chars: summary.length,
24052452
meta: metaObj,
@@ -4045,6 +4092,15 @@ export function parsePluginConfig(value: unknown): PluginConfig {
40454092
: 30,
40464093
}
40474094
: { skipLowValue: false, maxExtractionsPerHour: 30 },
4095+
recallPrefix:
4096+
typeof cfg.recallPrefix === "object" && cfg.recallPrefix !== null
4097+
? {
4098+
categoryField:
4099+
typeof (cfg.recallPrefix as Record<string, unknown>).categoryField === "string"
4100+
? ((cfg.recallPrefix as Record<string, unknown>).categoryField as string)
4101+
: undefined,
4102+
}
4103+
: undefined,
40484104
};
40494105
}
40504106

test/recall-text-cleanup.test.mjs

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -924,5 +924,176 @@ describe("recall text cleanup", () => {
924924
assert.equal(res.details.memories.length, 3);
925925
assert.match(res.content[0].text, //);
926926
});
927+
928+
// --- PR #602: recall prefix format tests ---
929+
930+
function makeAutoRecallHarness(workspaceDir, mockResults, extraConfig = {}) {
931+
const retrieverMod = jiti("../src/retriever.js");
932+
retrieverMod.createRetriever = function mockCreateRetriever() {
933+
return {
934+
async retrieve() { return mockResults; },
935+
getConfig() { return { mode: "hybrid" }; },
936+
setAccessTracker() {},
937+
setStatsCollector() {},
938+
};
939+
};
940+
const embedderMod = jiti("../src/embedder.js");
941+
embedderMod.createEmbedder = function mockCreateEmbedder() {
942+
return {
943+
async embedQuery() { return new Float32Array(384).fill(0); },
944+
async embedPassage() { return new Float32Array(384).fill(0); },
945+
};
946+
};
947+
const harness = createPluginApiHarness({
948+
resolveRoot: workspaceDir,
949+
pluginConfig: {
950+
dbPath: path.join(workspaceDir, "db"),
951+
embedding: { apiKey: "test-api-key" },
952+
smartExtraction: false,
953+
autoCapture: false,
954+
autoRecall: true,
955+
autoRecallMinLength: 1,
956+
selfImprovement: { enabled: false, beforeResetNote: false, ensureLearningFiles: false },
957+
...extraConfig,
958+
},
959+
});
960+
memoryLanceDBProPlugin.register(harness.api);
961+
const [{ handler: autoRecallHook }] = harness.eventHandlers.get("before_prompt_build") || [];
962+
return autoRecallHook;
963+
}
964+
965+
it("uses configured categoryField as display category when field is present in metadata", async () => {
966+
const ts = new Date("2024-05-30T00:00:00.000Z").getTime();
967+
const hook = makeAutoRecallHarness(workspaceDir, [
968+
{
969+
entry: {
970+
id: "apple-1",
971+
text: "reach revenue goal of $1M ARR by end of 2025",
972+
category: "other",
973+
scope: "global",
974+
importance: 0.8,
975+
timestamp: ts,
976+
metadata: JSON.stringify({ folder: "Goals", source: "manual" }),
977+
},
978+
score: 0.9,
979+
sources: { vector: { score: 0.9, rank: 1 } },
980+
},
981+
], { recallPrefix: { categoryField: "folder" } });
982+
983+
const output = await hook(
984+
{ prompt: "What are my goals?" },
985+
{ sessionId: "apple-prefix-test", sessionKey: "agent:main:session:apple-prefix-test", agentId: "main" },
986+
);
987+
988+
assert.ok(output, "expected recall output");
989+
// metadata.folder replaces the built-in category in the prefix
990+
assert.match(output.prependContext, /\[Goals:/);
991+
assert.doesNotMatch(output.prependContext, /\[other:/);
992+
// Date is appended from timestamp
993+
assert.match(output.prependContext, /2024-05-30/);
994+
// Source suffix is present
995+
assert.match(output.prependContext, /\(manual\)/);
996+
});
997+
998+
it("falls back to built-in category when categoryField is configured but absent from metadata", async () => {
999+
const hook = makeAutoRecallHarness(workspaceDir, [
1000+
{
1001+
entry: {
1002+
id: "plain-1",
1003+
text: "prefer short commit messages",
1004+
category: "preference",
1005+
scope: "global",
1006+
importance: 0.7,
1007+
timestamp: Date.now(),
1008+
},
1009+
score: 0.85,
1010+
sources: { vector: { score: 0.85, rank: 1 } },
1011+
},
1012+
], { recallPrefix: { categoryField: "folder" } });
1013+
1014+
const output = await hook(
1015+
{ prompt: "What are my preferences?" },
1016+
{ sessionId: "no-folder-test", sessionKey: "agent:main:session:no-folder-test", agentId: "main" },
1017+
);
1018+
1019+
assert.ok(output, "expected recall output");
1020+
assert.match(output.prependContext, /prefer short commit messages/);
1021+
// Falls back to built-in category (parseSmartMetadata maps "preference" → "preferences")
1022+
assert.match(output.prependContext, /\[preferences:global\]/);
1023+
assert.doesNotMatch(output.prependContext, /\[Goals:/);
1024+
});
1025+
1026+
it("uses built-in category unchanged when recallPrefix.categoryField is not configured", async () => {
1027+
const hook = makeAutoRecallHarness(workspaceDir, [
1028+
{
1029+
entry: {
1030+
id: "default-1",
1031+
text: "prefer short commit messages",
1032+
category: "preference",
1033+
scope: "global",
1034+
importance: 0.7,
1035+
timestamp: Date.now(),
1036+
metadata: JSON.stringify({ folder: "Preferences", source: "manual" }),
1037+
},
1038+
score: 0.85,
1039+
sources: { vector: { score: 0.85, rank: 1 } },
1040+
},
1041+
]); // no recallPrefix config
1042+
1043+
const output = await hook(
1044+
{ prompt: "What are my preferences?" },
1045+
{ sessionId: "default-prefix-test", sessionKey: "agent:main:session:default-prefix-test", agentId: "main" },
1046+
);
1047+
1048+
assert.ok(output, "expected recall output");
1049+
assert.match(output.prependContext, /prefer short commit messages/);
1050+
// No categoryField configured — folder is ignored, built-in category used
1051+
assert.match(output.prependContext, /\[preferences:global\]/);
1052+
assert.doesNotMatch(output.prependContext, /\[Preferences:/);
1053+
});
1054+
1055+
it("includes tier prefix in recall line when tier metadata is present", async () => {
1056+
const hook = makeAutoRecallHarness(workspaceDir, [
1057+
{
1058+
entry: {
1059+
id: "tiered-1",
1060+
text: "always use absolute imports",
1061+
category: "fact",
1062+
scope: "global",
1063+
importance: 0.9,
1064+
timestamp: Date.now(),
1065+
metadata: JSON.stringify({ tier: "l1" }),
1066+
},
1067+
score: 0.88,
1068+
sources: { vector: { score: 0.88, rank: 1 } },
1069+
},
1070+
{
1071+
entry: {
1072+
id: "tiered-2",
1073+
text: "prefer TypeScript strict mode",
1074+
category: "preference",
1075+
scope: "global",
1076+
importance: 0.85,
1077+
timestamp: Date.now(),
1078+
metadata: JSON.stringify({ tier: "l2" }),
1079+
},
1080+
score: 0.82,
1081+
sources: { vector: { score: 0.82, rank: 2 } },
1082+
},
1083+
]);
1084+
1085+
const output = await hook(
1086+
{ prompt: "What are my coding preferences?" },
1087+
{ sessionId: "tier-prefix-test", sessionKey: "agent:main:session:tier-prefix-test", agentId: "main" },
1088+
);
1089+
1090+
assert.ok(output, "expected recall output");
1091+
// Both entries should have a tier prefix (first char of tier, uppercased, in brackets)
1092+
const lines = output.prependContext.split("\n").filter((l) => l.startsWith("- ["));
1093+
assert.ok(lines.length >= 2, "expected at least 2 recall lines");
1094+
for (const line of lines) {
1095+
assert.match(line, /^- \[[A-Z]\]\[/, "recall line should start with tier prefix [X][");
1096+
}
1097+
});
9271098
});
9281099

0 commit comments

Comments
 (0)