Skip to content
Closed
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 198 additions & 1 deletion cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
*/

import type { Command } from "commander";
import { readFileSync } from "node:fs";
import { readFileSync, type Dirent } from "node:fs";
import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
import { homedir } from "node:os";
import path from "node:path";
Expand Down Expand Up @@ -1036,6 +1036,203 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {
}
});

/**
* import-markdown: Import memories from Markdown memory files into the plugin store.
* Targets MEMORY.md and memory/YYYY-MM-DD.md files found in OpenClaw workspaces.
*/
memory
.command("import-markdown [workspace-glob]")
.description("Import memories from Markdown files (MEMORY.md, memory/YYYY-MM-DD.md) into the plugin store")
.option("--dry-run", "Show what would be imported without importing")
.option("--scope <scope>", "Import into specific scope (default: global)")
.option(
"--openclaw-home <path>",
"OpenClaw home directory (default: ~/.openclaw)",
)
.option(
"--dedup",
"Skip entries already in store (scope-aware exact match, requires store.bm25Search)",
)
.option(
"--min-text-length <n>",
"Minimum text length to import (default: 5)",
"5",
)
.option(
"--importance <n>",
"Importance score for imported entries, 0.0-1.0 (default: 0.7)",
"0.7",
)
.action(async (workspaceGlob, options) => {
const openclawHome = options.openclawHome
? path.resolve(options.openclawHome)
: path.join(homedir(), ".openclaw");

const workspaceDir = path.join(openclawHome, "workspace");
let imported = 0;
let skipped = 0;
let foundFiles = 0;

if (!context.embedder) {
console.error(
"import-markdown requires an embedder. Use via plugin CLI or ensure embedder is configured.",
);
process.exit(1);
}

// Infer workspace scope from openclaw.json agents list
// (flat memory/ files have no per-file metadata, so we derive scope from config)
const fsPromises = await import("node:fs/promises");
let workspaceScope = ""; // empty = no scope override for nested workspaces
try {
const configPath = path.join(openclawHome, "openclaw.json");
const configContent = await fsPromises.readFile(configPath, "utf-8");
const config = JSON.parse(configContent);
const agentsList: Array<{ id?: string; workspace?: string }> = config?.agents?.list ?? [];
const matched = agentsList.find((a) => {
if (!a.workspace) return false;
return path.normalize(a.workspace) === workspaceDir;
});
if (matched?.id) {
workspaceScope = matched.id;
}
} catch { /* use default */ }

// Scan workspace directories
let workspaceEntries: Dirent[];
try {
workspaceEntries = await fsPromises.readdir(workspaceDir, { withFileTypes: true });
} catch {
console.error(`Failed to read workspace directory: ${workspaceDir}`);
process.exit(1);
}

// Collect all markdown files to scan
const mdFiles: Array<{ filePath: string; scope: string }> = [];

for (const entry of workspaceEntries) {
if (!entry.isDirectory()) continue;
if (workspaceGlob && !entry.name.includes(workspaceGlob)) continue;

const workspacePath = path.join(workspaceDir, entry.name);

// MEMORY.md
const memoryMd = path.join(workspacePath, "MEMORY.md");
try {
await fsPromises.stat(memoryMd);
mdFiles.push({ filePath: memoryMd, scope: entry.name });
} catch { /* not found */ }

// memory/ directory
const memoryDir = path.join(workspacePath, "memory");
try {
const stats = await fsPromises.stat(memoryDir);
if (stats.isDirectory()) {
const files = await fsPromises.readdir(memoryDir);
for (const f of files) {
if (f.endsWith(".md") && /^\d{4}-\d{2}-\d{2}/.test(f)) {
mdFiles.push({ filePath: path.join(memoryDir, f), scope: entry.name });
}
}
}
} catch { /* not found */ }
}

// Also scan the flat `workspace/memory/` directory directly under workspace root
// (not inside any workspace subdirectory — supports James's actual structure).
// This scan runs regardless of whether nested workspace mdFiles were found,
// so flat memory is always reachable even when all nested workspaces are empty.
// Skip if a specific workspace was requested (workspaceGlob), to avoid importing
// root flat memory when the user meant to import only one workspace.
if (!workspaceGlob) {
const flatMemoryDir = path.join(workspaceDir, "memory");
try {
const stats = await fsPromises.stat(flatMemoryDir);
if (stats.isDirectory()) {
const files = await fsPromises.readdir(flatMemoryDir);
for (const f of files) {
if (f.endsWith(".md") && /^\d{4}-\d{2}-\d{2}/.test(f)) {
mdFiles.push({ filePath: path.join(flatMemoryDir, f), scope: workspaceScope || "shared" });
}
}
}
} catch { /* not found */ }
}

if (mdFiles.length === 0) {
console.log("No Markdown memory files found.");
return;
}

const targetScope = options.scope || "global";
const minTextLength = parseInt(options.minTextLength ?? "5", 10);
const importanceDefault = parseFloat(options.importance ?? "0.7");
const dedupEnabled = !!options.dedup;

// Parse each file for memory entries (lines starting with "- ")
for (const { filePath, scope } of mdFiles) {
foundFiles++;
let content = await fsPromises.readFile(filePath, "utf-8");
// Strip UTF-8 BOM (e.g. from Windows Notepad-saved files)
content = content.replace(/^\uFEFF/, "");
// Normalize line endings: handle both CRLF (\r\n) and LF (\n)
const lines = content.split(/\r?\n/);

for (const line of lines) {
// Skip non-memory lines
// Supports: "- text", "* text", "+ text" (standard Markdown bullet formats)
if (!/^[-*+]\s/.test(line)) continue;
const text = line.slice(2).trim();
if (text.length < minTextLength) { skipped++; continue; }

// ── Deduplication check (scope-aware exact match) ───────────────────
// Run even in dry-run so --dry-run --dedup reports accurate counts
if (dedupEnabled) {
try {
const existing = await context.store.bm25Search(text, 1, [targetScope]);
if (existing.length > 0 && existing[0].entry.text === text) {
skipped++;
if (!options.dryRun) {
console.log(` [skip] already imported: ${text.slice(0, 60)}${text.length > 60 ? "..." : ""}`);
}
continue;
}
} catch {
// bm25Search not available on this store implementation; proceed with import
}
}

if (options.dryRun) {
console.log(` [dry-run] would import: ${text.slice(0, 80)}${text.length > 80 ? "..." : ""}`);
imported++;
continue;
}

try {
const vector = await context.embedder!.embedPassage(text);
await context.store.store({
text,
vector,
importance: importanceDefault,
category: "other",
scope: targetScope,
metadata: JSON.stringify({ importedFrom: filePath, sourceScope: scope }),
});
imported++;
} catch (err) {
console.warn(` Failed to import: ${text.slice(0, 60)}... — ${err}`);
skipped++;
}
}
}

if (options.dryRun) {
console.log(`\nDRY RUN — found ${foundFiles} files, ${imported} entries would be imported, ${skipped} skipped${dedupEnabled ? " [dedup enabled]" : ""}`);
} else {
console.log(`\nImport complete: ${imported} imported, ${skipped} skipped (scanned ${foundFiles} files)${dedupEnabled ? " [dedup enabled]" : ""}`);
}
});

// Re-embed an existing LanceDB into the current target DB (A/B testing)
memory
.command("reembed")
Expand Down
Loading
Loading