CortexReach · jlin53882 · Mar 31, 2026 · Mar 31, 2026 · Apr 1, 2026 · Apr 1, 2026
diff --git a/cli.ts b/cli.ts
@@ -3,7 +3,7 @@
  */
 
 import type { Command } from "commander";
-import { readFileSync } from "node:fs";
+import { readFileSync, type Dirent } from "node:fs";
 import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
 import { homedir } from "node:os";
 import path from "node:path";
@@ -1036,6 +1036,203 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void {
       }
     });
 
+  /**
+   * import-markdown: Import memories from Markdown memory files into the plugin store.
+   * Targets MEMORY.md and memory/YYYY-MM-DD.md files found in OpenClaw workspaces.
+   */
+  memory
+    .command("import-markdown [workspace-glob]")
+    .description("Import memories from Markdown files (MEMORY.md, memory/YYYY-MM-DD.md) into the plugin store")
+    .option("--dry-run", "Show what would be imported without importing")
+    .option("--scope <scope>", "Import into specific scope (default: global)")
+    .option(
+      "--openclaw-home <path>",
+      "OpenClaw home directory (default: ~/.openclaw)",
+    )
+    .option(
+      "--dedup",
+      "Skip entries already in store (scope-aware exact match, requires store.bm25Search)",
+    )
+    .option(
+      "--min-text-length <n>",
+      "Minimum text length to import (default: 5)",
+      "5",
+    )
+    .option(
+      "--importance <n>",
+      "Importance score for imported entries, 0.0-1.0 (default: 0.7)",
+      "0.7",
+    )
+    .action(async (workspaceGlob, options) => {
+      const openclawHome = options.openclawHome
+        ? path.resolve(options.openclawHome)
+        : path.join(homedir(), ".openclaw");
+
+      const workspaceDir = path.join(openclawHome, "workspace");
+      let imported = 0;
+      let skipped = 0;
+      let foundFiles = 0;
+
+      if (!context.embedder) {
+        console.error(
+          "import-markdown requires an embedder. Use via plugin CLI or ensure embedder is configured.",
+        );
+        process.exit(1);
+      }
+
+      // Infer workspace scope from openclaw.json agents list
+      // (flat memory/ files have no per-file metadata, so we derive scope from config)
+      const fsPromises = await import("node:fs/promises");
+      let workspaceScope = ""; // empty = no scope override for nested workspaces
+      try {
+        const configPath = path.join(openclawHome, "openclaw.json");
+        const configContent = await fsPromises.readFile(configPath, "utf-8");
+        const config = JSON.parse(configContent);
+        const agentsList: Array<{ id?: string; workspace?: string }> = config?.agents?.list ?? [];
+        const matched = agentsList.find((a) => {
+          if (!a.workspace) return false;
+          return path.normalize(a.workspace) === workspaceDir;
+        });
+        if (matched?.id) {
+          workspaceScope = matched.id;
+        }
+      } catch { /* use default */ }
+
+      // Scan workspace directories
+      let workspaceEntries: Dirent[];
+      try {
+        workspaceEntries = await fsPromises.readdir(workspaceDir, { withFileTypes: true });
+      } catch {
+        console.error(`Failed to read workspace directory: ${workspaceDir}`);
+        process.exit(1);
+      }
+
+      // Collect all markdown files to scan
+      const mdFiles: Array<{ filePath: string; scope: string }> = [];
+
+      for (const entry of workspaceEntries) {
+        if (!entry.isDirectory()) continue;
+        if (workspaceGlob && !entry.name.includes(workspaceGlob)) continue;
+
+        const workspacePath = path.join(workspaceDir, entry.name);
+
+        // MEMORY.md
+        const memoryMd = path.join(workspacePath, "MEMORY.md");
+        try {
+          await fsPromises.stat(memoryMd);
+          mdFiles.push({ filePath: memoryMd, scope: entry.name });
+        } catch { /* not found */ }
+
+        // memory/ directory
+        const memoryDir = path.join(workspacePath, "memory");
+        try {
+          const stats = await fsPromises.stat(memoryDir);
+          if (stats.isDirectory()) {
+            const files = await fsPromises.readdir(memoryDir);
+            for (const f of files) {
+              if (f.endsWith(".md") && /^\d{4}-\d{2}-\d{2}/.test(f)) {
+                mdFiles.push({ filePath: path.join(memoryDir, f), scope: entry.name });
+              }
+            }
+          }
+        } catch { /* not found */ }
+      }
+
+      // Also scan the flat `workspace/memory/` directory directly under workspace root
+      // (not inside any workspace subdirectory — supports James's actual structure).
+      // This scan runs regardless of whether nested workspace mdFiles were found,
+      // so flat memory is always reachable even when all nested workspaces are empty.
+      // Skip if a specific workspace was requested (workspaceGlob), to avoid importing
+      // root flat memory when the user meant to import only one workspace.
+      if (!workspaceGlob) {
+        const flatMemoryDir = path.join(workspaceDir, "memory");
+        try {
+          const stats = await fsPromises.stat(flatMemoryDir);
+          if (stats.isDirectory()) {
+            const files = await fsPromises.readdir(flatMemoryDir);
+            for (const f of files) {
+              if (f.endsWith(".md") && /^\d{4}-\d{2}-\d{2}/.test(f)) {
+                mdFiles.push({ filePath: path.join(flatMemoryDir, f), scope: workspaceScope || "shared" });
+              }
+            }
+          }
+        } catch { /* not found */ }
+      }
+
+      if (mdFiles.length === 0) {
+        console.log("No Markdown memory files found.");
+        return;
+      }
+
+      const targetScope = options.scope || "global";
+      const minTextLength = parseInt(options.minTextLength ?? "5", 10);
+      const importanceDefault = parseFloat(options.importance ?? "0.7");
+      const dedupEnabled = !!options.dedup;
+
+      // Parse each file for memory entries (lines starting with "- ")
+      for (const { filePath, scope } of mdFiles) {
+        foundFiles++;
+        let content = await fsPromises.readFile(filePath, "utf-8");
+        // Strip UTF-8 BOM (e.g. from Windows Notepad-saved files)
+        content = content.replace(/^\uFEFF/, "");
+        // Normalize line endings: handle both CRLF (\r\n) and LF (\n)
+        const lines = content.split(/\r?\n/);
+
+        for (const line of lines) {
+          // Skip non-memory lines
+          // Supports: "- text", "* text", "+ text" (standard Markdown bullet formats)
+          if (!/^[-*+]\s/.test(line)) continue;
+          const text = line.slice(2).trim();
+          if (text.length < minTextLength) { skipped++; continue; }
+
+          // ── Deduplication check (scope-aware exact match) ───────────────────
+          // Run even in dry-run so --dry-run --dedup reports accurate counts
+          if (dedupEnabled) {
+            try {
+              const existing = await context.store.bm25Search(text, 1, [targetScope]);
+              if (existing.length > 0 && existing[0].entry.text === text) {
+                skipped++;
+                if (!options.dryRun) {
+                  console.log(`  [skip] already imported: ${text.slice(0, 60)}${text.length > 60 ? "..." : ""}`);
+                }
+                continue;
+              }
+            } catch {
+              // bm25Search not available on this store implementation; proceed with import
+            }
+          }
+
+          if (options.dryRun) {
+            console.log(`  [dry-run] would import: ${text.slice(0, 80)}${text.length > 80 ? "..." : ""}`);
+            imported++;
+            continue;
+          }
+
+          try {
+            const vector = await context.embedder!.embedPassage(text);
+            await context.store.store({
+              text,
+              vector,
+              importance: importanceDefault,
+              category: "other",
+              scope: targetScope,
+              metadata: JSON.stringify({ importedFrom: filePath, sourceScope: scope }),
+            });
+            imported++;
+          } catch (err) {
+            console.warn(`  Failed to import: ${text.slice(0, 60)}... — ${err}`);
+            skipped++;
+          }
+        }
+      }
+
+      if (options.dryRun) {
+        console.log(`\nDRY RUN — found ${foundFiles} files, ${imported} entries would be imported, ${skipped} skipped${dedupEnabled ? " [dedup enabled]" : ""}`);
+      } else {
+        console.log(`\nImport complete: ${imported} imported, ${skipped} skipped (scanned ${foundFiles} files)${dedupEnabled ? " [dedup enabled]" : ""}`);
+      }
+    });
+
   // Re-embed an existing LanceDB into the current target DB (A/B testing)
   memory
     .command("reembed")