activeloopai · kaghni · May 18, 2026 · May 15, 2026 · May 18, 2026 · May 18, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -78,6 +78,18 @@ jobs:
         # — first CI run after the openclaw skillify wiring landed).
         run: npm run build
 
+      - name: Audit openclaw bundle against ClawHub static-scan rules
+        # PR-time gate mirroring release.yml's audit step. Catches a
+        # flagged bundle BEFORE the PR merges instead of only at release
+        # time (where it'd correctly abort the publish, but the bad code
+        # would already be in main). `--criticals-only` blocks on
+        # critical findings only; warns are advisory (the openclaw
+        # skillify-worker's `readFileSync` + `fetch` warn is irreducible
+        # without splitting the worker into multiple shipped files).
+        # If this step trips, run `npm run audit:openclaw` locally and
+        # see scripts/audit-openclaw-bundle.mjs for the rule details.
+        run: npm run audit:openclaw -- --criticals-only
+
       - name: Run tests with coverage
         # Per-file 80% thresholds for PR #60 files are declared in
         # vitest.config.ts under `coverage.thresholds`. Vitest exits non-zero

diff --git a/openclaw/src/index.ts b/openclaw/src/index.ts
@@ -67,7 +67,8 @@ import { homedir, tmpdir } from "node:os";
 import {
   existsSync as fsExists, mkdirSync as fsMkdir, openSync as fsOpen,
   closeSync as fsClose, writeFileSync as fsWriteFile, constants as fsConstants,
-  readFileSync as fsReadFile, renameSync as fsRename,
+  readFileSync as fsReadFile, renameSync as fsRename, unlinkSync as fsUnlink,
+  statSync as fsStat,
 } from "node:fs";
 import { createHash } from "node:crypto";
 // node:child_process is stubbed in the main openclaw bundle (see esbuild.config.mjs
@@ -374,6 +375,15 @@ let captureEnabled = true;
 const capturedCounts = new Map<string, number>();
 const fallbackSessionId = crypto.randomUUID();
 
+// Per-runtime dedup of skillify worker spawns. Without this, every
+// agent_end after the previous worker exits re-acquires the on-disk
+// lock and spawns a fresh worker, which does one watermark-check SQL
+// round-trip and exits — wasted Node cold-start + DB I/O across a long
+// session. Single-spawn-per-session-per-runtime matches what the
+// non-openclaw agents already do via `tryAcquireWorkerLock` semantics
+// in src/skillify/state.ts. See #100.
+const skillifySpawnedFor = new Set<string>();
+
 // --- Skillify worker spawn (mirror of src/skillify/spawn-skillify-worker.ts) ---
 //
 // OpenClaw can't import the shared skillify TS modules — its bundle is
@@ -425,14 +435,61 @@ function deriveOpenclawProjectKey(channel: string): { key: string; project: stri
   return { key, project };
 }
 
+// Per-project filesystem lock guarding the skillify worker spawn.
+// Mirrors `tryAcquireWorkerLock` in src/skillify/state.ts: writes a ms
+// timestamp into the lock file when acquired, treats locks older than
+// LOCK_MAX_AGE_MS as stale (abnormal worker death, kernel kill, OOM —
+// the worker's `finally`-release didn't run), unlinks and re-acquires.
+// Without this, a single crashed worker halts mining for that
+// project_key permanently until manual cleanup. See #110.
+//
+// Empty pre-existing locks (from earlier code that wrote no payload)
+// parse as NaN and are treated as immediately stale — clean migration
+// on first patched run.
+const LOCK_MAX_AGE_MS = 10 * 60 * 1000; // 10 min, generous vs typical
+                                        // worker run (<30s + buffer)
+
 function tryAcquireOpenclawSkillifyLock(projectKey: string): boolean {
   try {
     migrateOpenclawSkillifyLegacyStateDir();
     fsMkdir(OPENCLAW_SKILLIFY_STATE_DIR, { recursive: true });
     const lockPath = joinPath(OPENCLAW_SKILLIFY_STATE_DIR, `${projectKey}.worker.lock`);
-    const fd = fsOpen(lockPath, fsConstants.O_CREAT | fsConstants.O_EXCL | fsConstants.O_WRONLY);
-    fsClose(fd);
-    return true;
+    const acquire = (): boolean => {
+      const fd = fsOpen(lockPath, fsConstants.O_CREAT | fsConstants.O_EXCL | fsConstants.O_WRONLY);
+      try {
+        fsWriteFile(fd, String(Date.now()));
+      } finally {
+        fsClose(fd);
+      }
+      return true;
+    };
+    try {
+      return acquire();
+    } catch {
+      // O_EXCL failed → lock file already exists. Check staleness.
+      // There's a brief window between O_CREAT|O_EXCL and the timestamp
+      // write where a racing caller can see an empty body. Don't treat
+      // empty/NaN as immediately stale (CodeRabbit on #172) — fall back
+      // to the file's mtime to decide. If the FILE is fresh, the
+      // competitor is mid-write and we should yield; if the file is
+      // older than LOCK_MAX_AGE_MS, the previous holder crashed without
+      // writing the timestamp (or the disk lost it), and we can recycle.
+      try {
+        const body = fsReadFile(lockPath, "utf-8");
+        const ts = Number.parseInt(body.trim(), 10);
+        const ageByBody = Number.isFinite(ts) ? Date.now() - ts : Number.POSITIVE_INFINITY;
+        let ageByMtime = 0;
+        try { ageByMtime = Date.now() - fsStat(lockPath).mtimeMs; } catch { ageByMtime = 0; }
+        const effectiveAge = Number.isFinite(ts) ? ageByBody : ageByMtime;
+        if (effectiveAge > LOCK_MAX_AGE_MS) {
+          try { fsUnlink(lockPath); } catch { /* race; recheck below */ }
+          try { return acquire(); } catch { return false; }
-    const acquire = (): boolean => {
-      const fd = fsOpen(lockPath, fsConstants.O_CREAT | fsConstants.O_EXCL | fsConstants.O_WRONLY);
-      try {
-        fsWriteFile(fd, String(Date.now()));
-      } finally {
-        fsClose(fd);
-      }
-      return true;
-    };
-    try {
-      return acquire();
-    } catch {
-      // O_EXCL failed → lock file already exists. Check staleness.
-      try {
-        const body = fsReadFile(lockPath, "utf-8");
-        const ts = Number.parseInt(body.trim(), 10);
-        const age = Date.now() - (Number.isFinite(ts) ? ts : 0);
-        if (!Number.isFinite(ts) || age > LOCK_MAX_AGE_MS) {
-          try { fsUnlink(lockPath); } catch { /* race; recheck below */ }
-          try { return acquire(); } catch { return false; }
+    const acquire = (): boolean => {
+      const fd = fsOpen(lockPath, fsConstants.O_CREAT | fsConstants.O_EXCL | fsConstants.O_WRONLY);
+      try {
+        fsWriteFile(fd, String(Date.now()));
+      } finally {
+        fsClose(fd);
+      }
+      return true;
+    };
+    try {
+      return acquire();
+    } catch {
+      // O_EXCL failed → lock file already exists. Check staleness.
+      try {
+        const body = fsReadFile(lockPath, "utf-8");
+        const ts = Number.parseInt(body.trim(), 10);
+        const age = Number.isFinite(ts)
+          ? Date.now() - ts
+          : Date.now() - fsStat(lockPath).mtimeMs;
+        if (age > LOCK_MAX_AGE_MS) {
+          try { fsUnlink(lockPath); } catch { /* race; recheck below */ }
+          try { return acquire(); } catch { return false; }
-    const acquire = (): boolean => {
-      const fd = fsOpen(lockPath, fsConstants.O_CREAT | fsConstants.O_EXCL | fsConstants.O_WRONLY);
-      try {
-        fsWriteFile(fd, String(Date.now()));
-      } finally {
-        fsClose(fd);
-      }
-      return true;
-    };
-    try {
-      return acquire();
-    } catch {
-      // O_EXCL failed → lock file already exists. Check staleness.
-      try {
-        const body = fsReadFile(lockPath, "utf-8");
-        const ts = Number.parseInt(body.trim(), 10);
-        const age = Date.now() - (Number.isFinite(ts) ? ts : 0);
-        if (!Number.isFinite(ts) || age > LOCK_MAX_AGE_MS) {
-          try { fsUnlink(lockPath); } catch { /* race; recheck below */ }
-          try { return acquire(); } catch { return false; }
+    const acquire = (): boolean => {
+      const fd = fsOpen(lockPath, fsConstants.O_CREAT | fsConstants.O_EXCL | fsConstants.O_WRONLY);
+      try {
+        fsWriteFile(fd, String(Date.now()));
+      } finally {
+        fsClose(fd);
+      }
+      return true;
+    };
+    try {
+      return acquire();
+    } catch {
+      // O_EXCL failed → lock file already exists. Check staleness.
+      try {
+        const body = fsReadFile(lockPath, "utf-8");
+        const ts = Number.parseInt(body.trim(), 10);
+        const age = Number.isFinite(ts)
+          ? Date.now() - ts
+          : Date.now() - fsStat(lockPath).mtimeMs;
+        if (age > LOCK_MAX_AGE_MS) {
+          try { fsUnlink(lockPath); } catch { /* race; recheck below */ }
+          try { return acquire(); } catch { return false; }
+        }
+        return false; // fresh lock held by a live worker — skip spawn
+      } catch {
+        return false; // couldn't stat/read; safer to skip than double-spawn
+      }
+    }
   } catch { return false; }
 }
 
@@ -489,26 +546,36 @@ function detectOpenclawGateAgent(): GateAgent | null {
   return null;
 }
 
-function spawnOpenclawSkillifyWorker(a: OpenclawSpawnArgs): void {
+/**
+ * Returns true when the worker was actually spawned (the caller can
+ * record the session in the per-runtime dedup set). Returns false on
+ * any "didn't spawn" outcome — missing worker, no delegate gate CLI,
+ * lock not acquired, mkdir/config write failure, or spawn() throw —
+ * so the caller can let a future agent_end retry. CodeRabbit on #172
+ * caught the previous flow that recorded the session before knowing
+ * whether spawn succeeded, suppressing retries forever within the
+ * runtime.
+ */
+function spawnOpenclawSkillifyWorker(a: OpenclawSpawnArgs): boolean {
   if (!fsExists(OPENCLAW_SKILLIFY_WORKER_PATH)) {
     a.loggerWarn?.(`skillify worker missing at ${OPENCLAW_SKILLIFY_WORKER_PATH} — reinstall openclaw plugin`);
-    return;
+    return false;
   }
   const gateAgent = detectOpenclawGateAgent();
   if (!gateAgent) {
     a.loggerWarn?.(`skillify spawn: no delegate gate CLI found on PATH (need one of: claude, codex, cursor-agent, hermes, pi). Mining skipped.`);
-    return;
+    return false;
   }
   const { key: projectKey, project } = deriveOpenclawProjectKey(a.channel);
   if (!tryAcquireOpenclawSkillifyLock(projectKey)) {
     // A worker is already running for this project — skip (next agent_end may
     // re-fire after the worker releases the lock, or the worker watermark
     // advance makes the re-fire a no-op).
-    return;
+    return false;
   }
   const tmpDir = joinPath(tmpdir(), `deeplake-skillify-openclaw-${projectKey}-${Date.now()}`);
   try { fsMkdir(tmpDir, { recursive: true, mode: 0o700 }); }
-  catch (e: any) { a.loggerWarn?.(`skillify spawn: mkdir failed: ${e?.message ?? e}`); return; }
+  catch (e: any) { a.loggerWarn?.(`skillify spawn: mkdir failed: ${e?.message ?? e}`); return false; }
   const configPath = joinPath(tmpDir, "config.json");
 
   // install: "global" — openclaw has no per-project filesystem cwd, so written
@@ -549,16 +616,18 @@ function spawnOpenclawSkillifyWorker(a: OpenclawSpawnArgs): void {
     },
   };
   try { fsWriteFile(configPath, JSON.stringify(config), { mode: 0o600 }); }
-  catch (e: any) { a.loggerWarn?.(`skillify spawn: config write failed: ${e?.message ?? e}`); return; }
+  catch (e: any) { a.loggerWarn?.(`skillify spawn: config write failed: ${e?.message ?? e}`); return false; }
 
   try {
     realSpawn(process.execPath, [OPENCLAW_SKILLIFY_WORKER_PATH, configPath], {
       detached: true,
       stdio: "ignore",
       env: { ...inheritedEnv.env, HIVEMIND_SKILLIFY_WORKER: "1", HIVEMIND_CAPTURE: "false" },
     }).unref();
+    return true;
   } catch (e: any) {
     a.loggerWarn?.(`skillify spawn: spawn failed: ${e?.message ?? e}`);
+    return false;
   }
 }
 
@@ -1258,23 +1327,43 @@ export default definePluginEntry({
           // never blocks the agent. Worker reads from the sessions table we
           // just wrote to. Non-fatal: a spawn failure here only loses one
           // mining attempt, never breaks capture.
-          try {
-            spawnOpenclawSkillifyWorker({
-              apiUrl: cfg.apiUrl,
-              token: cfg.token,
-              orgId: cfg.orgId,
-              workspaceId: cfg.workspaceId,
-              userName: cfg.userName,
-              channel: ev.channel || "openclaw",
-              sessionId: sid,
-              loggerWarn: (msg) => logger.error(`Skillify spawn: ${msg}`),
-              // Pass the same tuning dispatch the plugin populated at
-              // register-time. The worker will repopulate its own
-              // globalThis from this.
-              tuning: (globalThis as Record<string, unknown>).__hivemind_tuning__ as Record<string, string | undefined> | undefined,
-            });
-          } catch (e: any) {
-            logger.error(`Skillify spawn threw: ${e?.message ?? e}`);
+          //
+          // Per-runtime dedup (see #100): on long sessions, agent_end fires
+          // many times, and the previous worker has typically finished by
+          // the second or third turn — releasing the on-disk lock. Without
+          // this guard, every subsequent agent_end re-acquires the lock and
+          // spawns a fresh worker that does one watermark-check SQL roundtrip
+          // and exits. The on-disk lock is still authoritative across
+          // processes (e.g. multiple gateway restarts); this Set only
+          // suppresses redundant spawns within the same runtime.
+          if (!skillifySpawnedFor.has(sid)) {
+            // Only record the session as deduped on SUCCESSFUL spawn.
+            // spawnOpenclawSkillifyWorker has multiple non-exception
+            // failure paths (no delegate CLI, lock held by a fresh
+            // worker, mkdir/config write failure, spawn throw). If we
+            // add to the set before knowing the outcome, one transient
+            // failure suppresses every retry for the rest of the
+            // runtime. CodeRabbit on #172.
+            try {
+              if (spawnOpenclawSkillifyWorker({
+                apiUrl: cfg.apiUrl,
+                token: cfg.token,
+                orgId: cfg.orgId,
+                workspaceId: cfg.workspaceId,
+                userName: cfg.userName,
+                channel: ev.channel || "openclaw",
+                sessionId: sid,
+                loggerWarn: (msg) => logger.error(`Skillify spawn: ${msg}`),
+                // Pass the same tuning dispatch the plugin populated at
+                // register-time. The worker will repopulate its own
+                // globalThis from this.
+                tuning: (globalThis as Record<string, unknown>).__hivemind_tuning__ as Record<string, string | undefined> | undefined,
+              })) {
+                skillifySpawnedFor.add(sid);
+              }
+            } catch (e: any) {
+              logger.error(`Skillify spawn threw: ${e?.message ?? e}`);
+            }
           }
         } catch (err) {
           logger.error(`Auto-capture failed: ${err instanceof Error ? err.message : String(err)}`);

diff --git a/tests/claude-code/skillify-session-start-injection.test.ts b/tests/claude-code/skillify-session-start-injection.test.ts
@@ -283,8 +283,16 @@ describe("OpenClaw skillify worker (mining) wiring", () => {
     expect(src).toMatch(/OPENCLAW_SKILLIFY_WORKER_PATH\s*=\s*joinPath\(__openclaw_dirname,\s*"skillify-worker\.js"\)/);
     // HIVEMIND_SKILLIFY_WORKER=1 recursion guard set on spawn env
     expect(src).toMatch(/HIVEMIND_SKILLIFY_WORKER:\s*"1"/);
-    // agent_end hook calls it after the capture loop
-    expect(src).toMatch(/agent_end[\s\S]{0,3500}Auto-captured[\s\S]{0,500}spawnOpenclawSkillifyWorker/);
+    // Per-runtime dedup guard (#100) — explicit assertions so a future
+    // refactor that drops the Set or its has/add usage fails this test
+    // (CodeRabbit on #172).
+    expect(src).toMatch(/const skillifySpawnedFor = new Set<string>\(\)/);
+    expect(src).toMatch(/if \(!skillifySpawnedFor\.has\(sid\)\)/);
+    expect(src).toMatch(/skillifySpawnedFor\.add\(sid\)/);
+    // agent_end hook calls it after the capture loop. Distance bumped
+    // from 500→1500 to accommodate the per-runtime spawn-dedup comment
+    // block landed for #100 between `Auto-captured` and the spawn site.
+    expect(src).toMatch(/agent_end[\s\S]{0,3500}Auto-captured[\s\S]{0,1500}spawnOpenclawSkillifyWorker/);
     // install: "global" — no per-project cwd, skills land under ~/.claude/skills/
     expect(src).toMatch(/install:\s*"global"/);
   });