Merge pull request #484 from dmoliveira/chore/enforcer-investigation-refresh2

dmoliveira · web-flow · commit 734a1dc92c06 · 2026-03-13T18:26:24.000+11:00
Improve continuation enforcer observability
diff --git a/.opencode/gateway-core.config.json b/.opencode/gateway-core.config.json
@@ -11,7 +11,7 @@
     "hookModes": {
       "auto-slash-command": "assist",
       "provider-error-classifier": "assist",
-      "todo-continuation-enforcer": "shadow"
+      "todo-continuation-enforcer": "assist"
     },
     "command": "opencode",
     "model": "openai/gpt-5.1-codex-mini",
diff --git a/README.md b/README.md
@@ -174,7 +174,7 @@ Profiles:
 - `scripts/autoflow_command.py` - backend script for `/autoflow`
 - `scripts/init_deep_command.py` - backend script for `/init-deep`
 - `scripts/continuation_stop_command.py` - backend script for `/continuation-stop`
-- `scripts/opencode_session.sh` - optional wrapper to run digest on process exit
+- `scripts/opencode_session.sh` - optional wrapper to run digest on process exit and enable `MY_OPENCODE_GATEWAY_EVENT_AUDIT=1` by default with rotation; after a wrapped session, `/gateway continuation report` is the fastest check for recent `todo-continuation-enforcer` activity
 - `scripts/telemetry_command.py` - backend script for `/telemetry`
 - `scripts/post_session_command.py` - backend script for `/post-session`
 - `scripts/policy_command.py` - policy profile helper used by `/notify policy ...` and stack presets
diff --git a/docs/command-handbook.md b/docs/command-handbook.md
@@ -126,7 +126,7 @@ Use these directly in OpenCode:
 - event: `events.<type>`
 - per-event channel: `channels.<type>.sound|visual`
 
-`/notify inbox` reads the repo-local gateway event audit feed from `.opencode/gateway-events.jsonl` (or `MY_OPENCODE_GATEWAY_EVENT_AUDIT_PATH` when set). Enable gateway event auditing with `MY_OPENCODE_GATEWAY_EVENT_AUDIT=1` to populate inbox entries.
+`/notify inbox` reads the repo-local gateway event audit feed from `.opencode/gateway-events.jsonl` (or `MY_OPENCODE_GATEWAY_EVENT_AUDIT_PATH` when set). Enable gateway event auditing with `MY_OPENCODE_GATEWAY_EVENT_AUDIT=1` to populate inbox entries, but after a wrapped session use `/gateway continuation report` for the fastest `todo-continuation-enforcer` audit check.
 
 ## Session digest inside OpenCode 🧾
 
@@ -153,6 +153,9 @@ Optional environment variables:
 - `MY_OPENCODE_DIGEST_PATH` custom output path
 - `MY_OPENCODE_DIGEST_HOOK` command to run after digest is written
 - `DIGEST_REASON_ON_EXIT` custom reason label (default `exit`)
+- `MY_OPENCODE_GATEWAY_EVENT_AUDIT` audit toggle for hook diagnostics (`opencode_session.sh` defaults to `1`; set `0` to disable)
+- `MY_OPENCODE_GATEWAY_EVENT_AUDIT_MAX_BYTES` max audit file size before rotation (`opencode_session.sh` defaults to `8388608`)
+- `MY_OPENCODE_GATEWAY_EVENT_AUDIT_MAX_BACKUPS` rotated audit backup count (`opencode_session.sh` defaults to `5`)
 
 When `--run-post` is used, digest also evaluates `post_session` config and stores hook results in the digest JSON.
 
@@ -278,6 +281,7 @@ Use these directly in OpenCode:
 /gateway enable
 /gateway disable
 /gateway doctor
+/gateway continuation report --minutes 120 --limit 10 --json
 /gateway tune memory --json
 /gateway recover memory --apply --resume --compress --force-kill
 /gateway protection report --limit 20 --json
@@ -291,12 +295,15 @@ Notes:
 - `/gateway status` and `/gateway doctor` run orphan cleanup before reporting runtime loop state.
 - `/gateway status --json` now includes `mistake_ledger` so operators can see whether validation deferrals are accumulating in `.opencode/mistake-ledger.jsonl`.
 - `/gateway doctor --json` now includes `hook_diagnostics` and fails when gateway is enabled without a valid built hook surface.
+- `/gateway continuation report --json` summarizes recent `todo-continuation-enforcer` audit events so you can see reason codes, stages, and affected sessions quickly.
+- `/gateway continuation report --json` now also exposes `assistant_message_open_todo_events` so you can spot intermediate assistant replies that landed while todos were still open.
+- after a wrapped session, `/gateway continuation report` is the fastest check for recent `todo-continuation-enforcer` activity.
 - parity and naming differences vs upstream are tracked in `docs/upstream-divergence-registry.md`.
-- set `MY_OPENCODE_GATEWAY_EVENT_AUDIT=1` to write hook dispatch diagnostics to `.opencode/gateway-events.jsonl` (override path with `MY_OPENCODE_GATEWAY_EVENT_AUDIT_PATH`).
+- `scripts/opencode_session.sh` now enables `MY_OPENCODE_GATEWAY_EVENT_AUDIT=1` by default with rotation; set `MY_OPENCODE_GATEWAY_EVENT_AUDIT=0` to disable or override path with `MY_OPENCODE_GATEWAY_EVENT_AUDIT_PATH`.
 - set `MY_OPENCODE_GATEWAY_DISPATCH_SAMPLE_RATE=<n>` to reduce noisy dispatch audit events (`message.*`, `session.*`, transform dispatch); `1` logs every event, default is `20`.
 
 Debug and troubleshooting guidance:
-- keep gateway event audit off by default during normal work; enable it for time-boxed diagnosis windows (for example, 30-120 minutes).
+- if you launch through `scripts/opencode_session.sh`, gateway event audit is on by default; launch plain `opencode` or set `MY_OPENCODE_GATEWAY_EVENT_AUDIT=0` when you want a quiet run.
 - with audit enabled, expect small extra CPU/file-I/O overhead and log growth; this is not a direct model token-cost increase by itself.
 - after diagnosis, disable audit again to reduce background noise and disk churn.
 
@@ -404,7 +411,7 @@ This index is sourced from `opencode.json` and is used as the complete catalog r
 /devtools - Manage external productivity tools (status|doctor|install|hooks-install)
 /digest - Generate or show session digests (run|show)
 /doctor - Run diagnostics and reason-code registry export
-/gateway - Manage gateway runtime controls (status|enable|disable|doctor|tune memory|recover memory|protection)
+/gateway - Manage gateway runtime controls (status|enable|disable|doctor|continuation report|tune memory|recover memory|protection)
 /governance - Manage governance policy profiles and authorizations (status|profile|authorize|revoke|doctor)
 /health - Show repo health score and drift insights
 /hook-learning - Run hook learning loop controls (pre-command|post-command|route|metrics|doctor)
diff --git a/plugin/gateway-core/dist/hooks/long-turn-watchdog/index.d.ts b/plugin/gateway-core/dist/hooks/long-turn-watchdog/index.d.ts
@@ -3,6 +3,7 @@ export declare function createLongTurnWatchdogHook(options: {
     directory: string;
     enabled: boolean;
     warningThresholdMs: number;
+    toolCallWarningThreshold: number;
     reminderCooldownMs: number;
     maxSessionStateEntries: number;
     prefix: string;
diff --git a/plugin/gateway-core/dist/hooks/long-turn-watchdog/index.js b/plugin/gateway-core/dist/hooks/long-turn-watchdog/index.js
@@ -120,9 +120,9 @@ export function createLongTurnWatchdogHook(options) {
                     reason_code: "below_threshold",
                     session_id: sessionId,
                     elapsed_ms: elapsedMs,
+                    warning_threshold_ms: options.warningThresholdMs,
                     tool_calls_this_turn: state.toolCallsThisTurn,
                     tool_call_warning_threshold: toolCallThreshold,
-                    warning_threshold_ms: options.warningThresholdMs,
                 });
                 return;
             }
diff --git a/plugin/gateway-core/dist/hooks/session-recovery/index.js b/plugin/gateway-core/dist/hooks/session-recovery/index.js
@@ -1,6 +1,7 @@
 import { writeGatewayEventAudit } from "../../audit/event-audit.js";
 import { injectHookMessage, inspectHookMessageSafety } from "../hook-message-injector/index.js";
 import { readCombinedToolAfterOutputText } from "../shared/tool-after-output.js";
+// Returns true when event error resembles recoverable transient session failure.
 function isRecoverableError(error) {
     const candidate = error && typeof error === "object" && "message" in error
         ? String(error.message ?? "")
@@ -12,8 +13,13 @@ function isRecoverableError(error) {
         message.includes("network") ||
         message.includes("timeout"));
 }
+// Resolves session id from error event payload.
 function resolveSessionId(payload) {
-    const candidates = [payload.properties?.sessionID, payload.properties?.sessionId, payload.properties?.info?.id];
+    const candidates = [
+        payload.properties?.sessionID,
+        payload.properties?.sessionId,
+        payload.properties?.info?.id,
+    ];
     for (const value of candidates) {
         if (typeof value === "string" && value.trim()) {
             return value.trim();
@@ -30,12 +36,11 @@ function looksLikeDelegatedTaskAbort(output) {
     const state = nested?.state && typeof nested.state === "object" ? nested.state : null;
     const metadata = state?.metadata && typeof state.metadata === "object" ? state.metadata : null;
     const status = String(state?.status ?? "").trim().toLowerCase();
-    const error = `${String(state?.error ?? "")}
-${String(nested?.error ?? "")}
-${text}`.toLowerCase();
+    const error = `${String(state?.error ?? "")}\n${String(nested?.error ?? "")}\n${text}`.toLowerCase();
     const childSessionId = String(metadata?.sessionId ?? metadata?.sessionID ?? "").trim();
     return {
-        aborted: status === "error" && error.includes("tool execution aborted"),
+        aborted: status === "error" &&
+            error.includes("tool execution aborted"),
         childSessionId,
     };
 }
@@ -77,6 +82,7 @@ async function injectRecoveryMessage(args) {
     });
     return true;
 }
+// Creates session recovery hook that attempts one auto-resume per active error session.
 export function createSessionRecoveryHook(options) {
     const recoveringSessions = new Set();
     return {
diff --git a/plugin/gateway-core/dist/hooks/subagent-telemetry-timeline/index.js b/plugin/gateway-core/dist/hooks/subagent-telemetry-timeline/index.js
@@ -44,8 +44,6 @@ export function createSubagentTelemetryTimelineHook(options) {
                     reasonCode: "subagent_telemetry_child_idle_reconciled",
                     endedAt: Date.now(),
                     childRunId: link.childRunId || undefined,
-                    traceId: link.traceId || undefined,
-                    subagentType: link.subagentType || undefined,
                 }, options.maxTimelineEntries);
                 if (!record) {
                     return;
@@ -89,8 +87,6 @@ export function createSubagentTelemetryTimelineHook(options) {
                         : "subagent_telemetry_child_message_completed_reconciled",
                     endedAt: Date.now(),
                     childRunId: link.childRunId || undefined,
-                    traceId: link.traceId || undefined,
-                    subagentType: link.subagentType || undefined,
                 }, options.maxTimelineEntries);
                 if (!record) {
                     return;
@@ -124,8 +120,6 @@ export function createSubagentTelemetryTimelineHook(options) {
                             reasonCode: "subagent_telemetry_child_deleted_reconciled",
                             endedAt: Date.now(),
                             childRunId: childLink.childRunId || undefined,
-                            traceId: childLink.traceId || undefined,
-                            subagentType: childLink.subagentType || undefined,
                         }, options.maxTimelineEntries);
                         if (record) {
                             writeGatewayEventAudit(options.directory, {
diff --git a/plugin/gateway-core/dist/hooks/todo-continuation-enforcer/index.js b/plugin/gateway-core/dist/hooks/todo-continuation-enforcer/index.js
@@ -437,6 +437,69 @@ export function createTodoContinuationEnforcerHook(options) {
                 state.pendingSource = state.pendingContinuation ? "task_output" : undefined;
                 return;
             }
+            if (type === "message.updated") {
+                const eventPayload = (payload ?? {});
+                const sessionId = resolveSessionId(eventPayload);
+                if (!sessionId) {
+                    return;
+                }
+                const info = eventPayload.properties?.info;
+                if (String(info?.role ?? "").toLowerCase().trim() !== "assistant") {
+                    return;
+                }
+                const completed = Number.isFinite(Number(info?.time?.completed ?? Number.NaN));
+                const failed = info?.error !== undefined && info?.error !== null;
+                if (!completed && !failed) {
+                    return;
+                }
+                const state = getSessionState(sessionState, sessionId);
+                state.lastTraceId = resolveTraceId(eventPayload);
+                if (state.pendingTodoCount <= 0) {
+                    return;
+                }
+                const text = assistantText({
+                    info: { role: "assistant" },
+                    parts: eventPayload.output?.parts ?? eventPayload.properties?.parts,
+                });
+                if (!text) {
+                    return;
+                }
+                const shouldContinue = await resolvePendingContinuationDecision({
+                    text,
+                    continueIntentArmed: state.continueIntentArmed,
+                    source: "assistant_message",
+                    sessionId,
+                    directory: resolveDirectory(eventPayload, options.directory),
+                    traceId: state.lastTraceId,
+                    decisionRuntime: options.decisionRuntime,
+                });
+                if (!shouldContinue) {
+                    state.pendingContinuation = false;
+                    state.pendingSource = undefined;
+                    state.pendingTodoCount = 0;
+                    state.markerProbeAttempted = false;
+                    writeGatewayEventAudit(resolveDirectory(eventPayload, options.directory), {
+                        hook: "todo-continuation-enforcer",
+                        stage: "state",
+                        reason_code: "todo_continuation_assistant_message_no_pending",
+                        session_id: sessionId,
+                        trace_id: state.lastTraceId,
+                    });
+                    return;
+                }
+                state.pendingContinuation = true;
+                state.pendingSource = "assistant_message";
+                state.markerProbeAttempted = false;
+                writeGatewayEventAudit(resolveDirectory(eventPayload, options.directory), {
+                    hook: "todo-continuation-enforcer",
+                    stage: "state",
+                    reason_code: "todo_continuation_assistant_message_with_open_todos",
+                    session_id: sessionId,
+                    trace_id: state.lastTraceId,
+                    open_todo_count: state.pendingTodoCount,
+                });
+                return;
+            }
             if (type !== "session.idle") {
                 return;
             }
@@ -575,6 +638,7 @@ export function createTodoContinuationEnforcerHook(options) {
                 if (injected) {
                     state.consecutiveFailures = 0;
                     state.pendingContinuation = false;
+                    state.pendingTodoCount = 0;
                     state.pendingSource = undefined;
                     state.continueIntentArmed = false;
                     state.markerProbeAttempted = false;
diff --git a/plugin/gateway-core/src/hooks/todo-continuation-enforcer/index.ts b/plugin/gateway-core/src/hooks/todo-continuation-enforcer/index.ts
@@ -74,9 +74,31 @@ interface ChatPayload {
   }
 }
 
+interface MessageUpdatedPayload {
+  directory?: string
+  properties?: {
+    sessionID?: string
+    sessionId?: string
+    trace_id?: string
+    traceId?: string
+    info?: {
+      id?: string
+      role?: string
+      sessionID?: string
+      sessionId?: string
+      error?: unknown
+      time?: { completed?: number }
+    }
+    parts?: Array<{ type?: string; text?: string; synthetic?: boolean }>
+  }
+  output?: {
+    parts?: Array<{ type?: string; text?: string; synthetic?: boolean }>
+  }
+}
+
 interface SessionState {
   pendingContinuation: boolean
-  pendingSource?: "task_output" | "message_probe"
+  pendingSource?: "assistant_message" | "task_output" | "message_probe"
   pendingTodoCount: number
   lastInjectedAt: number
   consecutiveFailures: number
@@ -308,7 +330,7 @@ function hasPendingCueText(text: string, continueIntentArmed: boolean): boolean
 async function resolvePendingContinuationDecision(options: {
   text: string
   continueIntentArmed: boolean
-  source: "task_output" | "message_probe"
+  source: "assistant_message" | "task_output" | "message_probe"
   sessionId: string
   directory: string
   traceId?: string
@@ -587,6 +609,70 @@ export function createTodoContinuationEnforcerHook(options: {
         return
       }
 
+      if (type === "message.updated") {
+        const eventPayload = (payload ?? {}) as MessageUpdatedPayload
+        const sessionId = resolveSessionId(eventPayload)
+        if (!sessionId) {
+          return
+        }
+        const info = eventPayload.properties?.info
+        if (String(info?.role ?? "").toLowerCase().trim() !== "assistant") {
+          return
+        }
+        const completed = Number.isFinite(Number(info?.time?.completed ?? Number.NaN))
+        const failed = info?.error !== undefined && info?.error !== null
+        if (!completed && !failed) {
+          return
+        }
+        const state = getSessionState(sessionState, sessionId)
+        state.lastTraceId = resolveTraceId(eventPayload)
+        if (state.pendingTodoCount <= 0) {
+          return
+        }
+        const text = assistantText({
+          info: { role: "assistant" },
+          parts: eventPayload.output?.parts ?? eventPayload.properties?.parts,
+        })
+        if (!text) {
+          return
+        }
+        const shouldContinue = await resolvePendingContinuationDecision({
+          text,
+          continueIntentArmed: state.continueIntentArmed,
+          source: "assistant_message",
+          sessionId,
+          directory: resolveDirectory(eventPayload, options.directory),
+          traceId: state.lastTraceId,
+          decisionRuntime: options.decisionRuntime,
+        })
+        if (!shouldContinue) {
+          state.pendingContinuation = false
+          state.pendingSource = undefined
+          state.pendingTodoCount = 0
+          state.markerProbeAttempted = false
+          writeGatewayEventAudit(resolveDirectory(eventPayload, options.directory), {
+            hook: "todo-continuation-enforcer",
+            stage: "state",
+            reason_code: "todo_continuation_assistant_message_no_pending",
+            session_id: sessionId,
+            trace_id: state.lastTraceId,
+          })
+          return
+        }
+        state.pendingContinuation = true
+        state.pendingSource = "assistant_message"
+        state.markerProbeAttempted = false
+        writeGatewayEventAudit(resolveDirectory(eventPayload, options.directory), {
+          hook: "todo-continuation-enforcer",
+          stage: "state",
+          reason_code: "todo_continuation_assistant_message_with_open_todos",
+          session_id: sessionId,
+          trace_id: state.lastTraceId,
+          open_todo_count: state.pendingTodoCount,
+        })
+        return
+      }
+
       if (type !== "session.idle") {
         return
       }
@@ -734,6 +820,7 @@ export function createTodoContinuationEnforcerHook(options: {
         if (injected) {
           state.consecutiveFailures = 0
           state.pendingContinuation = false
+          state.pendingTodoCount = 0
           state.pendingSource = undefined
           state.continueIntentArmed = false
           state.markerProbeAttempted = false
diff --git a/scripts/gateway_command.py b/scripts/gateway_command.py
diff --git a/scripts/opencode_session.sh b/scripts/opencode_session.sh
diff --git a/scripts/selftest.py b/scripts/selftest.py