Skip to content

Commit e5e2e75

Browse files
authored
Merge pull request #496 from dmoliveira/feat/session-flow-hardening-ae
Improve session flow recovery and reduce audit noise
2 parents 58bb1ab + 03a64bb commit e5e2e75

File tree

53 files changed

+624
-840
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+624
-840
lines changed

docs/command-handbook.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ Use these directly in OpenCode:
334334
/gateway doctor
335335
/gateway watchdog status
336336
/gateway watchdog doctor
337-
/gateway watchdog set --warning-threshold-seconds 300 --tool-call-threshold 50
337+
/gateway watchdog set --warning-threshold-seconds 60 --tool-call-threshold 12 --reminder-cooldown-seconds 60
338338
/gateway watchdog disable
339339
/gateway continuation report --minutes 120 --limit 10 --json
340340
/gateway tune memory --json

plugin/gateway-core/dist/config/schema.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -169,9 +169,9 @@ export const DEFAULT_GATEWAY_CONFIG = {
169169
},
170170
longTurnWatchdog: {
171171
enabled: true,
172-
warningThresholdMs: 300000,
173-
toolCallWarningThreshold: 50,
174-
reminderCooldownMs: 120000,
172+
warningThresholdMs: 60000,
173+
toolCallWarningThreshold: 12,
174+
reminderCooldownMs: 60000,
175175
maxSessionStateEntries: 1024,
176176
prefix: "[Turn Watchdog]:",
177177
},

plugin/gateway-core/dist/hooks/agent-denied-tool-enforcer/index.js

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { writeGatewayEventAudit } from "../../audit/event-audit.js";
22
import { loadAgentMetadata } from "../shared/agent-metadata.js";
33
import { resolveDelegationTraceId } from "../shared/delegation-trace.js";
4-
import { writeDecisionComparisonAudit, } from "../shared/llm-decision-runtime.js";
4+
import { buildCompactDecisionCacheKey, writeDecisionComparisonAudit, } from "../shared/llm-decision-runtime.js";
55
function sessionId(payload) {
66
return String(payload.input?.sessionID ?? payload.input?.sessionId ?? "").trim();
77
}
@@ -171,7 +171,11 @@ export function createAgentDeniedToolEnforcerHook(options) {
171171
R: "read_only_safe",
172172
N: "unclear",
173173
},
174-
cacheKey: `mutation:${subagentType}:${combinedText}`,
174+
cacheKey: buildCompactDecisionCacheKey({
175+
prefix: "mutation",
176+
parts: [subagentType || "none"],
177+
text: compactDecisionText(promptText, descriptionText),
178+
}),
175179
});
176180
if (mutationDecision.accepted && mutationDecision.char === "M") {
177181
writeDecisionComparisonAudit({
@@ -217,7 +221,11 @@ export function createAgentDeniedToolEnforcerHook(options) {
217221
A: "allowed_or_no_issue",
218222
N: "unclear",
219223
},
220-
cacheKey: `tool:${subagentType}:${denied.join(",")}:${combinedText}`,
224+
cacheKey: buildCompactDecisionCacheKey({
225+
prefix: "tool",
226+
parts: [subagentType || "none", denied.join(",") || "none"],
227+
text: compactDecisionText(promptText, descriptionText),
228+
}),
221229
});
222230
if (toolDecision.accepted && toolDecision.char === "D") {
223231
const suggestion = suggestAllowedTool(String(denied[0]), allowed);

plugin/gateway-core/dist/hooks/agent-model-resolver/index.js

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { writeGatewayEventAudit } from "../../audit/event-audit.js";
22
import { loadAgentMetadata } from "../shared/agent-metadata.js";
33
import { annotateDelegationMetadata, resolveDelegationTraceId } from "../shared/delegation-trace.js";
4-
import { writeDecisionComparisonAudit, } from "../shared/llm-decision-runtime.js";
4+
import { buildCompactDecisionCacheKey, writeDecisionComparisonAudit, } from "../shared/llm-decision-runtime.js";
55
const MODEL_BY_CATEGORY = {
66
quick: { model: "openai/gpt-5.1-codex-mini", reasoning: "low" },
77
balanced: { model: "openai/gpt-5.3-codex", reasoning: "medium" },
@@ -387,7 +387,11 @@ export function createAgentModelResolverHook(options) {
387387
context: buildRoutingContext(String(args.prompt ?? ""), String(args.description ?? ""), originalExplicitSubagent, aiInferred.name, aiInferred.score, explicitScore),
388388
allowedChars: alphabet,
389389
decisionMeaning: buildRoutingDecisionMeaning(aiInferred.name, originalExplicitSubagent),
390-
cacheKey: `route:${originalExplicitSubagent || "none"}:${aiInferred.name}:${combinedText}`,
390+
cacheKey: buildCompactDecisionCacheKey({
391+
prefix: "route",
392+
parts: [originalExplicitSubagent || "none", aiInferred.name],
393+
text: buildRoutingContext(String(args.prompt ?? ""), String(args.description ?? ""), originalExplicitSubagent, aiInferred.name, aiInferred.score, explicitScore),
394+
}),
391395
});
392396
if (decision.accepted) {
393397
const resolvedChar = decision.char.toUpperCase();

plugin/gateway-core/dist/hooks/auto-slash-command/index.js

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
import { writeGatewayEventAudit } from "../../audit/event-audit.js";
2-
import { writeDecisionComparisonAudit, } from "../shared/llm-decision-runtime.js";
2+
import { buildCompactDecisionCacheKey, writeDecisionComparisonAudit, } from "../shared/llm-decision-runtime.js";
33
const AUTO_SLASH_COMMAND_TAG_OPEN = "<auto-slash-command>";
44
const AUTO_SLASH_COMMAND_TAG_CLOSE = "</auto-slash-command>";
55
const SLASH_COMMAND_PATTERN = /^\/([a-zA-Z][\w-]*)\s*(.*)/;
66
const EXCLUDED_COMMANDS = new Set(["ulw-loop"]);
77
const INLINE_SLASH_TOKEN_PATTERN = /(^|\s)\/([a-zA-Z][\w-]*)\b/g;
88
const HIGH_RISK_SKIP_PATTERN = /\b(install|npm\s+install|brew\s+install|setup|configure|deploy|production)\b/i;
9+
const DETERMINISTIC_DOCTOR_PATTERN = /\b(doctor|diagnos(?:e|is|tic|tics))\b/i;
10+
const DIAGNOSTIC_CUE_PATTERN = /\b(doctor|diagnos(?:e|is|tic|tics)|health(?:\s+check)?|debug|investigat(?:e|ion)|inspect)\b/i;
11+
const ACTION_VERB_PATTERN = /\b(run|open|use|launch|start|check|perform|do|inspect|investigate|debug|review|analy[sz]e|look\s+into|tell\s+me|show\s+me|help\s+me\s+understand)\b/i;
12+
const META_DISCUSSION_SKIP_PATTERN = /\b(last session|previous session|instruction command|prompt wording|prompt text|slash doctor|auto[-\s]?slash|why did|why does|routed to|route to|activated \/doctor|triggered \/doctor|command behavior)\b/i;
13+
const INVESTIGATION_CONTEXT_PATTERN = /\b(issue|environment|state|problem|wrong|error|failure|symptom|health)\b/i;
914
const AI_AUTO_SLASH_CHAR_TO_COMMAND = {
1015
D: "/doctor",
1116
};
@@ -151,19 +156,24 @@ function detectSlash(prompt) {
151156
return { slash: explicit.raw, excludedExplicit: false };
152157
}
153158
const text = cleaned.toLowerCase();
154-
if (text.includes("doctor") || text.includes("diagnose") || text.includes("health check")) {
159+
if (!META_DISCUSSION_SKIP_PATTERN.test(text) && DETERMINISTIC_DOCTOR_PATTERN.test(text) && ACTION_VERB_PATTERN.test(text)) {
155160
return { slash: "/doctor", excludedExplicit: false };
156161
}
157162
return { slash: null, excludedExplicit: false };
158163
}
159164
function shouldSkipAiAutoSlash(prompt) {
160-
return HIGH_RISK_SKIP_PATTERN.test(prompt);
165+
const hasInvestigativeIntent = ACTION_VERB_PATTERN.test(prompt);
166+
const hasEligibleContext = DIAGNOSTIC_CUE_PATTERN.test(prompt) || INVESTIGATION_CONTEXT_PATTERN.test(prompt);
167+
return (HIGH_RISK_SKIP_PATTERN.test(prompt) ||
168+
META_DISCUSSION_SKIP_PATTERN.test(prompt) ||
169+
!hasInvestigativeIntent ||
170+
!hasEligibleContext);
161171
}
162172
function shouldSkipAutoSlash(prompt) {
163173
return HIGH_RISK_SKIP_PATTERN.test(prompt);
164174
}
165175
function buildAiSlashInstruction() {
166-
return "Classify only the sanitized user request text for diagnostics intent. D=diagnostics_or_health_check, N=not_diagnostics.";
176+
return "Classify only the sanitized user request text for explicit diagnostics intent. Return D only when the user is clearly asking to run or perform diagnostics or health checks now. Return N for meta discussion about prompts, routing, commands, past sessions, or instruction wording.";
167177
}
168178
function buildAiSlashContext(prompt) {
169179
return `request=${normalizePromptForAi(prompt) || "(empty)"}`;
@@ -210,7 +220,10 @@ export function createAutoSlashCommandHook(options) {
210220
D: "route_doctor",
211221
N: "no_slash",
212222
},
213-
cacheKey: `auto-slash:${prompt.trim().toLowerCase()}`,
223+
cacheKey: buildCompactDecisionCacheKey({
224+
prefix: "auto-slash",
225+
text: normalizePromptForAi(prompt),
226+
}),
214227
});
215228
}
216229
catch (error) {
@@ -236,27 +249,20 @@ export function createAutoSlashCommandHook(options) {
236249
deterministicValue: "none",
237250
aiValue: aiSlash ?? "none",
238251
});
252+
const shadowDeferred = options.decisionRuntime.config.mode === "shadow" && aiSlash;
239253
writeGatewayEventAudit(directory, {
240254
hook: "auto-slash-command",
241255
stage: "state",
242-
reason_code: "llm_auto_slash_decision_recorded",
256+
reason_code: shadowDeferred
257+
? "llm_auto_slash_shadow_deferred"
258+
: "llm_auto_slash_decision_recorded",
243259
session_id: sessionId,
244260
llm_decision_char: decision.char,
245261
llm_decision_meaning: decision.meaning,
246262
llm_decision_mode: options.decisionRuntime.config.mode,
247263
slash_command: aiSlash ?? undefined,
248264
});
249-
if (options.decisionRuntime.config.mode === "shadow" && aiSlash) {
250-
writeGatewayEventAudit(directory, {
251-
hook: "auto-slash-command",
252-
stage: "state",
253-
reason_code: "llm_auto_slash_shadow_deferred",
254-
session_id: sessionId,
255-
llm_decision_char: decision.char,
256-
llm_decision_meaning: decision.meaning,
257-
llm_decision_mode: options.decisionRuntime.config.mode,
258-
slash_command: aiSlash,
259-
});
265+
if (shadowDeferred) {
260266
}
261267
else {
262268
slash = aiSlash;

plugin/gateway-core/dist/hooks/context-window-monitor/index.js

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -141,34 +141,16 @@ export function createContextWindowMonitorHook(options) {
141141
});
142142
const actualUsage = totalInputTokens / actualLimit;
143143
if (actualUsage < options.warningThreshold) {
144-
writeGatewayEventAudit(directory, {
145-
hook: "context-window-monitor",
146-
stage: "skip",
147-
reason_code: "below_warning_threshold",
148-
session_id: sessionId,
149-
});
150144
return;
151145
}
152146
const hasPriorReminder = nextState.lastWarnedAtToolCall > 0;
153147
if (hasPriorReminder) {
154148
const cooldownElapsed = nextState.toolCalls - nextState.lastWarnedAtToolCall >= options.reminderCooldownToolCalls;
155149
const tokenDeltaEnough = totalInputTokens - nextState.lastWarnedTokens >= options.minTokenDeltaForReminder;
156150
if (!cooldownElapsed) {
157-
writeGatewayEventAudit(directory, {
158-
hook: "context-window-monitor",
159-
stage: "skip",
160-
reason_code: "reminder_cooldown_not_elapsed",
161-
session_id: sessionId,
162-
});
163151
return;
164152
}
165153
if (!tokenDeltaEnough) {
166-
writeGatewayEventAudit(directory, {
167-
hook: "context-window-monitor",
168-
stage: "skip",
169-
reason_code: "reminder_token_delta_too_small",
170-
session_id: sessionId,
171-
});
172154
return;
173155
}
174156
}

plugin/gateway-core/dist/hooks/continuation/index.js

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -217,11 +217,6 @@ export function createContinuationHook(options) {
217217
: options.directory;
218218
const sessionId = resolveSessionId(eventPayload);
219219
if (!sessionId) {
220-
writeGatewayEventAudit(directory, {
221-
hook: "continuation",
222-
stage: "skip",
223-
reason_code: "missing_session_id",
224-
});
225220
return;
226221
}
227222
let state = loadGatewayState(directory);
@@ -240,29 +235,12 @@ export function createContinuationHook(options) {
240235
}
241236
}
242237
if (!state || !active || active.active !== true) {
243-
writeGatewayEventAudit(directory, {
244-
hook: "continuation",
245-
stage: "skip",
246-
reason_code: "no_active_loop",
247-
});
248238
return;
249239
}
250240
if (options.stopGuard?.isStopped(sessionId)) {
251-
writeGatewayEventAudit(directory, {
252-
hook: "continuation",
253-
stage: "skip",
254-
reason_code: "stop_guard_active",
255-
session_id: sessionId,
256-
});
257241
return;
258242
}
259243
if (!sessionId || sessionId !== active.sessionId) {
260-
writeGatewayEventAudit(directory, {
261-
hook: "continuation",
262-
stage: "skip",
263-
reason_code: "session_mismatch",
264-
has_session_id: sessionId.length > 0,
265-
});
266244
return;
267245
}
268246
const client = options.client?.session;
@@ -296,13 +274,6 @@ export function createContinuationHook(options) {
296274
}
297275
state.lastUpdatedAt = nowIso();
298276
saveGatewayState(directory, state);
299-
writeGatewayEventAudit(directory, {
300-
hook: "continuation",
301-
stage: "skip",
302-
reason_code: REASON_CODES.LOOP_COMPLETION_IGNORED_INCOMPLETE_RUNTIME,
303-
session_id: sessionId,
304-
ignored_completion_cycles: ignoredCycles,
305-
});
306277
}
307278
else {
308279
active.active = false;
@@ -360,13 +331,6 @@ export function createContinuationHook(options) {
360331
directory,
361332
});
362333
if (!safety.safe) {
363-
writeGatewayEventAudit(directory, {
364-
hook: "continuation",
365-
stage: "skip",
366-
reason_code: `idle_prompt_${safety.reason}`,
367-
session_id: sessionId,
368-
iteration: active.iteration,
369-
});
370334
return;
371335
}
372336
const mode = options.keywordDetector?.modeForSession(sessionId) ?? null;

plugin/gateway-core/dist/hooks/delegation-fallback-orchestrator/index.js

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { writeGatewayEventAudit } from "../../audit/event-audit.js";
2-
import { writeDecisionComparisonAudit, } from "../shared/llm-decision-runtime.js";
2+
import { buildCompactDecisionCacheKey, writeDecisionComparisonAudit, } from "../shared/llm-decision-runtime.js";
33
import { annotateDelegationMetadata, extractDelegationTraceId, resolveDelegationTraceId, } from "../shared/delegation-trace.js";
44
const FAILURE_REASON_BY_CHAR = {
55
U: "delegation_unknown_agent",
@@ -171,7 +171,11 @@ export function createDelegationFallbackOrchestratorHook(options) {
171171
R: "delegation_runtime_error",
172172
N: "no_match",
173173
},
174-
cacheKey: `delegation-failure:${subagentType}:${category}:${String(eventPayload.output.output ?? "").trim().toLowerCase()}`,
174+
cacheKey: buildCompactDecisionCacheKey({
175+
prefix: "delegation-failure",
176+
parts: [subagentType || "none", category || "none"],
177+
text: buildFailureContext(String(eventPayload.output.output ?? ""), String(args?.prompt ?? ""), String(args?.description ?? "")),
178+
}),
175179
});
176180
if (decision.accepted) {
177181
const aiReason = FAILURE_REASON_BY_CHAR[decision.char] ?? null;

plugin/gateway-core/dist/hooks/done-proof-enforcer/index.d.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import type { GatewayHook } from "../registry.js";
2-
import type { LlmDecisionRuntime } from "../shared/llm-decision-runtime.js";
2+
import { type LlmDecisionRuntime } from "../shared/llm-decision-runtime.js";
33
export declare function createDoneProofEnforcerHook(options: {
44
enabled: boolean;
55
requiredMarkers: string[];

plugin/gateway-core/dist/hooks/done-proof-enforcer/index.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { markerCategory, validationEvidenceStatus } from "../validation-evidence-ledger/evidence.js";
2+
import { buildCompactDecisionCacheKey } from "../shared/llm-decision-runtime.js";
23
import { writeGatewayEventAudit } from "../../audit/event-audit.js";
34
import { writeDecisionComparisonAudit } from "../shared/llm-decision-runtime.js";
45
import { listToolAfterOutputTexts, readCombinedToolAfterOutputText, writeToolAfterOutputChannelText, } from "../shared/tool-after-output.js";
@@ -50,7 +51,10 @@ export function createDoneProofEnforcerHook(options) {
5051
context: buildMarkerContext(text),
5152
allowedChars: ["Y", "N"],
5253
decisionMeaning: { Y: `${marker}_present`, N: `${marker}_missing` },
53-
cacheKey: `done-proof:${marker}:${text.trim().toLowerCase()}`,
54+
cacheKey: buildCompactDecisionCacheKey({
55+
prefix: `done-proof:${marker}`,
56+
text: buildMarkerContext(text),
57+
}),
5458
});
5559
if (decision.accepted) {
5660
writeDecisionComparisonAudit({

0 commit comments

Comments
 (0)