nirukk52
diff --git a/‎.claude-skills/backend-debugging_skill/SKILL.md‎
Lines changed: 198 additions & 0 deletions b/‎.claude-skills/backend-debugging_skill/SKILL.md‎
Lines changed: 198 additions & 0 deletions
diff --git a/‎.claude-skills/webapp-testing_skill/SKILL.md‎
Lines changed: 100 additions & 2 deletions b/‎.claude-skills/webapp-testing_skill/SKILL.md‎
Lines changed: 100 additions & 2 deletions
diff --git a/‎backend/agent/engine/xstate/agent.machine.factory.ts‎
Lines changed: 9 additions & 1 deletion b/‎backend/agent/engine/xstate/agent.machine.factory.ts‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎backend/agent/nodes/terminal/Stop/node.ts‎
Lines changed: 3 additions & 23 deletions b/‎backend/agent/nodes/terminal/Stop/node.ts‎
Lines changed: 3 additions & 23 deletions
diff --git a/‎backend/graph/projector.ts‎
Lines changed: 1 addition & 1 deletion b/‎backend/graph/projector.ts‎
Lines changed: 1 addition & 1 deletion
@@ -82,3 +82,201 @@ encore test
 - Never use `console.log` (use `encore.dev/log`)
 - Always include structured context
 
+---
+
+## BUG-010 Case Study: Advanced Debugging Techniques
+
+### Diagnostic Scripts Arsenal
+
+Created during BUG-010 investigation (all in `backend/scripts/`):
+
+1. **`inspect-run.ts`** - Complete run event timeline
+   ```bash
+   bunx tsx backend/scripts/inspect-run.ts <runId>
+   # Shows: events, graph outcomes, cursor state, run record
+   ```
+
+2. **`check-agent-state.ts`** - Agent state snapshots
+   ```bash
+   bunx tsx backend/scripts/check-agent-state.ts <runId>
+   # Shows: nodeName, status, counters, budgets, timestamps
+   ```
+
+3. **`check-cursor-ordering.ts`** - Projector cursor health
+   ```bash
+   bunx tsx backend/scripts/check-cursor-ordering.ts
+   # Reveals: cursor limit issues, stuck cursors, ordering problems
+   ```
+
+4. **`find-completed-runs.ts`** / **`find-latest-run.ts`**
+   ```bash
+   bunx tsx backend/scripts/find-completed-runs.ts  # Successful runs
+   bunx tsx backend/scripts/find-latest-run.ts      # Recent runs (any status)
+   ```
+
+5. **`test-projector.ts`** - Isolated projector testing
+   ```bash
+   bunx tsx backend/scripts/test-projector.ts <runId>
+   # Tests: cursor hydration, event fetch, screen projection
+   ```
+
+### Git Forensics for Regressions
+
+**Timeline Method:**
+```bash
+# 1. Find last successful run
+bunx tsx backend/scripts/find-completed-runs.ts
+# Example: 01K9G8YXY6MG7J7875A5AM9Z4H at 2025-11-07 17:03
+
+# 2. Find first failed run
+bunx tsx backend/scripts/find-latest-run.ts
+# Example: 01K9GDQF9JQFM8A4Q5WGMARPAT at 2025-11-07 18:26
+
+# 3. Identify commits in regression window
+git log --oneline --since="Nov 7 17:00" --until="Nov 7 19:00"
+
+# 4. Examine suspect commits
+git show <commit_hash> --stat               # Files changed
+git show <commit_hash> <file_path>          # Detailed diff
+git show <commit_hash>~1:<file_path>       # Before version
+```
+
+**Binary Search Method:**
+```bash
+git bisect start
+git bisect bad HEAD                         # Current broken state
+git bisect good <last_known_good_commit>    # From timeline
+# Test each commit automatically until culprit found
+git bisect reset                            # Exit bisect mode
+```
+
+### Database Query Analysis
+
+**Stop Node Hang (BUG-010 Example):**
+```typescript
+// PROBLEM: Query inside node execution blocks XState machine
+const rows = await db.query`SELECT COUNT(*) FROM graph_persistence_outcomes WHERE run_id = ${runId}`;
+
+// SYMPTOMS:
+// - Worker times out after 30s lease
+// - Agent state shows "running" but stuck
+// - No "agent.node.finished" event emitted
+
+// DIAGNOSIS:
+// 1. Check worker lease timeout logs
+// 2. Inspect agent state (last snapshot shows incomplete node)
+// 3. Test query in isolation (encore exec bunx tsx test-query.ts)
+// 4. Profile query execution time
+
+// FIX:
+// Move heavy queries OUTSIDE critical execution path
+// Use lightweight operations in terminal nodes
+```
+
+### Cursor Limit Investigation
+
+**Projector Stalling Pattern:**
+```typescript
+// SYMPTOM: Recent runs never get graph_persistence_outcomes
+// CHECK: backend/graph/projector.ts
+const CURSOR_LIMIT = 50;  // ❌ Only processes 50 oldest cursors
+
+// DIAGNOSIS:
+bunx tsx backend/scripts/check-cursor-ordering.ts
+// Output: 75 total cursors, positions 51-75 never processed
+
+// VALIDATION:
+SELECT COUNT(*) FROM graph_projection_cursors;  -- Shows 75
+SELECT * FROM graph_projection_cursors ORDER BY updated_at ASC LIMIT 50;  -- Top 50
+SELECT * FROM graph_projection_cursors ORDER BY updated_at DESC LIMIT 10; -- Recent (excluded)
+
+// FIX:
+const CURSOR_LIMIT = 200;  // Scale with concurrent runs
+```
+
+### Worker State Inspection
+
+**Understanding Worker Lifecycle:**
+```bash
+# 1. Check run claim status
+SELECT processing_by, processing_started_at FROM runs WHERE run_id = '<runId>';
+
+# 2. Verify lease heartbeat
+# Watch Encore logs for "extending lease" messages
+
+# 3. Inspect final disposition
+SELECT status, stop_reason FROM runs WHERE run_id = '<runId>';
+# status=failed indicates worker crash/timeout before Stop node
+```
+
+### Phase 11: Advanced Regression Analysis (NEW)
+
+When standard phases 1-10 don't reveal the issue:
+
+1. **Compare successful vs failed run events side-by-side**
+   ```bash
+   diff <(bunx tsx backend/scripts/inspect-run.ts <good_run>) \
+        <(bunx tsx backend/scripts/inspect-run.ts <bad_run>)
+   ```
+
+2. **Identify missing events in sequence**
+   - Successful run: 19 events (includes Stop at step 6)
+   - Failed run: 15 events (stops at WaitIdle step 5)
+   - Missing: `agent.node.started Stop`, `agent.run.finished`
+
+3. **Trace XState machine transitions**
+   - Add logging to guards and actions in `agent.machine.factory.ts`
+   - Monitor which guards evaluate true/false
+   - Identify unexpected state transitions
+
+4. **Test node execution in isolation**
+   ```typescript
+   // scripts/test-node-isolation.ts
+   import { stop } from "../agent/nodes/terminal/Stop/node";
+   const input = { /* build input from failed run state */ };
+   const result = await stop(input);
+   console.log("Node output:", result);
+   ```
+
+### Common Backend Regression Patterns
+
+| Issue | Symptom | Investigation | Common Cause |
+|-------|---------|---------------|--------------|
+| **Cursor Limit** | Recent runs stuck at seq=1 | `check-cursor-ordering.ts` | `CURSOR_LIMIT` too low |
+| **Node Hangs** | Agent state "running" indefinitely | `check-agent-state.ts` | DB query blocks execution |
+| **Lease Timeout** | Run fails after 30s | Worker logs, database `processing_by` | Heavy sync operations |
+| **Missing Events** | Timeline incomplete | `inspect-run.ts`, compare with baseline | Event not emitted or lost |
+| **State Machine Stuck** | No transitions after event | XState logs, guard evaluation | Guard logic error |
+
+### Lesson: Avoid Heavy Operations in Critical Path
+
+**Bad Pattern (BUG-010):**
+```typescript
+export async function stop(input: StopInput) {
+  // ❌ DB query inside terminal node execution
+  const rows = await db.query`SELECT COUNT(*) ...`;
+  // If query hangs, entire machine stalls
+}
+```
+
+**Good Pattern:**
+```typescript
+export async function stop(input: StopInput) {
+  // ✅ Use pre-computed metrics from input
+  const metrics = input.finalRunMetrics;
+  // Terminal nodes must be lightweight and deterministic
+}
+```
+
+**Rationale:**
+- Terminal nodes finalize run state → must complete reliably
+- Heavy queries → post-run analytics layer
+- Critical path → optimized for latency, not accuracy
+
+---
+
+## References
+- BUG-010 RCA: `jira/bugs/BUG-010-run-page-regressions/RCA.md`
+- Diagnostic Scripts: `backend/scripts/`
+- Encore Debugging: `backend_coding_rules.mdc`
+
@@ -229,9 +229,107 @@ Reusable helpers live in `./lib/playwright-helpers.ts` (launch settings, safe cl
 
 ---
 
-## 5. References
+## 5. Regression Debugging Playbook (BUG-010 Case Study)
+
+### Systematic RCA for UI Regressions
+
+**Real Example:** Three regressions on `/run` page (Nov 2025)
+- Graph events missing
+- Screenshots not visible  
+- Stop node not executing
+
+**Investigation Flow:**
+
+1. **Visual Comparison**  
+   ```bash
+   # Capture current state
+   browser_take_screenshot({ fullPage: true, filename: "current-state.png" })
+   
+   # Compare with baseline
+   # .playwright-mcp/drift-detection-with-screenshot.png (working)
+   # vs current broken state
+   ```
+
+2. **Browser MCP Diagnostics**  
+   ```text
+   browser_navigate("http://localhost:5173")
+   browser_click("Detect My First Drift")
+   browser_snapshot()  # Check UI tree for missing elements
+   browser_console_messages()  # Catch JS errors
+   browser_network_requests()  # Verify SSE streams
+   ```
+
+3. **Timeline Forensics**  
+   ```bash
+   # Find last successful run
+   bunx tsx backend/scripts/find-completed-runs.ts
+   
+   # Compare with failed run
+   bunx tsx backend/scripts/inspect-run.ts <run_id>
+   
+   # Look for missing events (e.g., Stop node at step 6)
+   ```
+
+4. **Git Bisect**  
+   ```bash
+   # Identify regression window
+   git log --oneline --since="<last_success_time>" --until="<first_failure_time>"
+   
+   # Examine suspect commits
+   git show <commit_hash> --stat
+   git show <commit_hash> <specific_file>
+   ```
+
+5. **Backend State Inspection**  
+   ```bash
+   # Check agent state
+   bunx tsx backend/scripts/check-agent-state.ts <run_id>
+   
+   # Verify graph projector cursor
+   bunx tsx backend/scripts/check-cursor-ordering.ts
+   
+   # Test projector functions in isolation
+   bunx tsx backend/scripts/test-projector.ts <run_id>
+   ```
+
+6. **Root Cause Validation**  
+   - Remove suspect code changes
+   - Restart services
+   - Run fresh test
+   - Compare events sequence with baseline
+
+### Key Diagnostic Scripts Created
+- `backend/scripts/inspect-run.ts` - Full run event timeline
+- `backend/scripts/check-agent-state.ts` - Agent state snapshots
+- `backend/scripts/check-cursor-ordering.ts` - Projector cursor health
+- `backend/scripts/find-completed-runs.ts` - Identify successful runs for comparison
+- `backend/scripts/test-projector.ts` - Isolated projector function testing
+
+### Common Regression Patterns
+| Symptom | Check | Common Cause |
+|---------|-------|--------------|
+| Graph events missing | Cursor limit, projector logs | `CURSOR_LIMIT` too low, cursor stuck |
+| Screenshots not rendering | dataUrl in stream, CORS | Missing field in projection output |
+| Stop node not executing | Agent state, XState logs | Node execution error, budget exhaustion |
+| Run fails prematurely | Worker logs, lease timeout | Database query hangs, lease expired |
+
+### Evidence Collection Checklist
+- [ ] Screenshot comparison (baseline vs current)
+- [ ] Browser console logs
+- [ ] Network tab (SSE streams)
+- [ ] Backend logs (Encore dashboard)
+- [ ] Database state (run_events, outcomes, cursors)
+- [ ] Git diff of regression window
+- [ ] Agent state snapshots
+
+**See:** `jira/bugs/BUG-010-run-page-regressions/RCA.md` for complete case study
+
+---
+
+## 6. References
 - Playwright Docs: https://playwright.dev/docs/intro  
 - Encore/Svelte debugging: see `backend_coding_rules.mdc` and `frontend_engineer.mdc`  
 - Automation commands: `.cursor/commands/start-services`, `.cursor/commands/run-default-test`, `task founder:rules:check`
+- BUG-010 RCA: `jira/bugs/BUG-010-run-page-regressions/RCA.md`
 
-Use this playbook whenever you need reproducible UI testing. Playwright gives you deterministic coverage; Cursor’s tools remain on standby for exploratory analysis.
+Use this playbook whenever you need reproducible UI testing. Playwright gives you deterministic coverage; Cursor's tools remain on standby for exploratory analysis.
@@ -77,11 +77,17 @@ export class AgentMachineFactory {
         clearPendingStop: assign(() => ({ pendingStop: null } satisfies Partial<AgentMachineContext>)),
 
         // Stores the execution result and updates machine context with new state
-        storeExecutionResult: assign(({ event }) => {
+        storeExecutionResult: assign(({ event, context }) => {
           const output = "output" in event ? (event.output as RunNodeActorOutput | undefined) : undefined;
           if (!output) {
+            dependencies.logger.warn("storeExecutionResult: no output in event", { event });
             return {} satisfies Partial<AgentMachineContext>;
           }
+          dependencies.logger.info("storeExecutionResult", {
+            nodeName: output.execution.nodeName,
+            decision: output.decision.kind,
+            nextNode: output.decision.kind === "advance" ? output.decision.nextNode : null,
+          });
           return {
             agentState: output.nextState,
             latestExecution: output.execution,
@@ -321,6 +327,8 @@ export class AgentMachineFactory {
         nodeName: executionResult.execution.nodeName,
         outcome: executionResult.execution.outcome,
         decision: decision.kind,
+        nextNode: decision.kind === "advance" ? decision.nextNode : null,
+        budgetExhausted: budgetStopReason !== null,
       });
 
       return {
 
@@ -51,35 +51,15 @@ export async function stop(
     }
   });
 
-  // Query actual discovered screens from graph_persistence_outcomes
-  const discoveredScreensRows = await db.query<{ count: number }>`
-    SELECT COUNT(DISTINCT screen_id) as count
-    FROM graph_persistence_outcomes
-    WHERE run_id = ${input.runId}
-      AND outcome_kind = 'discovered'
-  `;
-
-  let actualDiscoveredScreens = 0;
-  for await (const row of discoveredScreensRows) {
-    actualDiscoveredScreens = row.count;
-  }
+  // Use metrics from input (DB query removed to fix regression)
+  const correctedMetrics = input.finalRunMetrics;
 
   logger.info("Stop node details", {
-    actualDiscoveredScreens,
-    reportedScreens: input.finalRunMetrics.uniqueScreensDiscoveredCount,
-    totalIterationsExecuted: input.finalRunMetrics.totalIterationsExecuted,
-    uniqueActionsPersistedCount: input.finalRunMetrics.uniqueActionsPersistedCount,
-    runDurationInMilliseconds: input.finalRunMetrics.runDurationInMilliseconds,
+    metrics: correctedMetrics,
     stepOrdinal: input.stepOrdinal,
     iterationOrdinalNumber: input.iterationOrdinalNumber,
   });
 
-  // Override the counter with actual database count
-  const correctedMetrics = {
-    ...input.finalRunMetrics,
-    uniqueScreensDiscoveredCount: actualDiscoveredScreens,
-  };
-
   const output: StopOutput = {
     runId: input.runId,
     confirmedTerminalDisposition: input.intendedTerminalDisposition,
 
@@ -16,7 +16,7 @@ import type {
 } from "./types";
 
 const POLL_INTERVAL_MS = 300;
-const CURSOR_LIMIT = 50;
+const CURSOR_LIMIT = 200; // Increased from 50 to handle more concurrent runs
 const HYDRATE_LIMIT = 20;
 const EVENT_BATCH_SIZE = 100;