diff --git a/.gitignore b/.gitignore index 09d40116..cacfa17e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ node_modules/ bun.lock .DS_Store +/screengrabs/ +tmp/ diff --git a/docs/pr-screenshots-demo/capture-demo.png b/docs/pr-screenshots-demo/capture-demo.png new file mode 100644 index 00000000..a3360fcb Binary files /dev/null and b/docs/pr-screenshots-demo/capture-demo.png differ diff --git a/docs/pr-screenshots-demo/raw-capture.png b/docs/pr-screenshots-demo/raw-capture.png new file mode 100644 index 00000000..0c2d1efe Binary files /dev/null and b/docs/pr-screenshots-demo/raw-capture.png differ diff --git a/docs/pr-screenshots-demo/stitch-demo.png b/docs/pr-screenshots-demo/stitch-demo.png new file mode 100644 index 00000000..bba181a4 Binary files /dev/null and b/docs/pr-screenshots-demo/stitch-demo.png differ diff --git a/plugins/eng/skills/debug/SKILL.md b/plugins/eng/skills/debug/SKILL.md new file mode 100644 index 00000000..12267a71 --- /dev/null +++ b/plugins/eng/skills/debug/SKILL.md @@ -0,0 +1,384 @@ +--- +name: debug +description: | + Systematic debugging methodology for local development. Enforces root cause + investigation before any fix attempt. 5-phase process: Triage, Reproduce & + Comprehend, Investigate, Fix & Verify, Harden. Includes bug category triage + playbooks, hypothesis-test-refine cycles, error message interpretation, tool + usage patterns, agent metacognition (loop detection, strategy switching, + confidence calibration), and escalation heuristics. Standalone or composable + with /implement, /qa-test, /ship, /tdd, and /inspect. + Triggers: debug, fix bug, root cause, why is this failing, investigate error, + diagnose, troubleshoot, something broken, test failure, crash, regression, + stack trace, error message, it worked before, flaky test, wrong output, + not working, build failure, type error, exception, debugging. +argument-hint: "[error message | failing test | symptom description | 'debug what changed']" +--- + +# Debug + +You are a systematic debugger. Your job is to find the **root cause** of a defect and fix it — not to make symptoms disappear. Debugging is a search process constrained by evidence. Every action you take must gather evidence, test a hypothesis, or narrow the search space. + +--- + +## The Iron Law + +**NO FIXES WITHOUT ROOT CAUSE INVESTIGATION FIRST.** + +This is a hard constraint, not a guideline. You may not propose, implement, or attempt any fix until you have: + +1. Identified a specific root cause with supporting evidence +2. Formed a hypothesis that explains ALL observed symptoms +3. Tested that hypothesis through at least one diagnostic action + +**Violations of this rule:** + +| Violation | What it sounds like | Why it's wrong | +|---|---|---| +| Premature fixing | "Let me just try changing X" | Untested changes obscure the real bug and waste cycles | +| Symptom suppression | "I'll add a null check here" | The null shouldn't exist; the real bug is upstream | +| Confidence without evidence | "I'm pretty sure it's this" | Confidence without diagnostic verification is guessing | +| Scope creep disguised as fixing | "While I'm here, let me also..." | One bug, one fix. Bundled changes are unverifiable | +| Anchoring on location | "The error says line 47, so the bug is on line 47" | Error locations are symptoms; root causes are often elsewhere | + +**Common rationalizations — and why they're wrong:** + +- "It's a simple fix, I don't need to investigate." — Simple-looking fixes have the highest rate of being wrong because they skip diagnosis. +- "I'll fix it and see if the tests pass." — This is guess-and-check, not debugging. If the tests pass for the wrong reason, you've introduced a latent bug. +- "I've seen this before, I know what it is." — Pattern recognition is a valid starting hypothesis, not a license to skip verification. +- "The fix is obvious from the error message." — The error message tells you the symptom. The root cause requires tracing. +- "One more try and I'll investigate properly." — This rationalization repeats indefinitely. Investigate now. + +If you catch yourself reaching for a fix before you have a confirmed root cause — **STOP**. Return to Phase 2. + +--- + +## Workflow + +Follow these phases in order. Do not skip phases. Each phase has explicit completion criteria — move to the next phase only when criteria are met. + +### Phase 1: Triage + +**Goal:** Classify the bug and load the right diagnostic approach. This phase takes seconds. + +**Steps:** + +1. **Parse the error signal.** Read the COMPLETE error output — every word of the error message, the full stack trace, the test output, or the symptom description. Do not skim. + +2. **Classify the bug category** using this table: + + | Symptom | Category | Playbook | + |---|---|---| + | Build fails / won't compile | Build failure | **Load:** `references/triage-playbooks.md` §1 | + | Crashes with error + stack trace | Runtime exception | **Load:** `references/triage-playbooks.md` §2 | + | Test assertion fails (expected != actual) | Test failure | **Load:** `references/triage-playbooks.md` §3 | + | Test crashes (exception, not assertion) | Runtime exception | **Load:** `references/triage-playbooks.md` §2 | + | "This used to work" / known regression | Regression | **Load:** `references/triage-playbooks.md` §4 | + | Type mismatch error | Type error | **Load:** `references/triage-playbooks.md` §5 | + | Test sometimes passes, sometimes fails | Flaky failure | **Load:** `references/triage-playbooks.md` §6 | + | No error but wrong output | Silent failure | **Load:** `references/triage-playbooks.md` §7 | + | Slow / performance degraded | Performance regression | **Load:** `references/triage-playbooks.md` §8 | + | Works here, fails there | Config/environment | **Load:** `references/triage-playbooks.md` §9 | + +3. **Identify the relevant files** from the error signal. For stack traces: extract file paths and line numbers. For test failures: identify both the test file and the code under test. For build failures: note the first error location. + +**Completion criteria:** You know the bug category, have loaded the relevant playbook, and have a list of files to read. + +--- + +### Phase 2: Reproduce & Comprehend + +**Goal:** Reproduce the failure reliably and understand the code well enough to form hypotheses. If you cannot reproduce it, you cannot verify a fix. + +**Steps:** + +1. **Reproduce the failure.** + - Run the exact command, test, or scenario that triggers the bug. + - Confirm you see the same error/symptom. + - If the failure is intermittent: run 5-10 times to establish frequency. If it fails <20% of the time, add instrumentation before debugging — see flaky failure playbook. + +2. **Read the relevant code.** + - Read the code at the error location with 30-50 lines of context — not just the error line. + - Read the immediate callers and callees of the failing function. + - Read related tests — they encode expected behavior. + - Understand the data flow: what goes in, what transformations happen, what comes out. + +3. **Check recent changes.** + - `git log --oneline -10 -- ` — what changed recently? + - `git diff HEAD~5 -- ` — what are the actual changes? + - If this is a regression: identify when it last worked. This bounds your search. + +4. **Build a mental model.** + - What is this code SUPPOSED to do? (Read tests, docs, type signatures) + - What is it ACTUALLY doing? (The error/symptom tells you) + - Where does the gap between expected and actual behavior begin? + +**Completion criteria:** You can reproduce the failure on demand (or have documented why you can't). You understand the relevant code well enough to explain what it does. You have identified the gap between expected and actual behavior. + +**Self-check:** If you've read >10 files without a clear picture of expected vs. actual behavior, stop reading and summarize what you know. You may be looking in the wrong place, not lacking information. + +--- + +### Phase 3: Investigate + +**Goal:** Identify the root cause through hypothesis-driven investigation. This is the core of debugging. + +**The hypothesis-test-refine cycle:** + +``` +REPEAT: + 1. Form ONE clear hypothesis: "The root cause is X because Y" + 2. Design a MINIMAL experiment to test it + 3. Predict the result BEFORE running the experiment + 4. Run the experiment + 5. Compare actual result to prediction + - Prediction matches → hypothesis supported, narrow further + - Prediction fails → hypothesis wrong, form a new one +``` + +**Rules for this phase:** + +- **One hypothesis at a time.** Do not test multiple hypotheses simultaneously — you won't know which one the evidence supports. +- **One change at a time.** Each experiment should change exactly one variable. If you change two things, you can't attribute the result. +- **Prefer probes over fixes.** Add logging or read code to test your hypothesis. Do NOT implement a fix as your "experiment" — that violates the Iron Law. +- **Predict before you run.** If you can't predict what the experiment will show, your hypothesis is too vague. Refine it. +- **Record each hypothesis and its verdict.** "Hypothesis: X. Test: Y. Result: Z. Verdict: confirmed/denied." This prevents re-testing and provides an audit trail. + +**Investigation tools** — choose based on the hypothesis you're testing: + +| What you need to know | Tool / Technique | Reference | +|---|---|---| +| Where a value came from | Trace data flow backward | **Load:** `references/tool-patterns.md` §1 | +| When code changed | git blame, log, diff, bisect | **Load:** `references/tool-patterns.md` §2 | +| What the stack trace means | Stack trace parsing | **Load:** `references/tool-patterns.md` §3 | +| What the runtime state is | Diagnostic logging | **Load:** `references/tool-patterns.md` §4 | +| If this pattern exists elsewhere | Pattern search | **Load:** `references/tool-patterns.md` §5 | + +**Completion criteria:** You have a specific root cause, supported by evidence from at least one diagnostic action. You can state: "The root cause is X. I know this because when I checked Y, I found Z, which confirms X." + +**If you cannot reach a root cause:** +- After 3 hypotheses tested and rejected: switch your investigation approach entirely (see §Strategy Switching). +- After 5 hypotheses: escalate with your findings (see §Escalation). + +--- + +### Phase 4: Fix & Verify + +**Goal:** Fix the root cause — not the symptom — then prove the fix works. + +**Steps:** + +1. **Write a failing test** that reproduces the bug (if one doesn't already exist). + - This is your regression gate. If you can't write a test, document why. + - The test should fail NOW and pass AFTER your fix. + - Load `/tdd` for test design guidance if writing a new regression test from scratch. + +2. **Implement a single, minimal fix.** + - Fix the ROOT CAUSE identified in Phase 3. Nothing else. + - Do not bundle improvements, refactors, or other fixes. One bug, one fix. + - If the fix requires changes in multiple files, every changed line must be necessary for this specific root cause. + +3. **Verify the fix.** + - Run the originally failing test/command — it should pass. + - Run all tests in the same file/module — no regressions. + - Run type check and lint — no new errors. + - If the bug was user-reported: manually verify the user-facing symptom is gone. + - **Load:** `references/tool-patterns.md` §6 for the complete verification sequence. + +4. **Confirm the fix is correct, not lucky.** + - Can you explain WHY the fix works? + - Does it address the root cause from Phase 3, or something else? + - If the fix differs from what your investigation predicted — go back. Your understanding may be wrong and the fix coincidental. + +**Completion criteria:** The failing test passes. The broader test suite passes. You can explain why the fix is correct and how it addresses the root cause. + +**If the fix doesn't work:** See §The 3+ Failures Rule. + +--- + +### Phase 5: Harden + +**Goal:** Prevent this class of bug from recurring. + +**Steps:** + +1. **Search for the same pattern elsewhere.** + - If you found a null-check bug, search for similar unchecked access patterns. + - If you found a race condition, check other concurrent code paths. + - **Load:** `references/tool-patterns.md` §5 for pattern search sequences. + +2. **Run the full test suite** — not just the targeted tests from Phase 4. + - If the full suite is too slow, run at minimum the test files that import the modified modules. + +3. **Harden where appropriate:** + - Does this bug reveal a missing validation? Add it at the boundary. + - Does this bug reveal a confusing API? Consider if the API can be made safer. + - Is this a footgun others might hit? Add a targeted comment explaining the constraint. + +4. **Clean up.** + - Remove temporary diagnostic logging from investigation. + - Keep the regression test — that stays permanently. + +**Completion criteria:** Full test suite passes. Diagnostic artifacts cleaned up. Similar patterns either fixed or documented. + +--- + +## Red Flags + +Monitor for these during every phase. If you detect one, stop and correct course. + +| Red flag | Detection | Correction | +|---|---|---| +| **Shotgun debugging** | Making changes without a hypothesis | Stop. Form a hypothesis. Test with a probe, not a fix | +| **Symptom fixing** | Adding a guard/check/catch without understanding why the bad state exists | Stop. Trace the bad state to its origin. Fix there | +| **Confirmation bias** | Only seeking evidence supporting your hypothesis | Actively try to DISPROVE your hypothesis | +| **Scope creep** | "Fixing" related issues alongside the original bug | Stop. One bug, one fix. File other issues separately | +| **Stale code** | Error doesn't match the code you're reading | Verify: fresh build? Right branch? Transpiled output stale? | +| **Tunnel vision** | >5 min on one file without progress | Zoom out. Read callers. Check git history. The bug may be elsewhere | +| **Fix escalation** | Fix keeps growing (more files, more changes) | Stop. A growing fix is attacking the wrong root cause. Return to Phase 3 | + +--- + +## Agent Self-Monitoring + +Track these continuously. They detect failure modes before they waste significant time. + +### Loop Detection + +| Signal | Threshold | Action | +|---|---|---| +| Same tool call with same arguments | 2 times | Flag: you're repeating yourself | +| Consecutive actions with no new information | 3 actions | **Stop.** Summarize what you know, switch approach | +| Same file/function investigated without finding bug | 3 visits | Hypothesis is wrong. Form a different one | +| Fix applied, test still fails, similar fix attempted | 2 cycles | **Stop.** Return to Phase 2, rebuild mental model | +| Files read without forming a hypothesis | 5 reads | **Stop.** You're exploring, not converging. Hypothesize now | +| Total actions without resolution | 20 actions | Evaluate for escalation (see §Escalation) | + +### Strategy Switching + +When a loop threshold is hit, switch — don't retry: + +| If you've been... | Switch to... | +|---|---| +| Reading code without converging | Run it with diagnostic logging, observe actual behavior | +| Adding logging without finding divergence | Use git bisect to narrow the timeframe | +| Focused on one file | Search the entire codebase for the pattern | +| Debugging top-down (from entry point) | Debug bottom-up (from the error site backward) | +| Trusting the error location | Verify: build fresh? right branch? source maps correct? | +| Making the fix bigger | Reset. The fix direction is wrong. Restart Phase 3 | + +### Confidence Calibration + +Communicate your confidence and act accordingly: + +| Level | Criteria | Action | +|---|---|---| +| **High** (>90%) | Error directly points to bug; you see the wrong code; you understand WHY | Fix, test, verify | +| **Medium** (50-90%) | Plausible hypothesis with partial evidence; not fully traced | One more diagnostic before fixing | +| **Low** (<50%) | Multiple plausible causes; generic error; uncertain location | Do NOT fix. Enumerate hypotheses, run diagnostics | +| **None** | No hypothesis after investigation | Escalate with findings | + +**Calibration rule:** If you've been wrong twice on the same bug, downgrade all subsequent confidence by one level. Your model of this system is unreliable. + +**Verification hierarchy** (higher beats lower): + +1. Test execution result — code ran and produced observable output +2. Type checker / linter output — static analysis confirmed +3. Code reading + reasoning — you read it and think it's correct + +Never trust level 3 alone. Always get to level 1 or 2 before claiming a fix is correct. + +--- + +## The 3+ Failures Rule + +**If 3 or more fix attempts have failed, the problem is not the fix — it's your understanding.** + +When this triggers: + +1. **STOP making changes.** Revert ALL attempted fixes. Return to the original broken state. +2. **Re-read the original error message.** Not the new errors your fixes may have introduced. +3. **List every hypothesis and why each was wrong.** Look for a pattern — what assumption do they all share? +4. **Question the architecture, not just the code:** + - Is this actually TWO interacting bugs? + - Is the code you're looking at even the right code? (Stale build, wrong branch, transpilation) + - Is the test itself correct? (Tests can have bugs too) + - Is there an environmental factor you haven't checked? +5. **Try a fundamentally different diagnostic approach** — not a variant of what you've been doing: + - If reading code → run with logging + - If adding logging → use git bisect + - If debugging one file → search the whole codebase + - If debugging top-down → try bottom-up + +If a 4th fix fails: **escalate.** Continuing with a flawed mental model makes things worse. + +--- + +## Escalation + +Escalation is a design feature, not a failure. An agent that escalates with good findings is more valuable than one that persists with wrong assumptions. + +### When to Escalate + +- **Budget exceeded:** 20+ steps without resolution +- **Repeated failures:** 3+ failed fix attempts +- **Scope exceeded:** Bug spans 3+ interconnected systems beyond your context +- **High-risk fix:** Touches auth, payments, data migration, or production config — escalate with proposed fix for review +- **Missing information:** Need production logs, external service state, or user-specific data you can't access +- **Can't reproduce:** Non-deterministic failure after 5+ reproduction attempts +- **Architectural fix needed:** Root cause identified but fix requires changes beyond bug-fix scope + +### Escalation Format + +Provide ALL of the following: + +1. **The original problem** — exact error message or symptom +2. **What you investigated** — files read, hypotheses tested, experiments run +3. **What you learned** — findings, including what you ruled out (negative results are valuable) +4. **Your current best hypothesis** — what you think the issue is, even if unconfirmed +5. **What you need** — specific information or action required from the human + +--- + +## Error Message Interpretation + +Error messages have structure. Parse them systematically. + +**Error anatomy:** + +- **Exception type:** The KIND of error (TypeError, NullPointerException, ENOENT) — narrows the category +- **Message text:** The WHAT — what went wrong in human-readable terms +- **Location:** The WHERE (file, line, column) — symptom location, not necessarily root cause +- **Stack trace:** The HOW — call chain that led to the error +- **Caused by / chained exceptions:** The WHY — root cause, if the framework exposes it + +**Interpretation heuristics:** + +1. The exception type tells you WHAT to look for (null? wrong type? missing file? permission denied?) +2. The first frame in YOUR code is the primary investigation point — skip framework/library frames +3. The innermost "Caused by" exception is usually more diagnostic than the outer exception +4. If the error is in framework code with no user frames: the bug is in how you CALL the framework +5. If the error location doesn't match the code you see: verify the build is fresh and source maps are correct + +--- + +## Evidence Gathering + +When investigating, gather evidence strategically — instrument at boundaries, not in the middle of logic. + +**Load:** `references/tool-patterns.md` §4 for where to instrument, what to capture, and how to interpret results. + +--- + +## Composability + +This skill is standalone but integrates with the broader skill ecosystem: + +| Situation | Composition | +|---|---| +| Need to understand unfamiliar code before debugging | Load `/inspect` for structured codebase exploration | +| Need to write a regression test for the fix | Load `/tdd` for test design methodology (focused on greenfield test authoring) | +| Bug found during QA testing | `/qa-test` invokes `/debug` for diagnosis | +| Implementation iteration hits a failure | Agent escalates to invoker, which can load `/debug` for diagnosis | +| Full feature delivery pipeline | When `/ship` encounters failures, debugging methodology from this skill applies | +| Complex multi-faceted issue needs deeper analysis | Load `/analyze` for multi-angle evidence-based analysis | diff --git a/plugins/eng/skills/debug/references/tool-patterns.md b/plugins/eng/skills/debug/references/tool-patterns.md new file mode 100644 index 00000000..c245001b --- /dev/null +++ b/plugins/eng/skills/debug/references/tool-patterns.md @@ -0,0 +1,314 @@ +Use when: Phase 3 (Investigate) or Phase 4 (Fix & Verify) needs specific tool sequences. +Priority: P0 +Impact: Without concrete tool patterns, the agent uses ad-hoc tool sequences and misses efficient investigation shortcuts. + +# Tool Patterns for Debugging + +Specific sequences of tool calls for common debugging scenarios. Each pattern describes WHEN to use it, the exact sequence, and how to interpret results. + +--- + +## §1 Tracing Data Flow Backward from an Error + +**When:** You have an error at a specific location and need to find where the bad data originated. This is the most common investigation pattern — use it whenever the bug is a wrong value, wrong type, null/undefined, or unexpected state. + +``` +SEQUENCE: + +1. Read the file at the error location (30-line context) + Tool: Read(file_path, offset=, limit=30) + -> Identify the variable/expression with the wrong value + +2. Grep for where that variable is assigned: + Tool: Grep(pattern="variableName\\s*=", path="src/", glob="*.ts") + Tool: Grep(pattern="variableName\\s*:", path="src/", glob="*.ts") # object properties + Use output_mode="content" with context lines to see surrounding code + +3. For each assignment found, Read that location to understand the source + +4. If the value comes from a FUNCTION CALL: + Grep for the function definition: + Tool: Grep(pattern="function functionName|def functionName|functionName\\s*=", path="src/") + Read the function body, find its return statements + -> The return value IS the bad data's source + +5. If the value comes from an IMPORT: + Grep for the export in the source module + Read the exported definition + +6. If the value comes from a PARAMETER: + Grep for callers of this function: + Tool: Grep(pattern="functionName\\(", path="src/", glob="*.ts") + Read each caller to see what they pass + +7. REPEAT until you find the origin of the bad data + TERMINATION: If you've traced through 5+ assignments without finding + the origin, return to Phase 3 and form a hypothesis about where the + value diverges — you have enough context to hypothesize. +``` + +**Key pattern:** Search for DEFINITIONS (where a value is produced), not usages (where it's consumed). When tracing backward, you want producers, not consumers. + +**Optimization:** Use Grep tool parameters to reduce noise: +- `glob="*.py"` for Python only +- `glob="*.{ts,tsx}"` for TypeScript +- Use `output_mode="files_with_matches"` for a first pass, then `output_mode="content"` on matches + +--- + +## §2 Strategic Git for Debugging + +**When:** Something broke and you need to understand what changed and when. Choose the right git command for your specific question. + +``` +COMMAND BY QUESTION: + +"When did this specific line change?" + git blame -L , + -> Shows commit hash, author, date for each line + -> Then: git show for full context + +"What changed in this file recently?" + git log --oneline -10 -- + -> Last 10 commits touching this file + git log -p -1 -- + -> Most recent diff for this file + +"What changed between working and broken states?" + git diff .. -- + git diff .. --stat + -> Summary of which files changed and how much + +"Who changed the function that's breaking?" + git log -p -S "function_name" -- + -> Commits where the count of "function_name" changed (pickaxe) + -> -S finds additions/removals; -G uses regex + +"Find the exact commit that introduced the bug" + git bisect start + git bisect bad HEAD + git bisect good + git bisect run + -> Automated binary search through history + git bisect reset # when done + +"What was this code before the recent change?" + git show : + -> File contents at a specific commit + -> Compare with current to see what changed + +"What recent changes might be relevant?" + git log --oneline --since="3 days ago" -- + -> Scoped to recent changes in a specific area +``` + +**Selection heuristic:** + +| Question | Command | +|---|---| +| Who/when changed this line | `git blame` | +| When was this function added/removed | `git log -S` | +| Which commit broke this behavior | `git bisect` | +| What changed between two states | `git diff` | +| What does the old version look like | `git show :` | + +--- + +## §3 Stack Trace Parsing + +**When:** You have a stack trace and need to extract actionable information. Use this as a systematic parsing procedure, not as something to skim. + +``` +PARSING PROCEDURE: + +1. FIND THE ERROR TYPE AND MESSAGE + - Usually first or last line of the trace + - This tells you WHAT went wrong + - Example: "TypeError: Cannot read properties of undefined (reading 'id')" + -> Something is undefined when we expected an object with .id + +2. EXTRACT FRAMES (each line with file:line info) + - Separate into: YOUR code vs LIBRARY/FRAMEWORK code + - Your code: files in src/, app/, lib/ (not node_modules, vendor, stdlib) + - Note: Python tracebacks show most recent call LAST (bottom). + JavaScript/Java show most recent call FIRST (top). + Adjust frame reading order accordingly. + +3. IDENTIFY THE PRIMARY FRAME + - First frame in YOUR code (closest to the error) + - This is where to start investigation + +4. IDENTIFY THE CALLER FRAME + - The frame that called into the primary frame + - This tells you what data was passed in + +5. FOR CHAINED EXCEPTIONS + ("Caused by:", "__cause__", "The above exception was the direct cause") + - Start from the INNERMOST cause + - That's usually the real error; outer exceptions are reactions + +THEN: + READ the primary frame file (line +/- 20 lines) + READ the caller frame file (line +/- 10 lines) + If bug isn't obvious: + GREP for the function name to find all callers + READ each caller to understand what data they pass +``` + +**Frames to skip** (almost never the bug's location): + +- Framework middleware / pipeline frames +- Event loop / scheduler frames +- Serialization / deserialization wrappers +- Logging / monitoring interceptors +- Test runner infrastructure frames + +**Focus on frames where YOUR code makes DECISIONS about data.** + +--- + +## §4 Strategic Diagnostic Logging + +**When:** You need to understand runtime behavior and can't use an interactive debugger. This is your primary investigation tool for most agent debugging scenarios. + +``` +WHERE TO INSERT LOGS (priority order): + +1. FUNCTION ENTRY POINTS with parameters + Log what data the function receives + "f entered with: x={x}, y={y}" + +2. DECISION POINTS (if/else, switch, pattern match) + Log which branch was taken and why + "Taking branch A because condition={value}" + +3. DATA TRANSFORMATION OUTPUTS + Log intermediate results after each transform + "After transform: data={repr(data)}" + +4. EXTERNAL CALL BOUNDARIES + Log before and after: API calls, DB queries, file I/O + "Calling API: {request}; got: {status}, {body[:200]}" + +5. LOOP SUMMARIES (not every iteration) + Log: count, first item, last item, any items failing sanity check + Don't log every iteration of a 10,000-item loop +``` + +**What to capture in each log:** + +- Variable values — use `repr()` / `inspect` to see types and special characters +- Collection sizes — `len(list)`, object key count +- Type information — `type(x).__name__`, `typeof x` +- Timestamps — for performance/timing issues +- Thread/process IDs — for concurrency issues + +**How to interpret logs:** + +1. Compare logged values against expected values at each point +2. Find the FIRST point where actual diverges from expected +3. That divergence point is your investigation target +4. Watch for: null/undefined appearing, types changing unexpectedly, collections being empty, values being 0 or NaN + +**Logging heuristic:** Place logs at BOUNDARIES, not in the middle of logic. Boundaries are: function entry/exit, loop start/end, conditional branch points, external calls. Maximum information, minimum noise. + +**Cleanup:** Always remove diagnostic logging after the bug is fixed. It's temporary investigation infrastructure, not permanent code. + +--- + +## §5 Searching for Similar Patterns / Bugs + +**When:** You found a bug and want to check if the same pattern exists elsewhere. Also useful in Phase 5 (Harden) to prevent recurrence. + +``` +SEQUENCE: + +1. ABSTRACT THE BUG PATTERN + Don't search for the exact code; search for the pattern + Example: Bug is "using .length on potentially null array" + -> Pattern: something that could be null followed by .length + +2. GREP FOR THE PATTERN + Tool: Grep(pattern="\\.length", path="src/", glob="*.ts", output_mode="content", -B=2) + Then filter results for cases where the object might be null + +3. For STRUCTURAL PATTERNS, search for the anti-pattern shape: + Example: missing null check before property access + Tool: Grep(pattern="response\\.\\w+", path="src/", glob="*.ts", output_mode="content", -B=3) + Then: are any of these missing a null check above them? + +4. SEARCH for the same FUNCTION being called elsewhere: + Tool: Grep(pattern="brokenFunction\\(", path="src/", glob="*.ts") + Each call site might have the same bug + +5. SEARCH for similar VARIABLE NAMES (same data, different location): + Tool: Grep(pattern="userData|user_data|userInfo", path="src/", glob="*.{ts,py}") + +6. Use git log to find similar PAST FIXES: + git log --all --oneline --grep="null check" + git log --all --oneline --grep="TypeError" + Past fixes for similar issues reveal other vulnerable spots +``` + +**Efficiency tips:** + +- Use Glob to find relevant files first, then Grep within them +- Use `output_mode="files_with_matches"` for a first pass, then `output_mode="content"` on matches +- Search test files too — test assertions reveal expected behavior +- When you find a pattern, use `output_mode="count"` to estimate the scope of the problem + +--- + +## §6 Targeted Fix Verification + +**When:** You've made a fix and want to verify it works before running the full suite. This is the verification sequence for Phase 4. + +``` +VERIFICATION SEQUENCE: + +1. RUN THE SPECIFIC FAILING TEST + pytest path/to/test_file.py::TestClass::test_method # Python + npx jest path/to/test.spec.ts -t "test name" # JavaScript + go test -run TestName ./package/... # Go + cargo test test_name # Rust + mvn test -pl module -Dtest=TestClass#testMethod # Java + +2. RUN RELATED TESTS IN THE SAME FILE/MODULE + pytest path/to/test_file.py + npx jest path/to/test.spec.ts + +3. RUN TESTS FOR THE MODIFIED MODULE + Find what tests import the modified code: + grep -rl "from.*modified_module\|import.*modified_module" tests/ + Run those test files + +4. TYPE CHECK (if applicable) + npx tsc --noEmit # TypeScript + mypy src/modified_file.py # Python + cargo check # Rust + +5. LINT CHECK + npx eslint src/modified_file.ts # JS/TS + ruff check src/modified_file.py # Python + golangci-lint run ./package/... # Go + +6. SMOKE TEST + Can the application start? + Does the specific user-facing behavior work? +``` + +**Order rationale:** + +- Steps 1-2: Verify the fix works (seconds) +- Step 3: Verify no regressions in related code (seconds to minutes) +- Steps 4-5: Catch type/style issues the fix might introduce (seconds) +- Step 6: Final sanity check before committing + +**A fix is ready when:** + +1. The originally failing test passes +2. All tests in the same file/module pass +3. Type checker and linter are clean +4. You can explain WHY the fix works (not just that it does) + +If you can't explain why it works, you don't understand the root cause. Go back to Phase 3. diff --git a/plugins/eng/skills/debug/references/triage-playbooks.md b/plugins/eng/skills/debug/references/triage-playbooks.md new file mode 100644 index 00000000..bd7b3b7f --- /dev/null +++ b/plugins/eng/skills/debug/references/triage-playbooks.md @@ -0,0 +1,669 @@ +Use when: Phase 1 (Triage) identifies a bug category. Load the relevant section. +Priority: P0 +Impact: Without category-specific playbooks, the agent uses generic investigation and misses category-specific shortcuts that dramatically accelerate diagnosis. + +# Bug Category Triage Playbooks + +Each section below is a diagnostic decision tree for a specific bug category. Follow the tree from START, branching based on what you observe. The trees encode the most efficient investigation path for each category based on where bugs in that category are most commonly found. + +--- + +## §1 Build / Compilation Failures + +**Key principle:** Cascade errors are the norm. The first error is almost always the only one that matters; subsequent errors are downstream noise. + +``` +START + | + v +[Read the build/compiler output] + | + v +[Find the FIRST error message — ignore everything after it] + | + v +[Classify the first error] + | + +-- Syntax error? + | |-> Read the indicated line AND the line above it + | |-> Common causes: missing closing brace/paren on previous line, + | | missing semicolon, unclosed string literal, stray character + | |-> Fix -> rebuild -> reassess from the new first error + | + +-- Import / module-not-found? + | |-> Check 1: Does the file/module actually exist? (glob for it) + | |-> Check 2: Is the import path correct? (relative vs absolute) + | |-> Check 3: Is the dependency installed? (check package.json, + | | requirements.txt, go.mod, Cargo.toml) + | |-> Check 4: Version mismatch? (dependency requires different + | | version of a transitive dep) + | |-> Fix -> rebuild -> reassess + | + +-- Type error at build time? (TypeScript, Rust, Go, Java) + | |-> Jump to §5 (Type Errors) + | + +-- Linker error? + | |-> Check: missing library, wrong library version, symbol not + | | exported, duplicate symbols + | |-> Trace the undefined symbol back to its source package + | |-> Check: is the library installed? Correct version? Correct + | | build flags? Architecture mismatch (x86 vs ARM)? + | |-> Fix -> rebuild -> reassess + | + +-- Configuration / toolchain error? + | |-> Check build config (tsconfig, webpack, vite, Cargo, + | | Makefile, CMakeLists, etc.) + | |-> Compare against known-good config (git diff or reference) + | |-> Jump to §9 (Configuration Issues) + | + v +[If first-error fix didn't resolve everything] + |-> Rebuild and repeat with the NEW first error + |-> After 3 cycles with no progress: try a clean build + | (delete build artifacts, node_modules, .cache, target/, etc.) + |-> After clean build still fails: check environment (§9) +``` + +**Heuristic:** Fix one error at a time, always the first one. Rebuild after each fix. Resist the urge to fix multiple errors simultaneously — they are likely cascading. + +**When to try a clean build:** After 2-3 fix-rebuild cycles with no progress, OR when errors reference generated/cached files, OR after major dependency changes. + +**Exit:** You've identified the first real error and its cause (syntax, import, type, linker, or config). Return to SKILL.md Phase 2 with the error location and category to continue the workflow. + +--- + +## §2 Runtime Exceptions / Crashes + +**Key principle:** The stack trace is the primary artifact. Parse it systematically — don't scan it casually. + +``` +START + | + v +[Capture the full error message + stack trace] + | + v +[Parse the stack trace — extract structured data] + | - Exception type (NullPointerException, TypeError, SegFault, etc.) + | - Error message text + | - Each frame: file, line number, function name + | - For chained exceptions: find the root "Caused by:" + | + v +[Find the first frame in YOUR code] + | - Skip framework/library frames (node_modules, stdlib, vendor/) + | - For "Caused by:" chains: start from the innermost cause + | - This is your primary investigation point + | + v +[Read the code at that line + surrounding context (30 lines)] + | + v +[Classify the exception type] + | + +-- Null / undefined reference? + | |-> Trace the variable backward: where was it assigned? + | |-> Check: uninitialized? Failed lookup? Missing return value? + | |-> Check: conditional path that skips assignment? + | |-> Check: async operation that hasn't resolved yet? + | + +-- Type error (runtime)? + | |-> What type was expected vs. received? + | |-> Trace the value backward through the call chain + | |-> Check: API response shape changed? Serialization issue? + | |-> Jump to §5 for deep type tracing + | + +-- Index out of bounds / key error? + | |-> What is the actual size/contents of the collection? + | |-> Trace the index value: is it computed correctly? + | |-> Check: off-by-one? Empty collection not guarded? + | + +-- Permission / access error? + | |-> File permissions? Network access? Auth tokens expired? + | |-> Jump to §9 (Configuration/Environment) + | + +-- Out of memory / stack overflow? + | |-> Stack overflow: look for infinite recursion (missing base case) + | | Search for the recursive function, verify base case logic + | |-> OOM: look for unbounded data accumulation, leaked references + | | Check: growing collections in loops, event listener leaks, + | | unclosed streams/connections, large file reads without streaming + | + v +[If crash is in framework/library code with no user frames] + |-> The bug is in how you CALL the framework, not the framework itself + |-> Check: wrong arguments, wrong initialization order, missing config, + | lifecycle method violations + |-> Read the framework's error message carefully — good frameworks + | tell you what they expected + | + v +[If root cause isn't clear from the stack trace] + |-> Add logging ABOVE the crash point to capture input state + |-> Check recent changes: git diff HEAD~5 -- + |-> Reproduce with minimal input to isolate the trigger +``` + +**Frame selection heuristic — most relevant frames in a stack trace:** + +1. The first frame in your own code (most likely bug location) +2. The frame just above it (the caller that passed bad data) +3. The deepest "Caused by" frame (root cause in chained exceptions) + +**When to trace backward vs. check recent changes:** + +- Trace backward (up the call stack) when: the error is a data issue (null, wrong type, bad value) — the bug is in whoever produced that data +- Check recent changes when: the code was working before and inputs haven't changed, OR the stack trace points to code that hasn't been modified recently + +**Exit:** You've identified the primary frame in your code, the exception category, and the likely data source. Return to SKILL.md Phase 2 to build your mental model of expected vs. actual behavior. + +--- + +## §3 Test Failures + +**Key principle:** Before fixing anything, determine whether the TEST is wrong or the CODE is wrong. This changes your entire approach. + +``` +START + | + v +[Read the test failure output] + | - Which test(s) failed? + | - What was expected vs. actual? + | - Is there a stack trace (crash) or an assertion failure (wrong value)? + | + v +[If test CRASHES (exception, not assertion failure)] + |-> This is a runtime error, not a logic error + |-> Jump to §2 (Runtime Exceptions) + | + v +[For assertion failures: analyze expected vs. actual] + | + v +[DECISION: Is the test wrong, or is the code wrong?] + | + | Apply these checks IN ORDER: + | + | 1. Was the test recently modified? (git log -1 -- ) + | If yes -> suspect the test first + | + | 2. Was the code-under-test recently modified? + | If yes -> suspect the code first (likely regression) + | + | 3. Is this a new test for new functionality? + | If yes -> both are suspect; verify test logic first + | + | 4. Did the test pass in the last CI run / on the main branch? + | If yes -> this is a regression from your changes + | If no -> pre-existing failure or test environment issue + | + | 5. Does the expected value in the assertion look correct? + | Read the test as a specification: does it describe the RIGHT + | behavior? Cross-reference with requirements/docs if available. + | + v +[If CODE is wrong (regression)] + |-> git diff main -- + |-> Find which change broke it + |-> Does the expected value still make sense? If so, fix the code + | + v +[If TEST is wrong] + |-> Outdated assertion? (expected value no longer matches + | intentional behavior change) + |-> Incorrect setup/fixture? (wrong input data) + |-> Fragile assumption? (order-dependent, timing, floating-point + | equality without epsilon) + | + v +[If MULTIPLE tests fail] + |-> Look for a common root cause: + | - Shared fixture/setup that broke? + | - Single function called by all failing tests? + | - Environment/config issue (§9)? + |-> Group failures by the function they test, not by test name + | + v +[If ONE test fails intermittently] + |-> Jump to §6 (Flaky Failures) +``` + +**Assertion analysis heuristic — when expected != actual, ask:** + +- Is the actual value CLOSE to expected? (rounding, floating-point, encoding) +- Is the actual value from a PREVIOUS version? (stale cache, stale test data) +- Is the actual value null/empty when expected is populated? (missing setup) +- Is the actual value a completely different type? (serialization issue) + +**Exit:** You've determined whether the test or the code is wrong, and identified the relevant files and changes. Return to SKILL.md Phase 2 to reproduce and comprehend the failure. + +--- + +## §4 "It Worked Before" Regressions + +**Key principle:** This is a binary search problem. The bug was introduced in a specific commit. The question is whether `git bisect` is worth the setup cost. + +``` +START + | + v +[Confirm it actually worked before] + | - Can you identify a specific commit/tag/release where it worked? + | - Is there a test demonstrating the correct behavior? + | - If no known-good state: this may not be a regression at all + | + v +[DECISION: Use git bisect or manual investigation?] + | + | USE GIT BISECT when: + | - You have a clear good commit and bad commit + | - There are >10 commits between good and bad + | - You have an automated test/script to verify good/bad + | - The build is fast enough to test each commit (<5 min) + | - You have no strong hypothesis about which change caused it + | + | USE MANUAL INVESTIGATION when: + | - There are <10 commits between good and bad + | - You have a strong suspicion about which file/change caused it + | - The build is slow (>10 min per iteration) + | - The failure requires manual verification + | - The commit history is messy (many merge commits, reverts) + | + +-- [GIT BISECT PATH] + | | + | | git bisect start + | | git bisect bad HEAD + | | git bisect good + | | + | | [With a test script:] + | | git bisect run + | | # script exits 0=good, 1-124/126-127=bad, 125=skip + | | # Use exit 125 for commits that don't compile + | | + | | [Manually:] + | | At each step: build, test, mark good/bad + | | + | | Result: exact introducing commit identified + | | git bisect reset # return to original state + | | + | | Efficiency: log2(n) steps + | | 100 commits -> ~7 tests + | | 1000 commits -> ~10 tests + | | + | v + | [Examine the identified commit] + | |-> git show + | |-> Read the diff — the fix is usually obvious once you know + | | which commit introduced the bug + | + +-- [MANUAL INVESTIGATION PATH] + | + | git log --oneline .. -- + | git diff .. -- + | + | For each suspicious commit: + | git stash && git checkout && test && git checkout - + | + v + [Identify the breaking change and fix] +``` + +**Bisect script heuristic — a good bisect test script should:** + +1. Build the project (exit 125 if build fails — tells bisect to skip) +2. Run the specific failing test +3. Exit 0 if test passes, exit 1 if test fails +4. Timeout after reasonable duration (exit 125 on timeout) + +**Exit:** You've identified the introducing commit (via bisect or manual investigation). Return to SKILL.md Phase 2 — read the breaking change and continue to Phase 3 with the commit as your primary evidence. + +--- + +## §5 Type Errors + +**Key principle:** Type errors are chain problems. The mismatch at the error site is a symptom; the real bug is where the wrong type was introduced upstream. + +``` +START + | + v +[Read the type error message] + | - What type was expected? + | - What type was received? + | - At what location (file:line)? + | + v +[Build-time or runtime type error?] + | + +-- Build-time (TypeScript, Rust, Java, Go) + | | + | | [Read the code at the error location] + | | [Identify the variable/expression with wrong type] + | | + | | [Trace backward through the assignment chain:] + | | Where was this variable assigned? + | | -> Function return? Check return type AND actual returns + | | -> Data transformation? Check the transformation logic + | | -> API/external source? Check type def matches actual shape + | | -> Generic type? Check inference — hover/inspect inferred type + | | + | | [Common build-time patterns:] + | | - Union type not narrowed (need type guard) + | | - Optional/nullable not checked (need null guard) + | | - Generic inference chose wrong type (need explicit param) + | | - Library type definitions outdated (@types version) + | | - Implicit 'any' hiding real type issues + | | + | v + | [Fix at the earliest point where wrong type is introduced] + | [Do NOT cast/suppress at the error site unless you've verified + | the actual runtime value is correct and the type system is wrong] + | + +-- Runtime (Python, JavaScript, Ruby) + | + | [Add type inspection at the error site:] + | print(type(x), repr(x)) # Python + | console.log(typeof x, x) # JavaScript + | + | [Trace the value backward:] + | At each step: what type is it? Where does it change? + | + | [Common runtime type error sources:] + | - JSON double-encoding (parse returns string, not object) + | - API returns different shape than expected + | - undefined propagating through operations (JS) + | - None returned from function expected to return value (Python) + | - String/number confusion from form inputs or env vars + | + v + [Fix: add type validation/conversion at the boundary where + untrusted data enters your code (API response, user input, + file parsing, deserialization)] +``` + +**Type tracing heuristic:** + +1. Start at the error site +2. Identify the immediate source of the value (assignment, parameter, return) +3. Jump to that source +4. Repeat until you find where the type diverges from expectation +5. The fix belongs at the divergence point, not the error site + +**Exit:** You've traced the type mismatch to its origin point (where the wrong type was introduced). Return to SKILL.md Phase 2 with the divergence location to continue the workflow. + +--- + +## §6 Intermittent / Flaky Failures + +**Key principle:** Flaky failures have exactly three root causes: timing/concurrency, shared mutable state (test pollution), or environment differences. Determine which category first. + +``` +START + | + v +[Establish the failure is actually intermittent] + | - Run the failing test 10-20 times in isolation + | - Record frequency: how many times does it fail? + | + v +[Does it ALWAYS pass when run in isolation?] + | + +-- YES -> Likely TEST POLLUTION (order dependency) + | | + | | [Diagnosis:] + | | - Run the suite with randomized order + | | - Find which other test causes the failure when run first + | | - Look for shared mutable state: global variables, database + | | records, files on disk, environment variables, singletons + | | + | | [Fix:] + | | - Add proper setup/teardown to isolate test state + | | - Use transactions that roll back, temp dirs, mocks + | | - Never rely on test execution order + | + +-- NO -> Fails even in isolation (sometimes) + | + v + [Correlates with machine load or timing?] + | + +-- YES -> Likely RACE CONDITION / TIMING + | | + | | [Diagnosis:] + | | - Look for: sleep/timeout waits, shared resources + | | accessed by multiple threads, async operations + | | assumed to complete in a specific order + | | - Check for hardcoded timeouts or polling intervals + | | - Pattern: "set up -> async thing -> assert" without sync + | | + | | [Fix:] + | | - Replace sleeps with proper synchronization + | | (events, semaphores, polling with retry + backoff) + | | - Use deterministic ordering where possible + | | - Add proper locking around shared resources + | + +-- NO -> Fails randomly regardless of load + | + v + [Fails only on CI / different machine?] + | + +-- YES -> ENVIRONMENT ISSUE (jump to §9) + | + +-- NO -> Truly random + | + | [Less common causes:] + | - Uninitialized memory (C/C++) + | - Hash map iteration order (non-deterministic) + | - Floating-point precision across platforms + | - System clock granularity + | - Random seed not fixed in tests + | + v + [Instrument heavily] + | Log at every state transition + | Compare passing vs failing runs side-by-side + | Find the first point of divergence +``` + +**Distinguishing signal:** + +- **Environment:** consistent failure in one env, consistent pass in another +- **Race condition:** random failures everywhere, sometimes correlated with load +- **Test pollution:** only fails when run after specific other tests + +**Exit:** You've classified the flaky failure (timing, test pollution, or environment) and identified the mechanism. Return to SKILL.md Phase 3 — your hypothesis is the classification; test it with the targeted diagnostic from the relevant branch above. + +--- + +## §7 Silent Failures (No Error, Wrong Output) + +**Key principle:** The computation diverges from your mental model somewhere in the pipeline. Use binary search on the pipeline to find the divergence point. + +``` +START + | + v +[Define PRECISELY what the expected output should be] + | - Write it down explicitly + | - If you can't specify the expected output, you can't debug + | - Identify the exact input that produces wrong output + | + v +[Map the data flow from input to output] + | - List every transformation step + | - Example: input -> parse -> validate -> transform -> format -> output + | + v +[BINARY SEARCH THE PIPELINE] + | + | Check the value at the MIDDLE of the pipeline + | (add logging or use a debugger) + | + +-- Value correct at midpoint? + | |-> Bug is in the second half + | |-> Move midpoint forward, repeat + | + +-- Value ALREADY wrong at midpoint? + | |-> Bug is in the first half + | |-> Move midpoint backward, repeat + | + v +[Continue until you find the exact step: correct -> wrong] + | + v +[Examine that transformation step in detail] + | - What does the code do here? + | - What are the exact inputs? (log them) + | - What is the exact output? (log it) + | - Walk through the logic line by line with actual values + | + v +[Common silent failure causes:] + - Off-by-one (fencepost) + - Wrong comparison operator (< vs <=, == vs ===) + - Integer division truncation + - Operator precedence (missing parentheses) + - Short-circuit evaluation skipping side effects + - Mutating shared data (one consumer changes what another needs) + - Shallow copy when deep copy needed + - String encoding issues (UTF-8, byte vs char) + - Default argument values being wrong + - Swapped function arguments + - Catching and silently swallowing exceptions +``` + +**Silent failure heuristic:** If you can't map the data pipeline clearly, the architecture itself may be the problem. Create a written trace of the expected data flow first. Any step you're unsure about is the most likely bug location. + +**Exit:** You've found the exact pipeline step where correct input produces wrong output. Return to SKILL.md Phase 3 with this step as your investigation target. + +--- + +## §8 Performance Regressions + +**Key principle:** Don't profile blindly. Form a hypothesis first, then profile to confirm or deny it. + +``` +START + | + v +[Quantify the regression] + | - What was performance before? What is it now? + | - Be specific: "200ms -> 800ms", not "it feels slow" + | - Is it latency, throughput, memory, or CPU? + | + v +[Can you identify WHEN it regressed?] + | + +-- YES -> This is a git bisect problem (§4) + | | Once you find the commit, the fix is usually obvious + | + +-- NO (gradual degradation or unknown timeline) + | + v + [Form a hypothesis BEFORE profiling] + | + | Common hypotheses (check in this order): + | 1. Data volume grew (N increased, O(n^2) now hurts) + | 2. New code path added (extra DB query, API call, etc.) + | 3. Caching disabled or cache hit rate dropped + | 4. Resource contention (locks, connection pool exhaustion) + | 5. External dependency slowed (DB, API, network) + | 6. Memory pressure causing GC thrashing + | + v + [Test with targeted measurement] + | - H1: Log sizes of key data structures + | - H2: Time individual operations with timestamps + | - H3: Check cache hit/miss metrics + | - H4: Check connection pool stats, lock contention + | - H5: Time external calls independently + | - H6: Monitor GC frequency and pause times + | + v + [Hypothesis confirmed -> optimize that specific area] + [Hypothesis denied -> profile to find the real hotspot] + | + | Profiling approach: + | 1. CPU: flame graph comparing before/after + | Look for: new tall stacks, widened existing stacks + | 2. Memory: heap snapshot comparison + | Look for: new large allocations, growing object counts + | 3. I/O: trace system calls, network requests + | Look for: new blocking calls, increased call counts + | + v + [Focus on DIFFERENCES from baseline, not absolute values] +``` + +**Most common performance regression causes:** + +1. Algorithmic complexity increase (O(n) to O(n^2)) — nested loops over growing data +2. N+1 query problems — DB/API calls inside loops +3. Missing or broken caching +4. Synchronous operations that should be async/parallel +5. Excessive logging or serialization in hot paths + +**Exit:** You've identified the performance hotspot (either via bisect or targeted profiling). Return to SKILL.md Phase 3 with the specific area to investigate — form a hypothesis about why that area regressed. + +--- + +## §9 Configuration / Environment Issues + +**Key principle:** Systematic comparison between working and failing environments. Every difference is a suspect until eliminated. + +``` +START + | + v +[Does the same code work in a different environment?] + | - Works locally, fails on CI? + | - Works on machine A, fails on machine B? + | - Works in dev, fails in staging? + | + v +[Systematic comparison — check in this order:] + | + | LAYER 1: Runtime versions + | - Language version (node --version, python --version, etc.) + | - Package versions (diff lock files between environments) + | - OS and architecture (x86 vs ARM, Linux vs macOS) + | + | LAYER 2: Environment variables + | - Compare env vars (printenv | sort) + | - Check for: missing vars, different values, extra vars + | - Common culprits: PATH, HOME, LANG/LC_*, TZ, proxy settings + | + | LAYER 3: File system + | - Permissions (especially on CI runners) + | - Path separators (Windows vs Unix) + | - Case sensitivity (macOS default is case-insensitive) + | - Temp directory location and permissions + | - Available disk space + | + | LAYER 4: Network + | - DNS resolution + | - Proxy / firewall settings + | - TLS certificate stores + | - Available ports + | + | LAYER 5: Configuration files + | - Diff config files between environments + | - Check for: .env files, config overrides, feature flags + | - Environment-specific config not in version control + | + v +[For each difference found:] + | Make the failing environment match the working one + | for this specific variable. Test. + | If it fixes -> document the requirement + | If not -> eliminate this variable, move to next +``` + +**80% of "works on my machine" bugs are caused by:** + +1. Different dependency versions (especially transitive) +2. Missing environment variables +3. Different OS behavior (filesystem, line endings, paths) +4. Different available system resources (memory, file handles) + +**Exit:** You've identified the environmental difference that causes the failure. Return to SKILL.md Phase 4 — the fix is making the environments consistent or making the code robust to the difference. diff --git a/plugins/eng/skills/docs/SKILL.md b/plugins/eng/skills/docs/SKILL.md index e535a29a..fdf193a2 100644 --- a/plugins/eng/skills/docs/SKILL.md +++ b/plugins/eng/skills/docs/SKILL.md @@ -134,6 +134,7 @@ For each item in the plan: - **Choose the right doc pattern for the reader's intent.** Different readers need different shapes: conceptual overviews ("what is X and why?"), exhaustive references (scannable, precise), goal-oriented tutorials (sequential steps to accomplish something), or setup/integration guides (get connected fast). Match the pattern to what the reader is trying to do — don't write a tutorial when they need a reference, or an overview when they need steps. - **When updating an existing page, match its grain.** Read the page before editing. If other features on that page get 2–3 sentences, your addition gets 2–3 sentences — not three paragraphs because you happen to know more about this change. The developer's time and attention are precious; write exactly the level of detail they'd expect at that point in the page, no more. Don't let recency bias inflate the importance of what was just built. - Be concrete — include code examples, configuration snippets, API request/response examples where they help. + - **Screenshots for UI documentation:** When writing docs for user-facing UI features (guides, tutorials, getting started), use `/use-browser` to capture screenshots of the actual running UI rather than relying on manually provided images. This ensures screenshots are accurate, reproducible, and match the current implementation. For capturing multiple routes or before/after comparisons, use `/screengrabs` which handles batch capture, sensitive data masking, and annotation. - Prefer editing existing pages over creating new ones (reduces docs sprawl). 3. **Verify accuracy against the implementation.** Every claim in the documentation must be verifiable against the current code. Do not document aspirational behavior — document what the code does now. If the spec says one thing and the code does another, the docs match the code. diff --git a/plugins/eng/skills/implement/SKILL.md b/plugins/eng/skills/implement/SKILL.md index db9b240e..63647bf1 100644 --- a/plugins/eng/skills/implement/SKILL.md +++ b/plugins/eng/skills/implement/SKILL.md @@ -160,10 +160,10 @@ For stories with testable logic, also include: For stories that change UI — **if browser automation is available** (no `--no-browser` flag): ``` -"Verify in browser using dev-browser skill" +"Verify in browser using use-browser skill" ``` -Frontend stories are NOT complete until visually verified. The iteration agent will use the dev-browser skill to navigate to the page, interact with the UI, and confirm changes work. +Frontend stories are NOT complete until visually verified. The iteration agent will load the `/use-browser` skill to navigate to the page, interact with the UI, and confirm changes work. Beyond visual verification, the use-browser skill provides helpers for console error monitoring (`startConsoleCapture` / `getConsoleErrors`), network request verification (`startNetworkCapture` / `getFailedRequests`), and accessibility audits (`runAccessibilityAudit`) — use these when acceptance criteria warrant deeper verification than a visual check. **If browser is NOT available** (`--no-browser`): Omit the browser criterion. Instead, add Bash-verifiable criteria that cover the UI behavior through API responses or rendered output (e.g., "API response includes the updated status badge markup", "Server-rendered HTML contains filter dropdown with options: All, Active, Completed"). @@ -319,7 +319,7 @@ Add ability to mark tasks with different statuses. "Each task card shows colored status badge", "Badge colors: gray=pending, blue=in_progress, green=done", "Typecheck passes", - "Verify in browser using dev-browser skill" + "Verify in browser using use-browser skill" ], "priority": 2, "passes": false, @@ -336,7 +336,7 @@ Add ability to mark tasks with different statuses. "API returns 400 with descriptive error when status value is not in [pending, in_progress, done]", "Status update is tenant-scoped (uses existing tenant middleware)", "Typecheck passes", - "Verify in browser using dev-browser skill" + "Verify in browser using use-browser skill" ], "priority": 3, "passes": false, @@ -350,7 +350,7 @@ Add ability to mark tasks with different statuses. "Filter dropdown: All | Pending | In Progress | Done", "Filter persists in URL params", "Typecheck passes", - "Verify in browser using dev-browser skill" + "Verify in browser using use-browser skill" ], "priority": 4, "passes": false, @@ -379,7 +379,7 @@ Before writing spec.json, verify: - [ ] Each story is completable in one iteration (small enough) - [ ] Stories are ordered by dependency (schema to backend to UI) - [ ] Every story has "Typecheck passes" as criterion -- [ ] UI stories have "Verify in browser using dev-browser skill" as criterion (if browser available) or Bash-verifiable substitutes (if `--no-browser`) +- [ ] UI stories have "Verify in browser using use-browser skill" as criterion (if browser available) or Bash-verifiable substitutes (if `--no-browser`) - [ ] Acceptance criteria are verifiable and not vague; functional criteria describe observable behavior, not internal mechanisms (see /tdd) - [ ] No story depends on a later story - [ ] **`implementationContext` extracted** from SPEC.md §8, §9, §10, §6 — concise prose, not a copy-paste @@ -598,6 +598,7 @@ If the same story fails across 2 consecutive implement.sh runs with the same blo 2. **Criteria ambiguous** → rewrite criteria to be more specific 3. **External dependency blocking** → skip the story, set `notes` explaining the blocker 4. **Wrong implementation approach** → add guidance to `tmp/ship/progress.txt` suggesting an alternative +5. **Code defect blocking the story** (test fails, runtime error, type error the iteration agent can't resolve) → Load `/debug` to diagnose the root cause between runs. Apply the fix, then re-invoke implement.sh. After 3 consecutive failed runs on the same story, stop and consult the user. diff --git a/plugins/eng/skills/inspect/SKILL.md b/plugins/eng/skills/inspect/SKILL.md index 8e6e9387..0be9c1b3 100644 --- a/plugins/eng/skills/inspect/SKILL.md +++ b/plugins/eng/skills/inspect/SKILL.md @@ -185,6 +185,7 @@ Calibrate search depth and brief detail to the purpose: | **Implementing** | Focused | Adjacent patterns, shared utils, naming, imports, data access | L1-L3 (L2 is critical) | | **Reviewing** | Targeted | Specific convention claim — confirm or refute with evidence | L1-L2, just enough to answer the question | | **Testing** | Focused | Test utilities, setup/teardown patterns, what's mocked vs. real | L1-L2 in `__tests__/` directories | +| **Debugging** | Focused | Call chains to/from error site, recent changes, data flow through the failing path | L1-L3, tracing lens | | **Pattern search** | Varies | "Do we do X elsewhere?" — find and classify matches | Start L1, expand as needed | | **Tracing** | Deep | Follow call chains, map dependencies, identify blast radius and surface area | Entry point → follow forward/backward to system boundaries | diff --git a/plugins/eng/skills/pull-request/SKILL.md b/plugins/eng/skills/pull-request/SKILL.md index ca608548..82be057a 100644 --- a/plugins/eng/skills/pull-request/SKILL.md +++ b/plugins/eng/skills/pull-request/SKILL.md @@ -60,6 +60,8 @@ Bullet list of what changed, organized by area. Visual evidence of UI or behavioral changes. **Omit if no visual changes.** +When the diff touches UI files (components, pages, styles, layouts) and `/screengrabs` is available, invoke it to capture before/after screenshots of affected routes. This automates the most common visual evidence and ensures screenshots are consistent and up-to-date with the actual PR state. Add manual screenshots only for interactions or states that `/screengrabs` cannot capture (e.g., hover states, mid-animation frames, error modals triggered by specific sequences). + #### How to verify Steps a reviewer can follow to manually verify the behavior. **Omit if changes are purely internal.** diff --git a/plugins/eng/skills/qa-test/SKILL.md b/plugins/eng/skills/qa-test/SKILL.md index a283c79d..1f6b99db 100644 --- a/plugins/eng/skills/qa-test/SKILL.md +++ b/plugins/eng/skills/qa-test/SKILL.md @@ -33,6 +33,8 @@ Record what's available. If browser or desktop tools are missing, say so upfront **Probe aggressively.** Don't stop at "browser automation is available." Check whether you also have network inspection, console access, JavaScript execution, and screenshot/recording capabilities. Each expands your testing surface area. The more tools you have, the more you should use. +**Cross-skill integration:** When browser automation is available, `Load /use-browser skill` for structured testing primitives. The use-browser skill provides helpers for console monitoring, network capture, accessibility audits, video recording, performance metrics, browser state inspection, and network simulation — all designed for use during QA flows. These helpers turn "check the console for errors" into reliable, automatable verification with structured output. Reference `/use-browser` SKILL.md for the full helper table and usage patterns. + **Get the system running.** Check `AGENTS.md`, `CLAUDE.md`, or similar repo configuration files for build, run, and setup instructions. If the software can be started locally, start it — you cannot test user-facing behavior against a system that isn't running. If the system depends on external services, databases, or environment variables, check what's available and what you can reach. Document anything you cannot start. ### Step 2: Gather context — what are you testing? @@ -121,11 +123,19 @@ Work through each scenario. Use the strongest tool available for each. - Record a GIF of multi-step flows when it helps demonstrate the result. **With browser inspection (use alongside browser automation — not instead of):** -- **Console monitoring:** Check the browser console for errors and warnings during every UI interaction. A page that looks correct but throws JS errors is not correct. Filter for errors/exceptions after each major action. -- **Network request verification:** Monitor network requests during UI flows. Verify: correct endpoints are called, response status codes are expected (no silent 4xx/5xx), request/response payloads match what the feature requires. Flag unexpected requests or missing requests. -- **In-page assertions:** Execute JavaScript in the page to verify DOM state, computed styles, data attributes, or application state that isn't visible on screen. Use this when visual inspection alone can't confirm correctness (e.g., "is this element actually hidden via CSS, or just scrolled off-screen?"). +- **Console monitoring (non-negotiable — do this on every flow):** Start capture BEFORE navigating (`startConsoleCapture`), then check for errors after each major action (`getConsoleErrors`). A page that looks correct but throws JS errors is not correct. Filter logs for specific patterns (`getConsoleLogs` with string/RegExp/function filter) when diagnosing issues. +- **Network request verification:** Start capture BEFORE navigating (`startNetworkCapture` with URL filter like `'/api/'`). After the flow, check for failed requests (`getFailedRequests` — catches 4xx, 5xx, and connection failures). Verify: correct endpoints called, status codes expected, no silent failures. For specific API calls, use `waitForApiResponse` to assert status and inspect response body/JSON. +- **Browser state verification:** After mutations, verify state was persisted correctly. Check `getLocalStorage`, `getSessionStorage`, `getCookies` to confirm the UI action actually wrote expected data. Use `clearAllStorage` between test scenarios for clean-state testing. +- **In-page assertions:** Execute JavaScript in the page to verify DOM state, computed styles, data attributes, or application state that isn't visible on screen. Use `getElementBounds` for layout verification (visibility, viewport presence, computed styles). Use this when visual inspection alone can't confirm correctness (e.g., "is this element actually hidden via CSS, or just scrolled off-screen?"). - **Rendered text verification:** Extract page text to verify content rendering — especially dynamic content, interpolated values, and conditional text. +**With browser-based quality signals (when /use-browser primitives are available):** +- **Accessibility audit:** Run `runAccessibilityAudit` on each major page/view. Report WCAG violations by impact level (critical > serious > moderate). Test keyboard focus order with `checkFocusOrder` — verify tab navigation follows logical reading order, especially on new or changed UI. +- **Performance baseline:** After page load, capture `capturePerformanceMetrics` to check for obvious regressions — TTFB, FCP, LCP, CLS. You're not doing formal perf testing; you're catching "this page takes 8 seconds to load" or "layout shifts when the hero image loads." +- **Video recording:** For complex multi-step flows, record with `createVideoContext`. Attach recordings to QA results as evidence. Especially useful for flows that involve timing, animations, or state transitions that are hard to capture in a screenshot. +- **Responsive verification:** Run `captureResponsiveScreenshots` to sweep standard breakpoints (mobile/tablet/desktop/wide). Compare screenshots for layout breakage, clipping, or missing elements across viewports. +- **Degraded conditions:** Test with `simulateSlowNetwork` (e.g., 500ms latency) and `blockResources` (block images/fonts) to verify graceful degradation. Test `simulateOffline` if the feature has offline handling. This catches "works on fast connections, breaks on slow ones." + **With macOS desktop automation:** - Test OS-level interactions when relevant — file dialogs, clipboard, multi-app workflows. - Take screenshots for visual verification. @@ -170,8 +180,11 @@ After each scenario (or batch of related scenarios), update the `## Test plan` s | **Skipped (tool limitation)** | Leave unchecked, append: `— Skipped: ` | **When you find a bug:** -1. Can it be reproduced with a formal test? If yes — write the test first, then fix the bug, then verify both the test and manual scenario pass. -2. If it can't be a test — fix it, verify manually, document what was found and fixed in the checklist. + +First, assess: do you see the root cause, or just the symptom? + +- **Root cause is obvious** (wrong variable, missing class, off-by-one visible in the code) — fix it directly. Write a test if possible, verify, document. +- **Root cause is unclear** (unexpected behavior, cause not visible from the symptom) — load `/debug` for systematic root cause investigation before attempting a fix. QA resumes after the fix is verified. ### Step 7: Report diff --git a/plugins/eng/skills/review/SKILL.md b/plugins/eng/skills/review/SKILL.md index 5db8f45e..3fadcdce 100644 --- a/plugins/eng/skills/review/SKILL.md +++ b/plugins/eng/skills/review/SKILL.md @@ -144,7 +144,7 @@ pnpm typecheck pnpm lint ``` -If changes affect user-facing behavior, also verify the experience manually (API calls, browser testing, etc.) as appropriate. +If changes affect user-facing behavior, also verify the experience manually (API calls, browser testing, etc.) as appropriate. When fixing a reviewer-flagged UI issue and `/use-browser` is available, load it to verify the fix: navigate to the affected page, confirm the visual change, and check for console errors (`startConsoleCapture` / `getConsoleErrors`). Capture a screenshot as evidence — this strengthens your reply when resolving the thread. If your changes affect documented behavior — whether product-facing (user docs, API reference, guides) or internal (architecture docs, READMEs, runbooks) — update the relevant documentation files (`.md`, `.mdx`, etc.) alongside the code fix. Docs should stay accurate through the review loop, not deferred to later. diff --git a/plugins/eng/skills/ship/SKILL.md b/plugins/eng/skills/ship/SKILL.md index 07bbf62d..de11e6ca 100644 --- a/plugins/eng/skills/ship/SKILL.md +++ b/plugins/eng/skills/ship/SKILL.md @@ -230,7 +230,7 @@ Wait for `/implement` to complete. If it reports that automated execution is una #### Step 3: Post-implementation review -After implementation completes, verify that you are satisfied with the output before proceeding. You are responsible for this code — the implementation output is your starting point, not your endpoint. Do not review the output by reading every changed file yourself — delegate targeted verification to a subagent: "Does the implementation match the SPEC.md acceptance criteria? Are there gaps, dead code, or unresolved TODOs? Does every acceptance criterion have a corresponding test?" Act on the findings. Fix issues directly for small problems, or re-invoke `/implement` with specific feedback for larger rework. +After implementation completes, verify that you are satisfied with the output before proceeding. You are responsible for this code — the implementation output is your starting point, not your endpoint. Do not review the output by reading every changed file yourself — delegate targeted verification to a subagent: "Does the implementation match the SPEC.md acceptance criteria? Are there gaps, dead code, or unresolved TODOs? Does every acceptance criterion have a corresponding test?" Act on the findings. Fix issues directly for small, obvious problems. For issues where the root cause isn't immediately clear, load `/debug` to diagnose before fixing. For larger rework that requires re-implementing a story, re-invoke `/implement` with specific feedback. **If you made any code changes** (whether direct fixes or by re-invoking `/implement`): re-run quality gates (test suite, typecheck, lint) and verify green before proceeding. `/implement` exits green, but post-implementation fixes happen outside its loop — you own verification of your own changes. @@ -262,6 +262,8 @@ If scope calibration indicated a lightweight scope (bug fix / config change), pa After Phase 3's exit gate and before entering Phase 4. Do not update `currentPhase` until Phase 4 begins. +If the implementation includes UI changes and `/screengrabs` is available, invoke it before writing the PR body — capture screenshots of affected routes so the PR body's "Screenshots / recordings" section has visual evidence ready. `/screengrabs` supports `--pre-script` for interaction before capture (dismissing modals, navigating tabs, logging in). + Load `/pull-request` skill with the PR number and `--spec ` to write the full PR body. Implementation and testing are now complete — the body can cover approach, changes, architectural decisions, and test plan comprehensively. If no PR exists (`prNumber: null` — GitHub CLI was unavailable during draft PR creation), load `/pull-request` with `new --spec ` to create the PR and write the body in one step. Update `prNumber` in `tmp/ship/state.json`. If `gh` is still unavailable, `/pull-request` will output the body for manual use — skip Phase 5. @@ -394,4 +396,5 @@ These govern your behavior throughout: | `references/completion-checklist.md` | Final verification (Phase 6) | Incomplete work ships as "done" | | `/review` skill `scripts/fetch-pr-feedback.sh` | Fetching review feedback and CI/CD status (Phase 5, via /review). Canonical copies live in the `/review` skill — do not duplicate. | Agent uses wrong/deprecated `gh` commands, misses inline review comments | | `/review` skill `scripts/investigate-ci-failures.sh` | Investigating CI/CD failures with logs (Phase 5, via /review). Canonical copies live in the `/review` skill — do not duplicate. | Agent struggles to find run IDs, fetch logs, or compare with main | +| `/debug` skill | Diagnosing root cause of failures encountered during implementation (Phase 2) or testing (Phase 3) — when the cause isn't obvious from the error | Shotgun debugging: fixing symptoms without understanding root cause, wasted iteration cycles | diff --git a/plugins/eng/skills/ship/references/capability-detection.md b/plugins/eng/skills/ship/references/capability-detection.md index 60556852..feb74a27 100644 --- a/plugins/eng/skills/ship/references/capability-detection.md +++ b/plugins/eng/skills/ship/references/capability-detection.md @@ -18,7 +18,7 @@ Detect what capabilities are available before starting work. For each capability |---|---|---| | GitHub CLI | `gh auth status` | Skip PR creation (after Phase 3) and review (Phase 5) | | Quality gate commands | Read `package.json` `scripts` field; check for `pnpm`/`npm`/`yarn`; accept user `--test-cmd` / `--typecheck-cmd` / `--lint-cmd` overrides | Use discovered commands; halt if no typecheck AND no test command works | -| Browser automation | Check if `mcp__claude-in-chrome__*` tools are available | Substitute Bash-based testing; pass `--no-browser` to `/implement` for criteria adaptation | +| Browser automation (`/use-browser`) | Check if `mcp__claude-in-chrome__*` tools are available OR `/use-browser` skill is loadable (Playwright-based headless automation) | Substitute Bash-based testing; pass `--no-browser` to `/implement` for criteria adaptation. When available, `/use-browser` provides console monitoring, network capture, a11y audits, and screenshot helpers used by `/qa-test`, `/review`, and `/screengrabs`. | | macOS computer use | Check if `mcp__peekaboo__*` tools are available | Skip OS-level testing; document gap | | Claude CLI subprocess | Detected by `/implement` during Phase 2 execution | `/implement` handles degradation internally — if subprocess unavailable, it provides manual iteration instructions. Ship does not need to detect this. | | Docker execution (`--implement-docker`) | User passes `--implement-docker` (optionally with compose file path) | Host execution (default). When passed, forwarded to `/implement` as `--docker` in Phase 2. The skill auto-discovers the compose file if no path given. | diff --git a/plugins/eng/skills/use-browser/API_REFERENCE.md b/plugins/eng/skills/use-browser/API_REFERENCE.md new file mode 100644 index 00000000..173574ae --- /dev/null +++ b/plugins/eng/skills/use-browser/API_REFERENCE.md @@ -0,0 +1,562 @@ +# Playwright Skill - Complete API Reference + +This document contains the comprehensive Playwright API reference and advanced patterns. For quick-start execution patterns, see [SKILL.md](SKILL.md). + +## Table of Contents + +- [Installation & Setup](#installation--setup) +- [Core Patterns](#core-patterns) +- [Selectors & Locators](#selectors--locators) +- [Common Actions](#common-actions) +- [Waiting Strategies](#waiting-strategies) +- [Assertions](#assertions) +- [Page Object Model](#page-object-model-pom) +- [Network & API Testing](#network--api-testing) +- [Authentication & Session Management](#authentication--session-management) +- [Visual Testing](#visual-testing) +- [Mobile Testing](#mobile-testing) +- [Debugging](#debugging) +- [Performance Testing](#performance-testing) +- [Parallel Execution](#parallel-execution) +- [Data-Driven Testing](#data-driven-testing) +- [Accessibility Testing](#accessibility-testing) +- [CI/CD Integration](#cicd-integration) +- [Best Practices](#best-practices) +- [Common Patterns & Solutions](#common-patterns--solutions) +- [Troubleshooting](#troubleshooting) + +## Installation & Setup + +### Prerequisites + +Before using this skill, ensure Playwright is available: + +```bash +# Check if Playwright is installed +npm list playwright 2>/dev/null || echo "Playwright not installed" + +# Install (if needed) +cd $SKILL_DIR +npm run setup +``` + +### Basic Configuration + +Create `playwright.config.ts`: + +```typescript +import { defineConfig, devices } from '@playwright/test'; + +export default defineConfig({ + testDir: './tests', + fullyParallel: true, + forbidOnly: !!process.env.CI, + retries: process.env.CI ? 2 : 0, + workers: process.env.CI ? 1 : undefined, + reporter: 'html', + use: { + baseURL: 'http://localhost:3000', + trace: 'on-first-retry', + screenshot: 'only-on-failure', + video: 'retain-on-failure', + }, + projects: [ + { + name: 'chromium', + use: { ...devices['Desktop Chrome'] }, + }, + ], + webServer: { + command: 'npm run start', + url: 'http://localhost:3000', + reuseExistingServer: !process.env.CI, + }, +}); +``` + +## Core Patterns + +### Basic Browser Automation + +```javascript +const { chromium } = require('playwright'); + +(async () => { + const browser = await chromium.launch({ + headless: true, + args: ['--no-sandbox', '--disable-setuid-sandbox'] + }); + + const context = await browser.newContext({ + viewport: { width: 1280, height: 720 } + }); + + const page = await context.newPage(); + + await page.goto('https://example.com', { + waitUntil: 'networkidle' + }); + + // Your automation here + + await browser.close(); +})(); +``` + +### Test Structure + +```typescript +import { test, expect } from '@playwright/test'; + +test.describe('Feature Name', () => { + test.beforeEach(async ({ page }) => { + await page.goto('/'); + }); + + test('should do something', async ({ page }) => { + const button = page.locator('button[data-testid="submit"]'); + await button.click(); + await expect(page).toHaveURL('/success'); + await expect(page.locator('.message')).toHaveText('Success!'); + }); +}); +``` + +## Selectors & Locators + +### Best Practices for Selectors + +```javascript +// PREFERRED: Data attributes (most stable) +await page.locator('[data-testid="submit-button"]').click(); + +// GOOD: Role-based selectors (accessible) +await page.getByRole('button', { name: 'Submit' }).click(); +await page.getByRole('textbox', { name: 'Email' }).fill('user@example.com'); + +// GOOD: Text content (for unique text) +await page.getByText('Sign in').click(); + +// OK: Semantic HTML +await page.locator('button[type="submit"]').click(); +await page.locator('input[name="email"]').fill('test@test.com'); + +// AVOID: Classes and IDs (can change frequently) +// LAST RESORT: Complex CSS/XPath +``` + +### Advanced Locator Patterns + +```javascript +// Filter and chain locators +const row = page.locator('tr').filter({ hasText: 'John Doe' }); +await row.locator('button').click(); + +// Nth element +await page.locator('button').nth(2).click(); + +// Combining conditions +await page.locator('button').and(page.locator('[disabled]')).count(); +``` + +## Common Actions + +### Form Interactions + +```javascript +// Text input +await page.getByLabel('Email').fill('user@example.com'); +await page.getByPlaceholder('Enter your name').fill('John Doe'); + +// Checkbox / Radio +await page.getByLabel('I agree').check(); +await page.getByLabel('Option 2').check(); + +// Select dropdown +await page.selectOption('select#country', 'usa'); +await page.selectOption('select#country', { label: 'United States' }); + +// File upload +await page.setInputFiles('input[type="file"]', 'path/to/file.pdf'); +``` + +### Mouse Actions + +```javascript +await page.click('button'); +await page.click('button', { button: 'right' }); +await page.dblclick('button'); +await page.hover('.menu-item'); +await page.dragAndDrop('#source', '#target'); +``` + +### Keyboard Actions + +```javascript +await page.keyboard.type('Hello World', { delay: 100 }); +await page.keyboard.press('Control+A'); +await page.keyboard.press('Enter'); +await page.keyboard.press('Tab'); +``` + +## Waiting Strategies + +### Smart Waiting + +```javascript +// Wait for element states +await page.locator('button').waitFor({ state: 'visible' }); +await page.locator('.spinner').waitFor({ state: 'hidden' }); + +// Wait for URL +await page.waitForURL('**/success'); + +// Wait for network +await page.waitForLoadState('networkidle'); + +// Wait for response +const responsePromise = page.waitForResponse('**/api/users'); +await page.click('button#load-users'); +const response = await responsePromise; + +// Custom timeout +await page.locator('.slow-element').waitFor({ + state: 'visible', + timeout: 10000 +}); +``` + +## Assertions + +```javascript +import { expect } from '@playwright/test'; + +// Page assertions +await expect(page).toHaveTitle('My App'); +await expect(page).toHaveURL(/.*dashboard/); + +// Element visibility +await expect(page.locator('.message')).toBeVisible(); +await expect(page.locator('button')).toBeEnabled(); + +// Text content +await expect(page.locator('h1')).toHaveText('Welcome'); +await expect(page.locator('.message')).toContainText('success'); + +// Input values +await expect(page.locator('input')).toHaveValue('test@example.com'); + +// Count +await expect(page.locator('.item')).toHaveCount(5); +``` + +## Network & API Testing + +### Intercepting Requests + +```javascript +// Mock API responses +await page.route('**/api/users', route => { + route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify([{ id: 1, name: 'John' }]) + }); +}); + +// Block resources +await page.route('**/*.{png,jpg,jpeg,gif}', route => route.abort()); +``` + +### Custom Headers via Environment Variables + +The skill supports automatic header injection via environment variables: + +```bash +# Single header (simple) +PW_HEADER_NAME=X-Automated-By PW_HEADER_VALUE=playwright-skill + +# Multiple headers (JSON) +PW_EXTRA_HEADERS='{"X-Automated-By":"playwright-skill","X-Request-ID":"123"}' +``` + +These headers are automatically applied when using: +- `helpers.createContext(browser)` - headers merged automatically +- `getContextOptionsWithHeaders(options)` - utility injected by run.js wrapper + +## Console Monitoring (via helpers) + +```javascript +const helpers = require('./lib/helpers'); + +// Start capturing before navigation +const consoleLogs = helpers.startConsoleCapture(page); + +await page.goto(url); + +// Get only errors (console.error + uncaught exceptions) +const errors = helpers.getConsoleErrors(consoleLogs); + +// Filter logs by string, RegExp, or function +const apiLogs = helpers.getConsoleLogs(consoleLogs, /api/i); +const warningsOnly = helpers.getConsoleLogs(consoleLogs, e => e.type === 'warning'); +``` + +## Network Capture (via helpers) + +```javascript +const helpers = require('./lib/helpers'); + +// Capture requests matching a URL pattern +const network = helpers.startNetworkCapture(page, '/api/'); + +await page.goto(url); + +// Get failed requests (4xx, 5xx, connection failures) +const failed = helpers.getFailedRequests(network); + +// Get all captured requests +const all = helpers.getCapturedRequests(network); +// Each entry: { url, method, status, statusText, resourceType, failure, timestamp } + +// Wait for a specific API response +const resp = await helpers.waitForApiResponse(page, '/api/users', { status: 200, timeout: 5000 }); +// Returns: { url, status, statusText, body, json } +``` + +## Browser State Inspection (via helpers) + +```javascript +const helpers = require('./lib/helpers'); + +// Read all localStorage +const allStorage = await helpers.getLocalStorage(page); + +// Read a single key +const token = await helpers.getLocalStorage(page, 'auth_token'); + +// Session storage +const session = await helpers.getSessionStorage(page); + +// Cookies (requires context, not page) +const cookies = await helpers.getCookies(page.context()); + +// Clear everything +await helpers.clearAllStorage(page); +``` + +## Video Recording (via helpers) + +```javascript +const helpers = require('./lib/helpers'); + +// Create context with video recording +const context = await helpers.createVideoContext(browser, { + outputDir: '/tmp/videos', + videoSize: { width: 1280, height: 720 } +}); +const page = await context.newPage(); + +// ... perform actions ... + +// Video saved on close +const videoPath = await page.video().path(); +await page.close(); +``` + +## Accessibility Testing (via helpers) + +```javascript +const helpers = require('./lib/helpers'); + +// Run axe-core WCAG 2.0 AA audit (injects from CDN) +const audit = await helpers.runAccessibilityAudit(page); +// audit.violations - full violation details +// audit.summary - compact: [{ id, impact, description, helpUrl, nodes }] +// audit.violationCount, audit.passes + +// Custom tags/rules +const audit2 = await helpers.runAccessibilityAudit(page, { + tags: ['wcag2a', 'wcag2aa', 'wcag21aa'], + context: '#main-content' +}); + +// Keyboard focus order verification +const focus = await helpers.checkFocusOrder(page, [ + '#search-input', + '#nav-home', + '#nav-about' +]); +// Returns: [{ step, expectedSelector, actualElement, matches }] +``` + +## Performance Metrics (via helpers) + +```javascript +const helpers = require('./lib/helpers'); + +// Capture after page load +await page.goto(url, { waitUntil: 'networkidle' }); +const perf = await helpers.capturePerformanceMetrics(page); + +// perf.timing: { dns, tcp, ttfb, download, domInteractive, domComplete, loadEvent } +// perf.vitals: { fcp, lcp, cls } +``` + +## Network Simulation (via helpers) + +```javascript +const helpers = require('./lib/helpers'); + +// Slow network (500ms delay per request) +await helpers.simulateSlowNetwork(page, 500); + +// Offline mode +await helpers.simulateOffline(page.context()); + +// Block images and fonts +await helpers.blockResources(page, ['image', 'font']); +``` + +## Layout Inspection (via helpers) + +```javascript +const helpers = require('./lib/helpers'); + +const bounds = await helpers.getElementBounds(page, '.hero-banner'); +// { x, y, width, height, visible, inViewport, computedStyles: { display, visibility, ... } } +``` + +## Responsive Screenshots (via helpers) + +```javascript +const helpers = require('./lib/helpers'); + +// Default breakpoints: mobile, tablet, desktop, wide +const results = await helpers.captureResponsiveScreenshots(page, url); + +// Custom breakpoints +const results2 = await helpers.captureResponsiveScreenshots(page, url, [ + { name: 'small', width: 320, height: 568 }, + { name: 'medium', width: 768, height: 1024 } +], '/tmp/my-screenshots'); +``` + +## Visual Testing + +```javascript +// Full page screenshot +await page.screenshot({ path: 'screenshot.png', fullPage: true }); + +// Element screenshot +await page.locator('.chart').screenshot({ path: 'chart.png' }); +``` + +## Mobile Testing + +```javascript +const { devices } = require('playwright'); +const iPhone = devices['iPhone 12']; + +const context = await browser.newContext({ + ...iPhone, + locale: 'en-US', + permissions: ['geolocation'], + geolocation: { latitude: 37.7749, longitude: -122.4194 } +}); +``` + +## Debugging + +```bash +# Run with inspector +npx playwright test --debug + +# Headed mode +npx playwright test --headed +``` + +```javascript +// Pause execution +await page.pause(); + +// Console logs +page.on('console', msg => console.log('Browser log:', msg.text())); +page.on('pageerror', error => console.log('Page error:', error)); +``` + +## Performance Testing + +```javascript +const startTime = Date.now(); +await page.goto('https://example.com'); +const loadTime = Date.now() - startTime; +console.log(`Page loaded in ${loadTime}ms`); +``` + +## Common Patterns & Solutions + +### Handling Popups + +```javascript +const [popup] = await Promise.all([ + page.waitForEvent('popup'), + page.click('button.open-popup') +]); +await popup.waitForLoadState(); +``` + +### File Downloads + +```javascript +const [download] = await Promise.all([ + page.waitForEvent('download'), + page.click('button.download') +]); +await download.saveAs(`./downloads/${download.suggestedFilename()}`); +``` + +### iFrames + +```javascript +const frame = page.frameLocator('#my-iframe'); +await frame.locator('button').click(); +``` + +## CI/CD Integration + +### GitHub Actions + +```yaml +name: Playwright Tests +on: + push: + branches: [main, master] +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-node@v3 + - name: Install dependencies + run: npm ci + - name: Install Playwright Browsers + run: npx playwright install --with-deps + - name: Run tests + run: npx playwright test +``` + +## Best Practices + +1. **Selector Strategy** - Prefer data-testid attributes, use role-based selectors +2. **Waiting** - Use Playwright's auto-waiting, avoid hard-coded delays +3. **Error Handling** - Add proper error messages, take screenshots on failure +4. **Performance** - Run tests in parallel, reuse authentication state +5. **Docker** - Always include `--no-sandbox` and `--disable-setuid-sandbox` args + +## Troubleshooting + +1. **Element not found** - Check if element is in iframe, verify visibility +2. **Timeout errors** - Increase timeout, check network conditions +3. **Flaky tests** - Use proper waiting strategies, mock external dependencies +4. **Docker failures** - Ensure `--no-sandbox` flag and all dependencies installed diff --git a/plugins/eng/skills/use-browser/SKILL.md b/plugins/eng/skills/use-browser/SKILL.md new file mode 100644 index 00000000..d819f8df --- /dev/null +++ b/plugins/eng/skills/use-browser/SKILL.md @@ -0,0 +1,782 @@ +--- +name: use-browser +description: "Browser automation with Playwright — navigate pages, fill forms, take screenshots, test responsive design, validate UX, test login flows, check links, inspect network requests, inject JavaScript, monitor console errors, capture network traffic, record video, inspect browser state, run accessibility audits, measure performance, and simulate network conditions. Headless by default for CI/Docker. Use when user wants to test websites, automate browser interactions, validate web functionality, or perform browser-based testing. Triggers: playwright, browser test, browser automation, web test, screenshot, responsive test, test the page, automate browser, headless browser, UI test, console errors, console monitoring, network inspection, network capture, accessibility audit, a11y test, performance metrics, web vitals, video recording, browser state, localStorage, network simulation, offline testing." +argument-hint: "[URL or description of what to test/automate]" +--- + +**IMPORTANT - Path Resolution:** +This skill is installed via the plugin system. Before executing any commands, determine the skill directory based on where you loaded this SKILL.md file, and use that path in all commands below. Replace `$SKILL_DIR` with the actual discovered path. + +Expected plugin path: `~/.claude/plugins/marketplaces/inkeep-team-skills/plugins/eng/skills/use-browser` + +# Playwright Browser Automation + +General-purpose browser automation skill. Write custom Playwright code for any automation task and execute it via the universal executor. + +**CRITICAL WORKFLOW - Follow these steps in order:** + +1. **Auto-detect dev servers** - For localhost testing, ALWAYS run server detection FIRST: + + ```bash + cd $SKILL_DIR && node -e "require('./lib/helpers').detectDevServers().then(servers => console.log(JSON.stringify(servers)))" + ``` + + - If **1 server found**: Use it automatically, inform user + - If **multiple servers found**: Ask user which one to test + - If **no servers found**: Ask for URL or offer to help start dev server + +2. **Write scripts to /tmp** - NEVER write test files to skill directory; always use `/tmp/playwright-test-*.js` + +3. **Use headless browser by default** - Always use `headless: true` unless user specifically requests visible/headed mode. This ensures Docker/CI compatibility. + +4. **Parameterize URLs** - Always make URLs configurable via environment variable or constant at top of script + +## How It Works + +1. You describe what you want to test/automate +2. Auto-detect running dev servers (or ask for URL if testing external site) +3. Write custom Playwright code in `/tmp/playwright-test-*.js` (won't clutter your project) +4. Execute it via: `cd $SKILL_DIR && node run.js /tmp/playwright-test-*.js` +5. Results displayed in real-time +6. Test files auto-cleaned from /tmp by your OS + +## Setup (First Time) + +```bash +cd $SKILL_DIR +npm run setup +``` + +This installs Playwright and Chromium browser. Only needed once. + +## Execution Pattern + +**Step 1: Detect dev servers (for localhost testing)** + +```bash +cd $SKILL_DIR && node -e "require('./lib/helpers').detectDevServers().then(s => console.log(JSON.stringify(s)))" +``` + +**Step 2: Write test script to /tmp with URL parameter** + +```javascript +// /tmp/playwright-test-page.js +const { chromium } = require('playwright'); + +// Parameterized URL (detected or user-provided) +const TARGET_URL = 'http://localhost:3001'; // <-- Auto-detected or from user + +(async () => { + const browser = await chromium.launch({ headless: true }); + const page = await browser.newPage(); + + await page.goto(TARGET_URL); + console.log('Page loaded:', await page.title()); + + await page.screenshot({ path: '/tmp/screenshot.png', fullPage: true }); + console.log('Screenshot saved to /tmp/screenshot.png'); + + await browser.close(); +})(); +``` + +**Step 3: Execute from skill directory** + +```bash +cd $SKILL_DIR && node run.js /tmp/playwright-test-page.js +``` + +## Common Patterns + +### Test a Page (Multiple Viewports) + +```javascript +// /tmp/playwright-test-responsive.js +const { chromium } = require('playwright'); + +const TARGET_URL = 'http://localhost:3001'; // Auto-detected + +(async () => { + const browser = await chromium.launch({ headless: true }); + const page = await browser.newPage(); + + // Desktop test + await page.setViewportSize({ width: 1920, height: 1080 }); + await page.goto(TARGET_URL); + console.log('Desktop - Title:', await page.title()); + await page.screenshot({ path: '/tmp/desktop.png', fullPage: true }); + + // Mobile test + await page.setViewportSize({ width: 375, height: 667 }); + await page.screenshot({ path: '/tmp/mobile.png', fullPage: true }); + + await browser.close(); +})(); +``` + +### Test Login Flow + +```javascript +// /tmp/playwright-test-login.js +const { chromium } = require('playwright'); + +const TARGET_URL = 'http://localhost:3001'; // Auto-detected + +(async () => { + const browser = await chromium.launch({ headless: true }); + const page = await browser.newPage(); + + await page.goto(`${TARGET_URL}/login`); + + await page.fill('input[name="email"]', 'test@example.com'); + await page.fill('input[name="password"]', 'password123'); + await page.click('button[type="submit"]'); + + // Wait for redirect + await page.waitForURL('**/dashboard'); + console.log('Login successful, redirected to dashboard'); + + await browser.close(); +})(); +``` + +### Fill and Submit Form + +```javascript +// /tmp/playwright-test-form.js +const { chromium } = require('playwright'); + +const TARGET_URL = 'http://localhost:3001'; // Auto-detected + +(async () => { + const browser = await chromium.launch({ headless: true }); + const page = await browser.newPage(); + + await page.goto(`${TARGET_URL}/contact`); + + await page.fill('input[name="name"]', 'John Doe'); + await page.fill('input[name="email"]', 'john@example.com'); + await page.fill('textarea[name="message"]', 'Test message'); + await page.click('button[type="submit"]'); + + // Verify submission + await page.waitForSelector('.success-message'); + console.log('Form submitted successfully'); + + await browser.close(); +})(); +``` + +### Network Request Inspection + +```javascript +// /tmp/playwright-test-network.js +const { chromium } = require('playwright'); + +const TARGET_URL = 'http://localhost:3001'; + +(async () => { + const browser = await chromium.launch({ headless: true }); + const page = await browser.newPage(); + + // Capture all API requests + const apiRequests = []; + page.on('request', request => { + if (request.url().includes('/api/')) { + apiRequests.push({ + method: request.method(), + url: request.url(), + headers: request.headers() + }); + } + }); + + page.on('response', response => { + if (response.url().includes('/api/')) { + console.log(`${response.status()} ${response.url()}`); + } + }); + + await page.goto(TARGET_URL); + await page.waitForLoadState('networkidle'); + + console.log('API requests captured:', JSON.stringify(apiRequests, null, 2)); + + await browser.close(); +})(); +``` + +### JavaScript Injection + +```javascript +// /tmp/playwright-test-js-inject.js +const { chromium } = require('playwright'); + +const TARGET_URL = 'http://localhost:3001'; + +(async () => { + const browser = await chromium.launch({ headless: true }); + const page = await browser.newPage(); + + await page.goto(TARGET_URL); + + // Inject and execute JavaScript + const result = await page.evaluate(() => { + return { + title: document.title, + links: document.querySelectorAll('a').length, + meta: Array.from(document.querySelectorAll('meta')).map(m => ({ + name: m.getAttribute('name'), + content: m.getAttribute('content') + })).filter(m => m.name), + localStorage: Object.keys(window.localStorage), + cookies: document.cookie + }; + }); + + console.log('Page analysis:', JSON.stringify(result, null, 2)); + + await browser.close(); +})(); +``` + +### Check for Broken Links + +```javascript +const { chromium } = require('playwright'); + +(async () => { + const browser = await chromium.launch({ headless: true }); + const page = await browser.newPage(); + + await page.goto('http://localhost:3000'); + + const links = await page.locator('a[href^="http"]').all(); + const results = { working: 0, broken: [] }; + + for (const link of links) { + const href = await link.getAttribute('href'); + try { + const response = await page.request.head(href); + if (response.ok()) { + results.working++; + } else { + results.broken.push({ url: href, status: response.status() }); + } + } catch (e) { + results.broken.push({ url: href, error: e.message }); + } + } + + console.log(`Working links: ${results.working}`); + console.log(`Broken links:`, results.broken); + + await browser.close(); +})(); +``` + +### Take Screenshot with Error Handling + +```javascript +const { chromium } = require('playwright'); + +(async () => { + const browser = await chromium.launch({ headless: true }); + const page = await browser.newPage(); + + try { + await page.goto('http://localhost:3000', { + waitUntil: 'networkidle', + timeout: 10000, + }); + + await page.screenshot({ + path: '/tmp/screenshot.png', + fullPage: true, + }); + + console.log('Screenshot saved to /tmp/screenshot.png'); + } catch (error) { + console.error('Error:', error.message); + } finally { + await browser.close(); + } +})(); +``` + +### Test Responsive Design + +```javascript +// /tmp/playwright-test-responsive-full.js +const { chromium } = require('playwright'); + +const TARGET_URL = 'http://localhost:3001'; // Auto-detected + +(async () => { + const browser = await chromium.launch({ headless: true }); + const page = await browser.newPage(); + + const viewports = [ + { name: 'Desktop', width: 1920, height: 1080 }, + { name: 'Tablet', width: 768, height: 1024 }, + { name: 'Mobile', width: 375, height: 667 }, + ]; + + for (const viewport of viewports) { + console.log( + `Testing ${viewport.name} (${viewport.width}x${viewport.height})`, + ); + + await page.setViewportSize({ + width: viewport.width, + height: viewport.height, + }); + + await page.goto(TARGET_URL); + await page.waitForTimeout(1000); + + await page.screenshot({ + path: `/tmp/${viewport.name.toLowerCase()}.png`, + fullPage: true, + }); + } + + console.log('All viewports tested'); + await browser.close(); +})(); +``` + +### Monitor Console Errors During a Flow + +Use when verifying a UI flow doesn't produce silent JS errors. + +```javascript +// /tmp/playwright-test-console.js +const { chromium } = require('playwright'); +const helpers = require('./lib/helpers'); + +const TARGET_URL = 'http://localhost:3001'; + +(async () => { + const browser = await chromium.launch({ headless: true }); + const page = await browser.newPage(); + + // Start capturing BEFORE navigation + const consoleLogs = helpers.startConsoleCapture(page); + + await page.goto(TARGET_URL); + await page.waitForLoadState('networkidle'); + + // Interact with the page + await page.click('button.submit').catch(() => {}); + await page.waitForTimeout(1000); + + // Check for errors + const errors = helpers.getConsoleErrors(consoleLogs); + if (errors.length > 0) { + console.log(`FAIL: ${errors.length} console error(s):`); + errors.forEach(e => console.log(` [${e.type}] ${e.text}`)); + } else { + console.log('PASS: No console errors'); + } + + // Optionally filter for specific logs + const apiLogs = helpers.getConsoleLogs(consoleLogs, /api|fetch/i); + console.log(`API-related logs: ${apiLogs.length}`); + + await browser.close(); +})(); +``` + +### Verify Network Requests During UI Flow + +Use when checking that the right API calls fire with the right status codes. + +```javascript +// /tmp/playwright-test-network-verify.js +const { chromium } = require('playwright'); +const helpers = require('./lib/helpers'); + +const TARGET_URL = 'http://localhost:3001'; + +(async () => { + const browser = await chromium.launch({ headless: true }); + const page = await browser.newPage(); + + // Capture only API requests + const network = helpers.startNetworkCapture(page, '/api/'); + + await page.goto(`${TARGET_URL}/dashboard`); + await page.waitForLoadState('networkidle'); + + // Check for failed API calls + const failed = helpers.getFailedRequests(network); + if (failed.length > 0) { + console.log(`FAIL: ${failed.length} failed API request(s):`); + failed.forEach(r => console.log(` ${r.method} ${r.url} -> ${r.status || r.failure}`)); + } else { + console.log('PASS: All API requests succeeded'); + } + + // Review all captured requests + const all = helpers.getCapturedRequests(network); + console.log(`Total API requests: ${all.length}`); + all.forEach(r => console.log(` ${r.status} ${r.method} ${r.url}`)); + + await browser.close(); +})(); +``` + +### Record Video of a Flow + +Use when you need a recording of multi-step browser interaction. + +```javascript +// /tmp/playwright-test-video.js +const { chromium } = require('playwright'); +const helpers = require('./lib/helpers'); + +const TARGET_URL = 'http://localhost:3001'; + +(async () => { + const browser = await chromium.launch({ headless: true }); + const context = await helpers.createVideoContext(browser, { + outputDir: '/tmp/playwright-videos' + }); + const page = await context.newPage(); + + await page.goto(TARGET_URL); + await page.click('nav a:first-child'); + await page.waitForTimeout(1000); + await page.click('button.submit').catch(() => {}); + await page.waitForTimeout(1000); + + // Video is saved when page closes + const videoPath = await page.video().path(); + await page.close(); + await context.close(); + + console.log(`Video saved: ${videoPath}`); + await browser.close(); +})(); +``` + +### Inspect Browser State After Mutation + +Use when verifying that a UI action correctly persisted data. + +```javascript +// /tmp/playwright-test-state.js +const { chromium } = require('playwright'); +const helpers = require('./lib/helpers'); + +const TARGET_URL = 'http://localhost:3001'; + +(async () => { + const browser = await chromium.launch({ headless: true }); + const context = await browser.newContext(); + const page = await context.newPage(); + + await page.goto(TARGET_URL); + + // Check state before action + const storageBefore = await helpers.getLocalStorage(page); + console.log('localStorage before:', JSON.stringify(storageBefore)); + + const cookies = await helpers.getCookies(context); + console.log('Cookies:', cookies.map(c => `${c.name}=${c.value}`)); + + // Perform some action that should change state + await page.click('button.save-preferences').catch(() => {}); + await page.waitForTimeout(500); + + // Check state after action + const storageAfter = await helpers.getLocalStorage(page); + console.log('localStorage after:', JSON.stringify(storageAfter)); + + // Clean up for next test + await helpers.clearAllStorage(page); + + await browser.close(); +})(); +``` + +### Capture Screenshots for Documentation + +Use when writing docs, help articles, or PR screenshots that need consistent, high-quality images of the running UI. + +```javascript +// /tmp/playwright-test-doc-screenshot.js +const { chromium } = require('playwright'); + +const TARGET_URL = 'http://localhost:3001'; + +(async () => { + const browser = await chromium.launch({ headless: true }); + const context = await browser.newContext({ + viewport: { width: 1280, height: 720 }, + deviceScaleFactor: 2, // Retina clarity + }); + const page = await context.newPage(); + + await page.goto(`${TARGET_URL}/settings`); + await page.waitForLoadState('networkidle'); + + // Crop to the relevant section — avoid full-page captures with empty space + const section = page.locator('.api-keys-section'); + await section.screenshot({ + path: '/tmp/doc-settings-api-keys.png', + type: 'png', + }); + + // Full-page fallback when you need the whole view + await page.screenshot({ + path: '/tmp/doc-settings-full.png', + type: 'png', + fullPage: false, // Viewport-only — keep it tight + }); + + console.log('Doc screenshots saved to /tmp/doc-*.png'); + await browser.close(); +})(); +``` + +**Key settings for doc screenshots:** +- `viewport: { width: 1280, height: 720 }` — standard docs width +- `deviceScaleFactor: 2` — retina resolution for sharp text +- `type: 'png'` — lossless for UI screenshots +- Use `element.screenshot()` to crop to a specific panel instead of full-page +- Target <200KB per image — crop aggressively + +### Run Accessibility Audit + +Use when checking a page for WCAG violations. + +```javascript +// /tmp/playwright-test-a11y.js +const { chromium } = require('playwright'); +const helpers = require('./lib/helpers'); + +const TARGET_URL = 'http://localhost:3001'; + +(async () => { + const browser = await chromium.launch({ headless: true }); + const page = await browser.newPage(); + + await page.goto(TARGET_URL); + await page.waitForLoadState('networkidle'); + + const audit = await helpers.runAccessibilityAudit(page); + + console.log(`Accessibility audit: ${audit.violationCount} violation(s), ${audit.passes} passes`); + + if (audit.violationCount > 0) { + console.log('\nViolations:'); + audit.summary.forEach(v => { + console.log(` [${v.impact}] ${v.id}: ${v.description} (${v.nodes} element(s))`); + console.log(` Help: ${v.helpUrl}`); + }); + } + + // Test keyboard focus order + const focusOrder = await helpers.checkFocusOrder(page, [ + 'a[href]:first-of-type', + 'nav a:nth-child(2)', + 'input[type="search"]' + ]); + focusOrder.forEach(f => { + console.log(` Tab ${f.step}: expected ${f.expectedSelector} -> ${f.matches ? 'PASS' : 'FAIL'}`); + }); + + await browser.close(); +})(); +``` + +## Inline Execution (Simple Tasks) + +For quick one-off tasks, you can execute code inline without creating files: + +```bash +# Take a quick screenshot +cd $SKILL_DIR && node run.js " +const browser = await chromium.launch({ headless: true }); +const page = await browser.newPage(); +await page.goto('http://localhost:3001'); +await page.screenshot({ path: '/tmp/quick-screenshot.png', fullPage: true }); +console.log('Screenshot saved'); +await browser.close(); +" +``` + +**When to use inline vs files:** + +- **Inline**: Quick one-off tasks (screenshot, check if element exists, get page title) +- **Files**: Complex tests, responsive design checks, anything user might want to re-run + +## Available Helpers + +All helpers live in `lib/helpers.js`. Use `const helpers = require('./lib/helpers');` in scripts. Organized by what you need to do: + +### Page Interaction + +| Helper | When to use | +|---|---| +| `helpers.detectDevServers()` | **CRITICAL — run first** for localhost testing. Returns array of detected server URLs. | +| `helpers.safeClick(page, selector, { retries: 3 })` | Click elements that may not be immediately visible/clickable. Auto-retries. | +| `helpers.safeType(page, selector, text)` | Type into inputs. Clears field first by default. | +| `helpers.extractTexts(page, selector)` | Get text from multiple matching elements as array. | +| `helpers.scrollPage(page, 'down', 500)` | Scroll page. Directions: `'down'`, `'up'`, `'top'`, `'bottom'`. | +| `helpers.handleCookieBanner(page)` | Dismiss common cookie consent banners. Run early — clears overlays that block interaction. | +| `helpers.authenticate(page, { username, password })` | Login flow with common field selectors. Auto-waits for redirect. | +| `helpers.extractTableData(page, 'table.results')` | Extract structured data from HTML tables (headers + rows). | +| `helpers.takeScreenshot(page, 'name')` | Save timestamped screenshot. | + +### Console Monitoring — catch silent JS errors + +| Helper | When to use | +|---|---| +| `helpers.startConsoleCapture(page)` | **Call BEFORE navigating.** Returns a collector that accumulates all console output. | +| `helpers.getConsoleErrors(collector)` | Get only error-level messages and uncaught exceptions from collector. | +| `helpers.getConsoleLogs(collector, filter?)` | Get all logs, or filter by string/RegExp/function. | + +### Network Inspection — verify API calls during UI flows + +| Helper | When to use | +|---|---| +| `helpers.startNetworkCapture(page, '/api/')` | **Call BEFORE navigating.** Captures request/response pairs. Optional URL filter. | +| `helpers.getFailedRequests(collector)` | Get 4xx, 5xx, and connection failures from collector. | +| `helpers.getCapturedRequests(collector)` | Get all captured request/response entries. | +| `helpers.waitForApiResponse(page, '/api/users', { status: 200 })` | Wait for a specific API call to complete. Returns `{ url, status, body, json }`. | + +### Browser State — inspect storage and cookies + +| Helper | When to use | +|---|---| +| `helpers.getLocalStorage(page)` | Get all localStorage entries. Pass a key for a single value. | +| `helpers.getSessionStorage(page)` | Get all sessionStorage entries. Pass a key for a single value. | +| `helpers.getCookies(context)` | Get all cookies from browser context. | +| `helpers.clearAllStorage(page)` | Clear localStorage + sessionStorage + cookies. Use for clean-state testing. | + +### Video Recording — record browser interactions + +| Helper | When to use | +|---|---| +| `helpers.createVideoContext(browser, { outputDir: '/tmp/videos' })` | Create a context that records video. Video saved when page/context closes. | + +### Accessibility — WCAG audits and keyboard navigation + +| Helper | When to use | +|---|---| +| `helpers.runAccessibilityAudit(page)` | Inject axe-core and run WCAG 2.0 AA audit. Returns violations with impact/description. Requires internet (CDN). | +| `helpers.checkFocusOrder(page, ['#first', '#second', '#third'])` | Tab through elements and verify focus lands on expected selectors in order. | + +### Performance Metrics — measure page speed + +| Helper | When to use | +|---|---| +| `helpers.capturePerformanceMetrics(page)` | Capture Navigation Timing (TTFB, DOM interactive) and Web Vitals (FCP, LCP, CLS). Call after page load. | + +### Responsive Screenshots — multi-viewport sweep + +| Helper | When to use | +|---|---| +| `helpers.captureResponsiveScreenshots(page, url)` | Screenshot at mobile/tablet/desktop/wide breakpoints. Custom breakpoints and output dir optional. | + +### Network Simulation — test degraded conditions + +| Helper | When to use | +|---|---| +| `helpers.simulateSlowNetwork(page, 500)` | Add artificial latency (ms) to all requests. | +| `helpers.simulateOffline(context)` | Set browser to offline mode. | +| `helpers.blockResources(page, ['image', 'font'])` | Block specific resource types (image, font, stylesheet, script, etc.). | + +### Layout Inspection — verify element positioning + +| Helper | When to use | +|---|---| +| `helpers.getElementBounds(page, '.selector')` | Get bounding box, visibility, viewport presence, and computed styles. | + +## Custom HTTP Headers + +Configure custom headers for all HTTP requests via environment variables. Useful for: + +- Identifying automated traffic to your backend +- Getting LLM-optimized responses (e.g., plain text errors instead of styled HTML) +- Adding authentication tokens globally + +### Configuration + +**Single header (common case):** + +```bash +PW_HEADER_NAME=X-Automated-By PW_HEADER_VALUE=playwright-skill \ + cd $SKILL_DIR && node run.js /tmp/my-script.js +``` + +**Multiple headers (JSON format):** + +```bash +PW_EXTRA_HEADERS='{"X-Automated-By":"playwright-skill","X-Debug":"true"}' \ + cd $SKILL_DIR && node run.js /tmp/my-script.js +``` + +### How It Works + +Headers are automatically applied when using `helpers.createContext()`: + +```javascript +const context = await helpers.createContext(browser); +const page = await context.newPage(); +// All requests from this page include your custom headers +``` + +For scripts using raw Playwright API, use the injected `getContextOptionsWithHeaders()`: + +```javascript +const context = await browser.newContext( + getContextOptionsWithHeaders({ viewport: { width: 1920, height: 1080 } }), +); +``` + +## Advanced Usage + +For comprehensive Playwright API documentation, see [API_REFERENCE.md](API_REFERENCE.md): + +- Selectors & Locators best practices +- Network interception & API mocking +- Authentication & session management +- Visual regression testing +- Mobile device emulation +- Performance testing +- Debugging techniques +- CI/CD integration + +## Tips + +- **CRITICAL: Detect servers FIRST** - Always run `detectDevServers()` before writing test code for localhost testing +- **Custom headers** - Use `PW_HEADER_NAME`/`PW_HEADER_VALUE` env vars to identify automated traffic to your backend +- **Use /tmp for test files** - Write to `/tmp/playwright-test-*.js`, never to skill directory or user's project +- **Parameterize URLs** - Put detected/provided URL in a `TARGET_URL` constant at the top of every script +- **DEFAULT: Headless browser** - Always use `headless: true` for Docker/CI compatibility +- **Headed mode** - Use `headless: false` when user specifically requests visible browser or is debugging locally +- **Wait strategies:** Use `waitForURL`, `waitForSelector`, `waitForLoadState` instead of fixed timeouts +- **Error handling:** Always use try-catch for robust automation +- **Console output:** Use `console.log()` to track progress and show what's happening +- **Docker:** The `--no-sandbox` flag is included by default in helpers for container compatibility + +## Troubleshooting + +**Playwright not installed:** + +```bash +cd $SKILL_DIR && npm run setup +``` + +**Module not found:** +Ensure running from skill directory via `run.js` wrapper + +**Browser doesn't launch in Docker:** +Ensure `--no-sandbox` and `--disable-setuid-sandbox` args are set (included by default in helpers) + +**Element not found:** +Add wait: `await page.waitForSelector('.element', { timeout: 10000 })` diff --git a/plugins/eng/skills/use-browser/lib/helpers.js b/plugins/eng/skills/use-browser/lib/helpers.js new file mode 100644 index 00000000..2d02a8d9 --- /dev/null +++ b/plugins/eng/skills/use-browser/lib/helpers.js @@ -0,0 +1,977 @@ +// playwright-helpers.js +// Reusable utility functions for Playwright automation + +const { chromium, firefox, webkit } = require('playwright'); + +/** + * Parse extra HTTP headers from environment variables. + * Supports two formats: + * - PW_HEADER_NAME + PW_HEADER_VALUE: Single header (simple, common case) + * - PW_EXTRA_HEADERS: JSON object for multiple headers (advanced) + * Single header format takes precedence if both are set. + * @returns {Object|null} Headers object or null if none configured + */ +function getExtraHeadersFromEnv() { + const headerName = process.env.PW_HEADER_NAME; + const headerValue = process.env.PW_HEADER_VALUE; + + if (headerName && headerValue) { + return { [headerName]: headerValue }; + } + + const headersJson = process.env.PW_EXTRA_HEADERS; + if (headersJson) { + try { + const parsed = JSON.parse(headersJson); + if (typeof parsed === 'object' && parsed !== null && !Array.isArray(parsed)) { + return parsed; + } + console.warn('PW_EXTRA_HEADERS must be a JSON object, ignoring...'); + } catch (e) { + console.warn('Failed to parse PW_EXTRA_HEADERS as JSON:', e.message); + } + } + + return null; +} + +/** + * Launch browser with standard configuration + * @param {string} browserType - 'chromium', 'firefox', or 'webkit' + * @param {Object} options - Additional launch options + */ +async function launchBrowser(browserType = 'chromium', options = {}) { + const defaultOptions = { + headless: process.env.HEADLESS !== 'false', + slowMo: process.env.SLOW_MO ? parseInt(process.env.SLOW_MO) : 0, + args: ['--no-sandbox', '--disable-setuid-sandbox'] + }; + + const browsers = { chromium, firefox, webkit }; + const browser = browsers[browserType]; + + if (!browser) { + throw new Error(`Invalid browser type: ${browserType}`); + } + + return await browser.launch({ ...defaultOptions, ...options }); +} + +/** + * Create a new page with viewport and user agent + * @param {Object} context - Browser context + * @param {Object} options - Page options + */ +async function createPage(context, options = {}) { + const page = await context.newPage(); + + if (options.viewport) { + await page.setViewportSize(options.viewport); + } + + if (options.userAgent) { + await page.setExtraHTTPHeaders({ + 'User-Agent': options.userAgent + }); + } + + // Set default timeout + page.setDefaultTimeout(options.timeout || 30000); + + return page; +} + +/** + * Smart wait for page to be ready + * @param {Object} page - Playwright page + * @param {Object} options - Wait options + */ +async function waitForPageReady(page, options = {}) { + const waitOptions = { + waitUntil: options.waitUntil || 'networkidle', + timeout: options.timeout || 30000 + }; + + try { + await page.waitForLoadState(waitOptions.waitUntil, { + timeout: waitOptions.timeout + }); + } catch (e) { + console.warn('Page load timeout, continuing...'); + } + + // Additional wait for dynamic content if selector provided + if (options.waitForSelector) { + await page.waitForSelector(options.waitForSelector, { + timeout: options.timeout + }); + } +} + +/** + * Safe click with retry logic + * @param {Object} page - Playwright page + * @param {string} selector - Element selector + * @param {Object} options - Click options + */ +async function safeClick(page, selector, options = {}) { + const maxRetries = options.retries || 3; + const retryDelay = options.retryDelay || 1000; + + for (let i = 0; i < maxRetries; i++) { + try { + await page.waitForSelector(selector, { + state: 'visible', + timeout: options.timeout || 5000 + }); + await page.click(selector, { + force: options.force || false, + timeout: options.timeout || 5000 + }); + return true; + } catch (e) { + if (i === maxRetries - 1) { + console.error(`Failed to click ${selector} after ${maxRetries} attempts`); + throw e; + } + console.log(`Retry ${i + 1}/${maxRetries} for clicking ${selector}`); + await page.waitForTimeout(retryDelay); + } + } +} + +/** + * Safe text input with clear before type + * @param {Object} page - Playwright page + * @param {string} selector - Input selector + * @param {string} text - Text to type + * @param {Object} options - Type options + */ +async function safeType(page, selector, text, options = {}) { + await page.waitForSelector(selector, { + state: 'visible', + timeout: options.timeout || 10000 + }); + + if (options.clear !== false) { + await page.fill(selector, ''); + } + + if (options.slow) { + await page.type(selector, text, { delay: options.delay || 100 }); + } else { + await page.fill(selector, text); + } +} + +/** + * Extract text from multiple elements + * @param {Object} page - Playwright page + * @param {string} selector - Elements selector + */ +async function extractTexts(page, selector) { + await page.waitForSelector(selector, { timeout: 10000 }); + return await page.$$eval(selector, elements => + elements.map(el => el.textContent?.trim()).filter(Boolean) + ); +} + +/** + * Take screenshot with timestamp + * @param {Object} page - Playwright page + * @param {string} name - Screenshot name + * @param {Object} options - Screenshot options + */ +async function takeScreenshot(page, name, options = {}) { + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + const filename = `${name}-${timestamp}.png`; + + await page.screenshot({ + path: filename, + fullPage: options.fullPage !== false, + ...options + }); + + console.log(`Screenshot saved: ${filename}`); + return filename; +} + +/** + * Handle authentication + * @param {Object} page - Playwright page + * @param {Object} credentials - Username and password + * @param {Object} selectors - Login form selectors + */ +async function authenticate(page, credentials, selectors = {}) { + const defaultSelectors = { + username: 'input[name="username"], input[name="email"], #username, #email', + password: 'input[name="password"], #password', + submit: 'button[type="submit"], input[type="submit"], button:has-text("Login"), button:has-text("Sign in")' + }; + + const finalSelectors = { ...defaultSelectors, ...selectors }; + + await safeType(page, finalSelectors.username, credentials.username); + await safeType(page, finalSelectors.password, credentials.password); + await safeClick(page, finalSelectors.submit); + + // Wait for navigation or success indicator + await Promise.race([ + page.waitForNavigation({ waitUntil: 'networkidle' }), + page.waitForSelector(selectors.successIndicator || '.dashboard, .user-menu, .logout', { timeout: 10000 }) + ]).catch(() => { + console.log('Login might have completed without navigation'); + }); +} + +/** + * Scroll page + * @param {Object} page - Playwright page + * @param {string} direction - 'down', 'up', 'top', 'bottom' + * @param {number} distance - Pixels to scroll (for up/down) + */ +async function scrollPage(page, direction = 'down', distance = 500) { + switch (direction) { + case 'down': + await page.evaluate(d => window.scrollBy(0, d), distance); + break; + case 'up': + await page.evaluate(d => window.scrollBy(0, -d), distance); + break; + case 'top': + await page.evaluate(() => window.scrollTo(0, 0)); + break; + case 'bottom': + await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); + break; + } + await page.waitForTimeout(500); // Wait for scroll animation +} + +/** + * Extract table data + * @param {Object} page - Playwright page + * @param {string} tableSelector - Table selector + */ +async function extractTableData(page, tableSelector) { + await page.waitForSelector(tableSelector); + + return await page.evaluate((selector) => { + const table = document.querySelector(selector); + if (!table) return null; + + const headers = Array.from(table.querySelectorAll('thead th')).map(th => + th.textContent?.trim() + ); + + const rows = Array.from(table.querySelectorAll('tbody tr')).map(tr => { + const cells = Array.from(tr.querySelectorAll('td')); + if (headers.length > 0) { + return cells.reduce((obj, cell, index) => { + obj[headers[index] || `column_${index}`] = cell.textContent?.trim(); + return obj; + }, {}); + } else { + return cells.map(cell => cell.textContent?.trim()); + } + }); + + return { headers, rows }; + }, tableSelector); +} + +/** + * Wait for and dismiss cookie banners + * @param {Object} page - Playwright page + * @param {number} timeout - Max time to wait + */ +async function handleCookieBanner(page, timeout = 3000) { + const commonSelectors = [ + 'button:has-text("Accept")', + 'button:has-text("Accept all")', + 'button:has-text("OK")', + 'button:has-text("Got it")', + 'button:has-text("I agree")', + '.cookie-accept', + '#cookie-accept', + '[data-testid="cookie-accept"]' + ]; + + for (const selector of commonSelectors) { + try { + const element = await page.waitForSelector(selector, { + timeout: timeout / commonSelectors.length, + state: 'visible' + }); + if (element) { + await element.click(); + console.log('Cookie banner dismissed'); + return true; + } + } catch (e) { + // Continue to next selector + } + } + + return false; +} + +/** + * Retry a function with exponential backoff + * @param {Function} fn - Function to retry + * @param {number} maxRetries - Maximum retry attempts + * @param {number} initialDelay - Initial delay in ms + */ +async function retryWithBackoff(fn, maxRetries = 3, initialDelay = 1000) { + let lastError; + + for (let i = 0; i < maxRetries; i++) { + try { + return await fn(); + } catch (error) { + lastError = error; + const delay = initialDelay * Math.pow(2, i); + console.log(`Attempt ${i + 1} failed, retrying in ${delay}ms...`); + await new Promise(resolve => setTimeout(resolve, delay)); + } + } + + throw lastError; +} + +/** + * Create browser context with common settings + * @param {Object} browser - Browser instance + * @param {Object} options - Context options + */ +async function createContext(browser, options = {}) { + const envHeaders = getExtraHeadersFromEnv(); + + // Merge environment headers with any passed in options + const mergedHeaders = { + ...envHeaders, + ...options.extraHTTPHeaders + }; + + const defaultOptions = { + viewport: { width: 1280, height: 720 }, + userAgent: options.mobile + ? 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1' + : undefined, + permissions: options.permissions || [], + geolocation: options.geolocation, + locale: options.locale || 'en-US', + timezoneId: options.timezoneId || 'America/New_York', + // Only include extraHTTPHeaders if we have any + ...(Object.keys(mergedHeaders).length > 0 && { extraHTTPHeaders: mergedHeaders }) + }; + + return await browser.newContext({ ...defaultOptions, ...options }); +} + +/** + * Detect running dev servers on common ports + * @param {Array} customPorts - Additional ports to check + * @returns {Promise} Array of detected server URLs + */ +async function detectDevServers(customPorts = []) { + const http = require('http'); + + // Common dev server ports + const commonPorts = [3000, 3001, 3002, 5173, 8080, 8000, 4200, 5000, 9000, 1234]; + const allPorts = [...new Set([...commonPorts, ...customPorts])]; + + const detectedServers = []; + + console.log('Checking for running dev servers...'); + + for (const port of allPorts) { + try { + await new Promise((resolve, reject) => { + const req = http.request({ + hostname: 'localhost', + port: port, + path: '/', + method: 'HEAD', + timeout: 500 + }, (res) => { + if (res.statusCode < 500) { + detectedServers.push(`http://localhost:${port}`); + console.log(` Found server on port ${port}`); + } + resolve(); + }); + + req.on('error', () => resolve()); + req.on('timeout', () => { + req.destroy(); + resolve(); + }); + + req.end(); + }); + } catch (e) { + // Port not available, continue + } + } + + if (detectedServers.length === 0) { + console.log(' No dev servers detected'); + } + + return detectedServers; +} + +// --------------------------------------------------------------------------- +// Console Monitoring +// --------------------------------------------------------------------------- + +/** + * Start capturing browser console output. Call BEFORE navigating to the page. + * Returns a collector object — pass it to getConsoleErrors() or getConsoleLogs(). + * @param {Object} page - Playwright page + * @returns {Object} Collector with .entries array + */ +function startConsoleCapture(page) { + const collector = { entries: [] }; + page.on('console', msg => { + collector.entries.push({ + type: msg.type(), + text: msg.text(), + location: msg.location(), + timestamp: Date.now() + }); + }); + page.on('pageerror', error => { + collector.entries.push({ + type: 'pageerror', + text: error.message, + stack: error.stack, + timestamp: Date.now() + }); + }); + return collector; +} + +/** + * Get only error-level messages from a console collector. + * Includes console.error() calls and uncaught page errors. + * @param {Object} collector - From startConsoleCapture() + * @returns {Array} Error entries + */ +function getConsoleErrors(collector) { + return collector.entries.filter(e => + e.type === 'error' || e.type === 'pageerror' + ); +} + +/** + * Get console logs from a collector, optionally filtered. + * @param {Object} collector - From startConsoleCapture() + * @param {string|RegExp|Function} [filter] - String (includes), RegExp (test), or predicate function + * @returns {Array} Matching entries + */ +function getConsoleLogs(collector, filter) { + if (!filter) return collector.entries; + if (typeof filter === 'string') { + return collector.entries.filter(e => e.text.includes(filter)); + } + if (filter instanceof RegExp) { + return collector.entries.filter(e => filter.test(e.text)); + } + if (typeof filter === 'function') { + return collector.entries.filter(filter); + } + return collector.entries; +} + +// --------------------------------------------------------------------------- +// Network Capture & Inspection +// --------------------------------------------------------------------------- + +/** + * Start capturing network requests. Call BEFORE navigating to the page. + * Returns a collector object — pass it to getFailedRequests() or getCapturedRequests(). + * @param {Object} page - Playwright page + * @param {string} [urlFilter] - Only capture requests whose URL contains this string (e.g. '/api/') + * @returns {Object} Collector with .requests array + */ +function startNetworkCapture(page, urlFilter) { + const collector = { requests: [] }; + + page.on('requestfinished', async request => { + const url = request.url(); + if (urlFilter && !url.includes(urlFilter)) return; + + const response = await request.response(); + collector.requests.push({ + url, + method: request.method(), + status: response ? response.status() : null, + statusText: response ? response.statusText() : null, + resourceType: request.resourceType(), + failure: null, + timestamp: Date.now() + }); + }); + + page.on('requestfailed', request => { + const url = request.url(); + if (urlFilter && !url.includes(urlFilter)) return; + + collector.requests.push({ + url, + method: request.method(), + status: null, + statusText: null, + resourceType: request.resourceType(), + failure: request.failure()?.errorText || 'unknown', + timestamp: Date.now() + }); + }); + + return collector; +} + +/** + * Get failed requests (4xx, 5xx, or connection failures) from a network collector. + * @param {Object} collector - From startNetworkCapture() + * @returns {Array} Failed request entries + */ +function getFailedRequests(collector) { + return collector.requests.filter(r => + r.failure || (r.status && r.status >= 400) + ); +} + +/** + * Get all captured requests from a network collector. + * @param {Object} collector - From startNetworkCapture() + * @returns {Array} All request entries + */ +function getCapturedRequests(collector) { + return collector.requests; +} + +/** + * Wait for a specific API response matching a URL pattern. + * @param {Object} page - Playwright page + * @param {string} urlPattern - URL substring to match (e.g. '/api/users') + * @param {Object} [options] - { timeout: 10000, status: 200 } + * @returns {Object} { url, status, statusText, body, json } + */ +async function waitForApiResponse(page, urlPattern, options = {}) { + const timeout = options.timeout || 10000; + const expectedStatus = options.status; + + const response = await page.waitForResponse( + resp => resp.url().includes(urlPattern) && + (!expectedStatus || resp.status() === expectedStatus), + { timeout } + ); + + return { + url: response.url(), + status: response.status(), + statusText: response.statusText(), + body: await response.text().catch(() => null), + json: await response.json().catch(() => null) + }; +} + +// --------------------------------------------------------------------------- +// Video Recording +// --------------------------------------------------------------------------- + +/** + * Create a browser context with video recording enabled. + * Videos are saved when the page or context is closed. + * @param {Object} browser - Browser instance + * @param {Object} [options] - { outputDir, videoSize, viewport, ...contextOptions } + * @returns {Object} Browser context with recording active + */ +async function createVideoContext(browser, options = {}) { + const envHeaders = getExtraHeadersFromEnv(); + const mergedHeaders = { ...envHeaders, ...options.extraHTTPHeaders }; + + const contextOptions = { + viewport: options.viewport || { width: 1280, height: 720 }, + recordVideo: { + dir: options.outputDir || '/tmp/playwright-videos', + size: options.videoSize || { width: 1280, height: 720 } + }, + ...(Object.keys(mergedHeaders).length > 0 && { extraHTTPHeaders: mergedHeaders }), + ...options + }; + // Remove non-context keys + delete contextOptions.outputDir; + delete contextOptions.videoSize; + + return await browser.newContext(contextOptions); +} + +// --------------------------------------------------------------------------- +// Browser State Inspection +// --------------------------------------------------------------------------- + +/** + * Read localStorage. Pass a key to get one value, or omit for all entries. + * @param {Object} page - Playwright page + * @param {string} [key] - Specific key to read + * @returns {string|Object} Single value or { key: value } map + */ +async function getLocalStorage(page, key) { + if (key) { + return await page.evaluate(k => window.localStorage.getItem(k), key); + } + return await page.evaluate(() => { + const items = {}; + for (let i = 0; i < window.localStorage.length; i++) { + const k = window.localStorage.key(i); + items[k] = window.localStorage.getItem(k); + } + return items; + }); +} + +/** + * Read sessionStorage. Pass a key to get one value, or omit for all entries. + * @param {Object} page - Playwright page + * @param {string} [key] - Specific key to read + * @returns {string|Object} Single value or { key: value } map + */ +async function getSessionStorage(page, key) { + if (key) { + return await page.evaluate(k => window.sessionStorage.getItem(k), key); + } + return await page.evaluate(() => { + const items = {}; + for (let i = 0; i < window.sessionStorage.length; i++) { + const k = window.sessionStorage.key(i); + items[k] = window.sessionStorage.getItem(k); + } + return items; + }); +} + +/** + * Get all cookies from a browser context. + * @param {Object} context - Browser context + * @returns {Array} Array of cookie objects + */ +async function getCookies(context) { + return await context.cookies(); +} + +/** + * Clear localStorage, sessionStorage, and cookies. + * @param {Object} page - Playwright page + */ +async function clearAllStorage(page) { + await page.evaluate(() => { + window.localStorage.clear(); + window.sessionStorage.clear(); + }); + await page.context().clearCookies(); +} + +// --------------------------------------------------------------------------- +// Accessibility +// --------------------------------------------------------------------------- + +/** + * Run an accessibility audit using axe-core (injected from CDN). + * Returns violations, pass count, and a summary. Requires internet for first load. + * @param {Object} page - Playwright page + * @param {Object} [options] - { context: 'main', tags: ['wcag2a','wcag2aa'], rules: {} } + * @returns {Object} { violations, violationCount, passes, incomplete, summary } + */ +async function runAccessibilityAudit(page, options = {}) { + await page.addScriptTag({ + url: 'https://cdnjs.cloudflare.com/ajax/libs/axe-core/4.9.1/axe.min.js' + }); + + const results = await page.evaluate(async (opts) => { + return await window.axe.run(opts.context || document, { + rules: opts.rules, + tags: opts.tags || ['wcag2a', 'wcag2aa'] + }); + }, options); + + return { + violations: results.violations, + violationCount: results.violations.length, + passes: results.passes.length, + incomplete: results.incomplete, + summary: results.violations.map(v => ({ + id: v.id, + impact: v.impact, + description: v.description, + helpUrl: v.helpUrl, + nodes: v.nodes.length + })) + }; +} + +/** + * Verify keyboard focus order by tabbing through elements. + * Returns whether each Tab press landed on the expected element. + * @param {Object} page - Playwright page + * @param {Array} selectors - Expected focus order as CSS selectors + * @returns {Array} [{ step, expectedSelector, actualElement, matches }] + */ +async function checkFocusOrder(page, selectors) { + const results = []; + await page.click('body'); + + for (let i = 0; i < selectors.length; i++) { + await page.keyboard.press('Tab'); + + const focused = await page.evaluate(() => { + const el = document.activeElement; + if (!el) return null; + return { + tagName: el.tagName, + id: el.id || null, + textContent: el.textContent?.trim()?.substring(0, 50) || null + }; + }); + + const matches = await page.evaluate( + (sel) => document.activeElement === document.querySelector(sel), + selectors[i] + ); + + results.push({ + step: i + 1, + expectedSelector: selectors[i], + actualElement: focused, + matches + }); + } + + return results; +} + +// --------------------------------------------------------------------------- +// Performance Metrics +// --------------------------------------------------------------------------- + +/** + * Capture page performance metrics including Navigation Timing and Web Vitals. + * Call AFTER page has fully loaded (after networkidle or load event). + * @param {Object} page - Playwright page + * @returns {Object} { timing: { ttfb, domInteractive, ... }, vitals: { fcp, lcp, cls } } + */ +async function capturePerformanceMetrics(page) { + const timing = await page.evaluate(() => { + const perf = performance.getEntriesByType('navigation')[0]; + if (!perf) return null; + return { + dns: Math.round(perf.domainLookupEnd - perf.domainLookupStart), + tcp: Math.round(perf.connectEnd - perf.connectStart), + ttfb: Math.round(perf.responseStart - perf.requestStart), + download: Math.round(perf.responseEnd - perf.responseStart), + domInteractive: Math.round(perf.domInteractive - perf.fetchStart), + domComplete: Math.round(perf.domComplete - perf.fetchStart), + loadEvent: Math.round(perf.loadEventEnd - perf.fetchStart) + }; + }); + + const vitals = await page.evaluate(() => { + const result = {}; + const fcp = performance.getEntriesByName('first-contentful-paint')[0]; + if (fcp) result.fcp = Math.round(fcp.startTime); + + const lcpEntries = performance.getEntriesByType('largest-contentful-paint'); + if (lcpEntries.length > 0) { + result.lcp = Math.round(lcpEntries[lcpEntries.length - 1].startTime); + } + + const clsEntries = performance.getEntriesByType('layout-shift'); + if (clsEntries.length > 0) { + result.cls = clsEntries + .filter(e => !e.hadRecentInput) + .reduce((sum, e) => sum + e.value, 0); + result.cls = Math.round(result.cls * 1000) / 1000; + } + + return result; + }); + + return { timing, vitals }; +} + +// --------------------------------------------------------------------------- +// Multi-Viewport Responsive Sweep +// --------------------------------------------------------------------------- + +/** + * Capture screenshots at multiple viewport sizes. + * @param {Object} page - Playwright page + * @param {string} url - URL to capture + * @param {Array} [breakpoints] - [{ name, width, height }] defaults to mobile/tablet/desktop/wide + * @param {string} [outputDir] - Output directory (default: /tmp/responsive-screenshots) + * @returns {Array} [{ name, width, height, path }] + */ +async function captureResponsiveScreenshots(page, url, breakpoints, outputDir) { + const fs = require('fs'); + const path = require('path'); + + const bps = breakpoints || [ + { name: 'mobile', width: 375, height: 667 }, + { name: 'tablet', width: 768, height: 1024 }, + { name: 'desktop', width: 1280, height: 800 }, + { name: 'wide', width: 1920, height: 1080 } + ]; + const dir = outputDir || '/tmp/responsive-screenshots'; + fs.mkdirSync(dir, { recursive: true }); + + const results = []; + for (const bp of bps) { + await page.setViewportSize({ width: bp.width, height: bp.height }); + try { + await page.goto(url, { waitUntil: 'networkidle', timeout: 15000 }); + } catch { + await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 15000 }); + } + await page.waitForTimeout(1000); + + const filePath = path.join(dir, `${bp.name}-${bp.width}x${bp.height}.png`); + await page.screenshot({ path: filePath, fullPage: true }); + results.push({ ...bp, path: filePath }); + console.log(` ${bp.name} (${bp.width}x${bp.height}): ${filePath}`); + } + + return results; +} + +// --------------------------------------------------------------------------- +// Network Simulation +// --------------------------------------------------------------------------- + +/** + * Add artificial latency to all network requests. + * @param {Object} page - Playwright page + * @param {number} [latencyMs=500] - Delay in milliseconds per request + */ +async function simulateSlowNetwork(page, latencyMs = 500) { + await page.route('**/*', async route => { + await new Promise(r => setTimeout(r, latencyMs)); + await route.continue(); + }); +} + +/** + * Set the browser context to offline mode. + * @param {Object} context - Browser context + */ +async function simulateOffline(context) { + await context.setOffline(true); +} + +/** + * Block specific resource types (images, fonts, stylesheets, etc.). + * @param {Object} page - Playwright page + * @param {Array} [types] - Resource types to block (default: ['image','font','stylesheet']) + */ +async function blockResources(page, types = ['image', 'font', 'stylesheet']) { + await page.route('**/*', route => { + if (types.includes(route.request().resourceType())) { + return route.abort(); + } + return route.continue(); + }); +} + +// --------------------------------------------------------------------------- +// Layout & Visual Inspection +// --------------------------------------------------------------------------- + +/** + * Get an element's bounding box, visibility, and computed styles. + * @param {Object} page - Playwright page + * @param {string} selector - CSS selector + * @returns {Object|null} { x, y, width, height, visible, inViewport, computedStyles } + */ +async function getElementBounds(page, selector) { + await page.waitForSelector(selector, { timeout: 5000 }); + + return await page.evaluate((sel) => { + const el = document.querySelector(sel); + if (!el) return null; + + const rect = el.getBoundingClientRect(); + const styles = window.getComputedStyle(el); + + return { + x: rect.x, + y: rect.y, + width: rect.width, + height: rect.height, + visible: styles.display !== 'none' && + styles.visibility !== 'hidden' && + styles.opacity !== '0', + inViewport: rect.top < window.innerHeight && + rect.bottom > 0 && + rect.left < window.innerWidth && + rect.right > 0, + computedStyles: { + display: styles.display, + visibility: styles.visibility, + opacity: styles.opacity, + position: styles.position, + zIndex: styles.zIndex, + overflow: styles.overflow + } + }; + }, selector); +} + +module.exports = { + // Page interaction + launchBrowser, + createPage, + waitForPageReady, + safeClick, + safeType, + extractTexts, + takeScreenshot, + authenticate, + scrollPage, + extractTableData, + handleCookieBanner, + retryWithBackoff, + createContext, + detectDevServers, + getExtraHeadersFromEnv, + // Console monitoring + startConsoleCapture, + getConsoleErrors, + getConsoleLogs, + // Network capture + startNetworkCapture, + getFailedRequests, + getCapturedRequests, + waitForApiResponse, + // Video recording + createVideoContext, + // Browser state + getLocalStorage, + getSessionStorage, + getCookies, + clearAllStorage, + // Accessibility + runAccessibilityAudit, + checkFocusOrder, + // Performance + capturePerformanceMetrics, + // Responsive + captureResponsiveScreenshots, + // Network simulation + simulateSlowNetwork, + simulateOffline, + blockResources, + // Layout + getElementBounds +}; diff --git a/plugins/eng/skills/use-browser/package-lock.json b/plugins/eng/skills/use-browser/package-lock.json new file mode 100644 index 00000000..ea852821 --- /dev/null +++ b/plugins/eng/skills/use-browser/package-lock.json @@ -0,0 +1,63 @@ +{ + "name": "playwright-skill", + "version": "4.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "playwright-skill", + "version": "4.1.0", + "license": "MIT", + "dependencies": { + "playwright": "^1.57.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/playwright": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.58.2.tgz", + "integrity": "sha512-vA30H8Nvkq/cPBnNw4Q8TWz1EJyqgpuinBcHET0YVJVFldr8JDNiU9LaWAE1KqSkRYazuaBhTpB5ZzShOezQ6A==", + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.58.2" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.58.2", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.58.2.tgz", + "integrity": "sha512-yZkEtftgwS8CsfYo7nm0KE8jsvm6i/PTgVtB8DL726wNf6H2IMsDuxCpJj59KDaxCtSnrWan2AeDqM7JBaultg==", + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + } + } +} diff --git a/plugins/eng/skills/use-browser/package.json b/plugins/eng/skills/use-browser/package.json new file mode 100644 index 00000000..8c07fdae --- /dev/null +++ b/plugins/eng/skills/use-browser/package.json @@ -0,0 +1,25 @@ +{ + "name": "playwright-skill", + "version": "4.1.0", + "description": "Browser automation with Playwright for Claude Code — headless by default for Docker/CI", + "author": "lackeyjb", + "main": "run.js", + "scripts": { + "setup": "npm install && npx playwright install chromium", + "install-all-browsers": "npx playwright install chromium firefox webkit" + }, + "keywords": [ + "playwright", + "automation", + "browser-testing", + "web-automation", + "claude-skill" + ], + "dependencies": { + "playwright": "^1.57.0" + }, + "engines": { + "node": ">=14.0.0" + }, + "license": "MIT" +} diff --git a/plugins/eng/skills/use-browser/run.js b/plugins/eng/skills/use-browser/run.js new file mode 100644 index 00000000..0836913e --- /dev/null +++ b/plugins/eng/skills/use-browser/run.js @@ -0,0 +1,228 @@ +#!/usr/bin/env node +/** + * Universal Playwright Executor for Claude Code + * + * Executes Playwright automation code from: + * - File path: node run.js script.js + * - Inline code: node run.js 'await page.goto("...")' + * - Stdin: cat script.js | node run.js + * + * Ensures proper module resolution by running from skill directory. + */ + +const fs = require('fs'); +const path = require('path'); +const { execSync } = require('child_process'); + +// Change to skill directory for proper module resolution +process.chdir(__dirname); + +/** + * Check if Playwright is installed + */ +function checkPlaywrightInstalled() { + try { + require.resolve('playwright'); + return true; + } catch (e) { + return false; + } +} + +/** + * Install Playwright if missing + */ +function installPlaywright() { + console.log('Playwright not found. Installing...'); + try { + execSync('npm install', { stdio: 'inherit', cwd: __dirname }); + execSync('npx playwright install chromium', { stdio: 'inherit', cwd: __dirname }); + console.log('Playwright installed successfully'); + return true; + } catch (e) { + console.error('Failed to install Playwright:', e.message); + console.error('Please run manually: cd', __dirname, '&& npm run setup'); + return false; + } +} + +/** + * Get code to execute from various sources + */ +function getCodeToExecute() { + const args = process.argv.slice(2); + + // Case 1: File path provided + if (args.length > 0 && fs.existsSync(args[0])) { + const filePath = path.resolve(args[0]); + console.log(`Executing file: ${filePath}`); + return fs.readFileSync(filePath, 'utf8'); + } + + // Case 2: Inline code provided as argument + if (args.length > 0) { + console.log('Executing inline code'); + return args.join(' '); + } + + // Case 3: Code from stdin + if (!process.stdin.isTTY) { + console.log('Reading from stdin'); + return fs.readFileSync(0, 'utf8'); + } + + // No input + console.error('No code to execute'); + console.error('Usage:'); + console.error(' node run.js script.js # Execute file'); + console.error(' node run.js "code here" # Execute inline'); + console.error(' cat script.js | node run.js # Execute from stdin'); + process.exit(1); +} + +/** + * Clean up old temporary execution files from previous runs + */ +function cleanupOldTempFiles() { + try { + const files = fs.readdirSync(__dirname); + const tempFiles = files.filter(f => f.startsWith('.temp-execution-') && f.endsWith('.js')); + + if (tempFiles.length > 0) { + tempFiles.forEach(file => { + const filePath = path.join(__dirname, file); + try { + fs.unlinkSync(filePath); + } catch (e) { + // Ignore errors - file might be in use or already deleted + } + }); + } + } catch (e) { + // Ignore directory read errors + } +} + +/** + * Wrap code in async IIFE if not already wrapped + */ +function wrapCodeIfNeeded(code) { + // Check if code already has require() and async structure + const hasRequire = code.includes('require('); + const hasAsyncIIFE = code.includes('(async () => {') || code.includes('(async()=>{'); + + // If it's already a complete script, return as-is + if (hasRequire && hasAsyncIIFE) { + return code; + } + + // If it's just Playwright commands, wrap in full template + if (!hasRequire) { + return ` +const { chromium, firefox, webkit, devices } = require('playwright'); +const helpers = require('./lib/helpers'); + +// Extra headers from environment variables (if configured) +const __extraHeaders = helpers.getExtraHeadersFromEnv(); + +/** + * Utility to merge environment headers into context options. + * Use when creating contexts with raw Playwright API instead of helpers.createContext(). + * @param {Object} options - Context options + * @returns {Object} Options with extraHTTPHeaders merged in + */ +function getContextOptionsWithHeaders(options = {}) { + if (!__extraHeaders) return options; + return { + ...options, + extraHTTPHeaders: { + ...__extraHeaders, + ...(options.extraHTTPHeaders || {}) + } + }; +} + +(async () => { + try { + ${code} + } catch (error) { + console.error('Automation error:', error.message); + if (error.stack) { + console.error(error.stack); + } + process.exit(1); + } +})(); +`; + } + + // If has require but no async wrapper + if (!hasAsyncIIFE) { + return ` +(async () => { + try { + ${code} + } catch (error) { + console.error('Automation error:', error.message); + if (error.stack) { + console.error(error.stack); + } + process.exit(1); + } +})(); +`; + } + + return code; +} + +/** + * Main execution + */ +async function main() { + console.log('Playwright Skill - Universal Executor\n'); + + // Clean up old temp files from previous runs + cleanupOldTempFiles(); + + // Check Playwright installation + if (!checkPlaywrightInstalled()) { + const installed = installPlaywright(); + if (!installed) { + process.exit(1); + } + } + + // Get code to execute + const rawCode = getCodeToExecute(); + const code = wrapCodeIfNeeded(rawCode); + + // Create temporary file for execution + const tempFile = path.join(__dirname, `.temp-execution-${Date.now()}.js`); + + try { + // Write code to temp file + fs.writeFileSync(tempFile, code, 'utf8'); + + // Execute the code + console.log('Starting automation...\n'); + require(tempFile); + + // Note: Temp file will be cleaned up on next run + // This allows long-running async operations to complete safely + + } catch (error) { + console.error('Execution failed:', error.message); + if (error.stack) { + console.error('\nStack trace:'); + console.error(error.stack); + } + process.exit(1); + } +} + +// Run main function +main().catch(error => { + console.error('Fatal error:', error.message); + process.exit(1); +}); diff --git a/plugins/shared/skills/analyze/SKILL.md b/plugins/shared/skills/analyze/SKILL.md index 5a972fbf..34b97871 100644 --- a/plugins/shared/skills/analyze/SKILL.md +++ b/plugins/shared/skills/analyze/SKILL.md @@ -145,6 +145,7 @@ Stop expanding when additional context is unlikely to change the analysis. - **Codebase understanding** (patterns, conventions, dependencies, blast radius) → load `/inspect` skill. It produces structured understanding faster and more reliably than ad-hoc file reading. - **Systematic evidence gathering** (when you discover evidence gaps that need formal investigation) → load `/research` skill. - **Domain-level surface mapping** (what does this topic touch across product and internal surfaces, how do they connect) → load `/discover` skill. It produces structured surface-area maps faster than ad-hoc enumeration. Use when the analysis involves understanding a feature's blast radius, cross-surface dependencies, or system-wide impact. +- **Defect diagnosis** (analysis reveals something is broken or malfunctioning, not just a decision to make) → load `/debug` skill. Analysis answers "what's the situation?"; debugging answers "why is this broken?" If the analysis subject transitions from understanding a system to diagnosing why it's failing, hand off to `/debug` rather than attempting ad-hoc diagnosis. - Use judgment — not every code mention needs a full inspection. Load a skill when the analysis would materially benefit from structured understanding, not for quick lookups. - **Subagent delegation** — subagents do not inherit your loaded skills. When you delegate work to a subagent that needs a skill, use the `general-purpose` type (it has the Skill tool). Start the subagent's prompt with `Before doing anything, load /skill-name skill`, then provide context and the task. diff --git a/plugins/shared/skills/screengrabs/SKILL.md b/plugins/shared/skills/screengrabs/SKILL.md new file mode 100644 index 00000000..b05516bb --- /dev/null +++ b/plugins/shared/skills/screengrabs/SKILL.md @@ -0,0 +1,257 @@ +--- +name: screengrabs +description: "Capture, annotate, and include screenshots in pull requests for UI changes. Use when creating or updating PRs that touch frontend components, pages, or any web-facing surface. Also use when asked to add before/after screenshots, visual diffs, or enrich PR descriptions. Triggers on: PR screenshots, before/after, visual diff, PR description, capture screenshot, PR images, enrich PR." +license: MIT +metadata: + author: "inkeep" + version: "1.1" +--- + +# Screengrabs + +Capture, redact, annotate, and embed screenshots in GitHub PRs for UI changes. + +## When to use + +- Creating/updating PRs that touch frontend components, pages, or styles +- User asks for screenshots, before/after comparisons, or PR body enrichment +- Skip for backend-only, test-only, or non-visual changes + +## Prerequisites + +These scripts require the following npm packages. Install them as dev dependencies in your project: + +| Package | Purpose | Install | +|---|---|---| +| `playwright` | Browser automation for screenshot capture | `npm add -D playwright` | +| `sharp` | Image annotation (labels, borders, stitching) | `npm add -D sharp` | +| `tsx` | TypeScript runner for scripts | `npm add -D tsx` | + +After installing Playwright, download browser binaries: `npx playwright install chromium` + +## Workflow + +Most screenshots require browser interaction before capture — dismissing popups, logging in, clicking tabs, scrolling to a section, or navigating through a flow. The default workflow accounts for this. + +1. **Identify affected pages** from the PR diff +2. **Plan interaction** — Load `/use-browser` skill. For each route, determine what interaction is needed before the screenshot (dismiss cookie banners, click tabs, scroll, login, etc.). Write a pre-script to `/tmp/pw-pre-.js` +3. **Capture screenshots** — run `scripts/capture.ts` with `--pre-script` +4. **Validate no sensitive data** — run `scripts/validate-sensitive.ts` +5. **Annotate** — run `scripts/annotate.ts` (labels, borders, side-by-side) +6. **Upload & embed** — update PR body with images + +**Simple captures (no interaction needed):** For static pages where goto + wait is sufficient, skip step 2 and omit `--pre-script`. Everything else stays the same. + +--- + +## Step 1: Identify Affected Pages + +Analyze the PR diff to determine which UI routes are impacted. Map changed component/page files to their corresponding URLs. If the diff only touches backend code, tests, or non-visual files, skip screenshot capture. + +--- + +## Step 2: Plan Interaction (Pre-Scripts) + +Load `/use-browser` skill for writing pre-scripts. A pre-script is a JS file that receives the Playwright `page` object and runs interaction before masking + screenshot. + +### Pre-script contract + +The file must export an async function that receives `{ page, url, route }`: + +```javascript +// /tmp/pw-pre-dashboard.js +module.exports = async function({ page, url, route }) { + // Dismiss cookie banner + await page.click('button:has-text("Accept")').catch(() => {}); + + // Click the "Analytics" tab + await page.click('[data-tab="analytics"]'); + await page.waitForTimeout(500); +}; +``` + +### Common pre-script patterns + +**Dismiss popups / modals:** +```javascript +module.exports = async function({ page }) { + // Cookie banner + await page.click('button:has-text("Accept all")').catch(() => {}); + // Marketing popup + await page.click('[data-testid="close-modal"]').catch(() => {}); +}; +``` + +**Navigate through a login flow:** +```javascript +module.exports = async function({ page }) { + await page.fill('input[name="email"]', 'test@example.com'); + await page.fill('input[name="password"]', 'password123'); + await page.click('button[type="submit"]'); + await page.waitForURL('**/dashboard'); +}; +``` + +**Scroll to a specific section:** +```javascript +module.exports = async function({ page }) { + await page.locator('#pricing-section').scrollIntoViewIfNeeded(); + await page.waitForTimeout(300); +}; +``` + +**Expand collapsed content:** +```javascript +module.exports = async function({ page }) { + await page.click('button:has-text("Show more")'); + await page.waitForSelector('.expanded-content', { state: 'visible' }); +}; +``` + +**One pre-script per route** — if routes need different interaction, write separate scripts and run capture once per route. If all routes share the same interaction (e.g., dismiss the same cookie banner), one script covers all. + +--- + +## Step 3: Capture Screenshots + +### Environment setup + +| Environment | Base URL | Notes | +|---|---|---| +| **Local dev** | `http://localhost:3000` (or your dev server port) | Start your dev server first | +| **Preview deployment** | Your preview URL (e.g., Vercel, Netlify, etc.) | Available after PR push | +| **Playwright server** | Connect via `--connect ws://localhost:3001` | See "Reusable server" below | + +### Capture command + +```bash +# With pre-script (default for most captures) +npx tsx scripts/capture.ts \ + --base-url http://localhost:3000 \ + --routes "/dashboard,/settings" \ + --pre-script /tmp/pw-pre-dashboard.js \ + --output-dir ./screengrabs + +# Simple capture (no interaction needed) +npx tsx scripts/capture.ts \ + --base-url http://localhost:3000 \ + --routes "/landing,/about" \ + --output-dir ./screengrabs + +# Preview deployment with pre-script +npx tsx scripts/capture.ts \ + --base-url https://your-preview-url.example.com \ + --routes "/dashboard" \ + --pre-script /tmp/pw-pre-dismiss-popups.js \ + --output-dir ./screengrabs +``` + +### All capture options + +| Option | Default | Description | +|---|---|---| +| `--base-url ` | *required* | Target URL (local dev or preview) | +| `--routes ` | *required* | Comma-separated route paths | +| `--pre-script ` | — | JS file to run on page before capture (for interaction) | +| `--output-dir ` | `./screengrabs` | Where to save PNGs and DOM text | +| `--viewport ` | `1280x800` | Browser viewport size | +| `--connect ` | — | Connect to existing Playwright server | +| `--mask-selectors ` | — | Additional CSS selectors to blur | +| `--wait ` | `2000` | Wait after page load before capture | +| `--full-page` | `false` | Capture full scrollable page | +| `--auth-cookie ` | — | Session cookie for authenticated pages | + +### Reusable Playwright server + +Start a server once, reuse across multiple captures: + +```bash +# Terminal 1: start server +npx tsx scripts/capture.ts --serve --port 3001 + +# Terminal 2+: connect and capture +npx tsx scripts/capture.ts \ + --connect ws://localhost:3001 --base-url http://localhost:3000 \ + --routes "/..." --pre-script /tmp/pw-pre-script.js --output-dir ./screengrabs +``` + +--- + +## Step 4: Validate Sensitive Data + +**Always run before uploading to GitHub.** + +```bash +npx tsx scripts/validate-sensitive.ts \ + --dir ./screengrabs +``` + +The script checks `.dom-text.txt` files (saved by capture) for: +- API keys (`sk-`, `sk-ant-`, `AKIA`, `sk_live_`) +- Tokens (Bearer, JWT, GitHub PATs) +- PEM private keys +- Connection strings with credentials + +Exit code 1 = sensitive data found. Re-capture with additional `--mask-selectors` or fix the source before proceeding. + +### Pre-capture masking (automatic) + +The capture script automatically masks these before taking screenshots: + +| Selector / Pattern | What it catches | +|---|---| +| `input[type="password"]` | Password fields | +| Text matching `sk-`, `Bearer`, `eyJ`, `ghp_`, PEM headers | In-page tokens/keys | + +Add more with `--mask-selectors "selector1,selector2"`. + +--- + +## Step 5: Annotate Images + +```bash +# Add "Before" label with red border +npx tsx scripts/annotate.ts \ + --input before.png --label "Before" --border "#ef4444" --output before-labeled.png + +# Add "After" label with green border +npx tsx scripts/annotate.ts \ + --input after.png --label "After" --border "#22c55e" --output after-labeled.png + +# Side-by-side comparison +npx tsx scripts/annotate.ts \ + --stitch before.png after.png --labels "Before,After" --output comparison.png +``` + +--- + +## Step 6: Upload & Embed in PR + +### Upload images to GitHub + +Images in PR markdown need permanent URLs. Use one of: + +**Option A — PR comment with image** (simplest): +```bash +# GitHub renders attached images with permanent CDN URLs +gh pr comment {pr-number} --body "![Before](./screengrabs/before-labeled.png)" +``` + +**Option B — Update PR body directly**: +```bash +gh pr edit {pr-number} --body "$(cat pr-body.md)" +``` + +### PR body templates + +Use the templates in [references/pr-templates.md](references/pr-templates.md) for consistent formatting. Include: + +1. **Visual Changes** section with before/after screenshots +2. **Test URLs** section with links to preview deployment pages +3. **Summary** of what changed and why + +--- + +## Additional Resources + +- [references/pr-templates.md](references/pr-templates.md) — PR body markdown templates diff --git a/plugins/shared/skills/screengrabs/references/pr-templates.md b/plugins/shared/skills/screengrabs/references/pr-templates.md new file mode 100644 index 00000000..4a8dbca0 --- /dev/null +++ b/plugins/shared/skills/screengrabs/references/pr-templates.md @@ -0,0 +1,153 @@ +# PR Body Templates + +Markdown templates for enriching PR descriptions with screenshots and preview links. + +## Template 1: Visual Changes (Before/After) + +Use for PRs that change UI appearance or behavior. + +```markdown +### Visual Changes + +| Before | After | +|--------|-------| +| ![Before - {page name}]({before-image-url}) | ![After - {page name}]({after-image-url}) | + +> Screenshots captured from {environment} +``` + +## Template 2: Visual Changes (Side-by-Side Comparison) + +Use when the before/after comparison is generated as a single stitched image. + +```markdown +### Visual Changes + +![{page name} - Before vs After]({comparison-image-url}) +``` + +## Template 3: Test URLs + +Include links to preview deployment pages for manual testing. + +```markdown +### Test URLs + +Test these pages on the preview deployment: + +- [{Page name}]({preview-url}/{route}) — {what to verify} +- [{Page name}]({preview-url}/{route}) — {what to verify} +``` + +## Template 4: Combined (Recommended) + +Full PR body template with all sections. + +```markdown +### Changes + +- {Change 1} +- {Change 2} +- {Change 3} + +### Visual Changes + +| Before | After | +|--------|-------| +| ![Before - {page}]({url}) | ![After - {page}]({url}) | + +### Test URLs + +- [{Page name}]({preview-url}) — {what to test} +- [{Page name}]({preview-url}) — {what to test} + +### Test Plan + +- [ ] {Test case 1} +- [ ] {Test case 2} +``` + +## Template 5: Video Demo + +Use when a screen recording is more appropriate than static screenshots (e.g., interaction flows, animations, drag-and-drop behavior). + +```markdown +### Demo + +
+Screen recording + +https://github.com/user-attachments/assets/{video-id} + +
+``` + +To upload a video: +1. Record with QuickTime or `screencapture -v recording.mov` (macOS) +2. Drag the `.mov` file into the GitHub PR comment editor +3. GitHub generates a permanent URL automatically + +## Template 6: Multiple Pages Affected + +Use when a change affects several different pages. + +```markdown +### Visual Changes + +#### {Page 1 name} +| Before | After | +|--------|-------| +| ![Before]({url}) | ![After]({url}) | + +#### {Page 2 name} +| Before | After | +|--------|-------| +| ![Before]({url}) | ![After]({url}) | + +### Test URLs + +| Page | URL | What to verify | +|------|-----|----------------| +| {Page 1} | [{link text}]({url}) | {verification steps} | +| {Page 2} | [{link text}]({url}) | {verification steps} | +``` + +## Image Upload Methods + +### Method A: Drag and drop (simplest) + +1. Edit the PR description on GitHub +2. Drag a PNG/GIF/MOV file into the text area +3. GitHub uploads it and inserts a markdown image link +4. Save + +### Method B: gh CLI comment + +```bash +# Post a comment with an image reference +gh pr comment {pr-number} --body "### Screenshot +![Description](image-url)" +``` + +### Method C: Update PR body programmatically + +```bash +# Read current PR body, append visual changes section +CURRENT_BODY=$(gh pr view {pr-number} --json body -q '.body') +NEW_BODY="${CURRENT_BODY} + +### Visual Changes +| Before | After | +|--------|-------| +| ![Before](url1) | ![After](url2) |" + +gh pr edit {pr-number} --body "$NEW_BODY" +``` + +## Notes + +- GitHub image URLs from drag-and-drop are permanent CDN links +- GitHub supports PNG, GIF, JPG, and MOV/MP4 uploads +- Maximum file size: 10MB for images, 100MB for videos (on paid plans) +- Always add descriptive alt text for accessibility +- Use `
` tags for large images or videos to keep the PR body scannable diff --git a/plugins/shared/skills/screengrabs/scripts/annotate.ts b/plugins/shared/skills/screengrabs/scripts/annotate.ts new file mode 100644 index 00000000..4fae22df --- /dev/null +++ b/plugins/shared/skills/screengrabs/scripts/annotate.ts @@ -0,0 +1,180 @@ +/** + * PR Screenshot Annotation Script + * + * Adds labels, colored borders, and creates side-by-side comparisons. + * + * Label mode: + * npx tsx annotate.ts --input before.png --label "Before" --border "#ef4444" --output labeled.png + * + * Stitch mode: + * npx tsx annotate.ts --stitch before.png after.png --labels "Before,After" --output comparison.png + */ + +import sharp from 'sharp'; + +function getArg(name: string): string | undefined { + const idx = process.argv.indexOf(`--${name}`); + return idx !== -1 && idx + 1 < process.argv.length ? process.argv[idx + 1] : undefined; +} + +function getMultiArg(name: string): string[] { + const idx = process.argv.indexOf(`--${name}`); + if (idx === -1) return []; + const values: string[] = []; + for (let i = idx + 1; i < process.argv.length; i++) { + if (process.argv[i].startsWith('--')) break; + values.push(process.argv[i]); + } + return values; +} + +function escapeXml(text: string): string { + return text + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"'); +} + +async function addLabel(inputPath: string, label: string, borderColor: string, outputPath: string) { + const metadata = await sharp(inputPath).metadata(); + const width = metadata.width || 800; + + const borderWidth = 3; + const labelHeight = 36; + const fontSize = 16; + const escapedLabel = escapeXml(label); + + const labelSvg = Buffer.from(` + + + + ${escapedLabel} + + + `); + + await sharp(inputPath) + .extend({ + top: labelHeight, + bottom: borderWidth, + left: borderWidth, + right: borderWidth, + background: borderColor, + }) + .composite([ + { + input: labelSvg, + top: 0, + left: borderWidth, + }, + ]) + .png() + .toFile(outputPath); + + console.log(`Labeled: ${outputPath}`); +} + +async function stitchImages(inputPaths: string[], labels: string[], outputPath: string) { + if (inputPaths.length !== 2) { + throw new Error('Stitch requires exactly 2 images'); + } + + const gap = 16; + const labelHeight = 36; + const fontSize = 16; + const colors = ['#ef4444', '#22c55e']; + + const images = await Promise.all( + inputPaths.map(async (p) => { + const meta = await sharp(p).metadata(); + return { path: p, width: meta.width || 800, height: meta.height || 600 }; + }) + ); + + const maxHeight = Math.max(...images.map((i) => i.height)); + const totalWidth = images.reduce((sum, i) => sum + i.width, 0) + gap; + + const labelSvgs = images.map((img, i) => { + const escapedLabel = escapeXml(labels[i] || (i === 0 ? 'Before' : 'After')); + return Buffer.from(` + + + + ${escapedLabel} + + + `); + }); + + const imageBuffers = await Promise.all(inputPaths.map((p) => sharp(p).toBuffer())); + + let xOffset = 0; + const composites: sharp.OverlayOptions[] = []; + + for (let i = 0; i < images.length; i++) { + composites.push({ + input: labelSvgs[i], + top: 0, + left: xOffset, + }); + composites.push({ + input: imageBuffers[i], + top: labelHeight, + left: xOffset, + }); + xOffset += images[i].width + gap; + } + + await sharp({ + create: { + width: totalWidth, + height: maxHeight + labelHeight, + channels: 4, + background: { r: 245, g: 245, b: 245, alpha: 1 }, + }, + }) + .composite(composites) + .png() + .toFile(outputPath); + + console.log(`Stitched: ${outputPath}`); +} + +async function main() { + const inputPath = getArg('input'); + const label = getArg('label'); + const borderColor = getArg('border') || '#6b7280'; + const outputPath = getArg('output'); + const stitchPaths = getMultiArg('stitch'); + const labelsStr = getArg('labels'); + + if (stitchPaths.length === 2 && outputPath) { + const labels = labelsStr ? labelsStr.split(',').map((l) => l.trim()) : ['Before', 'After']; + await stitchImages(stitchPaths, labels, outputPath); + } else if (inputPath && outputPath) { + await addLabel(inputPath, label || 'Screenshot', borderColor, outputPath); + } else { + console.error('Usage:'); + console.error( + ' Label: npx tsx annotate.ts --input --label --border --output ' + ); + console.error( + ' Stitch: npx tsx annotate.ts --stitch --labels "Before,After" --output ' + ); + process.exit(1); + } +} + +main().catch((err) => { + console.error('Annotate failed:', err); + process.exit(1); +}); diff --git a/plugins/shared/skills/screengrabs/scripts/capture.ts b/plugins/shared/skills/screengrabs/scripts/capture.ts new file mode 100644 index 00000000..ebb86f75 --- /dev/null +++ b/plugins/shared/skills/screengrabs/scripts/capture.ts @@ -0,0 +1,243 @@ +/** + * Screengrab Capture Script + * + * Captures screenshots of UI pages with automatic sensitive data masking. + * Supports local dev servers, preview deployments, and reusable Playwright servers. + * + * Usage: + * npx tsx scripts/capture.ts \ + * --base-url http://localhost:3000 \ + * --routes "/dashboard,/settings" \ + * --output-dir ./screengrabs + * + * Playwright server mode: + * npx tsx scripts/capture.ts --serve --port 3001 + */ + +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import { type Browser, chromium } from 'playwright'; + +function getArg(name: string): string | undefined { + const idx = process.argv.indexOf(`--${name}`); + return idx !== -1 && idx + 1 < process.argv.length ? process.argv[idx + 1] : undefined; +} + +function hasFlag(name: string): boolean { + return process.argv.includes(`--${name}`); +} + +/** + * Load and execute a pre-script before masking/screenshot. + * The pre-script is a JS module that exports an async function receiving { page, url, route }. + * Use for interaction that must happen before capture: dismiss popups, click tabs, scroll, fill forms, etc. + */ +async function runPreScript( + scriptPath: string, + page: import('playwright').Page, + url: string, + route: string +): Promise { + const resolvedPath = path.resolve(scriptPath); + if (!fs.existsSync(resolvedPath)) { + throw new Error(`Pre-script not found: ${resolvedPath}`); + } + console.log(` Running pre-script: ${resolvedPath}`); + // Use dynamic import (works with both CJS module.exports and ESM export default) + const preScript = await import(resolvedPath); + const fn = typeof preScript.default === 'function' ? preScript.default : preScript; + if (typeof fn !== 'function') { + throw new Error(`Pre-script must export a default function (got ${typeof fn}): ${resolvedPath}`); + } + await fn({ page, url, route }); +} + +const MASKING_CSS = ` + input[type="password"] { + -webkit-text-security: disc !important; + color: transparent !important; + text-shadow: 0 0 8px rgba(0,0,0,0.5) !important; + } +`; + +const MASKING_JS = `(() => { + // Mask password inputs + document.querySelectorAll('input[type="password"]').forEach(el => { + el.value = '••••••••'; + }); + + // Walk text nodes and redact sensitive patterns + const sensitivePatterns = [ + /sk-[a-zA-Z0-9]{20,}/g, + /sk-ant-[a-zA-Z0-9-]{20,}/g, + /sk_live_[a-zA-Z0-9]{20,}/g, + /Bearer\\s+[a-zA-Z0-9._-]{20,}/g, + /gh[pos]_[a-zA-Z0-9]{36}/g, + /AKIA[A-Z0-9]{16}/g, + /eyJ[a-zA-Z0-9_-]{50,}\\.[a-zA-Z0-9_-]+\\.[a-zA-Z0-9_-]+/g, + /-----BEGIN[A-Z ]*PRIVATE KEY-----/g, + /postgresql:\\/\\/[^\\s]+:[^\\s]+@/g, + ]; + + const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, null); + let node; + while (node = walker.nextNode()) { + let text = node.textContent || ''; + let changed = false; + for (const pattern of sensitivePatterns) { + pattern.lastIndex = 0; + if (pattern.test(text)) { + pattern.lastIndex = 0; + text = text.replace(pattern, '[REDACTED]'); + changed = true; + } + } + if (changed) { + node.textContent = text; + } + } +})()`; + +async function startServer(port: number) { + const server = await chromium.launchServer({ + port, + headless: true, + }); + console.log(`Playwright server started at: ${server.wsEndpoint()}`); + console.log('Press Ctrl+C to stop.'); + process.on('SIGINT', async () => { + await server.close(); + process.exit(0); + }); +} + +async function capture() { + const baseUrl = getArg('base-url'); + const routesStr = getArg('routes'); + const outputDir = getArg('output-dir') || './screengrabs'; + const viewport = getArg('viewport') || '1280x800'; + const connectUrl = getArg('connect'); + const extraMaskSelectors = getArg('mask-selectors'); + const waitMs = Number.parseInt(getArg('wait') || '2000', 10); + const fullPage = hasFlag('full-page'); + const authCookie = getArg('auth-cookie'); + const preScriptPath = getArg('pre-script'); + + if (!baseUrl || !routesStr) { + console.error( + 'Usage: npx tsx capture.ts --base-url --routes [options]\n' + ); + console.error('Options:'); + console.error(' --output-dir Output directory (default: ./screengrabs)'); + console.error(' --viewport Viewport size (default: 1280x800)'); + console.error(' --connect Connect to existing Playwright server'); + console.error(' --mask-selectors Additional CSS selectors to blur (comma-separated)'); + console.error(' --wait Wait after page load (default: 2000)'); + console.error(' --full-page Capture full page screenshot'); + console.error(' --auth-cookie Set session cookie for auth'); + console.error(' --pre-script JS/TS file to run on page before capture (for interaction)'); + console.error('\nServer mode:'); + console.error(' --serve Start a reusable Playwright server'); + console.error(' --port Server port (default: 3001)'); + process.exit(1); + } + + const routes = routesStr.split(',').map((r) => r.trim()); + const [vw, vh] = viewport.split('x').map(Number); + + fs.mkdirSync(outputDir, { recursive: true }); + + let fullMaskingCss = MASKING_CSS; + if (extraMaskSelectors) { + const selectors = extraMaskSelectors.split(',').map((s) => s.trim()); + fullMaskingCss += selectors.map((s) => `\n ${s} { filter: blur(5px) !important; }`).join(''); + } + + let browser: Browser; + let isConnected = false; + + if (connectUrl) { + console.log(`Connecting to Playwright server at ${connectUrl}`); + browser = await chromium.connect(connectUrl); + isConnected = true; + } else { + console.log('Launching browser...'); + browser = await chromium.launch({ headless: true }); + } + + try { + const context = await browser.newContext({ + viewport: { width: vw, height: vh }, + }); + + if (authCookie) { + const url = new URL(baseUrl); + await context.addCookies([ + { + name: 'session', + value: authCookie, + domain: url.hostname, + path: '/', + }, + ]); + } + + const page = await context.newPage(); + + for (const route of routes) { + const url = `${baseUrl.replace(/\/$/, '')}${route}`; + const safeName = route.replace(/^\//, '').replace(/\//g, '-') || 'index'; + + console.log(`\nCapturing: ${url}`); + + try { + await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 }); + } catch { + console.log(' networkidle timed out, proceeding with domcontentloaded...'); + await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 }); + } + + await page.waitForTimeout(waitMs); + + // Run pre-script for interaction (dismiss popups, click tabs, scroll, etc.) + if (preScriptPath) { + await runPreScript(preScriptPath, page, url, route); + await page.waitForTimeout(500); + } + + await page.addStyleTag({ content: fullMaskingCss }); + await page.evaluate(MASKING_JS); + await page.waitForTimeout(500); + + const screenshotPath = path.join(outputDir, `${safeName}.png`); + await page.screenshot({ path: screenshotPath, fullPage }); + console.log(` Screenshot: ${screenshotPath}`); + + const domText = await page.evaluate(() => document.body.innerText); + const textPath = path.join(outputDir, `${safeName}.dom-text.txt`); + fs.writeFileSync(textPath, domText, 'utf-8'); + console.log(` DOM text: ${textPath}`); + } + + await context.close(); + console.log(`\nDone. ${routes.length} screenshot(s) saved to ${outputDir}`); + } finally { + if (!isConnected) { + await browser.close(); + } + } +} + +async function main() { + if (hasFlag('serve')) { + const port = Number.parseInt(getArg('port') || '3001', 10); + await startServer(port); + } else { + await capture(); + } +} + +main().catch((err) => { + console.error('Capture failed:', err); + process.exit(1); +}); diff --git a/plugins/shared/skills/screengrabs/scripts/validate-sensitive.ts b/plugins/shared/skills/screengrabs/scripts/validate-sensitive.ts new file mode 100644 index 00000000..60d155d4 --- /dev/null +++ b/plugins/shared/skills/screengrabs/scripts/validate-sensitive.ts @@ -0,0 +1,148 @@ +/** + * Pre-upload Sensitive Data Validation + * + * Scans DOM text files (produced by capture.ts) for patterns that indicate + * sensitive data may have leaked through masking. Must pass before uploading + * screenshots to GitHub. + * + * Usage: + * npx tsx validate-sensitive.ts --dir ./screengrabs + */ + +import * as fs from 'node:fs'; +import * as path from 'node:path'; + +function getArg(name: string): string | undefined { + const idx = process.argv.indexOf(`--${name}`); + return idx !== -1 && idx + 1 < process.argv.length ? process.argv[idx + 1] : undefined; +} + +interface SensitivePattern { + name: string; + pattern: RegExp; + severity: 'critical' | 'warning'; +} + +const SENSITIVE_PATTERNS: SensitivePattern[] = [ + // Critical — real secrets + { name: 'OpenAI API key', pattern: /sk-[a-zA-Z0-9]{20,}/g, severity: 'critical' }, + { name: 'Anthropic API key', pattern: /sk-ant-[a-zA-Z0-9-]{20,}/g, severity: 'critical' }, + { name: 'Stripe secret key', pattern: /sk_live_[a-zA-Z0-9]{20,}/g, severity: 'critical' }, + { name: 'AWS access key', pattern: /AKIA[A-Z0-9]{16}/g, severity: 'critical' }, + { name: 'GitHub PAT (classic)', pattern: /ghp_[a-zA-Z0-9]{36}/g, severity: 'critical' }, + { name: 'GitHub OAuth token', pattern: /gho_[a-zA-Z0-9]{36}/g, severity: 'critical' }, + { name: 'GitHub App token', pattern: /ghs_[a-zA-Z0-9]{36}/g, severity: 'critical' }, + { name: 'PEM private key', pattern: /-----BEGIN[A-Z ]*PRIVATE KEY-----/g, severity: 'critical' }, + { + name: 'DB connection string with password', + pattern: /postgresql:\/\/[^\s:]+:[^\s@]+@/g, + severity: 'critical', + }, + { + name: 'Bearer token (long)', + pattern: /Bearer\s+[a-zA-Z0-9._-]{40,}/g, + severity: 'critical', + }, + + // Warning — might be sensitive + { + name: 'JWT token', + pattern: /eyJ[a-zA-Z0-9_-]{30,}\.[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+/g, + severity: 'warning', + }, + { + name: 'Bearer token (short)', + pattern: /Bearer\s+[a-zA-Z0-9._-]{20,39}/g, + severity: 'warning', + }, + { + name: 'Generic secret in assignment', + pattern: /(?:secret|password|token|api_key|apikey)\s*[:=]\s*["'][^"']{8,}["']/gi, + severity: 'warning', + }, +]; + +function scanFile(filePath: string): { critical: string[]; warnings: string[] } { + const content = fs.readFileSync(filePath, 'utf-8'); + const critical: string[] = []; + const warnings: string[] = []; + + for (const { name, pattern, severity } of SENSITIVE_PATTERNS) { + pattern.lastIndex = 0; + const matches = content.match(pattern); + if (matches) { + const msg = `${name}: ${matches.length} occurrence(s)`; + if (severity === 'critical') { + critical.push(msg); + } else { + warnings.push(msg); + } + } + } + + return { critical, warnings }; +} + +function main() { + const dir = getArg('dir') || './screengrabs'; + + if (!fs.existsSync(dir)) { + console.error(`Directory not found: ${dir}`); + process.exit(1); + } + + const textFiles = fs.readdirSync(dir).filter((f) => f.endsWith('.dom-text.txt')); + + if (textFiles.length === 0) { + console.log('No .dom-text.txt files found. Run capture.ts first.'); + process.exit(0); + } + + let hasCritical = false; + let hasWarnings = false; + + for (const file of textFiles) { + const filePath = path.join(dir, file); + const { critical, warnings } = scanFile(filePath); + + if (critical.length > 0) { + console.error(`\n\u274C CRITICAL in ${file}:`); + for (const msg of critical) { + console.error(` ${msg}`); + } + hasCritical = true; + } + + if (warnings.length > 0) { + console.warn(`\n\u26A0\uFE0F WARNING in ${file}:`); + for (const msg of warnings) { + console.warn(` ${msg}`); + } + hasWarnings = true; + } + + if (critical.length === 0 && warnings.length === 0) { + console.log(`\u2713 ${file}: clean`); + } + } + + console.log(''); + + if (hasCritical) { + console.error('\u274C Sensitive data detected. Do NOT upload these screenshots to GitHub.'); + console.error( + 'Re-capture with additional --mask-selectors or manually redact before uploading.' + ); + process.exit(1); + } + + if (hasWarnings) { + console.warn('\u26A0\uFE0F Warnings found. Review the flagged content before uploading.'); + console.warn('These may be false positives. Use judgment before proceeding.'); + process.exit(0); + } + + console.log('\u2705 All files clean. Safe to upload.'); +} + +main(); diff --git a/plugins/shared/skills/write-skill/scripts/validate_skill_dir.ts b/plugins/shared/skills/write-skill/scripts/validate_skill_dir.ts index 0a33d644..0272cb2a 100755 --- a/plugins/shared/skills/write-skill/scripts/validate_skill_dir.ts +++ b/plugins/shared/skills/write-skill/scripts/validate_skill_dir.ts @@ -39,25 +39,51 @@ function parseFrontmatter(text: string): { data: Frontmatter | null; warnings: s const data: Frontmatter = {}; const warnings: string[] = []; - for (const originalLine of raw.split(/\r?\n/)) { - const line = originalLine.trim(); - if (!line || line.startsWith('#')) continue; + const lines = raw.split(/\r?\n/); + let i = 0; + while (i < lines.length) { + const line = lines[i].trim(); + if (!line || line.startsWith('#')) { + i++; + continue; + } const idx = line.indexOf(':'); if (idx === -1) { warnings.push(`Unparsed frontmatter line (expected key: value): ${line}`); + i++; continue; } const key = line.slice(0, idx).trim(); let val = line.slice(idx + 1).trim(); - // Strip surrounding quotes if present - if ( - (val.startsWith('"') && val.endsWith('"') && val.length >= 2) || - (val.startsWith("'") && val.endsWith("'") && val.length >= 2) - ) { - val = val.slice(1, -1); + // Handle YAML block scalars (| or > with optional chomping indicators like |-, >+, etc.) + if (/^[|>][+-]?\s*$/.test(val)) { + // Collect continuation lines: any line that is indented more than the key line, + // or is blank (blank lines are preserved in block scalars) + const blockLines: string[] = []; + i++; + while (i < lines.length) { + const nextRaw = lines[i]; + // A non-empty line that isn't indented (starts at column 0 with a non-space char) + // signals the end of the block scalar + if (nextRaw.length > 0 && nextRaw[0] !== ' ' && nextRaw[0] !== '\t') { + break; + } + blockLines.push(nextRaw); + i++; + } + val = blockLines.map((l) => l.trimStart()).join('\n').trim(); + } else { + // Strip surrounding quotes if present + if ( + (val.startsWith('"') && val.endsWith('"') && val.length >= 2) || + (val.startsWith("'") && val.endsWith("'") && val.length >= 2) + ) { + val = val.slice(1, -1); + } + i++; } data[key] = val; @@ -116,6 +142,12 @@ function findSuspiciousReferences(skillText: string): string[] { for (const p of Array.from(candidates).sort()) { if (p.startsWith('http://') || p.startsWith('https://')) continue; + // Skip prose-like "word/word/word" patterns that aren't real file paths. + // Real paths typically have: a file extension, start with . or ~, contain common + // directory names, or are multi-segment with path-like structure. + // Prose alternatives like "pass/fail/blocked" or "P0/P1/P2" lack these signals. + if (isLikelyProse(p)) continue; + const segs = p .split('/') .map((s) => s.trim()) @@ -132,6 +164,46 @@ function findSuspiciousReferences(skillText: string): string[] { return warnings; } +/** + * Heuristic: distinguish real file paths from prose alternatives like + * "pass/fail/blocked", "P0/P1/P2", "Quick/Custom/Guided", "YAML/JSON/TOML". + * + * A candidate is likely prose if ALL of these are true: + * - No segment contains a file extension (e.g., .ts, .md, .json) + * - Doesn't start with . or ~ (relative/home paths) + * - No segment matches common directory names (src, lib, references, scripts, etc.) + * - Every segment is a single short word (no dots, no hyphens longer than typical filenames) + * + * Returns true if the candidate looks like prose, false if it looks like a real path. + */ +function isLikelyProse(candidate: string): boolean { + const segs = candidate.split('/').filter(Boolean); + if (segs.length === 0) return true; + + // Starts with . or ~ → almost certainly a real path + if (candidate.startsWith('.') || candidate.startsWith('~')) return false; + + // Any segment has a file extension → real path + const extRe = /\.\w{1,10}$/; + if (segs.some((s) => extRe.test(s))) return false; + + // Any segment matches common directory names → real path + const dirNames = new Set([ + 'src', 'lib', 'app', 'bin', 'dist', 'build', 'out', 'tmp', 'temp', + 'scripts', 'references', 'templates', 'assets', 'rules', 'config', + 'tests', 'test', '__tests__', 'spec', 'docs', 'node_modules', 'vendor', + 'packages', 'plugins', 'skills', 'agents', 'claude', 'reports', + ]); + if (segs.some((s) => dirNames.has(s.toLowerCase()))) return false; + + // If every segment is a short word (<=20 chars) with no dots and no hyphens, + // it's likely a prose enumeration like "pass/fail/blocked" + const isShortWord = (s: string) => s.length <= 20 && !s.includes('.') && !/^\d+$/.test(s); + if (segs.every(isShortWord)) return true; + + return false; +} + function main(): number { const args = process.argv.slice(2); if (args.length !== 1) {