Skip to content

Commit 19408ec

Browse files
NNTinclaudeflorintimbuc
authored
feat: playwright e2e tests for vscode extension (#161)
* feat: add Playwright e2e infrastructure for VS Code extension testing Adds a complete end-to-end test setup using Playwright's Electron API to validate the Pixel Agents extension inside a real VS Code instance. - e2e/playwright.config.ts — config with video, trace, 1-worker isolation - e2e/global-setup.ts — downloads VS Code via @vscode/test-electron - e2e/helpers/launch.ts — launches VS Code with isolated HOME + mock PATH - e2e/helpers/webview.ts — waits for the webview frame, clicks + Agent - e2e/fixtures/mock-claude — deterministic claude shim: creates JSONL file, logs invocation; no real Claude CLI required - e2e/tests/agent-spawn.spec.ts — first spec: click + Agent → assert mock called, JSONL session file created, terminal tab visible - e2e/tsconfig.json — strict TypeScript config for test code NPM scripts added: e2e (xvfb-run headless), e2e:headed, e2e:debug CONTRIBUTING.md updated with running instructions, artifact paths, mock docs test-results/ and playwright-report/ added to .gitignore Test passes locally in 25 s; video recording confirmed at test-results/e2e/videos/<test-name>/*.webm Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * feat: revert this commit later, for now we will include video report in artifact * test: enlarge panel for e2e webview * feat: enable tests in pipeline * feat: add debugging help for e2e test * ci: install Playwright with deps and chromium * revert: add debugging help for e2e test This reverts commit 52096c0. * fix: cross-platform e2e fixes for Playwright tests on macOS * fix: Windows e2e support, add mock-claude.cmd * feat: enable matrix CI run * fix: resolve e2e failures across CI platforms * fix: prevent Windows CI installs from failing * fix: e2e:headed broke * revert: fix: e2e:headed broke This reverts commit bcffe69. * chore: dropping support for headed * docs: dropped support for headed * fix: using cached vscode-test * fix: wrong path for cache * feat: restore and cache .vscode-test. First run will always say cache not found * fix: poisoned cache, retrying with new * feat: caching playwright as well * fix: flaky test step in macos ci * fix: e2e dynamic VS Code cache key, keychain cleanup * fix: e2e avoid stale row selection * fix: e2e macos display palette * ci: skip PR title check for Dependabot, restrict badge updates to main repo * ci: skip PR title check for Dependabot, restrict badge updates to main repo --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com> Co-authored-by: Florin Timbuc <florin@sowild.design>
1 parent 07898b7 commit 19408ec

File tree

14 files changed

+1206
-15
lines changed

14 files changed

+1206
-15
lines changed

.github/workflows/ci.yml

Lines changed: 115 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,13 @@ jobs:
7575
run: npm run lint
7676
continue-on-error: true
7777

78+
- name: Webview Tests
79+
id: webview_test
80+
if: always() && steps.install_webview.outcome == 'success'
81+
working-directory: webview-ui
82+
run: npm test
83+
continue-on-error: true
84+
7885
- name: Format Check
7986
id: format_check
8087
if: always() && steps.install_root.outcome == 'success'
@@ -118,6 +125,7 @@ jobs:
118125
TYPE_CHECK: ${{ steps.type_check.outcome }}
119126
ROOT_LINT: ${{ steps.root_lint.outcome }}
120127
WEBVIEW_LINT: ${{ steps.webview_lint.outcome }}
128+
WEBVIEW_TEST: ${{ steps.webview_test.outcome }}
121129
FORMAT_CHECK: ${{ steps.format_check.outcome }}
122130
BUILD: ${{ steps.build.outcome }}
123131
AUDIT_ROOT: ${{ steps.audit_root.outcome }}
@@ -138,6 +146,7 @@ jobs:
138146
echo "| **Type check** | $(status "$TYPE_CHECK") |"
139147
echo "| **Root lint** | $(status "$ROOT_LINT") |"
140148
echo "| **Webview lint** | $(status "$WEBVIEW_LINT") |"
149+
echo "| **Webview tests** | $(status "$WEBVIEW_TEST") |"
141150
echo "| **Format check** | $(status "$FORMAT_CHECK") |"
142151
echo "| **Build** | $(status "$BUILD") |"
143152
echo "| Audit root _(advisory)_ | $(status "$AUDIT_ROOT") |"
@@ -156,17 +165,121 @@ jobs:
156165
TYPE_CHECK: ${{ steps.type_check.outcome }}
157166
ROOT_LINT: ${{ steps.root_lint.outcome }}
158167
WEBVIEW_LINT: ${{ steps.webview_lint.outcome }}
168+
WEBVIEW_TEST: ${{ steps.webview_test.outcome }}
159169
FORMAT_CHECK: ${{ steps.format_check.outcome }}
160170
BUILD: ${{ steps.build.outcome }}
161171
run: |
162172
failed=0
163173
for step in CHECKOUT SETUP_NODE INSTALL_ROOT INSTALL_WEBVIEW \
164-
TYPE_CHECK ROOT_LINT WEBVIEW_LINT FORMAT_CHECK \
165-
BUILD; do
174+
TYPE_CHECK ROOT_LINT WEBVIEW_LINT \
175+
WEBVIEW_TEST FORMAT_CHECK BUILD; do
166176
eval "val=\$$step"
167177
if [ "$val" != "success" ]; then
168178
echo "::error::$step failed"
169179
failed=1
170180
fi
171181
done
172182
exit "$failed"
183+
184+
e2e:
185+
needs: ci
186+
strategy:
187+
fail-fast: false
188+
matrix:
189+
os: [ubuntu-latest, macos-latest, windows-latest]
190+
runs-on: ${{ matrix.os }}
191+
timeout-minutes: 15
192+
env:
193+
PLAYWRIGHT_BROWSERS_PATH: .playwright-browsers
194+
195+
steps:
196+
- name: Checkout
197+
uses: actions/checkout@v6
198+
with:
199+
fetch-depth: 0
200+
201+
- name: Setup Node
202+
uses: actions/setup-node@v6
203+
with:
204+
node-version-file: .nvmrc
205+
cache: npm
206+
cache-dependency-path: |
207+
package-lock.json
208+
webview-ui/package-lock.json
209+
210+
- name: Restore VS Code Cache
211+
id: cache_vscode_restore
212+
uses: actions/cache/restore@v4
213+
with:
214+
path: .vscode-test
215+
key: vscode-test-${{ runner.os }}-${{ hashFiles('e2e/global-setup.ts') }}-v2
216+
restore-keys: |
217+
vscode-test-${{ runner.os }}-
218+
219+
- name: Restore Playwright Cache
220+
id: cache_playwright_restore
221+
uses: actions/cache/restore@v4
222+
with:
223+
path: .playwright-browsers
224+
key: playwright-browsers-${{ runner.os }}-${{ hashFiles('package-lock.json') }}-v1
225+
restore-keys: |
226+
playwright-browsers-${{ runner.os }}-
227+
228+
- name: Install Root Dependencies
229+
run: npm ci
230+
231+
- name: Install Webview Dependencies
232+
working-directory: webview-ui
233+
run: npm ci
234+
235+
- name: Build
236+
run: node esbuild.js
237+
238+
- name: Build Webview
239+
working-directory: webview-ui
240+
run: npm run build
241+
242+
- name: Install Playwright Dependencies
243+
id: install_playwright_deps
244+
run: npx playwright install --with-deps chromium
245+
continue-on-error: true
246+
247+
- name: E2E Tests
248+
id: e2e_test
249+
if: steps.install_playwright_deps.outcome == 'success'
250+
run: npm run e2e
251+
continue-on-error: true
252+
253+
- name: Save VS Code Cache
254+
if: always() && steps.cache_vscode_restore.outputs.cache-hit != 'true' && steps.e2e_test.outcome == 'success' && hashFiles('.vscode-test/vscode-executable.txt') != ''
255+
uses: actions/cache/save@v4
256+
with:
257+
path: .vscode-test
258+
key: ${{ steps.cache_vscode_restore.outputs.cache-primary-key }}
259+
260+
- name: Save Playwright Cache
261+
if: always() && steps.cache_playwright_restore.outputs.cache-hit != 'true' && steps.install_playwright_deps.outcome == 'success' && hashFiles('.playwright-browsers/**') != ''
262+
uses: actions/cache/save@v4
263+
with:
264+
path: .playwright-browsers
265+
key: ${{ steps.cache_playwright_restore.outputs.cache-primary-key }}
266+
267+
- name: Write Step Summary
268+
if: always()
269+
shell: bash
270+
env:
271+
OS: ${{ matrix.os }}
272+
INSTALL_PLAYWRIGHT_DEPS: ${{ steps.install_playwright_deps.outcome }}
273+
E2E_TEST: ${{ steps.e2e_test.outcome }}
274+
run: |
275+
status() {
276+
if [ "$1" = "success" ]; then echo "✅ PASS"; else echo "❌ FAIL"; fi
277+
}
278+
{
279+
echo "## E2E Results ($OS)"
280+
echo
281+
echo "| Check | Result |"
282+
echo "| --- | --- |"
283+
echo "| Install Playwright deps | $(status "$INSTALL_PLAYWRIGHT_DEPS") |"
284+
echo "| E2E tests | $(status "$E2E_TEST") |"
285+
} >> "$GITHUB_STEP_SUMMARY"

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ Thumbs.db
1717
.vscode-test/
1818
/.idea
1919

20+
# E2E test artifacts
21+
test-results/
22+
playwright-report/
23+
2024
# Build artifacts
2125
*.vsix
2226
*.map

CONTRIBUTING.md

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,49 @@ These conventions are enforced by custom ESLint rules (`eslint-rules/pixel-agent
9494

9595
These rules are set to `warn` — they won't block your PR but will flag violations for cleanup.
9696

97+
## End-to-End Tests
98+
99+
The `e2e/` directory contains Playwright tests that launch a real VS Code instance with the extension loaded in development mode.
100+
101+
### Running e2e tests locally
102+
103+
```bash
104+
# Build the extension first (tests load the compiled output)
105+
npm run build
106+
107+
# Runs the e2e test
108+
npm run e2e
109+
110+
# Step-by-step debug mode
111+
npm run e2e:debug
112+
```
113+
114+
On the first run, `@vscode/test-electron` will download a stable VS Code release into `.vscode-test/` (≈200 MB). Subsequent runs reuse the cache.
115+
116+
### Artifacts
117+
118+
All test artifacts are written to `test-results/e2e/`:
119+
120+
| Path | Contents |
121+
|---|---|
122+
| `test-results/e2e/videos/<test-name>/` | `.webm` screen recording for every test |
123+
| `playwright-report/e2e/` | Playwright HTML report (`npx playwright show-report playwright-report/e2e`) |
124+
| `test-results/e2e/*.png` | Final screenshots saved on failure |
125+
126+
On failure, the test output prints the path to the video for that run.
127+
128+
### Mock claude
129+
130+
Tests never invoke the real `claude` CLI. Instead, a bash script at `e2e/fixtures/mock-claude` is copied into an isolated `bin/` directory and prepended to `PATH` before VS Code starts.
131+
132+
The mock:
133+
1. Parses `--session-id <uuid>` from its arguments.
134+
2. Appends a line to `$HOME/.claude-mock/invocations.log` so tests can assert it was called.
135+
3. Creates `$HOME/.claude/projects/<project-hash>/<session-id>.jsonl` with a minimal init line so the extension's file-watcher can detect the session.
136+
4. Sleeps for 30 s (keeps the terminal alive) then exits.
137+
138+
Each test runs with an isolated `HOME` and `--user-data-dir`, so no test state leaks between runs or into your real VS Code profile.
139+
97140
## Submitting a Pull Request
98141

99142
1. Fork the repo and create a feature branch from `main`

e2e/fixtures/mock-claude

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/usr/bin/env bash
2+
# Mock 'claude' executable for Pixel Agents e2e tests.
3+
#
4+
# Behaviour:
5+
# 1. Parses --session-id <id> from args.
6+
# 2. Appends an invocation record to $HOME/.claude-mock/invocations.log.
7+
# 3. Creates the expected JSONL file under $HOME/.claude/projects/<hash>/<id>.jsonl
8+
# using the same path-hash algorithm as agentManager.ts
9+
# (replace every non-[a-zA-Z0-9-] char with '-').
10+
# 4. Writes a minimal valid JSONL line so the extension file-watcher can proceed.
11+
# 5. Stays alive for up to 30 s (tests can kill it once assertions pass).
12+
13+
set -euo pipefail
14+
15+
SESSION_ID=""
16+
PREV=""
17+
for arg in "$@"; do
18+
if [ "$PREV" = "--session-id" ]; then
19+
SESSION_ID="$arg"
20+
fi
21+
PREV="$arg"
22+
done
23+
24+
LOG_DIR="${HOME}/.claude-mock"
25+
mkdir -p "$LOG_DIR"
26+
echo "$(date -Iseconds) session-id=${SESSION_ID} cwd=$(pwd) args=$*" >> "${LOG_DIR}/invocations.log"
27+
28+
if [ -n "$SESSION_ID" ]; then
29+
CWD="$(pwd)"
30+
# Replicate agentManager.ts: workspacePath.replace(/[^a-zA-Z0-9-]/g, '-')
31+
DIR_NAME="$(printf '%s' "$CWD" | tr -c 'a-zA-Z0-9-' '-')"
32+
PROJECT_DIR="${HOME}/.claude/projects/${DIR_NAME}"
33+
mkdir -p "$PROJECT_DIR"
34+
JSONL_FILE="${PROJECT_DIR}/${SESSION_ID}.jsonl"
35+
36+
# Write a minimal system init line so the extension watcher sees the file.
37+
printf '{"type":"system","subtype":"init","content":"mock-claude-ready"}\n' >> "$JSONL_FILE"
38+
fi
39+
40+
# Stay alive so the VS Code terminal doesn't immediately close.
41+
sleep 30 &
42+
SLEEP_PID=$!
43+
44+
# Clean exit on SIGTERM/SIGINT.
45+
trap 'kill $SLEEP_PID 2>/dev/null; exit 0' SIGTERM SIGINT
46+
47+
wait $SLEEP_PID || true

e2e/fixtures/mock-claude.cmd

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
@echo off
2+
REM Mock 'claude' executable for Pixel Agents e2e tests (Windows).
3+
REM
4+
REM Behaviour:
5+
REM 1. Parses --session-id <id> from args.
6+
REM 2. Appends an invocation record to %HOME%\.claude-mock\invocations.log.
7+
REM 3. Creates the expected JSONL file under %HOME%\.claude\projects\<hash>\<id>.jsonl
8+
REM 4. Stays alive for up to 30 s (tests can kill it once assertions pass).
9+
10+
setlocal enabledelayedexpansion
11+
12+
set "SESSION_ID="
13+
set "PREV="
14+
15+
:parse_args
16+
if "%~1"=="" goto done_args
17+
if "!PREV!"=="--session-id" set "SESSION_ID=%~1"
18+
set "PREV=%~1"
19+
shift
20+
goto parse_args
21+
:done_args
22+
23+
REM Use HOME if set (our e2e sets it), fall back to USERPROFILE
24+
if defined HOME (
25+
set "MOCK_HOME=%HOME%"
26+
) else (
27+
set "MOCK_HOME=%USERPROFILE%"
28+
)
29+
30+
set "LOG_DIR=%MOCK_HOME%\.claude-mock"
31+
if not exist "%LOG_DIR%" mkdir "%LOG_DIR%"
32+
echo %DATE% %TIME% session-id=%SESSION_ID% cwd=%CD% args=%* >> "%LOG_DIR%\invocations.log"
33+
34+
if "%SESSION_ID%"=="" goto stay_alive
35+
36+
REM Replicate agentManager.ts: workspacePath.replace(/[^a-zA-Z0-9-]/g, '-')
37+
REM PowerShell one-liner to do the regex replace
38+
for /f "delims=" %%D in ('powershell -NoProfile -Command "[regex]::Replace('%CD%', '[^a-zA-Z0-9-]', '-')"') do set "DIR_NAME=%%D"
39+
40+
set "PROJECT_DIR=%MOCK_HOME%\.claude\projects\%DIR_NAME%"
41+
if not exist "%PROJECT_DIR%" mkdir "%PROJECT_DIR%"
42+
43+
set "JSONL_FILE=%PROJECT_DIR%\%SESSION_ID%.jsonl"
44+
echo {"type":"system","subtype":"init","content":"mock-claude-ready"} >> "%JSONL_FILE%"
45+
46+
:stay_alive
47+
REM Stay alive so the VS Code terminal doesn't immediately close.
48+
REM Use ping to localhost as a cross-platform sleep (timeout command requires console).
49+
ping -n 31 127.0.0.1 > nul 2>&1

e2e/global-setup.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import { downloadAndUnzipVSCode } from '@vscode/test-electron';
2+
import fs from 'fs';
3+
import path from 'path';
4+
5+
export const VSCODE_CACHE_DIR = path.join(__dirname, '../.vscode-test');
6+
export const VSCODE_PATH_FILE = path.join(VSCODE_CACHE_DIR, 'vscode-executable.txt');
7+
8+
export default async function globalSetup(): Promise<void> {
9+
console.log('[e2e] Ensuring VS Code is downloaded...');
10+
const vscodePath = await downloadAndUnzipVSCode({
11+
version: 'stable',
12+
cachePath: VSCODE_CACHE_DIR,
13+
});
14+
console.log(`[e2e] VS Code executable: ${vscodePath}`);
15+
16+
fs.mkdirSync(VSCODE_CACHE_DIR, { recursive: true });
17+
fs.writeFileSync(VSCODE_PATH_FILE, vscodePath, 'utf8');
18+
}

0 commit comments

Comments
 (0)