From 09c344c51613f06c0c7b435e3aa6ebce6c76d77b Mon Sep 17 00:00:00 2001 From: NNTin Date: Wed, 18 Mar 2026 19:30:04 +0100 Subject: [PATCH 01/27] feat: add Playwright e2e infrastructure for VS Code extension testing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a complete end-to-end test setup using Playwright's Electron API to validate the Pixel Agents extension inside a real VS Code instance. - e2e/playwright.config.ts — config with video, trace, 1-worker isolation - e2e/global-setup.ts — downloads VS Code via @vscode/test-electron - e2e/helpers/launch.ts — launches VS Code with isolated HOME + mock PATH - e2e/helpers/webview.ts — waits for the webview frame, clicks + Agent - e2e/fixtures/mock-claude — deterministic claude shim: creates JSONL file, logs invocation; no real Claude CLI required - e2e/tests/agent-spawn.spec.ts — first spec: click + Agent → assert mock called, JSONL session file created, terminal tab visible - e2e/tsconfig.json — strict TypeScript config for test code NPM scripts added: e2e (xvfb-run headless), e2e:headed, e2e:debug CONTRIBUTING.md updated with running instructions, artifact paths, mock docs test-results/ and playwright-report/ added to .gitignore Test passes locally in 25 s; video recording confirmed at test-results/e2e/videos//*.webm Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 4 + CONTRIBUTING.md | 46 +++++ e2e/fixtures/mock-claude | 47 +++++ e2e/global-setup.ts | 18 ++ e2e/helpers/launch.ts | 122 +++++++++++ e2e/helpers/webview.ts | 69 +++++++ e2e/playwright.config.ts | 31 +++ e2e/tests/agent-spawn.spec.ts | 114 +++++++++++ e2e/tsconfig.json | 16 ++ package-lock.json | 369 ++++++++++++++++++++++++++++++++++ package.json | 9 +- 11 files changed, 844 insertions(+), 1 deletion(-) create mode 100755 e2e/fixtures/mock-claude create mode 100644 e2e/global-setup.ts create mode 100644 e2e/helpers/launch.ts create mode 100644 e2e/helpers/webview.ts create mode 100644 e2e/playwright.config.ts create mode 100644 e2e/tests/agent-spawn.spec.ts create mode 100644 e2e/tsconfig.json diff --git a/.gitignore b/.gitignore index 30ab455b..66b1f150 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,10 @@ Thumbs.db .vscode-test/ /.idea +# E2E test artifacts +test-results/ +playwright-report/ + # Build artifacts *.vsix *.map diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e9c1487a..24e9e9bf 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -94,6 +94,52 @@ These conventions are enforced by custom ESLint rules (`eslint-rules/pixel-agent These rules are set to `warn` — they won't block your PR but will flag violations for cleanup. +## End-to-End Tests + +The `e2e/` directory contains Playwright tests that launch a real VS Code instance with the extension loaded in development mode. + +### Running e2e tests locally + +```bash +# Build the extension first (tests load the compiled output) +npm run build + +# Headless (default — uses xvfb-run on Linux) +npm run e2e + +# Headed (shows the VS Code window) +npm run e2e:headed + +# Step-by-step debug mode +npm run e2e:debug +``` + +On the first run, `@vscode/test-electron` will download a stable VS Code release into `.vscode-test/` (≈200 MB). Subsequent runs reuse the cache. + +### Artifacts + +All test artifacts are written to `test-results/e2e/`: + +| Path | Contents | +|---|---| +| `test-results/e2e/videos//` | `.webm` screen recording for every test | +| `test-results/e2e/html/` | Playwright HTML report (`npx playwright show-report test-results/e2e/html`) | +| `test-results/e2e/*.png` | Final screenshots saved on failure | + +On failure, the test output prints the path to the video for that run. + +### Mock claude + +Tests never invoke the real `claude` CLI. Instead, a bash script at `e2e/fixtures/mock-claude` is copied into an isolated `bin/` directory and prepended to `PATH` before VS Code starts. + +The mock: +1. Parses `--session-id ` from its arguments. +2. Appends a line to `$HOME/.claude-mock/invocations.log` so tests can assert it was called. +3. Creates `$HOME/.claude/projects//.jsonl` with a minimal init line so the extension's file-watcher can detect the session. +4. Sleeps for 30 s (keeps the terminal alive) then exits. + +Each test runs with an isolated `HOME` and `--user-data-dir`, so no test state leaks between runs or into your real VS Code profile. + ## Submitting a Pull Request 1. Fork the repo and create a feature branch from `main` diff --git a/e2e/fixtures/mock-claude b/e2e/fixtures/mock-claude new file mode 100755 index 00000000..a1db3a99 --- /dev/null +++ b/e2e/fixtures/mock-claude @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# Mock 'claude' executable for Pixel Agents e2e tests. +# +# Behaviour: +# 1. Parses --session-id from args. +# 2. Appends an invocation record to $HOME/.claude-mock/invocations.log. +# 3. Creates the expected JSONL file under $HOME/.claude/projects//.jsonl +# using the same path-hash algorithm as agentManager.ts +# (replace every non-[a-zA-Z0-9-] char with '-'). +# 4. Writes a minimal valid JSONL line so the extension file-watcher can proceed. +# 5. Stays alive for up to 30 s (tests can kill it once assertions pass). + +set -euo pipefail + +SESSION_ID="" +PREV="" +for arg in "$@"; do + if [ "$PREV" = "--session-id" ]; then + SESSION_ID="$arg" + fi + PREV="$arg" +done + +LOG_DIR="${HOME}/.claude-mock" +mkdir -p "$LOG_DIR" +echo "$(date -Iseconds) session-id=${SESSION_ID} cwd=$(pwd) args=$*" >> "${LOG_DIR}/invocations.log" + +if [ -n "$SESSION_ID" ]; then + CWD="$(pwd)" + # Replicate agentManager.ts: workspacePath.replace(/[^a-zA-Z0-9-]/g, '-') + DIR_NAME="$(printf '%s' "$CWD" | tr -cs 'a-zA-Z0-9-' '-')" + PROJECT_DIR="${HOME}/.claude/projects/${DIR_NAME}" + mkdir -p "$PROJECT_DIR" + JSONL_FILE="${PROJECT_DIR}/${SESSION_ID}.jsonl" + + # Write a minimal system init line so the extension watcher sees the file. + printf '{"type":"system","subtype":"init","content":"mock-claude-ready"}\n' >> "$JSONL_FILE" +fi + +# Stay alive so the VS Code terminal doesn't immediately close. +sleep 30 & +SLEEP_PID=$! + +# Clean exit on SIGTERM/SIGINT. +trap 'kill $SLEEP_PID 2>/dev/null; exit 0' SIGTERM SIGINT + +wait $SLEEP_PID || true diff --git a/e2e/global-setup.ts b/e2e/global-setup.ts new file mode 100644 index 00000000..d71dcd7e --- /dev/null +++ b/e2e/global-setup.ts @@ -0,0 +1,18 @@ +import { downloadAndUnzipVSCode } from '@vscode/test-electron'; +import fs from 'fs'; +import path from 'path'; + +export const VSCODE_CACHE_DIR = path.join(__dirname, '../.vscode-test'); +export const VSCODE_PATH_FILE = path.join(VSCODE_CACHE_DIR, 'vscode-executable.txt'); + +export default async function globalSetup(): Promise { + console.log('[e2e] Ensuring VS Code is downloaded...'); + const vscodePath = await downloadAndUnzipVSCode({ + version: 'stable', + cachePath: VSCODE_CACHE_DIR, + }); + console.log(`[e2e] VS Code executable: ${vscodePath}`); + + fs.mkdirSync(VSCODE_CACHE_DIR, { recursive: true }); + fs.writeFileSync(VSCODE_PATH_FILE, vscodePath, 'utf8'); +} diff --git a/e2e/helpers/launch.ts b/e2e/helpers/launch.ts new file mode 100644 index 00000000..407c23f3 --- /dev/null +++ b/e2e/helpers/launch.ts @@ -0,0 +1,122 @@ +import { _electron as electron } from '@playwright/test'; +import type { ElectronApplication, Page } from '@playwright/test'; +import fs from 'fs'; +import os from 'os'; +import path from 'path'; + +const REPO_ROOT = path.join(__dirname, '../..'); +const VSCODE_PATH_FILE = path.join(REPO_ROOT, '.vscode-test/vscode-executable.txt'); +const MOCK_CLAUDE_PATH = path.join(REPO_ROOT, 'e2e/fixtures/mock-claude'); +const ARTIFACTS_DIR = path.join(REPO_ROOT, 'test-results/e2e'); + +export interface VSCodeSession { + app: ElectronApplication; + window: Page; + /** Isolated HOME directory for this test session. */ + tmpHome: string; + /** Workspace directory opened in VS Code. */ + workspaceDir: string; + /** Path to the mock invocations log. */ + mockLogFile: string; + cleanup: () => Promise; +} + +/** + * Launch VS Code with the Pixel Agents extension loaded in development mode. + * + * Uses an isolated temp HOME and injects the mock `claude` binary at the + * front of PATH so no real Claude CLI is needed. + */ +export async function launchVSCode(testTitle: string): Promise { + const vscodePath = fs.readFileSync(VSCODE_PATH_FILE, 'utf8').trim(); + + // --- Isolated temp directories --- + const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'pixel-e2e-')); + const tmpHome = path.join(tmpBase, 'home'); + const workspaceDir = path.join(tmpBase, 'workspace'); + const userDataDir = path.join(tmpBase, 'userdata'); + const mockBinDir = path.join(tmpBase, 'bin'); + + fs.mkdirSync(tmpHome, { recursive: true }); + fs.mkdirSync(workspaceDir, { recursive: true }); + fs.mkdirSync(userDataDir, { recursive: true }); + fs.mkdirSync(mockBinDir, { recursive: true }); + + // Copy mock-claude into an isolated bin dir and symlink as 'claude' + const mockDest = path.join(mockBinDir, 'claude'); + fs.copyFileSync(MOCK_CLAUDE_PATH, mockDest); + fs.chmodSync(mockDest, 0o755); + + const mockLogFile = path.join(tmpHome, '.claude-mock', 'invocations.log'); + + // --- Video output dir --- + const safeTitle = testTitle.replace(/[^a-z0-9]+/gi, '-').toLowerCase(); + const videoDir = path.join(ARTIFACTS_DIR, 'videos', safeTitle); + fs.mkdirSync(videoDir, { recursive: true }); + + // --- Environment for VS Code process --- + const env: Record = { + ...process.env as Record, + HOME: tmpHome, + // Prepend mock bin so 'claude' resolves to our mock + PATH: `${mockBinDir}:${process.env['PATH'] ?? '/usr/local/bin:/usr/bin:/bin'}`, + // Prevent VS Code from trying to talk to real accounts / telemetry + VSCODE_TELEMETRY_DISABLED: '1', + }; + + // --- VS Code launch args --- + const args = [ + // Load our extension in dev mode (this overrides the installed version) + `--extensionDevelopmentPath=${REPO_ROOT}`, + // Disable all other extensions so tests are isolated + '--disable-extensions', + // Isolated user-data (settings, state, etc.) + `--user-data-dir=${userDataDir}`, + // Skip interactive prompts + '--disable-workspace-trust', + '--skip-release-notes', + '--skip-welcome', + '--no-sandbox', + // Open the workspace folder + workspaceDir, + ]; + + const app = await electron.launch({ + executablePath: vscodePath, + args, + env, + cwd: workspaceDir, + recordVideo: { + dir: videoDir, + size: { width: 1280, height: 800 }, + }, + timeout: 60_000, + }); + + // Get the main VS Code window + const window = await app.firstWindow(); + await window.waitForLoadState('domcontentloaded'); + + const cleanup = async (): Promise => { + try { + await app.close(); + } catch { + // ignore close errors + } + try { + fs.rmSync(tmpBase, { recursive: true, force: true }); + } catch { + // ignore cleanup errors + } + }; + + return { app, window, tmpHome, workspaceDir, mockLogFile, cleanup }; +} + +/** + * Wait for VS Code's workbench to be fully ready before interacting. + */ +export async function waitForWorkbench(window: Page): Promise { + // VS Code renders a div.monaco-workbench when the shell is ready + await window.waitForSelector('.monaco-workbench', { timeout: 60_000 }); +} diff --git a/e2e/helpers/webview.ts b/e2e/helpers/webview.ts new file mode 100644 index 00000000..2617c58f --- /dev/null +++ b/e2e/helpers/webview.ts @@ -0,0 +1,69 @@ +import type { Frame, Page } from '@playwright/test'; +import { expect } from '@playwright/test'; + +const WEBVIEW_TIMEOUT_MS = 30_000; +const PANEL_OPEN_TIMEOUT_MS = 15_000; + +/** + * Open the Pixel Agents panel via the Command Palette and wait for the + * "Pixel Agents: Show Panel" command to execute. + */ +export async function openPixelAgentsPanel(window: Page): Promise { + // Open command palette (Ctrl+Shift+P / F1) + await window.keyboard.press('F1'); + await window.waitForSelector('.quick-input-widget', { timeout: PANEL_OPEN_TIMEOUT_MS }); + + // Type the command + await window.keyboard.type('Pixel Agents: Show Panel'); + await window.waitForSelector('.quick-input-list .monaco-list-row', { + timeout: PANEL_OPEN_TIMEOUT_MS, + }); + await window.keyboard.press('Enter'); + + // Wait for the panel container to appear + await window.waitForSelector('[id="workbench.panel.bottom"]', { + timeout: PANEL_OPEN_TIMEOUT_MS, + }).catch(() => { + // Panel might not use this id; just continue + }); +} + +/** + * Find and return the Pixel Agents webview frame. + * + * VS Code renders WebviewViewProvider content in an