pablodelucca · florintimbuc · Mar 24, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -75,6 +75,13 @@ jobs:
         run: npm run lint
         continue-on-error: true
 
+      - name: Webview Tests
+        id: webview_test
+        if: always() && steps.install_webview.outcome == 'success'
+        working-directory: webview-ui
+        run: npm test
+        continue-on-error: true
+
       - name: Format Check
         id: format_check
         if: always() && steps.install_root.outcome == 'success'
@@ -118,6 +125,7 @@ jobs:
           TYPE_CHECK: ${{ steps.type_check.outcome }}
           ROOT_LINT: ${{ steps.root_lint.outcome }}
           WEBVIEW_LINT: ${{ steps.webview_lint.outcome }}
+          WEBVIEW_TEST: ${{ steps.webview_test.outcome }}
           FORMAT_CHECK: ${{ steps.format_check.outcome }}
           BUILD: ${{ steps.build.outcome }}
           AUDIT_ROOT: ${{ steps.audit_root.outcome }}
@@ -138,6 +146,7 @@ jobs:
             echo "| **Type check** | $(status "$TYPE_CHECK") |"
             echo "| **Root lint** | $(status "$ROOT_LINT") |"
             echo "| **Webview lint** | $(status "$WEBVIEW_LINT") |"
+            echo "| **Webview tests** | $(status "$WEBVIEW_TEST") |"
             echo "| **Format check** | $(status "$FORMAT_CHECK") |"
             echo "| **Build** | $(status "$BUILD") |"
             echo "| Audit root _(advisory)_ | $(status "$AUDIT_ROOT") |"
@@ -156,17 +165,121 @@ jobs:
           TYPE_CHECK: ${{ steps.type_check.outcome }}
           ROOT_LINT: ${{ steps.root_lint.outcome }}
           WEBVIEW_LINT: ${{ steps.webview_lint.outcome }}
+          WEBVIEW_TEST: ${{ steps.webview_test.outcome }}
           FORMAT_CHECK: ${{ steps.format_check.outcome }}
           BUILD: ${{ steps.build.outcome }}
         run: |
           failed=0
           for step in CHECKOUT SETUP_NODE INSTALL_ROOT INSTALL_WEBVIEW \
-                      TYPE_CHECK ROOT_LINT WEBVIEW_LINT FORMAT_CHECK \
-                      BUILD; do
+                      TYPE_CHECK ROOT_LINT WEBVIEW_LINT \
+                      WEBVIEW_TEST FORMAT_CHECK BUILD; do
             eval "val=\$$step"
             if [ "$val" != "success" ]; then
               echo "::error::$step failed"
               failed=1
             fi
           done
           exit "$failed"
+
+  e2e:
+    needs: ci
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 15
+    env:
+      PLAYWRIGHT_BROWSERS_PATH: .playwright-browsers
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+
+      - name: Setup Node
+        uses: actions/setup-node@v6
+        with:
+          node-version-file: .nvmrc
+          cache: npm
+          cache-dependency-path: |
+            package-lock.json
+            webview-ui/package-lock.json
+
+      - name: Restore VS Code Cache
+        id: cache_vscode_restore
+        uses: actions/cache/restore@v4
+        with:
+          path: .vscode-test
+          key: vscode-test-${{ runner.os }}-${{ hashFiles('e2e/global-setup.ts') }}-v2
+          restore-keys: |
+            vscode-test-${{ runner.os }}-
+
+      - name: Restore Playwright Cache
+        id: cache_playwright_restore
+        uses: actions/cache/restore@v4
+        with:
+          path: .playwright-browsers
+          key: playwright-browsers-${{ runner.os }}-${{ hashFiles('package-lock.json') }}-v1
+          restore-keys: |
+            playwright-browsers-${{ runner.os }}-
+
+      - name: Install Root Dependencies
+        run: npm ci
+
+      - name: Install Webview Dependencies
+        working-directory: webview-ui
+        run: npm ci
+
+      - name: Build
+        run: node esbuild.js
+
+      - name: Build Webview
+        working-directory: webview-ui
+        run: npm run build
+
+      - name: Install Playwright Dependencies
+        id: install_playwright_deps
+        run: npx playwright install --with-deps chromium
+        continue-on-error: true
+
+      - name: E2E Tests
+        id: e2e_test
+        if: steps.install_playwright_deps.outcome == 'success'
+        run: npm run e2e
+        continue-on-error: true
+
+      - name: Save VS Code Cache
+        if: always() && steps.cache_vscode_restore.outputs.cache-hit != 'true' && steps.e2e_test.outcome == 'success' && hashFiles('.vscode-test/vscode-executable.txt') != ''
+        uses: actions/cache/save@v4
+        with:
+          path: .vscode-test
+          key: ${{ steps.cache_vscode_restore.outputs.cache-primary-key }}
+
+      - name: Save Playwright Cache
+        if: always() && steps.cache_playwright_restore.outputs.cache-hit != 'true' && steps.install_playwright_deps.outcome == 'success' && hashFiles('.playwright-browsers/**') != ''
+        uses: actions/cache/save@v4
+        with:
+          path: .playwright-browsers
+          key: ${{ steps.cache_playwright_restore.outputs.cache-primary-key }}
+
+      - name: Write Step Summary
+        if: always()
+        shell: bash
+        env:
+          OS: ${{ matrix.os }}
+          INSTALL_PLAYWRIGHT_DEPS: ${{ steps.install_playwright_deps.outcome }}
+          E2E_TEST: ${{ steps.e2e_test.outcome }}
+        run: |
+          status() {
+            if [ "$1" = "success" ]; then echo "✅ PASS"; else echo "❌ FAIL"; fi
+          }
+          {
+            echo "## E2E Results ($OS)"
+            echo
+            echo "| Check | Result |"
+            echo "| --- | --- |"
+            echo "| Install Playwright deps | $(status "$INSTALL_PLAYWRIGHT_DEPS") |"
+            echo "| E2E tests | $(status "$E2E_TEST") |"
+          } >> "$GITHUB_STEP_SUMMARY"
diff --git a/.github/workflows/pr-title.yml b/.github/workflows/pr-title.yml
@@ -10,6 +10,7 @@ permissions:
 jobs:
   check:
     runs-on: ubuntu-latest
+    if: ${{ github.actor != 'dependabot[bot]' }}
     steps:
       - uses: amannn/action-semantic-pull-request@v6
         env:

diff --git a/.github/workflows/update-badges.yml b/.github/workflows/update-badges.yml
@@ -8,6 +8,7 @@ on:
 jobs:
   update-badges:
     runs-on: ubuntu-latest
+    if: ${{ github.repository == 'pablodelucca/pixel-agents' }}
 
     steps:
       - name: Fetch VS Code Marketplace stats

diff --git a/.gitignore b/.gitignore
@@ -17,6 +17,10 @@ Thumbs.db
 .vscode-test/
 /.idea
 
+# E2E test artifacts
+test-results/
+playwright-report/
+
 # Build artifacts
 *.vsix
 *.map

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -94,6 +94,49 @@ These conventions are enforced by custom ESLint rules (`eslint-rules/pixel-agent
 
 These rules are set to `warn` — they won't block your PR but will flag violations for cleanup.
 
+## End-to-End Tests
+
+The `e2e/` directory contains Playwright tests that launch a real VS Code instance with the extension loaded in development mode.
+
+### Running e2e tests locally
+
+```bash
+# Build the extension first (tests load the compiled output)
+npm run build
+
+# Runs the e2e test
+npm run e2e
+
+# Step-by-step debug mode
+npm run e2e:debug
+```
+
+On the first run, `@vscode/test-electron` will download a stable VS Code release into `.vscode-test/` (≈200 MB). Subsequent runs reuse the cache.
+
+### Artifacts
+
+All test artifacts are written to `test-results/e2e/`:
+
+| Path | Contents |
+|---|---|
+| `test-results/e2e/videos/<test-name>/` | `.webm` screen recording for every test |
+| `playwright-report/e2e/` | Playwright HTML report (`npx playwright show-report playwright-report/e2e`) |
+| `test-results/e2e/*.png` | Final screenshots saved on failure |
+
+On failure, the test output prints the path to the video for that run.
+
+### Mock claude
+
+Tests never invoke the real `claude` CLI. Instead, a bash script at `e2e/fixtures/mock-claude` is copied into an isolated `bin/` directory and prepended to `PATH` before VS Code starts.
+
+The mock:
+1. Parses `--session-id <uuid>` from its arguments.
+2. Appends a line to `$HOME/.claude-mock/invocations.log` so tests can assert it was called.
+3. Creates `$HOME/.claude/projects/<project-hash>/<session-id>.jsonl` with a minimal init line so the extension's file-watcher can detect the session.
+4. Sleeps for 30 s (keeps the terminal alive) then exits.
+
+Each test runs with an isolated `HOME` and `--user-data-dir`, so no test state leaks between runs or into your real VS Code profile.
+
 ## Submitting a Pull Request
 
 1. Fork the repo and create a feature branch from `main`

diff --git a/e2e/fixtures/mock-claude b/e2e/fixtures/mock-claude
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+# Mock 'claude' executable for Pixel Agents e2e tests.
+#
+# Behaviour:
+#   1. Parses --session-id <id> from args.
+#   2. Appends an invocation record to $HOME/.claude-mock/invocations.log.
+#   3. Creates the expected JSONL file under $HOME/.claude/projects/<hash>/<id>.jsonl
+#      using the same path-hash algorithm as agentManager.ts
+#      (replace every non-[a-zA-Z0-9-] char with '-').
+#   4. Writes a minimal valid JSONL line so the extension file-watcher can proceed.
+#   5. Stays alive for up to 30 s (tests can kill it once assertions pass).
+
+set -euo pipefail
+
+SESSION_ID=""
+PREV=""
+for arg in "$@"; do
+  if [ "$PREV" = "--session-id" ]; then
+    SESSION_ID="$arg"
+  fi
+  PREV="$arg"
+done
+
+LOG_DIR="${HOME}/.claude-mock"
+mkdir -p "$LOG_DIR"
+echo "$(date -Iseconds) session-id=${SESSION_ID} cwd=$(pwd) args=$*" >> "${LOG_DIR}/invocations.log"
+
+if [ -n "$SESSION_ID" ]; then
+  CWD="$(pwd)"
+  # Replicate agentManager.ts: workspacePath.replace(/[^a-zA-Z0-9-]/g, '-')
+  DIR_NAME="$(printf '%s' "$CWD" | tr -c 'a-zA-Z0-9-' '-')"
+  PROJECT_DIR="${HOME}/.claude/projects/${DIR_NAME}"
+  mkdir -p "$PROJECT_DIR"
+  JSONL_FILE="${PROJECT_DIR}/${SESSION_ID}.jsonl"
+
+  # Write a minimal system init line so the extension watcher sees the file.
+  printf '{"type":"system","subtype":"init","content":"mock-claude-ready"}\n' >> "$JSONL_FILE"
+fi
+
+# Stay alive so the VS Code terminal doesn't immediately close.
+sleep 30 &
+SLEEP_PID=$!
+
+# Clean exit on SIGTERM/SIGINT.
+trap 'kill $SLEEP_PID 2>/dev/null; exit 0' SIGTERM SIGINT
+
+wait $SLEEP_PID || true
diff --git a/e2e/fixtures/mock-claude.cmd b/e2e/fixtures/mock-claude.cmd
@@ -0,0 +1,49 @@
+@echo off
+REM Mock 'claude' executable for Pixel Agents e2e tests (Windows).
+REM
+REM Behaviour:
+REM   1. Parses --session-id <id> from args.
+REM   2. Appends an invocation record to %HOME%\.claude-mock\invocations.log.
+REM   3. Creates the expected JSONL file under %HOME%\.claude\projects\<hash>\<id>.jsonl
+REM   4. Stays alive for up to 30 s (tests can kill it once assertions pass).
+
+setlocal enabledelayedexpansion
+
+set "SESSION_ID="
+set "PREV="
+
+:parse_args
+if "%~1"=="" goto done_args
+if "!PREV!"=="--session-id" set "SESSION_ID=%~1"
+set "PREV=%~1"
+shift
+goto parse_args
+:done_args
+
+REM Use HOME if set (our e2e sets it), fall back to USERPROFILE
+if defined HOME (
+  set "MOCK_HOME=%HOME%"
+) else (
+  set "MOCK_HOME=%USERPROFILE%"
+)
+
+set "LOG_DIR=%MOCK_HOME%\.claude-mock"
+if not exist "%LOG_DIR%" mkdir "%LOG_DIR%"
+echo %DATE% %TIME% session-id=%SESSION_ID% cwd=%CD% args=%* >> "%LOG_DIR%\invocations.log"
+
+if "%SESSION_ID%"=="" goto stay_alive
+
+REM Replicate agentManager.ts: workspacePath.replace(/[^a-zA-Z0-9-]/g, '-')
+REM PowerShell one-liner to do the regex replace
+for /f "delims=" %%D in ('powershell -NoProfile -Command "[regex]::Replace('%CD%', '[^a-zA-Z0-9-]', '-')"') do set "DIR_NAME=%%D"
+
+set "PROJECT_DIR=%MOCK_HOME%\.claude\projects\%DIR_NAME%"
+if not exist "%PROJECT_DIR%" mkdir "%PROJECT_DIR%"
+
+set "JSONL_FILE=%PROJECT_DIR%\%SESSION_ID%.jsonl"
+echo {"type":"system","subtype":"init","content":"mock-claude-ready"} >> "%JSONL_FILE%"
+
+:stay_alive
+REM Stay alive so the VS Code terminal doesn't immediately close.
+REM Use ping to localhost as a cross-platform sleep (timeout command requires console).
+ping -n 31 127.0.0.1 > nul 2>&1
diff --git a/e2e/global-setup.ts b/e2e/global-setup.ts
@@ -0,0 +1,18 @@
+import { downloadAndUnzipVSCode } from '@vscode/test-electron';
+import fs from 'fs';
+import path from 'path';
+
+export const VSCODE_CACHE_DIR = path.join(__dirname, '../.vscode-test');
+export const VSCODE_PATH_FILE = path.join(VSCODE_CACHE_DIR, 'vscode-executable.txt');
+
+export default async function globalSetup(): Promise<void> {
+  console.log('[e2e] Ensuring VS Code is downloaded...');
+  const vscodePath = await downloadAndUnzipVSCode({
+    version: 'stable',
+    cachePath: VSCODE_CACHE_DIR,
+  });
+  console.log(`[e2e] VS Code executable: ${vscodePath}`);
+
+  fs.mkdirSync(VSCODE_CACHE_DIR, { recursive: true });
+  fs.writeFileSync(VSCODE_PATH_FILE, vscodePath, 'utf8');
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -10,6 +10,7 @@ permissions: @@
     jobs:
       check:
         runs-on: ubuntu-latest
+        if: ${{ github.actor != 'dependabot[bot]' }}
         steps:
           - uses: amannn/action-semantic-pull-request@v6
             env:
@@ Expand Down @@