link-assistant
diff --git a/‎.github/workflows/e2e.yml‎
Lines changed: 103 additions & 0 deletions b/‎.github/workflows/e2e.yml‎
Lines changed: 103 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 5 additions & 0 deletions b/‎.gitignore‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎changelog.d/20260101_213120_fix_inference_bug.md‎
Lines changed: 8 additions & 0 deletions b/‎changelog.d/20260101_213120_fix_inference_bug.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎wasm/src/lib.rs‎
Lines changed: 2 additions & 8 deletions b/‎wasm/src/lib.rs‎
Lines changed: 2 additions & 8 deletions
diff --git a/‎web/e2e/inference.spec.ts‎
Lines changed: 160 additions & 0 deletions b/‎web/e2e/inference.spec.ts‎
Lines changed: 160 additions & 0 deletions
@@ -0,0 +1,103 @@
+name: E2E Tests
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+  workflow_dispatch:
+
+concurrency:
+  group: e2e-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  e2e-tests:
+    name: Browser E2E Tests
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Rust
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          targets: wasm32-unknown-unknown
+
+      - name: Install wasm-pack
+        run: cargo install wasm-pack
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20.x'
+
+      - name: Cache cargo registry
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/registry
+            ~/.cargo/git
+            target
+            wasm/target
+          key: ${{ runner.os }}-cargo-wasm-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-cargo-wasm-
+
+      - name: Cache npm dependencies
+        uses: actions/cache@v4
+        with:
+          path: web/node_modules
+          key: ${{ runner.os }}-npm-${{ hashFiles('web/package-lock.json') }}
+          restore-keys: |
+            ${{ runner.os }}-npm-
+
+      - name: Build WASM package
+        env:
+          RUSTFLAGS: '--cfg getrandom_backend="wasm_js" -C target-feature=+bulk-memory,+mutable-globals,+simd128'
+        run: |
+          cd wasm
+          wasm-pack build --target web --out-dir ../web/src/pkg
+
+      - name: Install npm dependencies
+        run: |
+          cd web
+          npm install
+
+      - name: Install Playwright browsers
+        run: |
+          cd web
+          npx playwright install chromium --with-deps
+
+      - name: Build web application
+        run: |
+          cd web
+          npm run build
+
+      - name: Run E2E tests
+        run: |
+          cd web
+          npm run test:e2e
+        env:
+          CI: true
+
+      - name: Upload Playwright report
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-report
+          path: web/playwright-report/
+          retention-days: 7
+
+      - name: Upload test results
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: test-results
+          path: web/test-results/
+          retention-days: 7
@@ -63,6 +63,11 @@ web/src/pkg/
 .npm
 npm-debug.log*
 
+# Playwright
+web/playwright-report/
+web/test-results/
+web/playwright/.cache/
+
 # WASM build artifacts
 wasm/pkg/
 wasm/target/
 
@@ -0,0 +1,8 @@
+### Fixed
+
+- Fixed browser inference "Repeat penalty failed: unexpected rank" error that occurred when generating text. The bug was caused by incorrectly attempting to index into the logits tensor after the Llama model's forward pass, which already extracts the last position internally.
+
+### Added
+
+- Added Playwright e2e tests to verify browser inference works correctly
+- Added GitHub Actions workflow for running e2e tests on PRs and main branch
@@ -235,18 +235,12 @@ pub async fn generate(
             .map_err(|e| JsValue::from_str(&format!("Forward pass failed: {}", e)))?;
 
         // Get logits for next token prediction
+        // The Llama model already extracts the last position internally,
+        // so the output shape is [batch_size, vocab_size], not [batch_size, seq_len, vocab_size]
         let logits = logits
             .squeeze(0)
             .map_err(|e| JsValue::from_str(&format!("Squeeze failed: {}", e)))?;
 
-        let seq_len = logits
-            .dim(0)
-            .map_err(|e| JsValue::from_str(&format!("Failed to get dim: {}", e)))?;
-
-        let logits = logits
-            .get(seq_len - 1)
-            .map_err(|e| JsValue::from_str(&format!("Get logits failed: {}", e)))?;
-
         // Apply repeat penalty
         let logits = if params.repeat_penalty != 1.0 {
             let start_at = all_tokens.len().saturating_sub(params.repeat_last_n);
 
@@ -0,0 +1,160 @@
+import { test, expect } from '@playwright/test';
+
+/**
+ * E2E tests for SmolLM2 browser inference.
+ *
+ * These tests verify that the WASM-based language model can:
+ * 1. Load successfully in the browser
+ * 2. Generate text responses without errors
+ * 3. Stream tokens back to the UI
+ *
+ * Note: These tests require significant time due to:
+ * - Model download (~270MB)
+ * - WASM compilation
+ * - Inference computation
+ */
+
+test.describe('SmolLM2 Browser Inference', () => {
+  // Run tests serially since they share model state
+  test.describe.configure({ mode: 'serial' });
+
+  test('should display initial UI correctly', async ({ page }) => {
+    await page.goto('/');
+
+    // Check header
+    await expect(page.getByRole('heading', { name: 'SmolLM2 in Browser' })).toBeVisible();
+    await expect(
+      page.getByText('AI language model running entirely on your device via WebAssembly')
+    ).toBeVisible();
+
+    // Check load button
+    await expect(page.getByRole('button', { name: /Load Model/i })).toBeVisible();
+    await expect(page.getByRole('button', { name: /Load Model/i })).toBeEnabled();
+
+    // Check initial message
+    await expect(
+      page.getByText(/Hello! I'm SmolLM2, a small language model running entirely in your browser/)
+    ).toBeVisible();
+
+    // Check footer info
+    await expect(page.getByText(/No data sent to servers/)).toBeVisible();
+
+    // Check initial status - worker sends 'Worker initialized' on startup
+    await expect(page.getByText('Worker initialized')).toBeVisible();
+  });
+
+  test('should load model successfully', async ({ page }) => {
+    await page.goto('/');
+
+    // Verify initial state
+    await expect(page.getByRole('button', { name: /Load Model/i })).toBeVisible();
+
+    // Click load button
+    await page.getByRole('button', { name: /Load Model/i }).click();
+
+    // Should show loading status
+    await expect(page.getByText(/Initializing|Downloading|Loading/i)).toBeVisible({
+      timeout: 10000,
+    });
+
+    // Wait for model to be ready (this can take several minutes)
+    await expect(page.getByText('Model ready')).toBeVisible({
+      timeout: 5 * 60 * 1000, // 5 minutes
+    });
+
+    // Load button should be gone
+    await expect(page.getByRole('button', { name: /Load Model/i })).not.toBeVisible();
+
+    // Message input should be enabled
+    await expect(page.locator('.cs-message-input__content-editor')).toBeEnabled();
+  });
+
+  test('should generate text response without errors', async ({ page }) => {
+    await page.goto('/');
+
+    // Listen for console errors from the start
+    const consoleErrors: string[] = [];
+    page.on('console', (msg) => {
+      if (msg.type() === 'error') {
+        consoleErrors.push(msg.text());
+      }
+    });
+
+    // Load the model first
+    await page.getByRole('button', { name: /Load Model/i }).click();
+    await expect(page.getByText('Model ready')).toBeVisible({
+      timeout: 5 * 60 * 1000,
+    });
+
+    // Send a message
+    const messageInput = page.locator('.cs-message-input__content-editor');
+    await messageInput.fill('Hello');
+    await messageInput.press('Enter');
+
+    // Should show user message
+    await expect(page.getByText('Hello').first()).toBeVisible();
+
+    // Wait for generation to complete (typing indicator should appear then disappear)
+    // The response should appear within 2 minutes
+    await expect(page.getByText('SmolLM2 is thinking...')).toBeVisible({ timeout: 10000 });
+
+    // Wait for typing indicator to disappear (generation complete)
+    await expect(page.getByText('SmolLM2 is thinking...')).not.toBeVisible({
+      timeout: 2 * 60 * 1000,
+    });
+
+    // Check for the critical error that was reported in issue #5
+    const repeatPenaltyError = consoleErrors.find((e) =>
+      e.includes('Repeat penalty failed: unexpected rank')
+    );
+    expect(repeatPenaltyError).toBeUndefined();
+
+    // There should be no error status
+    await expect(page.getByText(/Error:/i)).not.toBeVisible();
+
+    // Status should still be "Model ready" (not error state)
+    await expect(page.getByText('Model ready')).toBeVisible();
+  });
+
+  test('should stream tokens to the UI', async ({ page }) => {
+    await page.goto('/');
+
+    // Load the model first
+    await page.getByRole('button', { name: /Load Model/i }).click();
+    await expect(page.getByText('Model ready')).toBeVisible({
+      timeout: 5 * 60 * 1000,
+    });
+
+    // Send a message
+    const messageInput = page.locator('.cs-message-input__content-editor');
+    await messageInput.fill('Count from 1 to 5');
+    await messageInput.press('Enter');
+
+    // Wait for generation to complete
+    await expect(page.getByText('SmolLM2 is thinking...')).toBeVisible({ timeout: 10000 });
+    await expect(page.getByText('SmolLM2 is thinking...')).not.toBeVisible({
+      timeout: 2 * 60 * 1000,
+    });
+
+    // There should be multiple AI response regions (initial greeting + new response)
+    const aiMessages = page.locator('[class*="cs-message--incoming"]');
+    await expect(aiMessages).toHaveCount(2, { timeout: 5000 });
+  });
+});
+
+test.describe('Error Handling', () => {
+  test('should handle model loading gracefully', async ({ page }) => {
+    await page.goto('/');
+
+    // Click load button
+    await page.getByRole('button', { name: /Load Model/i }).click();
+
+    // Should not crash immediately
+    await expect(page.getByText(/Initializing|Downloading/i)).toBeVisible({
+      timeout: 10000,
+    });
+
+    // Page should remain responsive
+    await expect(page.getByRole('heading', { name: 'SmolLM2 in Browser' })).toBeVisible();
+  });
+});