AIToolsLab · monicaz010101 · Nov 11, 2025 · Nov 17, 2025 · Nov 18, 2025 · Nov 25, 2025
diff --git a/.github/workflows/playwright.yml b/.github/workflows/playwright.yml
@@ -1,19 +1,11 @@
-name: Playwright Tests
+name: Playwright Visual Regression Tests
 on:
-  # push:
-  #   branches: [ main ]
-  # pull_request:
-  #   branches: [ main ]
-  workflow_dispatch:  # Enables manual triggering only
-    inputs:
-      update-snapshots:
-        description: 'Update visual snapshots'
-        required: false
-        type: boolean
-        default: false
+  workflow_dispatch:  # Manual trigger only
+
 defaults:
   run:
     working-directory: frontend
+
 jobs:
   test:
     timeout-minutes: 60
@@ -31,19 +23,10 @@ jobs:
       run: npm run build
     - name: Run Playwright tests
       run: npx playwright test
-      # Playwright will automatically start the frontend via webServer config
-      # API calls are mocked in the test file with page.route()
     - uses: actions/upload-artifact@v4
       if: ${{ !cancelled() }}
       with:
         name: playwright-report
         path: frontend/playwright-report/
         retention-days: 30
-    - name: Upload updated snapshots
-      if: github.event.inputs.update-snapshots == 'true'
-      uses: actions/upload-artifact@v4
-      with:
-        name: updated-snapshots
-        path: frontend/tests/**/*-snapshots/**
-        retention-days: 30
 
diff --git a/README.md b/README.md
@@ -43,4 +43,5 @@ We follow specific GitHub conventions to keep our project organized and maintain
 For detailed conventions on branch naming, commit messages, PR process, and issue management, see [CONTRIBUTING.md](CONTRIBUTING.md).
 
 # Running visual regression tests
-Run manually via Actions tab. To update baselines after UI changes, re-run with "Update visual snapshots" checked and commit the downloaded artifact as new baseline images.
+
+The visual regression tests capture screenshots and ensure UI consistency of the application. See [VISUAL_REGRESSION.md](VISUAL_REGRESSION.md) for detailed instructions on running tests and updating baseline images.
diff --git a/VISUAL_REGRESSION.md b/VISUAL_REGRESSION.md
@@ -0,0 +1,50 @@
+# Visual Regression Testing Guide
+
+This project uses Playwright to run visual regression tests that capture screenshots and compare them against baseline images. Currently, we have tests set up only for the demo page of the application.
+
+## Running the tests
+
+1. Navigate to the **Actions** tab in GitHub
+2. Select **Playwright Visual Regression Tests** workflow
+3. Click **Run workflow** button and choose the branch you wish to run tests on
+4. The workflow will run tests against all browsers (Chromium, Firefox, WebKit)
+
+## Understanding test results
+
+- **✅ Pass**: No visual changes detected - your PR is ready for merge
+- **❌ Fail**: Visual differences detected - review the changes
+
+## Reviewing visual differences
+
+When tests fail:
+
+1. Go to the failed workflow run
+2. Download the **playwright-report** artifact
+3. Extract the artifact and open `index.html` in a browser
+4. Review the visual comparison showing:
+   - Expected (baseline) image
+   - Actual (current) image
+   - Diff highlighting the changes
+
+## Updating baseline images
+
+If the UI changes are **intentional** and you want to update the baselines:
+
+1. From the Playwright report, download the actual images for each browser
+2. Replace the existing baseline images in `frontend/tests/demo-page-visual.spec.ts-snapshots/`
+3. Rename downloaded images to match existing baseline names:
+   - `demo-page-chromium-linux.png`
+   - `demo-page-firefox-linux.png`
+   - `demo-page-webkit-linux.png`
+4. Commit and push the updated baseline images
+5. Re-run the visual regression test to verify it passes
+
+## Baseline image locations
+
+Current baseline images are stored in:
+```
+frontend/tests/demo-page-visual.spec.ts-snapshots/
+├── demo-page-chromium-linux.png
+├── demo-page-firefox-linux.png
+└── demo-page-webkit-linux.png
+```
diff --git a/frontend/src/editor/editor.module.css b/frontend/src/editor/editor.module.css
@@ -1,7 +1,7 @@
 /* Controls CSS for editor in all the pages (editor, demo, study) */
 .editorContainer {
 	margin: 20px;
-	background: #fff;
+	background: #ffffff;
 	color: #000;
 	position: relative;
 	line-height: 20px;

diff --git a/frontend/tests/demo-page-visual.spec.ts b/frontend/tests/demo-page-visual.spec.ts
@@ -1,13 +1,9 @@
 import { test, expect } from '@playwright/test';
+import { setupMockBackend } from './mockBackend';
 
 test('demo page - visual regression', async ({ page }) => {
-  // Intercept API calls and return mocked responses
-  await page.route('/api/**', async route => {
-    await route.fulfill({
-      status: 200,
-      body: JSON.stringify({ message: 'mocked' })
-    });
-  });
+  // Setup mock backend with actual API structure
+  await setupMockBackend(page);
 
   await page.goto('/');
 

diff --git a/frontend/tests/demo-page-visual.spec.ts-snapshots/demo-page-chromium-linux.png b/frontend/tests/demo-page-visual.spec.ts-snapshots/demo-page-chromium-linux.png
diff --git a/frontend/tests/demo-page-visual.spec.ts-snapshots/demo-page-firefox-linux.png b/frontend/tests/demo-page-visual.spec.ts-snapshots/demo-page-firefox-linux.png
diff --git a/frontend/tests/demo-page-visual.spec.ts-snapshots/demo-page-webkit-linux.png b/frontend/tests/demo-page-visual.spec.ts-snapshots/demo-page-webkit-linux.png
diff --git a/frontend/tests/draft-flows.spec.ts b/frontend/tests/draft-flows.spec.ts
@@ -0,0 +1,145 @@
+import { test, expect } from '@playwright/test';
+import { setupMockBackend } from './mockBackend';
+
+test.describe('Draft component - Main flows', () => {
+  test.beforeEach(async ({ page }) => {
+    // Setup mock backend with actual API structure
+    await setupMockBackend(page);
+
+    // Navigate to the draft page
+    await page.goto('/');
+
+    // Wait for page to be ready
+    await expect(page.getByRole('banner')).toContainText('Thoughtful');
+  });
+
+  test('should display three generation option buttons', async ({ page }) => {
+    // Locate the draft iframe
+    const frame = page.frameLocator('#editor-frame');
+
+    // Verify all three generation buttons are present using title attribute
+    const exampleButton = frame.locator('button[title="Examples of what you could write next:"]');
+    const readerButton = frame.locator('button[title="Possible questions your reader might have:"]');
+    const adviceButton = frame.locator('button[title="Advice for your next words:"]');
+
+    await expect(exampleButton).toBeVisible();
+    await expect(readerButton).toBeVisible();
+    await expect(adviceButton).toBeVisible();
+  });
+
+  test('should generate and display example sentences when clicking example button', async ({ page }) => {
+    const frame = page.frameLocator('#editor-frame');
+    const exampleButton = frame.locator('button[title="Examples of what you could write next:"]');
+
+    // Click the example sentences button
+    await exampleButton.click();
+
+    // Wait for and verify suggestions are displayed
+    await expect(frame.getByText('First example suggestion')).toBeVisible({ timeout: 5000 });
+    await expect(frame.getByText('Second example suggestion')).toBeVisible();
+    await expect(frame.getByText('Third example suggestion')).toBeVisible();
+  });
+
+  test('should generate and display reader perspective when clicking reader button', async ({ page }) => {
+    const frame = page.frameLocator('#editor-frame');
+    const readerButton = frame.locator('button[title="Possible questions your reader might have:"]');
+
+    // Click the reader perspective button
+    await readerButton.click();
+
+    // Wait for and verify suggestions are displayed
+    await expect(frame.getByText('First reader perspective')).toBeVisible({ timeout: 5000 });
+    await expect(frame.getByText('Second reader perspective')).toBeVisible();
+    await expect(frame.getByText('Third reader perspective')).toBeVisible();
+  });
+
+  test('should generate and display advice when clicking advice button', async ({ page }) => {
+    const frame = page.frameLocator('#editor-frame');
+    const adviceButton = frame.locator('button[title="Advice for your next words:"]');
+
+    // Click the advice button
+    await adviceButton.click();
+
+    // Wait for and verify suggestions are displayed
+    await expect(frame.getByText('First piece of advice')).toBeVisible({ timeout: 5000 });
+    await expect(frame.getByText('Second piece of advice')).toBeVisible();
+    await expect(frame.getByText('Third piece of advice')).toBeVisible();
+  });
+
+  test('should delete suggestion when clicking delete button', async ({ page }) => {
+    const frame = page.frameLocator('#editor-frame');
+    const exampleButton = frame.locator('button[title="Examples of what you could write next:"]');
+    const readerButton = frame.locator('button[title="Possible questions your reader might have:"]');
+    const adviceButton = frame.locator('button[title="Advice for your next words:"]');
+
+    // Generate example suggestion
+    await exampleButton.click();
+    await expect(frame.getByText('First example suggestion')).toBeVisible({ timeout: 5000 });
+
+    // Delete example suggestion
+    const deleteButton1 = frame.locator('button[aria-label="Delete saved item"]').first();
+    await deleteButton1.click();
+    await expect(frame.getByText('First example suggestion')).not.toBeVisible({ timeout: 2000 });
+
+    // Generate reader perspective suggestion
+    await readerButton.click();
+    await expect(frame.getByText('First reader perspective')).toBeVisible({ timeout: 5000 });
+
+    // Delete reader perspective suggestion
+    const deleteButton2 = frame.locator('button[aria-label="Delete saved item"]').first();
+    await deleteButton2.click();
+    await expect(frame.getByText('First reader perspective')).not.toBeVisible({ timeout: 2000 });
+
+    // Generate advice suggestion
+    await adviceButton.click();
+    await expect(frame.getByText('First piece of advice')).toBeVisible({ timeout: 5000 });
+
+    // Delete advice suggestion
+    const deleteButton3 = frame.locator('button[aria-label="Delete saved item"]').first();
+    await deleteButton3.click();
+    await expect(frame.getByText('First piece of advice')).not.toBeVisible({ timeout: 2000 });
+  });
+
+  test('should disable buttons during loading', async ({ page }) => {
+  const frame = page.frameLocator('#editor-frame');
+  const exampleButton = frame.locator('button[title="Examples of what you could write next:"]');
+  const readerButton = frame.locator('button[title="Possible questions your reader might have:"]');
+  const adviceButton = frame.locator('button[title="Advice for your next words:"]');
+
+  // Mock backend with delay and realistic response
+  await page.route('**/api/get_suggestion*', async (route) => {
+    await page.waitForTimeout(1000); // simulate network delay
+    await route.fulfill({
+      status: 200,
+      contentType: 'application/json',
+      body: JSON.stringify({
+        result: '- First example suggestion\n\n- Second example suggestion\n\n- Third example suggestion'
+      }),
+    });
+  });
+
+  // Click the button to trigger request
+  await exampleButton.click();
+
+  // Immediately check that all buttons are disabled
+  await expect(exampleButton).toBeDisabled();
+  await expect(readerButton).toBeDisabled();
+  await expect(adviceButton).toBeDisabled();
+
+  // Wait for the UI to render the first suggestion
+  await expect(frame.locator('text=First example suggestion')).toBeVisible();
+
+  // Verify buttons are enabled again
+  await expect(exampleButton).toBeEnabled();
+  await expect(readerButton).toBeEnabled();
+  await expect(adviceButton).toBeEnabled();
+});
+
+  test('should display empty state message when no suggestions generated', async ({ page }) => {
+    const frame = page.frameLocator('#editor-frame');
+
+    // Verify empty state message is shown
+    await expect(frame.getByText('Click the button above to generate a suggestion.')).toBeVisible();
+  });
+
+});
diff --git a/frontend/tests/mockBackend.ts b/frontend/tests/mockBackend.ts
@@ -0,0 +1,50 @@
+import { Page } from '@playwright/test';
+
+/**
+ * Mock backend API responses that match the actual FastAPI backend structure
+ */
+
+export interface GenerationResult {
+  generation_type: string;
+  result: string;
+  extra_data: Record<string, any>;
+}
+
+/**
+ * Setup mock backend for /api/get_suggestion endpoint
+ * Matches the actual backend API structure from server.py and nlp.py
+ */
+export async function setupMockBackend(page: Page) {
+  // Mock /api/get_suggestion
+  await page.route('**/api/get_suggestion', async (route) => {
+    const request = route.request();
+    const postData = request.postDataJSON();
+    const gtype = postData?.gtype;
+
+    let result = '';
+
+    switch(gtype) {
+      case 'example_sentences':
+        result = '- First example suggestion\n\n- Second example suggestion\n\n- Third example suggestion';
+        break;
+      case 'analysis_readerPerspective':
+        result = '- First reader perspective\n\n- Second reader perspective\n\n- Third reader perspective';
+        break;
+      case 'proposal_advice':
+        result = '- First piece of advice\n\n- Second piece of advice\n\n- Third piece of advice';
+        break;
+    }
+
+    const response: GenerationResult = {
+      generation_type: gtype || 'unknown',
+      result,
+      extra_data: {},
+    };
+
+    await route.fulfill({
+      status: 200,
+      contentType: 'application/json',
+      body: JSON.stringify(response),
+    });
+  });
+}
diff --git a/sandbox/ideas/vis-spec.txt b/sandbox/ideas/vis-spec.txt
@@ -0,0 +1,5 @@
+Text editor with sidebar that has an AI conversation about the document. When the writer sends a message, the current state of the document (including what text is around the cursor) is also sent in the conversation (maybe the user chat message and the document text are given in separate XML-ish tags for the LLM). The AI is given tools to show visualizations in ASCII art, Mermaid, or plain Markdown (e.g., for a hierarchical outline or presentation slides). To start, the user presses a button to request that the AI suggest visualizations of the document content that could help them allocate their attention in their writing. The AI then responds with brief descriptions of 3 or more possible visualizations that it could make, and maybe some clarifying questions about the rhetorical situation if needed. Then the writer responds with a what visualization they desire (one of those or something different). The AI then generates that visualization.
+
+The visualizations should include references to specific parts of the document. When the user clicks on that part of the visualization, the corresponding part of the document should highlight. When the user clicks in the document, the part of the visualization with a document reference closest to the text that's near the cursor should be highlighted, so the visualization is interactive and two way.
+
+The AI should be very careful to ground its observations and visualizations in the specific content of the document, not generic things.
diff --git a/sandbox/ideas/visualization-sys-msg.txt b/sandbox/ideas/visualization-sys-msg.txt
@@ -0,0 +1,9 @@
+We are powering a tool that is designed to help people write thoughtfully, with full cognitive engagement in their work, thinking about their complete rhetorical situation.
+
+The user is currently in a "visualization" part of the tool, where the tool promises to help the writer visualize their document to help them understand what points they are making, what their current structure is, what are the concepts and relationships in their document, and many other possible visualizations. The appropriate visualization will depend on the document, the writer, and the context. The writer may not have provided us with all necessary context; we should ask for additional details as needed.
+
+The user will provide a document that they're working on. For our initial response, we will list 3 or more possible visualizations that we could make for them, each with an example of a specific relationship or insight that they might observe if they request that visualization. We will wait for the user to request a visualization (which might be different yet from what we suggest). Then we will provide the requested visualization. We are allowed to generate Mermaid diagrams using ```mermaid fenced code blocks, ASCII art in ```pre code blocks (which will be displayed monospace), or plain Markdown (for outlines or whatever), no code block needed in that case.
+
+We should reference specific parts of the document as much as possible. Within the body of the visualization or conversation, add a reference using Markdown numbered footnote syntax [^3]. Wait until the end of the response to include all of the footnote bodies. For each footnote body, include a verbatim quote from the document (without quotation marks) that is long enough to uniquely identify the referenced part of the document, but max of one line.
+
+When generating a visualization, it is critical that we remain faithful to the document provided. If we ever realize that we've deviated from the document text, even slightly, we must include a remark to that effect in [square brackets] as soon as possible after the deviation.