@@ -79,44 +79,79 @@ kill $(cat {OUTPUT_DIR}/dh-serve.pid) 2>/dev/null
7979
8080Analyze the dashboard script from Phase 1. Identify all ` @ui.component ` decorated functions, the component tree, and every interactive element.
8181
82- Write a Playwright TypeScript test to: {OUTPUT_DIR}/playwright-test.ts
83-
84- The test must use ` @playwright/test ` . Structure:
85-
86- ``` ts
87- import { test , expect , type Page } from " @playwright/test" ;
88- import fs from " fs" ;
89- import path from " path" ;
90-
91- const DASHBOARD_URL = " <the URL from 4a>" ;
92- const OUTPUT_DIR = path .resolve (" {OUTPUT_DIR}" );
93- const SCREENSHOTS_DIR = path .join (OUTPUT_DIR , " screenshots" );
94- const RESULTS_FILE = path .join (OUTPUT_DIR , " playwright-results.json" );
95-
96- interface ComponentTestResult {
97- component: string ;
98- type: string ;
99- action: string ;
100- result: " pass" | " fail" | " not_found" ;
101- error: string ;
102- screenshot: string ;
103- }
104-
105- interface TestResults {
106- eval_name: string ;
107- dashboard_url: string ;
108- initial_load: {
109- success: boolean ;
110- screenshot: string ;
111- };
112- component_tests: ComponentTestResult [];
113- summary: {
114- total_components: number ;
115- tested: number ;
116- passed: number ;
117- failed: number ;
118- };
119- }
82+ Write a Playwright Python test to: {OUTPUT_DIR}/playwright-test.py
83+
84+ The test must use ` playwright.sync_api ` . Structure:
85+
86+ ``` python
87+ import json
88+ import time
89+ from pathlib import Path
90+
91+ from playwright.sync_api import sync_playwright, expect
92+
93+ DASHBOARD_URL = " <the URL from 4a>"
94+ OUTPUT_DIR = Path(" {OUTPUT_DIR} " ).resolve()
95+ SCREENSHOTS_DIR = OUTPUT_DIR / " screenshots"
96+ RESULTS_FILE = OUTPUT_DIR / " playwright-results.json"
97+
98+
99+ def retry (fn , * , timeout = 15_000 , interval = 1_000 ):
100+ """ Retry fn() until it succeeds or timeout (ms) is reached."""
101+ deadline = time.monotonic() + timeout / 1000
102+ last_error = None
103+ while time.monotonic() < deadline:
104+ try :
105+ fn()
106+ return
107+ except Exception as e:
108+ last_error = e
109+ time.sleep(interval / 1000 )
110+ if last_error:
111+ raise last_error
112+
113+
114+ def main ():
115+ component_tests = []
116+ SCREENSHOTS_DIR .mkdir(parents = True , exist_ok = True )
117+
118+ with sync_playwright() as p:
119+ browser = p.chromium.launch(headless = True )
120+ page = browser.new_page()
121+
122+ # Navigate and wait for dashboard
123+ page.goto(DASHBOARD_URL )
124+ # ... wait for selectors, take screenshots, test components ...
125+
126+ # Write results
127+ results = {
128+ " eval_name" : " {EVAL_NAME} " ,
129+ " dashboard_url" : DASHBOARD_URL ,
130+ " initial_load" : {
131+ " success" : True ,
132+ " screenshot" : " screenshots/initial-load.png" ,
133+ },
134+ " component_tests" : component_tests,
135+ " summary" : {
136+ " total_components" : len (component_tests),
137+ " tested" : sum (
138+ 1 for t in component_tests if t[" result" ] != " not_found"
139+ ),
140+ " passed" : sum (
141+ 1 for t in component_tests if t[" result" ] == " pass"
142+ ),
143+ " failed" : sum (
144+ 1 for t in component_tests if t[" result" ] == " fail"
145+ ),
146+ },
147+ }
148+ RESULTS_FILE .write_text(json.dumps(results, indent = 2 ))
149+
150+ browser.close()
151+
152+
153+ if __name__ == " __main__" :
154+ main()
120155```
121156
122157** Test flow:**
@@ -134,39 +169,47 @@ interface TestResults {
134169** Do NOT capture console logs or page errors.** They are not meaningful for eval results and waste context space. Do not set up ` page.on("console", ...) ` or ` page.on("pageerror", ...) ` handlers.
135170
136171** Deephaven-specific patterns (from the Playwright skill):**
137- - Overlays render OUTSIDE the panel — use ` page.getByRole () ` not ` panel.getByRole () ` for dialogs/dropdowns
172+ - Overlays render OUTSIDE the panel — use ` page.get_by_role () ` not ` panel.get_by_role () ` for dialogs/dropdowns
138173- Use ` click() ` not ` check() ` /` uncheck() ` for Spectrum checkboxes/switches
139174- Visit every tab in every stack. Use ` .lm_tab ` selectors and check ` .lm_active ` class
140- - Table-backed pickers: use retry loop (up to 15 attempts with 1s sleep )
175+ - Table-backed pickers: use ` retry() ` loop (retries with 1s interval, 15s total timeout )
141176- Cold-start race: tab click handlers may not be attached on first load — retry tab clicks
142177
143- ** Test structure** — use a single ` test() ` block that tests all components sequentially and writes results at the end:
144-
145- ``` ts
146- test (" dashboard eval" , async ({ page }) => {
147- const componentTests: ComponentTestResult [] = [];
148-
149- fs .mkdirSync (SCREENSHOTS_DIR , { recursive: true });
150-
151- // Navigate and wait for dashboard
152- await page .goto (DASHBOARD_URL );
153- // ... wait for selectors, take screenshots, test components ...
154-
155- // Write results
156- const results: TestResults = {
157- eval_name: " {EVAL_NAME}" ,
158- dashboard_url: DASHBOARD_URL ,
159- initial_load: { success: true , screenshot: " screenshots/initial-load.png" },
160- component_tests: componentTests ,
161- summary: {
162- total_components: componentTests .length ,
163- tested: componentTests .filter (t => t .result !== " not_found" ).length ,
164- passed: componentTests .filter (t => t .result === " pass" ).length ,
165- failed: componentTests .filter (t => t .result === " fail" ).length ,
166- },
167- };
168- fs .writeFileSync (RESULTS_FILE , JSON .stringify (results , null , 2 ));
169- });
178+ ** Test structure** — use a single ` main() ` function that tests all components sequentially and writes results at the end:
179+
180+ ``` python
181+ def main ():
182+ component_tests = []
183+ SCREENSHOTS_DIR .mkdir(parents = True , exist_ok = True )
184+
185+ with sync_playwright() as p:
186+ browser = p.chromium.launch(headless = True )
187+ page = browser.new_page()
188+
189+ # Navigate and wait for dashboard
190+ page.goto(DASHBOARD_URL )
191+ # ... wait for selectors, take screenshots, test components ...
192+
193+ # Write results
194+ results = {
195+ " eval_name" : " {EVAL_NAME} " ,
196+ " dashboard_url" : DASHBOARD_URL ,
197+ " initial_load" : {" success" : True , " screenshot" : " screenshots/initial-load.png" },
198+ " component_tests" : component_tests,
199+ " summary" : {
200+ " total_components" : len (component_tests),
201+ " tested" : sum (1 for t in component_tests if t[" result" ] != " not_found" ),
202+ " passed" : sum (1 for t in component_tests if t[" result" ] == " pass" ),
203+ " failed" : sum (1 for t in component_tests if t[" result" ] == " fail" ),
204+ },
205+ }
206+ RESULTS_FILE .write_text(json.dumps(results, indent = 2 ))
207+
208+ browser.close()
209+
210+
211+ if __name__ == " __main__" :
212+ main()
170213```
171214
172215** Results format** — write to ` playwright-results.json ` :
@@ -201,36 +244,18 @@ test("dashboard eval", async ({ page }) => {
201244** Constraints:**
202245- Do NOT modify the dashboard script
203246- Do NOT use Deephaven APIs in the test — pure Playwright against the browser DOM
204- - Use ` @ playwright/test ` framework only
205- - Create ` screenshots/ ` directory before saving screenshots using ` fs.mkdirSync(SCREENSHOTS_DIR, { recursive: true } )`
206- - Wrap each component test in try/catch so one failure doesn't abort all tests
247+ - Use ` playwright.sync_api ` only
248+ - Create ` screenshots/ ` directory before saving screenshots using ` SCREENSHOTS_DIR.mkdir(parents=True, exist_ok=True )`
249+ - Wrap each component test in try/except so one failure doesn't abort all tests
207250- If a component can't be located, record it as ` "not_found" ` rather than failing
208251
209- ### 4c. Set up Playwright and run the test
210-
211- First, create a minimal ` playwright.config.ts ` in {OUTPUT_DIR}:
212-
213- ``` ts
214- import { defineConfig } from " @playwright/test" ;
252+ ### 4c. Run the test
215253
216- export default defineConfig ({
217- timeout: 120_000 ,
218- use: {
219- headless: true ,
220- },
221- });
222- ```
223-
224- Then install dependencies (if not already present):
225- ```
226- npm init -y --prefix {OUTPUT_DIR} 2>/dev/null
227- npm install --prefix {OUTPUT_DIR} @playwright/test
228- npx --prefix {OUTPUT_DIR} playwright install chromium
229- ```
254+ Playwright and its browsers are pre-installed in the tools environment. No per-eval installation needed.
230255
231256Run the test:
232257```
233- npx --prefix {OUTPUT_DIR} playwright test {OUTPUT_DIR} /playwright-test.ts --config {OUTPUT_DIR}/playwright.config.ts --reporter=list
258+ python {OUTPUT_DIR}/playwright-test.py
234259```
235260
236261If the test script itself has errors (not component failures), fix the test script and re-run up to 3 times.
@@ -253,8 +278,8 @@ If there ARE failures, iterate up to {MAX_FIX_ITERATIONS} times:
2532783 . Verify the fix: ` dh exec --vm --no-show-tables {OUTPUT_DIR}/{SCRIPT_NAME} --timeout 120 `
2542794 . If dh exec fails, fix and retry (up to 3 sub-attempts)
2552805 . Stop your server by PID: ` kill $(cat {OUTPUT_DIR}/dh-serve.pid) 2>/dev/null ` , then re-serve: ` dh serve {OUTPUT_DIR}/{SCRIPT_NAME} --no-browser --iframe dashboard & echo $! > {OUTPUT_DIR}/dh-serve.pid `
256- 6 . Update ` DASHBOARD_URL ` in ` {OUTPUT_DIR}/playwright-test.ts ` if the port changed
257- 7 . Re-run: ` npx --prefix {OUTPUT_DIR} playwright test {OUTPUT_DIR} /playwright-test.ts --config {OUTPUT_DIR}/playwright.config.ts --reporter=list `
281+ 6 . Update ` DASHBOARD_URL ` in ` {OUTPUT_DIR}/playwright-test.py ` if the port changed
282+ 7 . Re-run: ` python {OUTPUT_DIR}/playwright-test.py `
2582838 . If no more actionable failures, stop the loop
259284
260285After the loop ends, stop your server: ` kill $(cat {OUTPUT_DIR}/dh-serve.pid) 2>/dev/null `
0 commit comments