|
| 1 | +const { chromium } = require("playwright"); |
| 2 | +const { Ollama } = require("ollama"); |
| 3 | + |
| 4 | +async function inject({ page }) { |
| 5 | + await page.addScriptTag({ |
| 6 | + url: "https://unpkg.com/@guidepup/virtual-screen-reader/lib/esm/index.browser.js", |
| 7 | + type: "module", |
| 8 | + }); |
| 9 | + |
| 10 | + await page.addScriptTag({ |
| 11 | + content: `import { virtual } from "https://unpkg.com/@guidepup/virtual-screen-reader/lib/esm/index.browser.js"; window.virtual = virtual;`, |
| 12 | + type: "module", |
| 13 | + }); |
| 14 | +} |
| 15 | + |
| 16 | +async function start({ page }) { |
| 17 | + await page.evaluate(async () => { |
| 18 | + await window.virtual.start({ |
| 19 | + container: document.body, |
| 20 | + }); |
| 21 | + }); |
| 22 | +} |
| 23 | + |
| 24 | +async function act({ page }) { |
| 25 | + await page.evaluate(async () => { |
| 26 | + await window.virtual.act(); |
| 27 | + }); |
| 28 | +} |
| 29 | + |
| 30 | +async function next({ page }) { |
| 31 | + await page.evaluate(async () => { |
| 32 | + await window.virtual.next(); |
| 33 | + }); |
| 34 | +} |
| 35 | + |
| 36 | +async function lastSpokenPhrase({ page }) { |
| 37 | + return await page.evaluate(async () => { |
| 38 | + return await window.virtual.lastSpokenPhrase(); |
| 39 | + }); |
| 40 | +} |
| 41 | + |
| 42 | +async function stop({ page }) { |
| 43 | + await page.evaluate(async () => { |
| 44 | + await window.virtual.stop(); |
| 45 | + }); |
| 46 | +} |
| 47 | + |
| 48 | +const ollama = new Ollama(); |
| 49 | + |
| 50 | +const COMMANDS_MAP = { |
| 51 | + ACT: "act", |
| 52 | + NEXT: "next", |
| 53 | + STOP: "stop", |
| 54 | +}; |
| 55 | + |
| 56 | +const COMMANDS_EXPLANATIONS_MAP = { |
| 57 | + [COMMANDS_MAP.ACT]: "Click on the element.", |
| 58 | + [COMMANDS_MAP.NEXT]: "Move to the next element.", |
| 59 | + [COMMANDS_MAP.STOP]: |
| 60 | + "Stop the screen reader, indicating the goal has been achieved.", |
| 61 | +}; |
| 62 | + |
| 63 | +const COMMANDS = Object.values(COMMANDS_MAP); |
| 64 | + |
| 65 | +const args = process.argv.slice(2); |
| 66 | + |
| 67 | +if (args.length < 2) { |
| 68 | + console.error("Usage: node src/index.js <url> <goal> [maxAttempts]"); |
| 69 | + |
| 70 | + process.exit(1); |
| 71 | +} |
| 72 | + |
| 73 | +const [url, goal, maxAttemptsArg] = args; |
| 74 | +const maxAttempts = maxAttemptsArg ? parseInt(maxAttemptsArg, 10) : 10; |
| 75 | + |
| 76 | +const SYSTEM_MESSAGE = `You are a senior accessibility testing expert controlling a screen reader. |
| 77 | +
|
| 78 | +Your goal is: ${goal}. |
| 79 | +
|
| 80 | +Based on the screen reader output provided to you, choose the next command to achieve the above goal. |
| 81 | + |
| 82 | +Available commands: |
| 83 | +${COMMANDS.map( |
| 84 | + (option) => `- ${option} - explanation: ${COMMANDS_EXPLANATIONS_MAP[option]}` |
| 85 | +).join("\n")} |
| 86 | +
|
| 87 | +You MUST always respond in the following format: <command> <any other thoughts after a space>. |
| 88 | +It is CRUCIAL that the command is the first thing you output. |
| 89 | +
|
| 90 | +Notes: |
| 91 | +
|
| 92 | +- If you encounter a modal you must dismiss the modal by navigating to a close button, reject button, or something similar and by using the "act" command. Dialogs and modals will result in a loop if you just navigate, so you MUST interact with a CTA to close it. |
| 93 | +- Rely only on the screen reader output you have received and do not assume anything about the state of the page otherwise. E.g. just because you act on something doesn't mean it worked - you must use screen reader output to confirm the action worked. |
| 94 | +- As an accessibility expert you should navigate using the full range of commands available to you and make use of accessibility features such as skip links. |
| 95 | +- Always double check your logic for what the next command should be. |
| 96 | +- The goal is always be achievable by exploring a page fully. |
| 97 | +- Don't act on arbitrary links to discover the goal. |
| 98 | +- Always double check you have actually achieved your goal before using the "stop" command. |
| 99 | +- If you encounter a cookie modal then try to reject cookies. |
| 100 | +- If a command doesn't work, try again with a different command. Repeating a command won't help. |
| 101 | +- Even if you are confused by the screen reader output, you MUST only respond with one of the provided commands, and any additional content must come afterwards.`; |
| 102 | + |
| 103 | +(async () => { |
| 104 | + const browser = await chromium.launch({ headless: false }); |
| 105 | + const context = await browser.newContext(); |
| 106 | + const page = await context.newPage(); |
| 107 | + |
| 108 | + await page.goto(url); |
| 109 | + |
| 110 | + await inject({ page }); |
| 111 | + await start({ page }); |
| 112 | + |
| 113 | + let attempts = 0; |
| 114 | + let goalAchieved = false; |
| 115 | + |
| 116 | + const messages = []; |
| 117 | + |
| 118 | + let error = null; |
| 119 | + |
| 120 | + while (attempts < maxAttempts && !goalAchieved) { |
| 121 | + attempts++; |
| 122 | + |
| 123 | + const spokenPhrase = await lastSpokenPhrase({ page }); |
| 124 | + const userMessage = `Screen Reader Output: ${spokenPhrase}\n${ |
| 125 | + error ? `Error: ${error}\n` : "" |
| 126 | + }`; |
| 127 | + error = null; |
| 128 | + console.info(userMessage); |
| 129 | + |
| 130 | + if (messages.length > 6) { |
| 131 | + messages.unshift(); |
| 132 | + messages.unshift(); |
| 133 | + } |
| 134 | + |
| 135 | + messages.push({ |
| 136 | + role: "user", |
| 137 | + content: userMessage, |
| 138 | + }); |
| 139 | + |
| 140 | + const response = await ollama.chat({ |
| 141 | + model: "llama3.1", |
| 142 | + messages: [ |
| 143 | + { |
| 144 | + role: "system", |
| 145 | + content: SYSTEM_MESSAGE, |
| 146 | + }, |
| 147 | + ...messages, |
| 148 | + ], |
| 149 | + }); |
| 150 | + |
| 151 | + const command = response.message.content.trim(); |
| 152 | + console.log(`Agent: ${command}\n`); |
| 153 | + |
| 154 | + messages.push({ |
| 155 | + role: "assistant", |
| 156 | + content: command, |
| 157 | + }); |
| 158 | + |
| 159 | + const [commandType] = command.split(/\s+/gi); |
| 160 | + |
| 161 | + switch (commandType) { |
| 162 | + case COMMANDS_MAP.ACT: { |
| 163 | + await act({ page }); |
| 164 | + |
| 165 | + break; |
| 166 | + } |
| 167 | + case COMMANDS_MAP.NEXT: { |
| 168 | + await next({ page }); |
| 169 | + |
| 170 | + break; |
| 171 | + } |
| 172 | + case COMMANDS_MAP.STOP: { |
| 173 | + goalAchieved = true; |
| 174 | + |
| 175 | + break; |
| 176 | + } |
| 177 | + default: { |
| 178 | + error = "Unknown command received."; |
| 179 | + console.error(error); |
| 180 | + |
| 181 | + break; |
| 182 | + } |
| 183 | + } |
| 184 | + } |
| 185 | + |
| 186 | + if (goalAchieved) { |
| 187 | + console.log("Goal achieved!"); |
| 188 | + } else { |
| 189 | + console.error("Max attempts reached without achieving the goal."); |
| 190 | + } |
| 191 | + |
| 192 | + await stop({ page }); |
| 193 | + await browser.close(); |
| 194 | +})(); |
0 commit comments