From 90c477396a68674dd02739226489d8adf6b413a4 Mon Sep 17 00:00:00 2001 From: alinavarkki Date: Mon, 13 Oct 2025 10:06:58 +0200 Subject: [PATCH 1/4] fix select --- src/McpContext.ts | 32 +++++++++++-- src/tools/ToolDefinition.ts | 2 + src/tools/input.ts | 91 +++++++++++++++++++++++++++++-------- tests/tools/input.test.ts | 29 ++++++++++++ 4 files changed, 131 insertions(+), 23 deletions(-) diff --git a/src/McpContext.ts b/src/McpContext.ts index d1037935..5b29ce09 100644 --- a/src/McpContext.ts +++ b/src/McpContext.ts @@ -272,6 +272,10 @@ export class McpContext implements Context { return page.getDefaultNavigationTimeout(); } + getAXNodeByUid(uid: string) { + return this.#textSnapshot?.idToNode.get(uid); + } + async getElementByUid(uid: string): Promise> { if (!this.#textSnapshot?.idToNode.size) { throw new Error( @@ -326,19 +330,37 @@ export class McpContext implements Context { // will be used for the tree serialization and mapping ids back to nodes. let idCounter = 0; const idToNode = new Map(); - const assignIds = (node: SerializedAXNode): TextSnapshotNode => { + const assignIds = async ( + node: SerializedAXNode, + ): Promise => { const nodeWithId: TextSnapshotNode = { ...node, id: `${snapshotId}_${idCounter++}`, - children: node.children - ? node.children.map(child => assignIds(child)) - : [], + children: [], }; + + // The AXNode for an option doesn't contain its `value`. + // Therefore, set text content of the option as value. + if (node.role === 'option') { + const handle = await node.elementHandle(); + if (handle) { + const textContentHandle = await handle.getProperty('textContent'); + const optionText = await textContentHandle.jsonValue(); + if (optionText) { + nodeWithId.value = optionText.toString(); + } + } + } + + nodeWithId.children = node.children + ? await Promise.all(node.children.map(child => assignIds(child))) + : []; + idToNode.set(nodeWithId.id, nodeWithId); return nodeWithId; }; - const rootNodeWithId = assignIds(rootNode); + const rootNodeWithId = await assignIds(rootNode); this.#textSnapshot = { root: rootNodeWithId, snapshotId: String(snapshotId), diff --git a/src/tools/ToolDefinition.ts b/src/tools/ToolDefinition.ts index fe2fae7b..56fdb53a 100644 --- a/src/tools/ToolDefinition.ts +++ b/src/tools/ToolDefinition.ts @@ -7,6 +7,7 @@ import type {Dialog, ElementHandle, Page} from 'puppeteer-core'; import z from 'zod'; +import type {TextSnapshotNode} from '../McpContext.js'; import type {TraceResult} from '../trace-processing/parse.js'; import type {ToolCategories} from './categories.js'; @@ -68,6 +69,7 @@ export type Context = Readonly<{ closePage(pageIdx: number): Promise; setSelectedPageIdx(idx: number): void; getElementByUid(uid: string): Promise>; + getAXNodeByUid(uid: string): TextSnapshotNode | undefined; setNetworkConditions(conditions: string | null): void; setCpuThrottlingRate(rate: number): void; saveTemporaryFile( diff --git a/src/tools/input.ts b/src/tools/input.ts index eda04e80..02bb8a0f 100644 --- a/src/tools/input.ts +++ b/src/tools/input.ts @@ -7,6 +7,8 @@ import type {ElementHandle} from 'puppeteer-core'; import z from 'zod'; +import type {McpContext, TextSnapshotNode} from '../McpContext.js'; + import {ToolCategories} from './categories.js'; import {defineTool} from './ToolDefinition.js'; @@ -78,6 +80,61 @@ export const hover = defineTool({ }, }); +// The AXNode for an option doesn't contain its `value`. We set text content of the option as value. +// If the form is a combobox, we need to find the correct option by its text value. +// To do that, loop through the children while checking which child's text matches the requested value (requested value is actually the text content). +// When the correct option is found, use the element handle to get the real value. +async function selectOption( + handle: ElementHandle, + aXNode: TextSnapshotNode, + value: string, +) { + let optionFound = false; + for (const child of aXNode.children) { + if (child.role === 'option' && child.name === value && child.value) { + optionFound = true; + const childHandle = await child.elementHandle(); + if (childHandle) { + try { + const childValueHandle = await childHandle.getProperty('value'); + try { + const childValue = await childValueHandle.jsonValue(); + if (childValue) { + await handle.asLocator().fill(childValue.toString()); + } + } finally { + void childValueHandle.dispose(); + } + break; + } finally { + void childHandle.dispose(); + } + } + } + } + if (!optionFound) { + throw new Error(`Could not find option with text "${value}"`); + } +} + +async function fillFormElement( + uid: string, + value: string, + context: McpContext, +) { + const handle = await context.getElementByUid(uid); + try { + const aXNode = context.getAXNodeByUid(uid); + if (aXNode && aXNode.role === 'combobox') { + await selectOption(handle, aXNode, value); + } else { + await handle.asLocator().fill(value); + } + } finally { + void handle.dispose(); + } +} + export const fill = defineTool({ name: 'fill', description: `Type text into a input, text area or select an option from a `, + ); + await context.createTextSnapshot(); + await fill.handler( + { + params: { + uid: '1_1', + value: 'two', + }, + }, + response, + context, + ); + assert.strictEqual( + response.responseLines[0], + 'Successfully filled out the element', + ); + assert.ok(response.includeSnapshot); + const selectedValue = await page.evaluate( + () => document.querySelector('select')!.value, + ); + assert.strictEqual(selectedValue, 'v2'); + }); + }); }); describe('drags', () => { From d299e5a62b81d92be8fbb62d35bea948d7218847 Mon Sep 17 00:00:00 2001 From: alinavarkki Date: Mon, 13 Oct 2025 10:33:22 +0200 Subject: [PATCH 2/4] remove handle to get node name --- src/McpContext.ts | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/McpContext.ts b/src/McpContext.ts index 5b29ce09..e1992a1a 100644 --- a/src/McpContext.ts +++ b/src/McpContext.ts @@ -342,13 +342,9 @@ export class McpContext implements Context { // The AXNode for an option doesn't contain its `value`. // Therefore, set text content of the option as value. if (node.role === 'option') { - const handle = await node.elementHandle(); - if (handle) { - const textContentHandle = await handle.getProperty('textContent'); - const optionText = await textContentHandle.jsonValue(); - if (optionText) { - nodeWithId.value = optionText.toString(); - } + const optionText = node.name; + if (optionText) { + nodeWithId.value = optionText.toString(); } } From 1b542c9913ade07cfea770c06d0997e58f6aa003 Mon Sep 17 00:00:00 2001 From: alinavarkki Date: Mon, 13 Oct 2025 11:08:46 +0200 Subject: [PATCH 3/4] make assignids not async --- src/McpContext.ts | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/McpContext.ts b/src/McpContext.ts index e1992a1a..2ecce8ca 100644 --- a/src/McpContext.ts +++ b/src/McpContext.ts @@ -330,13 +330,13 @@ export class McpContext implements Context { // will be used for the tree serialization and mapping ids back to nodes. let idCounter = 0; const idToNode = new Map(); - const assignIds = async ( - node: SerializedAXNode, - ): Promise => { + const assignIds = (node: SerializedAXNode): TextSnapshotNode => { const nodeWithId: TextSnapshotNode = { ...node, id: `${snapshotId}_${idCounter++}`, - children: [], + children: node.children + ? node.children.map(child => assignIds(child)) + : [], }; // The AXNode for an option doesn't contain its `value`. @@ -348,10 +348,6 @@ export class McpContext implements Context { } } - nodeWithId.children = node.children - ? await Promise.all(node.children.map(child => assignIds(child))) - : []; - idToNode.set(nodeWithId.id, nodeWithId); return nodeWithId; }; From 537c0ce1782d992a81a949117a2e6baedc50b53b Mon Sep 17 00:00:00 2001 From: alinavarkki Date: Mon, 13 Oct 2025 11:14:01 +0200 Subject: [PATCH 4/4] remove async --- src/McpContext.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/McpContext.ts b/src/McpContext.ts index 2ecce8ca..cd829e16 100644 --- a/src/McpContext.ts +++ b/src/McpContext.ts @@ -352,7 +352,7 @@ export class McpContext implements Context { return nodeWithId; }; - const rootNodeWithId = await assignIds(rootNode); + const rootNodeWithId = assignIds(rootNode); this.#textSnapshot = { root: rootNodeWithId, snapshotId: String(snapshotId),