Skip to content

Commit 014a8bc

Browse files
authored
feat: validate uids (#37)
- generate snapshot IDs. - change uid to be a string. - validate that uid is not coming from a stale snapshot.
1 parent b8d99a6 commit 014a8bc

File tree

11 files changed

+122
-72
lines changed

11 files changed

+122
-72
lines changed

docs/tool-reference.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
**Parameters:**
4444

4545
- **dblClick** (boolean) _(optional)_: Set to true for double clicks. Default is false.
46-
- **uid** (number) **(required)**: The uid of an element on the page from the page content snapshot
46+
- **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot
4747

4848
---
4949

@@ -53,8 +53,8 @@
5353

5454
**Parameters:**
5555

56-
- **from_uid** (number) **(required)**: The uid of the element to [`drag`](#drag)
57-
- **to_uid** (number) **(required)**: The uid of the element to drop into
56+
- **from_uid** (string) **(required)**: The uid of the element to [`drag`](#drag)
57+
- **to_uid** (string) **(required)**: The uid of the element to drop into
5858

5959
---
6060

@@ -64,7 +64,7 @@
6464

6565
**Parameters:**
6666

67-
- **uid** (number) **(required)**: The uid of an element on the page from the page content snapshot
67+
- **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot
6868
- **value** (string) **(required)**: The value to [`fill`](#fill) in
6969

7070
---
@@ -96,7 +96,7 @@
9696

9797
**Parameters:**
9898

99-
- **uid** (number) **(required)**: The uid of an element on the page from the page content snapshot
99+
- **uid** (string) **(required)**: The uid of an element on the page from the page content snapshot
100100

101101
---
102102

@@ -107,7 +107,7 @@
107107
**Parameters:**
108108

109109
- **filePath** (string) **(required)**: The local path of the file to upload
110-
- **uid** (number) **(required)**: The uid of the file input element or an element that will open file chooser on the page from the page content snapshot
110+
- **uid** (string) **(required)**: The uid of the file input element or an element that will open file chooser on the page from the page content snapshot
111111

112112
---
113113

@@ -283,7 +283,7 @@
283283

284284
- **format** (enum: "png", "jpeg") _(optional)_: Type of format to save the screenshot as. Default is "png"
285285
- **fullPage** (boolean) _(optional)_: If set to true takes a screenshot of the full page instead of the currently visible viewport. Incompatible with uid.
286-
- **uid** (number) _(optional)_: The uid of an element on the page from the page content snapshot. If omitted takes a pages screenshot.
286+
- **uid** (string) _(optional)_: The uid of an element on the page from the page content snapshot. If omitted takes a pages screenshot.
287287

288288
---
289289

src/McpContext.ts

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,16 @@ import path from 'node:path';
2121
import {listPages} from './tools/pages.js';
2222

2323
export interface TextSnapshotNode extends SerializedAXNode {
24-
id: number;
24+
id: string;
2525
children: TextSnapshotNode[];
2626
}
2727

28+
export interface TextSnapshot {
29+
root: TextSnapshotNode;
30+
idToNode: Map<string, TextSnapshotNode>;
31+
snapshotId: string;
32+
}
33+
2834
export class McpContext implements Context {
2935
browser: Browser;
3036
logger: Debugger;
@@ -33,8 +39,7 @@ export class McpContext implements Context {
3339
#pages: Page[] = [];
3440
#selectedPageIdx = 0;
3541
// The most recent snapshot.
36-
#textSnapshot: TextSnapshotNode | null = null;
37-
#idToNodeMap = new Map<number, TextSnapshotNode>();
42+
#textSnapshot: TextSnapshot | null = null;
3843
#networkCollector: NetworkCollector;
3944
#consoleCollector: PageCollector<ConsoleMessage | Error>;
4045

@@ -43,6 +48,8 @@ export class McpContext implements Context {
4348
#cpuThrottlingRate = 1;
4449
#dialog?: Dialog;
4550

51+
#nextSnapshotId = 1;
52+
4653
private constructor(browser: Browser, logger: Debugger) {
4754
this.browser = browser;
4855
this.logger = logger;
@@ -192,11 +199,19 @@ export class McpContext implements Context {
192199
newPage.setDefaultNavigationTimeout(10_000);
193200
}
194201

195-
async getElementByUid(uid: number): Promise<ElementHandle<Element>> {
196-
if (!this.#idToNodeMap.size) {
202+
async getElementByUid(uid: string): Promise<ElementHandle<Element>> {
203+
if (!this.#textSnapshot?.idToNode.size) {
197204
throw new Error('No snapshot found. Use browser_snapshot to capture one');
198205
}
199-
const node = this.#idToNodeMap.get(uid);
206+
const [snapshotId] = uid.split('_');
207+
208+
if (this.#textSnapshot.snapshotId !== snapshotId) {
209+
throw new Error(
210+
'This uid is coming from a stale snapshot. Call take_snapshot to get a fresh snapshot.',
211+
);
212+
}
213+
214+
const node = this.#textSnapshot?.idToNode.get(uid);
200215
if (!node) {
201216
throw new Error('No such element found in the snapshot');
202217
}
@@ -222,35 +237,39 @@ export class McpContext implements Context {
222237
/**
223238
* Creates a text snapshot of a page.
224239
*/
225-
async createTextSnapshot(): Promise<TextSnapshotNode | null> {
240+
async createTextSnapshot(): Promise<void> {
226241
const page = this.getSelectedPage();
227242
const rootNode = await page.accessibility.snapshot();
228243
if (!rootNode) {
229-
return null;
244+
return;
230245
}
231246

247+
const snapshotId = this.#nextSnapshotId++;
232248
// Iterate through the whole accessibility node tree and assign node ids that
233249
// will be used for the tree serialization and mapping ids back to nodes.
234250
let idCounter = 0;
235-
this.#idToNodeMap.clear();
251+
const idToNode = new Map<string, TextSnapshotNode>();
236252
const assignIds = (node: SerializedAXNode): TextSnapshotNode => {
237253
const nodeWithId: TextSnapshotNode = {
238254
...node,
239-
id: idCounter++,
255+
id: `${snapshotId}_${idCounter++}`,
240256
children: node.children
241257
? node.children.map(child => assignIds(child))
242258
: [],
243259
};
244-
this.#idToNodeMap.set(nodeWithId.id, nodeWithId);
260+
idToNode.set(nodeWithId.id, nodeWithId);
245261
return nodeWithId;
246262
};
247263

248264
const rootNodeWithId = assignIds(rootNode);
249-
this.#textSnapshot = rootNodeWithId;
250-
return rootNodeWithId;
265+
this.#textSnapshot = {
266+
root: rootNodeWithId,
267+
snapshotId: String(snapshotId),
268+
idToNode,
269+
};
251270
}
252271

253-
getTextSnapshot(): TextSnapshotNode | null {
272+
getTextSnapshot(): TextSnapshot | null {
254273
return this.#textSnapshot;
255274
}
256275

src/McpResponse.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,9 @@ Call browser_handle_dialog to handle it before continuing.`);
145145
}
146146

147147
if (this.#includeSnapshot) {
148-
const rootNode = context.getTextSnapshot();
149-
if (rootNode) {
150-
const formattedSnapshot = formatA11ySnapshot(rootNode);
148+
const snapshot = context.getTextSnapshot();
149+
if (snapshot) {
150+
const formattedSnapshot = formatA11ySnapshot(snapshot.root);
151151
response.push('## Page content');
152152
response.push(formattedSnapshot);
153153
}

src/tools/ToolDefinition.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ export type Context = Readonly<{
6060
getPageByIdx(idx: number): Page;
6161
newPage(): Promise<Page>;
6262
setSelectedPageIdx(idx: number): void;
63-
getElementByUid(uid: number): Promise<ElementHandle<Element>>;
63+
getElementByUid(uid: string): Promise<ElementHandle<Element>>;
6464
setNetworkConditions(conditions: string | null): void;
6565
setCpuThrottlingRate(rate: number): void;
6666
saveTemporaryFile(

src/tools/input.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ export const click = defineTool({
1919
},
2020
schema: {
2121
uid: z
22-
.number()
22+
.string()
2323
.describe(
2424
'The uid of an element on the page from the page content snapshot',
2525
),
@@ -58,7 +58,7 @@ export const hover = defineTool({
5858
},
5959
schema: {
6060
uid: z
61-
.number()
61+
.string()
6262
.describe(
6363
'The uid of an element on the page from the page content snapshot',
6464
),
@@ -87,7 +87,7 @@ export const fill = defineTool({
8787
},
8888
schema: {
8989
uid: z
90-
.number()
90+
.string()
9191
.describe(
9292
'The uid of an element on the page from the page content snapshot',
9393
),
@@ -115,8 +115,8 @@ export const drag = defineTool({
115115
readOnlyHint: false,
116116
},
117117
schema: {
118-
from_uid: z.number().describe('The uid of the element to drag'),
119-
to_uid: z.number().describe('The uid of the element to drop into'),
118+
from_uid: z.string().describe('The uid of the element to drag'),
119+
to_uid: z.string().describe('The uid of the element to drop into'),
120120
},
121121
handler: async (request, response, context) => {
122122
const fromHandle = await context.getElementByUid(request.params.from_uid);
@@ -147,7 +147,7 @@ export const fillForm = defineTool({
147147
elements: z
148148
.array(
149149
z.object({
150-
uid: z.number().describe('The uid of the element to fill out'),
150+
uid: z.string().describe('The uid of the element to fill out'),
151151
value: z.string().describe('Value for the element'),
152152
}),
153153
)
@@ -178,7 +178,7 @@ export const uploadFile = defineTool({
178178
},
179179
schema: {
180180
uid: z
181-
.number()
181+
.string()
182182
.describe(
183183
'The uid of the file input element or an element that will open file chooser on the page from the page content snapshot',
184184
),

src/tools/screenshot.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ export const screenshot = defineTool({
2222
.default('png')
2323
.describe('Type of format to save the screenshot as. Default is "png"'),
2424
uid: z
25-
.number()
25+
.string()
2626
.optional()
2727
.describe(
2828
'The uid of an element on the page from the page content snapshot. If omitted takes a pages screenshot.',

tests/McpContext.test.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/**
2+
* @license
3+
* Copyright 2025 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
import {describe, it} from 'node:test';
7+
import assert from 'assert';
8+
9+
import {withBrowser} from './utils.js';
10+
11+
describe('McpResponse', () => {
12+
it('list pages', async () => {
13+
await withBrowser(async (response, context) => {
14+
const page = context.getSelectedPage();
15+
await page.setContent(`<!DOCTYPE html>
16+
<button>Click me</button><input type="text" value="Input">`);
17+
await context.createTextSnapshot();
18+
assert.ok(await context.getElementByUid('1_1'));
19+
await context.createTextSnapshot();
20+
try {
21+
await context.getElementByUid('1_1');
22+
assert.fail('not reached');
23+
} catch (err) {
24+
assert.strict(
25+
err.message,
26+
'This uid is coming from a stale snapshot. Call take_snapshot to get a fresh snapshot',
27+
);
28+
}
29+
});
30+
});
31+
});

tests/McpResponse.test.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ Testing 2`,
6161
result[0].text,
6262
`# test response
6363
## Page content
64-
uid=0 RootWebArea ""
65-
uid=1 button "Click me" focusable focused
66-
uid=2 textbox "" value="Input"
64+
uid=1_0 RootWebArea ""
65+
uid=1_1 button "Click me" focusable focused
66+
uid=1_2 textbox "" value="Input"
6767
`,
6868
);
6969
});
@@ -87,9 +87,9 @@ uid=0 RootWebArea ""
8787
result[0].text,
8888
`# test response
8989
## Page content
90-
uid=0 RootWebArea "My test page"
91-
uid=1 StaticText "username"
92-
uid=2 textbox "username" value="mcp" focusable focused
90+
uid=1_0 RootWebArea "My test page"
91+
uid=1_1 StaticText "username"
92+
uid=1_2 textbox "username" value="mcp" focusable focused
9393
`,
9494
);
9595
});

0 commit comments

Comments
 (0)