diff --git a/README.md b/README.md index 91faecde..e0429202 100644 --- a/README.md +++ b/README.md @@ -238,13 +238,14 @@ If you run into any issues, checkout our [troubleshooting guide](./docs/troubles -- **Input automation** (7 tools) +- **Input automation** (8 tools) - [`click`](docs/tool-reference.md#click) - [`drag`](docs/tool-reference.md#drag) - [`fill`](docs/tool-reference.md#fill) - [`fill_form`](docs/tool-reference.md#fill_form) - [`handle_dialog`](docs/tool-reference.md#handle_dialog) - [`hover`](docs/tool-reference.md#hover) + - [`press_key`](docs/tool-reference.md#press_key) - [`upload_file`](docs/tool-reference.md#upload_file) - **Navigation automation** (7 tools) - [`close_page`](docs/tool-reference.md#close_page) diff --git a/docs/tool-reference.md b/docs/tool-reference.md index a5ca013a..4872226c 100644 --- a/docs/tool-reference.md +++ b/docs/tool-reference.md @@ -2,13 +2,14 @@ # Chrome DevTools MCP Tool Reference -- **[Input automation](#input-automation)** (7 tools) +- **[Input automation](#input-automation)** (8 tools) - [`click`](#click) - [`drag`](#drag) - [`fill`](#fill) - [`fill_form`](#fill_form) - [`handle_dialog`](#handle_dialog) - [`hover`](#hover) + - [`press_key`](#press_key) - [`upload_file`](#upload_file) - **[Navigation automation](#navigation-automation)** (7 tools) - [`close_page`](#close_page) @@ -102,6 +103,16 @@ --- +### `press_key` + +**Description:** Press a key or key combination. Use this when other input methods like [`fill`](#fill)() cannot be used (e.g., keyboard shortcuts, navigation keys, or special key combinations). + +**Parameters:** + +- **key** (string) **(required)**: A key or a combination (e.g., "Enter", "Control+A", "Control++", "Control+Shift+R"). Modifiers: Control, Shift, Alt, Meta + +--- + ### `upload_file` **Description:** Upload a file through a provided element. diff --git a/src/tools/input.ts b/src/tools/input.ts index 9913ee56..6e4094dc 100644 --- a/src/tools/input.ts +++ b/src/tools/input.ts @@ -7,6 +7,7 @@ import type {McpContext, TextSnapshotNode} from '../McpContext.js'; import {zod} from '../third_party/index.js'; import type {ElementHandle} from '../third_party/index.js'; +import {parseKey} from '../utils/keyboard.js'; import {ToolCategory} from './categories.js'; import {defineTool} from './ToolDefinition.js'; @@ -270,3 +271,39 @@ export const uploadFile = defineTool({ } }, }); + +export const pressKey = defineTool({ + name: 'press_key', + description: `Press a key or key combination. Use this when other input methods like fill() cannot be used (e.g., keyboard shortcuts, navigation keys, or special key combinations).`, + annotations: { + category: ToolCategory.INPUT, + readOnlyHint: false, + }, + schema: { + key: zod + .string() + .describe( + 'A key or a combination (e.g., "Enter", "Control+A", "Control++", "Control+Shift+R"). Modifiers: Control, Shift, Alt, Meta', + ), + }, + handler: async (request, response, context) => { + const page = context.getSelectedPage(); + const tokens = parseKey(request.params.key); + const [key, ...modifiers] = tokens; + + await context.waitForEventsAfterAction(async () => { + for (const modifier of modifiers) { + await page.keyboard.down(modifier); + } + await page.keyboard.press(key); + for (const modifier of modifiers.toReversed()) { + await page.keyboard.up(modifier); + } + }); + + response.appendResponseLine( + `Successfully pressed key: ${request.params.key}`, + ); + response.setIncludeSnapshot(true); + }, +}); diff --git a/src/utils/keyboard.ts b/src/utils/keyboard.ts new file mode 100644 index 00000000..c32b23ae --- /dev/null +++ b/src/utils/keyboard.ts @@ -0,0 +1,304 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ +import type {KeyInput} from '../third_party'; + +// See the KeyInput type for the list of supported keys. +const validKeys = new Set([ + '0', + '1', + '2', + '3', + '4', + '5', + '6', + '7', + '8', + '9', + 'Power', + 'Eject', + 'Abort', + 'Help', + 'Backspace', + 'Tab', + 'Numpad5', + 'NumpadEnter', + 'Enter', + '\r', + '\n', + 'ShiftLeft', + 'ShiftRight', + 'ControlLeft', + 'ControlRight', + 'AltLeft', + 'AltRight', + 'Pause', + 'CapsLock', + 'Escape', + 'Convert', + 'NonConvert', + 'Space', + 'Numpad9', + 'PageUp', + 'Numpad3', + 'PageDown', + 'End', + 'Numpad1', + 'Home', + 'Numpad7', + 'ArrowLeft', + 'Numpad4', + 'Numpad8', + 'ArrowUp', + 'ArrowRight', + 'Numpad6', + 'Numpad2', + 'ArrowDown', + 'Select', + 'Open', + 'PrintScreen', + 'Insert', + 'Numpad0', + 'Delete', + 'NumpadDecimal', + 'Digit0', + 'Digit1', + 'Digit2', + 'Digit3', + 'Digit4', + 'Digit5', + 'Digit6', + 'Digit7', + 'Digit8', + 'Digit9', + 'KeyA', + 'KeyB', + 'KeyC', + 'KeyD', + 'KeyE', + 'KeyF', + 'KeyG', + 'KeyH', + 'KeyI', + 'KeyJ', + 'KeyK', + 'KeyL', + 'KeyM', + 'KeyN', + 'KeyO', + 'KeyP', + 'KeyQ', + 'KeyR', + 'KeyS', + 'KeyT', + 'KeyU', + 'KeyV', + 'KeyW', + 'KeyX', + 'KeyY', + 'KeyZ', + 'MetaLeft', + 'MetaRight', + 'ContextMenu', + 'NumpadMultiply', + 'NumpadAdd', + 'NumpadSubtract', + 'NumpadDivide', + 'F1', + 'F2', + 'F3', + 'F4', + 'F5', + 'F6', + 'F7', + 'F8', + 'F9', + 'F10', + 'F11', + 'F12', + 'F13', + 'F14', + 'F15', + 'F16', + 'F17', + 'F18', + 'F19', + 'F20', + 'F21', + 'F22', + 'F23', + 'F24', + 'NumLock', + 'ScrollLock', + 'AudioVolumeMute', + 'AudioVolumeDown', + 'AudioVolumeUp', + 'MediaTrackNext', + 'MediaTrackPrevious', + 'MediaStop', + 'MediaPlayPause', + 'Semicolon', + 'Equal', + 'NumpadEqual', + 'Comma', + 'Minus', + 'Period', + 'Slash', + 'Backquote', + 'BracketLeft', + 'Backslash', + 'BracketRight', + 'Quote', + 'AltGraph', + 'Props', + 'Cancel', + 'Clear', + 'Shift', + 'Control', + 'Alt', + 'Accept', + 'ModeChange', + ' ', + 'Print', + 'Execute', + '\u0000', + 'a', + 'b', + 'c', + 'd', + 'e', + 'f', + 'g', + 'h', + 'i', + 'j', + 'k', + 'l', + 'm', + 'n', + 'o', + 'p', + 'q', + 'r', + 's', + 't', + 'u', + 'v', + 'w', + 'x', + 'y', + 'z', + 'Meta', + '*', + '+', + '-', + '/', + ';', + '=', + ',', + '.', + '`', + '[', + '\\', + ']', + "'", + 'Attn', + 'CrSel', + 'ExSel', + 'EraseEof', + 'Play', + 'ZoomOut', + ')', + '!', + '@', + '#', + '$', + '%', + '^', + '&', + '(', + 'A', + 'B', + 'C', + 'D', + 'E', + 'F', + 'G', + 'H', + 'I', + 'J', + 'K', + 'L', + 'M', + 'N', + 'O', + 'P', + 'Q', + 'R', + 'S', + 'T', + 'U', + 'V', + 'W', + 'X', + 'Y', + 'Z', + ':', + '<', + '_', + '>', + '?', + '~', + '{', + '|', + '}', + '"', + 'SoftLeft', + 'SoftRight', + 'Camera', + 'Call', + 'EndCall', + 'VolumeDown', + 'VolumeUp', +]); + +function throwIfInvalidKey(key: string): KeyInput { + if (validKeys.has(key)) { + return key as KeyInput; + } + throw new Error( + `${key} is invalid. Valid keys are: ${Array.from(validKeys.values()).join(',')}.`, + ); +} + +/** + * Returns the primary key, followed by modifiers in original order. + */ +export function parseKey(keyInput: string): [KeyInput, ...KeyInput[]] { + let key = ''; + const result: KeyInput[] = []; + for (const ch of keyInput) { + // Handle cases like Shift++. + if (ch === '+' && key) { + result.push(throwIfInvalidKey(key)); + key = ''; + } else { + key += ch; + } + } + if (key) { + result.push(throwIfInvalidKey(key)); + } + + if (result.length === 0) { + throw new Error(`Key ${keyInput} could not be parsed.`); + } + + if (new Set(result).size !== result.length) { + throw new Error(`Key ${keyInput} contains duplicate keys.`); + } + + return [result.at(-1), ...result.slice(0, -1)] as [KeyInput, ...KeyInput[]]; +} diff --git a/tests/tools/input.test.ts b/tests/tools/input.test.ts index b788c7b5..8a621173 100644 --- a/tests/tools/input.test.ts +++ b/tests/tools/input.test.ts @@ -15,7 +15,9 @@ import { drag, fillForm, uploadFile, + pressKey, } from '../../src/tools/input.js'; +import {parseKey} from '../../src/utils/keyboard.js'; import {serverHooks} from '../server.js'; import {html, withBrowser} from '../utils.js'; @@ -431,4 +433,64 @@ describe('input', () => { }); }); }); + + describe('press_key', () => { + it('parses keys', () => { + assert.deepStrictEqual(parseKey('Shift+A'), ['A', 'Shift']); + assert.deepStrictEqual(parseKey('Shift++'), ['+', 'Shift']); + assert.deepStrictEqual(parseKey('Control+Shift++'), [ + '+', + 'Control', + 'Shift', + ]); + assert.deepStrictEqual(parseKey('Shift'), ['Shift']); + assert.deepStrictEqual(parseKey('KeyA'), ['KeyA']); + }); + it('throws on empty key', () => { + assert.throws(() => { + parseKey(''); + }); + }); + it('throws on invalid key', () => { + assert.throws(() => { + parseKey('aaaaa'); + }); + }); + it('throws on multiple keys', () => { + assert.throws(() => { + parseKey('Shift+Shift'); + }); + }); + + it('processes press_key', async () => { + await withBrowser(async (response, context) => { + const page = context.getSelectedPage(); + await page.setContent(``); + await context.createTextSnapshot(); + + await pressKey.handler( + { + params: { + key: 'Control+Shift+C', + }, + }, + response, + context, + ); + + assert.deepStrictEqual(await page.evaluate('logs'), [ + 'dControl', + 'dShift', + 'dC', + 'uC', + 'uShift', + 'uControl', + ]); + }); + }); + }); });