Skip to content

Commit 1be0fb1

Browse files
committed
feat: add press_key tool for keyboard input
Add press_key tool that supports single keys and key combinations with modifiers. Features: - Single key press (e.g., "Enter", "Escape", "Tab") - Key combinations with modifiers (e.g., "Control+A", "Control+Shift+T") - Edge case handling (e.g., "Control++" for plus key with modifier) Implementation: - Added splitKeyCombo() helper function to parse key combinations - Handles modifier keys: Control, Shift, Alt, Meta - Presses modifiers in order, releases in reverse order - Includes comprehensive tests for all scenarios - Updated documentation with 27 tools (was 26)
1 parent 7d61330 commit 1be0fb1

File tree

4 files changed

+227
-2
lines changed

4 files changed

+227
-2
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,13 +204,14 @@ If you run into any issues, checkout our [troubleshooting guide](./docs/troubles
204204

205205
<!-- BEGIN AUTO GENERATED TOOLS -->
206206

207-
- **Input automation** (7 tools)
207+
- **Input automation** (8 tools)
208208
- [`click`](docs/tool-reference.md#click)
209209
- [`drag`](docs/tool-reference.md#drag)
210210
- [`fill`](docs/tool-reference.md#fill)
211211
- [`fill_form`](docs/tool-reference.md#fill_form)
212212
- [`handle_dialog`](docs/tool-reference.md#handle_dialog)
213213
- [`hover`](docs/tool-reference.md#hover)
214+
- [`press_key`](docs/tool-reference.md#press_key)
214215
- [`upload_file`](docs/tool-reference.md#upload_file)
215216
- **Navigation automation** (7 tools)
216217
- [`close_page`](docs/tool-reference.md#close_page)

docs/tool-reference.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@
22

33
# Chrome DevTools MCP Tool Reference
44

5-
- **[Input automation](#input-automation)** (7 tools)
5+
- **[Input automation](#input-automation)** (8 tools)
66
- [`click`](#click)
77
- [`drag`](#drag)
88
- [`fill`](#fill)
99
- [`fill_form`](#fill_form)
1010
- [`handle_dialog`](#handle_dialog)
1111
- [`hover`](#hover)
12+
- [`press_key`](#press_key)
1213
- [`upload_file`](#upload_file)
1314
- **[Navigation automation](#navigation-automation)** (7 tools)
1415
- [`close_page`](#close_page)
@@ -101,6 +102,16 @@
101102

102103
---
103104

105+
### `press_key`
106+
107+
**Description:** Press a key or key combination on the keyboard. Supports modifier keys and combinations.
108+
109+
**Parameters:**
110+
111+
- **key** (string) **(required)**: Key to press. Can be a single key (e.g., "Enter", "Escape", "a") or a combination with modifiers (e.g., "Control+A", "Control+Shift+T", "Control++"). Modifier keys: Control, Shift, Alt, Meta.
112+
113+
---
114+
104115
### `upload_file`
105116

106117
**Description:** Upload a file through a provided element.

src/tools/input.ts

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,69 @@ export const uploadFile = defineTool({
216216
}
217217
},
218218
});
219+
220+
/**
221+
* Split a key combination string into individual keys.
222+
* Handles combinations like "Control+A" and special cases like "Control++".
223+
* Based on Playwright's implementation.
224+
*/
225+
function splitKeyCombo(keyString: string): string[] {
226+
const keys: string[] = [];
227+
let building = '';
228+
for (const char of keyString) {
229+
if (char === '+' && building) {
230+
// Only split if there's text before +
231+
keys.push(building);
232+
building = '';
233+
} else {
234+
building += char;
235+
}
236+
}
237+
keys.push(building);
238+
return keys;
239+
}
240+
241+
export const pressKey = defineTool({
242+
name: 'press_key',
243+
description: `Press a key or key combination on the keyboard. Supports modifier keys and combinations.`,
244+
annotations: {
245+
category: ToolCategories.INPUT_AUTOMATION,
246+
readOnlyHint: false,
247+
},
248+
schema: {
249+
key: z
250+
.string()
251+
.describe(
252+
'Key to press. Can be a single key (e.g., "Enter", "Escape", "a") or a combination with modifiers (e.g., "Control+A", "Control+Shift+T", "Control++"). Modifier keys: Control, Shift, Alt, Meta.',
253+
),
254+
},
255+
handler: async (request, response, context) => {
256+
const page = context.getSelectedPage();
257+
const tokens = splitKeyCombo(request.params.key);
258+
const key = tokens[tokens.length - 1];
259+
const modifiers = tokens.slice(0, -1);
260+
261+
await context.waitForEventsAfterAction(async () => {
262+
// Press down modifiers
263+
for (const modifier of modifiers) {
264+
// @ts-expect-error - Puppeteer KeyInput type is too restrictive for dynamic input
265+
await page.keyboard.down(modifier);
266+
}
267+
268+
// Press the key
269+
// @ts-expect-error - Puppeteer KeyInput type is too restrictive for dynamic input
270+
await page.keyboard.press(key);
271+
272+
// Release modifiers in reverse order
273+
for (let i = modifiers.length - 1; i >= 0; i--) {
274+
// @ts-expect-error - Puppeteer KeyInput type is too restrictive for dynamic input
275+
await page.keyboard.up(modifiers[i]);
276+
}
277+
});
278+
279+
response.appendResponseLine(
280+
`Successfully pressed key: ${request.params.key}`,
281+
);
282+
response.setIncludeSnapshot(true);
283+
},
284+
});

tests/tools/input.test.ts

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import {
1515
drag,
1616
fillForm,
1717
uploadFile,
18+
pressKey,
1819
} from '../../src/tools/input.js';
1920
import {serverHooks} from '../server.js';
2021
import {html, withBrowser} from '../utils.js';
@@ -402,4 +403,150 @@ describe('input', () => {
402403
});
403404
});
404405
});
406+
407+
describe('pressKey', () => {
408+
it('presses a simple key', async () => {
409+
await withBrowser(async (response, context) => {
410+
const page = context.getSelectedPage();
411+
await page.setContent(`<!DOCTYPE html>
412+
<input id="test-input" />
413+
<div id="result"></div>
414+
<script>
415+
document.getElementById('test-input').addEventListener('keydown', (e) => {
416+
document.getElementById('result').innerText = e.key;
417+
});
418+
</script>`);
419+
await context.createTextSnapshot();
420+
await page.focus('#test-input');
421+
await pressKey.handler(
422+
{
423+
params: {
424+
key: 'Enter',
425+
},
426+
},
427+
response,
428+
context,
429+
);
430+
assert.strictEqual(
431+
response.responseLines[0],
432+
'Successfully pressed key: Enter',
433+
);
434+
assert.ok(response.includeSnapshot);
435+
const result = await page.$eval(
436+
'#result',
437+
el => (el as HTMLElement).innerText,
438+
);
439+
assert.strictEqual(result, 'Enter');
440+
});
441+
});
442+
443+
it('presses a key combination', async () => {
444+
await withBrowser(async (response, context) => {
445+
const page = context.getSelectedPage();
446+
await page.setContent(`<!DOCTYPE html>
447+
<textarea id="test-input">Hello World</textarea>
448+
<script>
449+
const input = document.getElementById('test-input');
450+
input.focus();
451+
input.setSelectionRange(0, 0);
452+
</script>`);
453+
await context.createTextSnapshot();
454+
await pressKey.handler(
455+
{
456+
params: {
457+
key: 'Control+A',
458+
},
459+
},
460+
response,
461+
context,
462+
);
463+
assert.strictEqual(
464+
response.responseLines[0],
465+
'Successfully pressed key: Control+A',
466+
);
467+
assert.ok(response.includeSnapshot);
468+
// Verify text is selected by getting selection
469+
const selected = await page.evaluate(() => {
470+
const input = document.getElementById(
471+
'test-input',
472+
) as HTMLTextAreaElement;
473+
return (
474+
input.selectionStart === 0 &&
475+
input.selectionEnd === input.value.length
476+
);
477+
});
478+
assert.ok(selected, 'Text should be selected');
479+
});
480+
});
481+
482+
it('presses plus key with modifier (Control++)', async () => {
483+
await withBrowser(async (response, context) => {
484+
const page = context.getSelectedPage();
485+
await page.setContent(`<!DOCTYPE html>
486+
<div id="result"></div>
487+
<script>
488+
document.addEventListener('keydown', (e) => {
489+
if (e.ctrlKey && e.key === '+') {
490+
document.getElementById('result').innerText = 'ctrl-plus';
491+
}
492+
});
493+
</script>`);
494+
await context.createTextSnapshot();
495+
await pressKey.handler(
496+
{
497+
params: {
498+
key: 'Control++',
499+
},
500+
},
501+
response,
502+
context,
503+
);
504+
assert.strictEqual(
505+
response.responseLines[0],
506+
'Successfully pressed key: Control++',
507+
);
508+
assert.ok(response.includeSnapshot);
509+
const result = await page.$eval(
510+
'#result',
511+
el => (el as HTMLElement).innerText,
512+
);
513+
assert.strictEqual(result, 'ctrl-plus');
514+
});
515+
});
516+
517+
it('presses multiple modifiers', async () => {
518+
await withBrowser(async (response, context) => {
519+
const page = context.getSelectedPage();
520+
await page.setContent(`<!DOCTYPE html>
521+
<div id="result"></div>
522+
<script>
523+
document.addEventListener('keydown', (e) => {
524+
if (e.ctrlKey && e.shiftKey && e.key === 'T') {
525+
document.getElementById('result').innerText = 'ctrl-shift-t';
526+
}
527+
});
528+
</script>`);
529+
await context.createTextSnapshot();
530+
await pressKey.handler(
531+
{
532+
params: {
533+
key: 'Control+Shift+T',
534+
},
535+
},
536+
response,
537+
context,
538+
);
539+
assert.strictEqual(
540+
response.responseLines[0],
541+
'Successfully pressed key: Control+Shift+T',
542+
);
543+
assert.ok(response.includeSnapshot);
544+
const result = await page.$eval(
545+
'#result',
546+
el => (el as HTMLElement).innerText,
547+
);
548+
assert.strictEqual(result, 'ctrl-shift-t');
549+
});
550+
});
551+
});
405552
});

0 commit comments

Comments
 (0)