Skip to content

Commit 1ecc5c2

Browse files
authored
improve cua key mapping logic (#910)
# why Currently we have a handful of keys that fail to be used, and redundant logic around key handling. It is now consolidated to one location, and properly handles the failing keys part of STG-587 # what changed consolidated key handling for CUA agents to one util # test plan tested locally
1 parent c508a0a commit 1ecc5c2

File tree

4 files changed

+70
-125
lines changed

4 files changed

+70
-125
lines changed

lib/agent/AnthropicCUAClient.ts

Lines changed: 3 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ import {
1313
} from "@/types/agent";
1414
import { AgentClient } from "./AgentClient";
1515
import { AgentScreenshotProviderError } from "@/types/stagehandErrors";
16-
import { compressConversationImages } from "./imageCompressionUtils";
16+
import { compressConversationImages } from "./utils/imageCompression";
17+
import { mapKeyToPlaywright } from "./utils/cuaKeyMapping";
1718

1819
export type ResponseInputItem = AnthropicMessage | AnthropicToolResult;
1920

@@ -787,55 +788,7 @@ export class AnthropicCUAClient extends AgentClient {
787788
};
788789
} else if (action === "key") {
789790
const text = input.text as string;
790-
// Convert common key names to a format our handler can understand
791-
let mappedKey = text;
792-
793-
if (
794-
text === "Return" ||
795-
text === "return" ||
796-
text === "Enter" ||
797-
text === "enter"
798-
) {
799-
mappedKey = "Enter";
800-
} else if (text === "Tab" || text === "tab") {
801-
mappedKey = "Tab";
802-
} else if (
803-
text === "Escape" ||
804-
text === "escape" ||
805-
text === "Esc" ||
806-
text === "esc"
807-
) {
808-
mappedKey = "Escape";
809-
} else if (text === "Backspace" || text === "backspace") {
810-
mappedKey = "Backspace";
811-
} else if (
812-
text === "Delete" ||
813-
text === "delete" ||
814-
text === "Del" ||
815-
text === "del"
816-
) {
817-
mappedKey = "Delete";
818-
} else if (text === "ArrowUp" || text === "Up" || text === "up") {
819-
mappedKey = "ArrowUp";
820-
} else if (
821-
text === "ArrowDown" ||
822-
text === "Down" ||
823-
text === "down"
824-
) {
825-
mappedKey = "ArrowDown";
826-
} else if (
827-
text === "ArrowLeft" ||
828-
text === "Left" ||
829-
text === "left"
830-
) {
831-
mappedKey = "ArrowLeft";
832-
} else if (
833-
text === "ArrowRight" ||
834-
text === "Right" ||
835-
text === "right"
836-
) {
837-
mappedKey = "ArrowRight";
838-
}
791+
const mappedKey = mapKeyToPlaywright(text);
839792

840793
return {
841794
type: "key",

lib/agent/utils/cuaKeyMapping.ts

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/**
2+
* Universal key mapping utility for converting various key representations
3+
* to Playwright-compatible key names. Used by all CUA clients and handlers.
4+
*/
5+
6+
/**
7+
* map of key variations to Playwright key names
8+
* This handles keys from both Anthropic and OpenAI CUA APIs
9+
*/
10+
const KEY_MAP: Record<string, string> = {
11+
ENTER: "Enter",
12+
RETURN: "Enter",
13+
ESCAPE: "Escape",
14+
ESC: "Escape",
15+
BACKSPACE: "Backspace",
16+
TAB: "Tab",
17+
SPACE: " ",
18+
DELETE: "Delete",
19+
DEL: "Delete",
20+
ARROWUP: "ArrowUp",
21+
ARROWDOWN: "ArrowDown",
22+
ARROWLEFT: "ArrowLeft",
23+
ARROWRIGHT: "ArrowRight",
24+
ARROW_UP: "ArrowUp",
25+
ARROW_DOWN: "ArrowDown",
26+
ARROW_LEFT: "ArrowLeft",
27+
ARROW_RIGHT: "ArrowRight",
28+
UP: "ArrowUp",
29+
DOWN: "ArrowDown",
30+
LEFT: "ArrowLeft",
31+
RIGHT: "ArrowRight",
32+
SHIFT: "Shift",
33+
CONTROL: "Control",
34+
CTRL: "Control",
35+
ALT: "Alt",
36+
OPTION: "Alt", // macOS alternative name
37+
META: "Meta",
38+
COMMAND: "Meta", // macOS
39+
CMD: "Meta", // macOS shorthand
40+
SUPER: "Meta", // Linux
41+
WINDOWS: "Meta", // Windows
42+
WIN: "Meta", // Windows shorthand
43+
HOME: "Home",
44+
END: "End",
45+
PAGEUP: "PageUp",
46+
PAGEDOWN: "PageDown",
47+
PAGE_UP: "PageUp",
48+
PAGE_DOWN: "PageDown",
49+
PGUP: "PageUp",
50+
PGDN: "PageDown",
51+
};
52+
53+
/**
54+
* Maps a key name from various formats to Playwright-compatible format
55+
* @param key The key name in any supported format
56+
* @returns The Playwright-compatible key name
57+
*/
58+
export function mapKeyToPlaywright(key: string): string {
59+
if (!key) return key;
60+
const upperKey = key.toUpperCase();
61+
return KEY_MAP[upperKey] || key;
62+
}

lib/handlers/agentHandler.ts

Lines changed: 5 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import {
1212
} from "@/types/agent";
1313
import { Stagehand } from "../index";
1414
import { StagehandFunctionName } from "@/types/stagehand";
15+
import { mapKeyToPlaywright } from "../agent/utils/cuaKeyMapping";
1516

1617
export class StagehandAgentHandler {
1718
private stagehand: Stagehand;
@@ -263,32 +264,8 @@ export class StagehandAgentHandler {
263264
const { keys } = action;
264265
if (Array.isArray(keys)) {
265266
for (const key of keys) {
266-
// Handle special keys
267-
if (key.includes("ENTER")) {
268-
await this.page.keyboard.press("Enter");
269-
} else if (key.includes("SPACE")) {
270-
await this.page.keyboard.press(" ");
271-
} else if (key.includes("TAB")) {
272-
await this.page.keyboard.press("Tab");
273-
} else if (key.includes("ESCAPE") || key.includes("ESC")) {
274-
await this.page.keyboard.press("Escape");
275-
} else if (key.includes("BACKSPACE")) {
276-
await this.page.keyboard.press("Backspace");
277-
} else if (key.includes("DELETE")) {
278-
await this.page.keyboard.press("Delete");
279-
} else if (key.includes("ARROW_UP")) {
280-
await this.page.keyboard.press("ArrowUp");
281-
} else if (key.includes("ARROW_DOWN")) {
282-
await this.page.keyboard.press("ArrowDown");
283-
} else if (key.includes("ARROW_LEFT")) {
284-
await this.page.keyboard.press("ArrowLeft");
285-
} else if (key.includes("ARROW_RIGHT")) {
286-
await this.page.keyboard.press("ArrowRight");
287-
} else {
288-
// For other keys, use the existing conversion
289-
const playwrightKey = this.convertKeyName(key);
290-
await this.page.keyboard.press(playwrightKey);
291-
}
267+
const mappedKey = mapKeyToPlaywright(key);
268+
await this.page.keyboard.press(mappedKey);
292269
}
293270
}
294271
return { success: true };
@@ -381,18 +358,8 @@ export class StagehandAgentHandler {
381358
case "key": {
382359
// Handle the 'key' action type from Anthropic
383360
const { text } = action;
384-
if (text === "Return" || text === "Enter") {
385-
await this.page.keyboard.press("Enter");
386-
} else if (text === "Tab") {
387-
await this.page.keyboard.press("Tab");
388-
} else if (text === "Escape" || text === "Esc") {
389-
await this.page.keyboard.press("Escape");
390-
} else if (text === "Backspace") {
391-
await this.page.keyboard.press("Backspace");
392-
} else {
393-
// For other keys, try to press directly
394-
await this.page.keyboard.press(text as string);
395-
}
361+
const playwrightKey = mapKeyToPlaywright(text as string);
362+
await this.page.keyboard.press(playwrightKey);
396363
return { success: true };
397364
}
398365

@@ -614,43 +581,6 @@ export class StagehandAgentHandler {
614581
}
615582
}
616583

617-
private convertKeyName(key: string): string {
618-
// Map of CUA key names to Playwright key names
619-
const keyMap: Record<string, string> = {
620-
ENTER: "Enter",
621-
ESCAPE: "Escape",
622-
BACKSPACE: "Backspace",
623-
TAB: "Tab",
624-
SPACE: " ",
625-
ARROWUP: "ArrowUp",
626-
ARROWDOWN: "ArrowDown",
627-
ARROWLEFT: "ArrowLeft",
628-
ARROWRIGHT: "ArrowRight",
629-
UP: "ArrowUp",
630-
DOWN: "ArrowDown",
631-
LEFT: "ArrowLeft",
632-
RIGHT: "ArrowRight",
633-
SHIFT: "Shift",
634-
CONTROL: "Control",
635-
ALT: "Alt",
636-
META: "Meta",
637-
COMMAND: "Meta",
638-
CMD: "Meta",
639-
CTRL: "Control",
640-
DELETE: "Delete",
641-
HOME: "Home",
642-
END: "End",
643-
PAGEUP: "PageUp",
644-
PAGEDOWN: "PageDown",
645-
};
646-
647-
// Convert to uppercase for case-insensitive matching
648-
const upperKey = key.toUpperCase();
649-
650-
// Return the mapped key or the original key if not found
651-
return keyMap[upperKey] || key;
652-
}
653-
654584
private get page() {
655585
// stagehand.page is the live proxy you already implemented
656586
return this.stagehand.page;

0 commit comments

Comments
 (0)