diff --git a/lib/agent/AnthropicCUAClient.ts b/lib/agent/AnthropicCUAClient.ts index fc8af9b26..8b410692a 100644 --- a/lib/agent/AnthropicCUAClient.ts +++ b/lib/agent/AnthropicCUAClient.ts @@ -13,7 +13,8 @@ import { } from "@/types/agent"; import { AgentClient } from "./AgentClient"; import { AgentScreenshotProviderError } from "@/types/stagehandErrors"; -import { compressConversationImages } from "./imageCompressionUtils"; +import { compressConversationImages } from "./utils/imageCompression"; +import { mapKeyToPlaywright } from "./utils/cuaKeyMapping"; export type ResponseInputItem = AnthropicMessage | AnthropicToolResult; @@ -787,55 +788,7 @@ export class AnthropicCUAClient extends AgentClient { }; } else if (action === "key") { const text = input.text as string; - // Convert common key names to a format our handler can understand - let mappedKey = text; - - if ( - text === "Return" || - text === "return" || - text === "Enter" || - text === "enter" - ) { - mappedKey = "Enter"; - } else if (text === "Tab" || text === "tab") { - mappedKey = "Tab"; - } else if ( - text === "Escape" || - text === "escape" || - text === "Esc" || - text === "esc" - ) { - mappedKey = "Escape"; - } else if (text === "Backspace" || text === "backspace") { - mappedKey = "Backspace"; - } else if ( - text === "Delete" || - text === "delete" || - text === "Del" || - text === "del" - ) { - mappedKey = "Delete"; - } else if (text === "ArrowUp" || text === "Up" || text === "up") { - mappedKey = "ArrowUp"; - } else if ( - text === "ArrowDown" || - text === "Down" || - text === "down" - ) { - mappedKey = "ArrowDown"; - } else if ( - text === "ArrowLeft" || - text === "Left" || - text === "left" - ) { - mappedKey = "ArrowLeft"; - } else if ( - text === "ArrowRight" || - text === "Right" || - text === "right" - ) { - mappedKey = "ArrowRight"; - } + const mappedKey = mapKeyToPlaywright(text); return { type: "key", diff --git a/lib/agent/utils/cuaKeyMapping.ts b/lib/agent/utils/cuaKeyMapping.ts new file mode 100644 index 000000000..442df5eab --- /dev/null +++ b/lib/agent/utils/cuaKeyMapping.ts @@ -0,0 +1,62 @@ +/** + * Universal key mapping utility for converting various key representations + * to Playwright-compatible key names. Used by all CUA clients and handlers. + */ + +/** + * map of key variations to Playwright key names + * This handles keys from both Anthropic and OpenAI CUA APIs + */ +const KEY_MAP: Record = { + ENTER: "Enter", + RETURN: "Enter", + ESCAPE: "Escape", + ESC: "Escape", + BACKSPACE: "Backspace", + TAB: "Tab", + SPACE: " ", + DELETE: "Delete", + DEL: "Delete", + ARROWUP: "ArrowUp", + ARROWDOWN: "ArrowDown", + ARROWLEFT: "ArrowLeft", + ARROWRIGHT: "ArrowRight", + ARROW_UP: "ArrowUp", + ARROW_DOWN: "ArrowDown", + ARROW_LEFT: "ArrowLeft", + ARROW_RIGHT: "ArrowRight", + UP: "ArrowUp", + DOWN: "ArrowDown", + LEFT: "ArrowLeft", + RIGHT: "ArrowRight", + SHIFT: "Shift", + CONTROL: "Control", + CTRL: "Control", + ALT: "Alt", + OPTION: "Alt", // macOS alternative name + META: "Meta", + COMMAND: "Meta", // macOS + CMD: "Meta", // macOS shorthand + SUPER: "Meta", // Linux + WINDOWS: "Meta", // Windows + WIN: "Meta", // Windows shorthand + HOME: "Home", + END: "End", + PAGEUP: "PageUp", + PAGEDOWN: "PageDown", + PAGE_UP: "PageUp", + PAGE_DOWN: "PageDown", + PGUP: "PageUp", + PGDN: "PageDown", +}; + +/** + * Maps a key name from various formats to Playwright-compatible format + * @param key The key name in any supported format + * @returns The Playwright-compatible key name + */ +export function mapKeyToPlaywright(key: string): string { + if (!key) return key; + const upperKey = key.toUpperCase(); + return KEY_MAP[upperKey] || key; +} diff --git a/lib/agent/imageCompressionUtils.ts b/lib/agent/utils/imageCompression.ts similarity index 100% rename from lib/agent/imageCompressionUtils.ts rename to lib/agent/utils/imageCompression.ts diff --git a/lib/handlers/agentHandler.ts b/lib/handlers/agentHandler.ts index 4a6c162b2..fb024cbb3 100644 --- a/lib/handlers/agentHandler.ts +++ b/lib/handlers/agentHandler.ts @@ -12,6 +12,7 @@ import { } from "@/types/agent"; import { Stagehand } from "../index"; import { StagehandFunctionName } from "@/types/stagehand"; +import { mapKeyToPlaywright } from "../agent/utils/cuaKeyMapping"; export class StagehandAgentHandler { private stagehand: Stagehand; @@ -263,32 +264,8 @@ export class StagehandAgentHandler { const { keys } = action; if (Array.isArray(keys)) { for (const key of keys) { - // Handle special keys - if (key.includes("ENTER")) { - await this.page.keyboard.press("Enter"); - } else if (key.includes("SPACE")) { - await this.page.keyboard.press(" "); - } else if (key.includes("TAB")) { - await this.page.keyboard.press("Tab"); - } else if (key.includes("ESCAPE") || key.includes("ESC")) { - await this.page.keyboard.press("Escape"); - } else if (key.includes("BACKSPACE")) { - await this.page.keyboard.press("Backspace"); - } else if (key.includes("DELETE")) { - await this.page.keyboard.press("Delete"); - } else if (key.includes("ARROW_UP")) { - await this.page.keyboard.press("ArrowUp"); - } else if (key.includes("ARROW_DOWN")) { - await this.page.keyboard.press("ArrowDown"); - } else if (key.includes("ARROW_LEFT")) { - await this.page.keyboard.press("ArrowLeft"); - } else if (key.includes("ARROW_RIGHT")) { - await this.page.keyboard.press("ArrowRight"); - } else { - // For other keys, use the existing conversion - const playwrightKey = this.convertKeyName(key); - await this.page.keyboard.press(playwrightKey); - } + const mappedKey = mapKeyToPlaywright(key); + await this.page.keyboard.press(mappedKey); } } return { success: true }; @@ -381,18 +358,8 @@ export class StagehandAgentHandler { case "key": { // Handle the 'key' action type from Anthropic const { text } = action; - if (text === "Return" || text === "Enter") { - await this.page.keyboard.press("Enter"); - } else if (text === "Tab") { - await this.page.keyboard.press("Tab"); - } else if (text === "Escape" || text === "Esc") { - await this.page.keyboard.press("Escape"); - } else if (text === "Backspace") { - await this.page.keyboard.press("Backspace"); - } else { - // For other keys, try to press directly - await this.page.keyboard.press(text as string); - } + const playwrightKey = mapKeyToPlaywright(text as string); + await this.page.keyboard.press(playwrightKey); return { success: true }; } @@ -614,43 +581,6 @@ export class StagehandAgentHandler { } } - private convertKeyName(key: string): string { - // Map of CUA key names to Playwright key names - const keyMap: Record = { - ENTER: "Enter", - ESCAPE: "Escape", - BACKSPACE: "Backspace", - TAB: "Tab", - SPACE: " ", - ARROWUP: "ArrowUp", - ARROWDOWN: "ArrowDown", - ARROWLEFT: "ArrowLeft", - ARROWRIGHT: "ArrowRight", - UP: "ArrowUp", - DOWN: "ArrowDown", - LEFT: "ArrowLeft", - RIGHT: "ArrowRight", - SHIFT: "Shift", - CONTROL: "Control", - ALT: "Alt", - META: "Meta", - COMMAND: "Meta", - CMD: "Meta", - CTRL: "Control", - DELETE: "Delete", - HOME: "Home", - END: "End", - PAGEUP: "PageUp", - PAGEDOWN: "PageDown", - }; - - // Convert to uppercase for case-insensitive matching - const upperKey = key.toUpperCase(); - - // Return the mapped key or the original key if not found - return keyMap[upperKey] || key; - } - private get page() { // stagehand.page is the live proxy you already implemented return this.stagehand.page;