Skip to content

improve cua key mapping logic #910

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/huge-suns-dress.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@browserbasehq/stagehand": patch
---

Improve cua key mapping logic
53 changes: 3 additions & 50 deletions lib/agent/AnthropicCUAClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ import {
} from "@/types/agent";
import { AgentClient } from "./AgentClient";
import { AgentScreenshotProviderError } from "@/types/stagehandErrors";
import { compressConversationImages } from "./imageCompressionUtils";
import { compressConversationImages } from "./utils/imageCompression";
import { mapKeyToPlaywright } from "./utils/cuaKeyMapping";

export type ResponseInputItem = AnthropicMessage | AnthropicToolResult;

Expand Down Expand Up @@ -787,55 +788,7 @@ export class AnthropicCUAClient extends AgentClient {
};
} else if (action === "key") {
const text = input.text as string;
// Convert common key names to a format our handler can understand
let mappedKey = text;

if (
text === "Return" ||
text === "return" ||
text === "Enter" ||
text === "enter"
) {
mappedKey = "Enter";
} else if (text === "Tab" || text === "tab") {
mappedKey = "Tab";
} else if (
text === "Escape" ||
text === "escape" ||
text === "Esc" ||
text === "esc"
) {
mappedKey = "Escape";
} else if (text === "Backspace" || text === "backspace") {
mappedKey = "Backspace";
} else if (
text === "Delete" ||
text === "delete" ||
text === "Del" ||
text === "del"
) {
mappedKey = "Delete";
} else if (text === "ArrowUp" || text === "Up" || text === "up") {
mappedKey = "ArrowUp";
} else if (
text === "ArrowDown" ||
text === "Down" ||
text === "down"
) {
mappedKey = "ArrowDown";
} else if (
text === "ArrowLeft" ||
text === "Left" ||
text === "left"
) {
mappedKey = "ArrowLeft";
} else if (
text === "ArrowRight" ||
text === "Right" ||
text === "right"
) {
mappedKey = "ArrowRight";
}
const mappedKey = mapKeyToPlaywright(text);

return {
type: "key",
Expand Down
62 changes: 62 additions & 0 deletions lib/agent/utils/cuaKeyMapping.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/**
* Universal key mapping utility for converting various key representations
* to Playwright-compatible key names. Used by all CUA clients and handlers.
*/

/**
* map of key variations to Playwright key names
* This handles keys from both Anthropic and OpenAI CUA APIs
*/
const KEY_MAP: Record<string, string> = {
ENTER: "Enter",
RETURN: "Enter",
ESCAPE: "Escape",
ESC: "Escape",
BACKSPACE: "Backspace",
TAB: "Tab",
SPACE: " ",
DELETE: "Delete",
DEL: "Delete",
ARROWUP: "ArrowUp",
ARROWDOWN: "ArrowDown",
ARROWLEFT: "ArrowLeft",
ARROWRIGHT: "ArrowRight",
ARROW_UP: "ArrowUp",
ARROW_DOWN: "ArrowDown",
ARROW_LEFT: "ArrowLeft",
ARROW_RIGHT: "ArrowRight",
UP: "ArrowUp",
DOWN: "ArrowDown",
LEFT: "ArrowLeft",
RIGHT: "ArrowRight",
SHIFT: "Shift",
CONTROL: "Control",
CTRL: "Control",
ALT: "Alt",
OPTION: "Alt", // macOS alternative name
META: "Meta",
COMMAND: "Meta", // macOS
CMD: "Meta", // macOS shorthand
SUPER: "Meta", // Linux
WINDOWS: "Meta", // Windows
WIN: "Meta", // Windows shorthand
HOME: "Home",
END: "End",
PAGEUP: "PageUp",
PAGEDOWN: "PageDown",
PAGE_UP: "PageUp",
PAGE_DOWN: "PageDown",
PGUP: "PageUp",
PGDN: "PageDown",
};

/**
* Maps a key name from various formats to Playwright-compatible format
* @param key The key name in any supported format
* @returns The Playwright-compatible key name
*/
export function mapKeyToPlaywright(key: string): string {
if (!key) return key;
const upperKey = key.toUpperCase();
return KEY_MAP[upperKey] || key;
}
File renamed without changes.
80 changes: 5 additions & 75 deletions lib/handlers/agentHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
} from "@/types/agent";
import { Stagehand } from "../index";
import { StagehandFunctionName } from "@/types/stagehand";
import { mapKeyToPlaywright } from "../agent/utils/cuaKeyMapping";

export class StagehandAgentHandler {
private stagehand: Stagehand;
Expand Down Expand Up @@ -263,32 +264,8 @@ export class StagehandAgentHandler {
const { keys } = action;
if (Array.isArray(keys)) {
for (const key of keys) {
// Handle special keys
if (key.includes("ENTER")) {
await this.page.keyboard.press("Enter");
} else if (key.includes("SPACE")) {
await this.page.keyboard.press(" ");
} else if (key.includes("TAB")) {
await this.page.keyboard.press("Tab");
} else if (key.includes("ESCAPE") || key.includes("ESC")) {
await this.page.keyboard.press("Escape");
} else if (key.includes("BACKSPACE")) {
await this.page.keyboard.press("Backspace");
} else if (key.includes("DELETE")) {
await this.page.keyboard.press("Delete");
} else if (key.includes("ARROW_UP")) {
await this.page.keyboard.press("ArrowUp");
} else if (key.includes("ARROW_DOWN")) {
await this.page.keyboard.press("ArrowDown");
} else if (key.includes("ARROW_LEFT")) {
await this.page.keyboard.press("ArrowLeft");
} else if (key.includes("ARROW_RIGHT")) {
await this.page.keyboard.press("ArrowRight");
} else {
// For other keys, use the existing conversion
const playwrightKey = this.convertKeyName(key);
await this.page.keyboard.press(playwrightKey);
}
const mappedKey = mapKeyToPlaywright(key);
await this.page.keyboard.press(mappedKey);
}
}
return { success: true };
Expand Down Expand Up @@ -381,18 +358,8 @@ export class StagehandAgentHandler {
case "key": {
// Handle the 'key' action type from Anthropic
const { text } = action;
if (text === "Return" || text === "Enter") {
await this.page.keyboard.press("Enter");
} else if (text === "Tab") {
await this.page.keyboard.press("Tab");
} else if (text === "Escape" || text === "Esc") {
await this.page.keyboard.press("Escape");
} else if (text === "Backspace") {
await this.page.keyboard.press("Backspace");
} else {
// For other keys, try to press directly
await this.page.keyboard.press(text as string);
}
const playwrightKey = mapKeyToPlaywright(text as string);
await this.page.keyboard.press(playwrightKey);
return { success: true };
}

Expand Down Expand Up @@ -614,43 +581,6 @@ export class StagehandAgentHandler {
}
}

private convertKeyName(key: string): string {
// Map of CUA key names to Playwright key names
const keyMap: Record<string, string> = {
ENTER: "Enter",
ESCAPE: "Escape",
BACKSPACE: "Backspace",
TAB: "Tab",
SPACE: " ",
ARROWUP: "ArrowUp",
ARROWDOWN: "ArrowDown",
ARROWLEFT: "ArrowLeft",
ARROWRIGHT: "ArrowRight",
UP: "ArrowUp",
DOWN: "ArrowDown",
LEFT: "ArrowLeft",
RIGHT: "ArrowRight",
SHIFT: "Shift",
CONTROL: "Control",
ALT: "Alt",
META: "Meta",
COMMAND: "Meta",
CMD: "Meta",
CTRL: "Control",
DELETE: "Delete",
HOME: "Home",
END: "End",
PAGEUP: "PageUp",
PAGEDOWN: "PageDown",
};

// Convert to uppercase for case-insensitive matching
const upperKey = key.toUpperCase();

// Return the mapped key or the original key if not found
return keyMap[upperKey] || key;
}

private get page() {
// stagehand.page is the live proxy you already implemented
return this.stagehand.page;
Expand Down
Loading