Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/huge-suns-dress.md
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we delete this changeset

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

deleted

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@browserbasehq/stagehand": patch
---

Improve cua key mapping logic
53 changes: 3 additions & 50 deletions lib/agent/AnthropicCUAClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ import {
} from "@/types/agent";
import { AgentClient } from "./AgentClient";
import { AgentScreenshotProviderError } from "@/types/stagehandErrors";
import { compressConversationImages } from "./imageCompressionUtils";
import { compressConversationImages } from "./utils/imageCompression";
import { mapKeyToPlaywright } from "./utils/cuaKeyMapping";

export type ResponseInputItem = AnthropicMessage | AnthropicToolResult;

Expand Down Expand Up @@ -787,55 +788,7 @@ export class AnthropicCUAClient extends AgentClient {
};
} else if (action === "key") {
const text = input.text as string;
// Convert common key names to a format our handler can understand
let mappedKey = text;

if (
text === "Return" ||
text === "return" ||
text === "Enter" ||
text === "enter"
) {
mappedKey = "Enter";
} else if (text === "Tab" || text === "tab") {
mappedKey = "Tab";
} else if (
text === "Escape" ||
text === "escape" ||
text === "Esc" ||
text === "esc"
) {
mappedKey = "Escape";
} else if (text === "Backspace" || text === "backspace") {
mappedKey = "Backspace";
} else if (
text === "Delete" ||
text === "delete" ||
text === "Del" ||
text === "del"
) {
mappedKey = "Delete";
} else if (text === "ArrowUp" || text === "Up" || text === "up") {
mappedKey = "ArrowUp";
} else if (
text === "ArrowDown" ||
text === "Down" ||
text === "down"
) {
mappedKey = "ArrowDown";
} else if (
text === "ArrowLeft" ||
text === "Left" ||
text === "left"
) {
mappedKey = "ArrowLeft";
} else if (
text === "ArrowRight" ||
text === "Right" ||
text === "right"
) {
mappedKey = "ArrowRight";
}
const mappedKey = mapKeyToPlaywright(text);

return {
type: "key",
Expand Down
62 changes: 62 additions & 0 deletions lib/agent/utils/cuaKeyMapping.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/**
* Universal key mapping utility for converting various key representations
* to Playwright-compatible key names. Used by all CUA clients and handlers.
*/

/**
* map of key variations to Playwright key names
* This handles keys from both Anthropic and OpenAI CUA APIs
*/
const KEY_MAP: Record<string, string> = {
ENTER: "Enter",
RETURN: "Enter",
ESCAPE: "Escape",
ESC: "Escape",
BACKSPACE: "Backspace",
TAB: "Tab",
SPACE: " ",
DELETE: "Delete",
DEL: "Delete",
ARROWUP: "ArrowUp",
ARROWDOWN: "ArrowDown",
ARROWLEFT: "ArrowLeft",
ARROWRIGHT: "ArrowRight",
ARROW_UP: "ArrowUp",
ARROW_DOWN: "ArrowDown",
ARROW_LEFT: "ArrowLeft",
ARROW_RIGHT: "ArrowRight",
UP: "ArrowUp",
DOWN: "ArrowDown",
LEFT: "ArrowLeft",
RIGHT: "ArrowRight",
SHIFT: "Shift",
CONTROL: "Control",
CTRL: "Control",
ALT: "Alt",
OPTION: "Alt", // macOS alternative name
META: "Meta",
COMMAND: "Meta", // macOS
CMD: "Meta", // macOS shorthand
SUPER: "Meta", // Linux
WINDOWS: "Meta", // Windows
WIN: "Meta", // Windows shorthand
HOME: "Home",
END: "End",
PAGEUP: "PageUp",
PAGEDOWN: "PageDown",
PAGE_UP: "PageUp",
PAGE_DOWN: "PageDown",
PGUP: "PageUp",
PGDN: "PageDown",
};

/**
* Maps a key name from various formats to Playwright-compatible format
* @param key The key name in any supported format
* @returns The Playwright-compatible key name
*/
export function mapKeyToPlaywright(key: string): string {
if (!key) return key;
const upperKey = key.toUpperCase();
return KEY_MAP[upperKey] || key;
}
File renamed without changes.
80 changes: 5 additions & 75 deletions lib/handlers/agentHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
} from "@/types/agent";
import { Stagehand } from "../index";
import { StagehandFunctionName } from "@/types/stagehand";
import { mapKeyToPlaywright } from "../agent/utils/cuaKeyMapping";

export class StagehandAgentHandler {
private stagehand: Stagehand;
Expand Down Expand Up @@ -263,32 +264,8 @@ export class StagehandAgentHandler {
const { keys } = action;
if (Array.isArray(keys)) {
for (const key of keys) {
// Handle special keys
if (key.includes("ENTER")) {
await this.page.keyboard.press("Enter");
} else if (key.includes("SPACE")) {
await this.page.keyboard.press(" ");
} else if (key.includes("TAB")) {
await this.page.keyboard.press("Tab");
} else if (key.includes("ESCAPE") || key.includes("ESC")) {
await this.page.keyboard.press("Escape");
} else if (key.includes("BACKSPACE")) {
await this.page.keyboard.press("Backspace");
} else if (key.includes("DELETE")) {
await this.page.keyboard.press("Delete");
} else if (key.includes("ARROW_UP")) {
await this.page.keyboard.press("ArrowUp");
} else if (key.includes("ARROW_DOWN")) {
await this.page.keyboard.press("ArrowDown");
} else if (key.includes("ARROW_LEFT")) {
await this.page.keyboard.press("ArrowLeft");
} else if (key.includes("ARROW_RIGHT")) {
await this.page.keyboard.press("ArrowRight");
} else {
// For other keys, use the existing conversion
const playwrightKey = this.convertKeyName(key);
await this.page.keyboard.press(playwrightKey);
}
const mappedKey = mapKeyToPlaywright(key);
await this.page.keyboard.press(mappedKey);
}
}
return { success: true };
Expand Down Expand Up @@ -381,18 +358,8 @@ export class StagehandAgentHandler {
case "key": {
// Handle the 'key' action type from Anthropic
const { text } = action;
if (text === "Return" || text === "Enter") {
await this.page.keyboard.press("Enter");
} else if (text === "Tab") {
await this.page.keyboard.press("Tab");
} else if (text === "Escape" || text === "Esc") {
await this.page.keyboard.press("Escape");
} else if (text === "Backspace") {
await this.page.keyboard.press("Backspace");
} else {
// For other keys, try to press directly
await this.page.keyboard.press(text as string);
}
const playwrightKey = mapKeyToPlaywright(text as string);
await this.page.keyboard.press(playwrightKey);
return { success: true };
}

Expand Down Expand Up @@ -614,43 +581,6 @@ export class StagehandAgentHandler {
}
}

private convertKeyName(key: string): string {
// Map of CUA key names to Playwright key names
const keyMap: Record<string, string> = {
ENTER: "Enter",
ESCAPE: "Escape",
BACKSPACE: "Backspace",
TAB: "Tab",
SPACE: " ",
ARROWUP: "ArrowUp",
ARROWDOWN: "ArrowDown",
ARROWLEFT: "ArrowLeft",
ARROWRIGHT: "ArrowRight",
UP: "ArrowUp",
DOWN: "ArrowDown",
LEFT: "ArrowLeft",
RIGHT: "ArrowRight",
SHIFT: "Shift",
CONTROL: "Control",
ALT: "Alt",
META: "Meta",
COMMAND: "Meta",
CMD: "Meta",
CTRL: "Control",
DELETE: "Delete",
HOME: "Home",
END: "End",
PAGEUP: "PageUp",
PAGEDOWN: "PageDown",
};

// Convert to uppercase for case-insensitive matching
const upperKey = key.toUpperCase();

// Return the mapped key or the original key if not found
return keyMap[upperKey] || key;
}

private get page() {
// stagehand.page is the live proxy you already implemented
return this.stagehand.page;
Expand Down