diff --git a/packages/types/src/global-settings.ts b/packages/types/src/global-settings.ts index 36fae6f3d9a6..e555bf2e5aec 100644 --- a/packages/types/src/global-settings.ts +++ b/packages/types/src/global-settings.ts @@ -118,6 +118,7 @@ export const globalSettingsSchema = z.object({ browserToolEnabled: z.boolean().optional(), browserViewportSize: z.string().optional(), screenshotQuality: z.number().optional(), + browserActionsAutoExpand: z.boolean().optional(), remoteBrowserEnabled: z.boolean().optional(), remoteBrowserHost: z.string().optional(), cachedChromeHostUrl: z.string().optional(), @@ -308,6 +309,7 @@ export const EVALS_SETTINGS: RooCodeSettings = { browserToolEnabled: false, browserViewportSize: "900x600", screenshotQuality: 75, + browserActionsAutoExpand: false, remoteBrowserEnabled: false, ttsEnabled: false, diff --git a/packages/types/src/message.ts b/packages/types/src/message.ts index b02078fc0d8b..dae13f5c2320 100644 --- a/packages/types/src/message.ts +++ b/packages/types/src/message.ts @@ -156,6 +156,7 @@ export const clineSays = [ "shell_integration_warning", "browser_action", "browser_action_result", + "browser_session_status", "mcp_server_request_started", "mcp_server_response", "subtask_result", diff --git a/src/core/assistant-message/presentAssistantMessage.ts b/src/core/assistant-message/presentAssistantMessage.ts index 689675999fd1..cb08af37356e 100644 --- a/src/core/assistant-message/presentAssistantMessage.ts +++ b/src/core/assistant-message/presentAssistantMessage.ts @@ -355,8 +355,32 @@ export async function presentAssistantMessage(cline: Task) { return text.replace(tagRegex, "") } - if (block.name !== "browser_action") { - await cline.browserSession.closeBrowser() + // Keep browser open during an active session so other tools can run. + // Session is active if we've seen any browser_action_result and the last browser_action is not "close". + try { + const messages = cline.clineMessages || [] + const hasStarted = messages.some((m: any) => m.say === "browser_action_result") + let isClosed = false + for (let i = messages.length - 1; i >= 0; i--) { + const m = messages[i] + if (m.say === "browser_action") { + try { + const act = JSON.parse(m.text || "{}") + isClosed = act.action === "close" + } catch {} + break + } + } + const sessionActive = hasStarted && !isClosed + // Only auto-close when no active browser session is present, and this isn't a browser_action + if (!sessionActive && block.name !== "browser_action") { + await cline.browserSession.closeBrowser() + } + } catch { + // On any unexpected error, fall back to conservative behavior + if (block.name !== "browser_action") { + await cline.browserSession.closeBrowser() + } } if (!block.partial) { diff --git a/src/core/environment/__tests__/getEnvironmentDetails.spec.ts b/src/core/environment/__tests__/getEnvironmentDetails.spec.ts index 1110aa8831b9..4b07d4e775b2 100644 --- a/src/core/environment/__tests__/getEnvironmentDetails.spec.ts +++ b/src/core/environment/__tests__/getEnvironmentDetails.spec.ts @@ -116,6 +116,9 @@ describe("getEnvironmentDetails", () => { deref: vi.fn().mockReturnValue(mockProvider), [Symbol.toStringTag]: "WeakRef", } as unknown as WeakRef, + browserSession: { + isSessionActive: vi.fn().mockReturnValue(false), + } as any, } // Mock other dependencies. @@ -390,4 +393,18 @@ describe("getEnvironmentDetails", () => { const result = await getEnvironmentDetails(cline as Task) expect(result).toContain("REMINDERS") }) + it("should include Browser Session Status when inactive", async () => { + const result = await getEnvironmentDetails(mockCline as Task) + expect(result).toContain("# Browser Session Status") + expect(result).toContain("Inactive - Browser is not launched") + }) + + it("should include Browser Session Status with current viewport when active", async () => { + ;(mockCline.browserSession as any).isSessionActive = vi.fn().mockReturnValue(true) + ;(mockCline.browserSession as any).getViewportSize = vi.fn().mockReturnValue({ width: 1280, height: 720 }) + + const result = await getEnvironmentDetails(mockCline as Task) + expect(result).toContain("Active - A browser session is currently open and ready for browser_action commands") + expect(result).toContain("Current viewport size: 1280x720 pixels.") + }) }) diff --git a/src/core/environment/getEnvironmentDetails.ts b/src/core/environment/getEnvironmentDetails.ts index 30d9cd0b0d1e..4b73394b6359 100644 --- a/src/core/environment/getEnvironmentDetails.ts +++ b/src/core/environment/getEnvironmentDetails.ts @@ -244,6 +244,38 @@ export async function getEnvironmentDetails(cline: Task, includeFileDetails: boo } } + // Add browser session status - Always show to prevent LLM from trying browser actions when no session is active + const isBrowserActive = cline.browserSession.isSessionActive() + + // Build viewport info for status (prefer actual viewport if available, else fallback to configured setting) + const configuredViewport = (state?.browserViewportSize as string | undefined) ?? "900x600" + let configuredWidth: number | undefined + let configuredHeight: number | undefined + if (configuredViewport.includes("x")) { + const parts = configuredViewport.split("x").map((v) => Number(v)) + configuredWidth = parts[0] + configuredHeight = parts[1] + } + + let actualWidth: number | undefined + let actualHeight: number | undefined + // Use optional chaining to avoid issues with tests that stub browserSession + const vp = isBrowserActive ? (cline.browserSession as any).getViewportSize?.() : undefined + if (vp) { + actualWidth = vp.width + actualHeight = vp.height + } + + const width = actualWidth ?? configuredWidth + const height = actualHeight ?? configuredHeight + const viewportInfo = isBrowserActive && width && height ? `\nCurrent viewport size: ${width}x${height} pixels.` : "" + + details += `\n# Browser Session Status\n${ + isBrowserActive + ? "Active - A browser session is currently open and ready for browser_action commands" + : "Inactive - Browser is not launched. Using any browser action except the browser_action with action='launch' to start a new session will result in an error." + }${viewportInfo}\n` + if (includeFileDetails) { details += `\n\n# Current Workspace Directory (${cline.cwd.toPosix()}) Files\n` const isDesktop = arePathsEqual(cline.cwd, path.join(os.homedir(), "Desktop")) diff --git a/src/core/prompts/__tests__/__snapshots__/system-prompt/with-computer-use-support.snap b/src/core/prompts/__tests__/__snapshots__/system-prompt/with-computer-use-support.snap index 641ec16082ef..d8165c095ee4 100644 --- a/src/core/prompts/__tests__/__snapshots__/system-prompt/with-computer-use-support.snap +++ b/src/core/prompts/__tests__/__snapshots__/system-prompt/with-computer-use-support.snap @@ -264,10 +264,12 @@ Examples: ## browser_action Description: Request to interact with a Puppeteer-controlled browser. Every action, except `close`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. -- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. -- While the browser is active, only the `browser_action` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. -- The browser window has a resolution of **1280x800** pixels. When performing any click actions, ensure the coordinates are within this resolution range. -- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. + +**Browser Session Lifecycle:** +- Browser sessions **start** with `launch` and **end** with `close` +- The session remains active across multiple messages and tool uses +- You can use other tools while the browser session is active - it will stay open in the background + Parameters: - action: (required) The action to perform. The available actions are: * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. @@ -281,6 +283,12 @@ Parameters: - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. - Use with the `text` parameter to provide the string to type. + * press: Press a single keyboard key or key combination (e.g., Enter, Tab, Escape, Cmd+K, Shift+Enter). + - Use with the `text` parameter to provide the key name or combination. + - For single keys: Enter, Tab, Escape, etc. + - For key combinations: Cmd+K, Ctrl+C, Shift+Enter, Alt+F4, etc. + - Supported modifiers: Cmd/Command/Meta, Ctrl/Control, Shift, Alt/Option + - Example: Cmd+K or Shift+Enter * resize: Resize the viewport to a specific w,h size. - Use with the `size` parameter to specify the new size. * scroll_down: Scroll down the page by one page height. @@ -289,17 +297,24 @@ Parameters: - Example: `close` - url: (optional) Use this for providing the URL for the `launch` action. * Example: https://example.com -- coordinate: (optional) The X and Y coordinates for the `click` and `hover` actions. Coordinates should be within the **1280x800** resolution. - * Example: 450,300 +- coordinate: (optional) The X and Y coordinates for the `click` and `hover` actions. + * **CRITICAL**: Screenshot dimensions are NOT the same as the browser viewport dimensions + * Format: x,y@widthxheight + * Measure x,y on the screenshot image you see in chat + * The widthxheight MUST be the EXACT pixel size of that screenshot image (never the browser viewport) + * Never use the browser viewport size for widthxheight - the viewport is only a reference and is often larger than the screenshot + * Images are often downscaled before you see them, so the screenshot's dimensions will likely be smaller than the viewport + * Example A: If the screenshot you see is 1094x1092 and you want to click (450,300) on that image, use: 450,300@1094x1092 + * Example B: If the browser viewport is 1280x800 but the screenshot is 1000x625 and you want to click (500,300) on the screenshot, use: 500,300@1000x625 - size: (optional) The width and height for the `resize` action. * Example: 1280,720 - text: (optional) Use this for providing the text for the `type` action. * Example: Hello, world! Usage: -Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +Action to perform (e.g., launch, click, type, press, scroll_down, scroll_up, close) URL to launch the browser at (optional) -x,y coordinates (optional) +x,y@widthxheight coordinates (optional) Text to type (optional) @@ -309,10 +324,10 @@ Example: Requesting to launch a browser at https://example.com https://example.com -Example: Requesting to click on the element at coordinates 450,300 +Example: Requesting to click on the element at coordinates 450,300 on a 1024x768 image click -450,300 +450,300@1024x768 ## ask_followup_question @@ -541,7 +556,7 @@ RULES - At the end of each user message, you will automatically receive environment_details. This information is not written by the user themselves, but is auto-generated to provide potentially relevant context about the project structure and environment. While this information can be valuable for understanding the project context, do not treat it as a direct part of the user's request or response. Use it to inform your actions and decisions, but don't assume the user is explicitly asking about or referring to this information unless they clearly do so in their message. When using environment_details, explain your actions clearly to ensure the user understands, as they may not be aware of these details. - Before executing commands, check the "Actively Running Terminals" section in environment_details. If present, consider how these active processes might impact your task. For example, if a local development server is already running, you wouldn't need to start it again. If no active terminals are listed, proceed with command execution as normal. - MCP operations should be used one at a time, similar to other tool usage. Wait for confirmation of success before proceeding with additional operations. -- It is critical you wait for the user's response after each tool use, in order to confirm the success of the tool use. For example, if asked to make a todo app, you would create a file, wait for the user's response it was created successfully, then create another file if needed, wait for the user's response it was created successfully, etc. Then if you want to test your work, you might use browser_action to launch the site, wait for the user's response confirming the site was launched along with a screenshot, then perhaps e.g., click a button to test functionality if needed, wait for the user's response confirming the button was clicked along with a screenshot of the new state, before finally closing the browser. +- It is critical you wait for the user's response after each tool use, in order to confirm the success of the tool use. For example, if asked to make a todo app, you would create a file, wait for the user's response it was created successfully, then create another file if needed, wait for the user's response it was created successfully, etc. ==== diff --git a/src/core/prompts/sections/rules.ts b/src/core/prompts/sections/rules.ts index a5eaf23ce08f..0bac86b2797e 100644 --- a/src/core/prompts/sections/rules.ts +++ b/src/core/prompts/sections/rules.ts @@ -92,9 +92,5 @@ ${getEditingInstructions(diffStrategy)} - At the end of each user message, you will automatically receive environment_details. This information is not written by the user themselves, but is auto-generated to provide potentially relevant context about the project structure and environment. While this information can be valuable for understanding the project context, do not treat it as a direct part of the user's request or response. Use it to inform your actions and decisions, but don't assume the user is explicitly asking about or referring to this information unless they clearly do so in their message. When using environment_details, explain your actions clearly to ensure the user understands, as they may not be aware of these details. - Before executing commands, check the "Actively Running Terminals" section in environment_details. If present, consider how these active processes might impact your task. For example, if a local development server is already running, you wouldn't need to start it again. If no active terminals are listed, proceed with command execution as normal. - MCP operations should be used one at a time, similar to other tool usage. Wait for confirmation of success before proceeding with additional operations. -- It is critical you wait for the user's response after each tool use, in order to confirm the success of the tool use. For example, if asked to make a todo app, you would create a file, wait for the user's response it was created successfully, then create another file if needed, wait for the user's response it was created successfully, etc.${ - supportsComputerUse - ? " Then if you want to test your work, you might use browser_action to launch the site, wait for the user's response confirming the site was launched along with a screenshot, then perhaps e.g., click a button to test functionality if needed, wait for the user's response confirming the button was clicked along with a screenshot of the new state, before finally closing the browser." - : "" - }` +- It is critical you wait for the user's response after each tool use, in order to confirm the success of the tool use. For example, if asked to make a todo app, you would create a file, wait for the user's response it was created successfully, then create another file if needed, wait for the user's response it was created successfully, etc.` } diff --git a/src/core/prompts/tools/browser-action.ts b/src/core/prompts/tools/browser-action.ts index e1b33b9d7d1d..3f9a5c1ae290 100644 --- a/src/core/prompts/tools/browser-action.ts +++ b/src/core/prompts/tools/browser-action.ts @@ -6,10 +6,12 @@ export function getBrowserActionDescription(args: ToolArgs): string | undefined } return `## browser_action Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. -- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. -- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. -- The browser window has a resolution of **${args.browserViewportSize}** pixels. When performing any click actions, ensure the coordinates are within this resolution range. -- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. + +**Browser Session Lifecycle:** +- Browser sessions **start** with \`launch\` and **end** with \`close\` +- The session remains active across multiple messages and tool uses +- You can use other tools while the browser session is active - it will stay open in the background + Parameters: - action: (required) The action to perform. The available actions are: * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. @@ -23,6 +25,12 @@ Parameters: - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. - Use with the \`text\` parameter to provide the string to type. + * press: Press a single keyboard key or key combination (e.g., Enter, Tab, Escape, Cmd+K, Shift+Enter). + - Use with the \`text\` parameter to provide the key name or combination. + - For single keys: Enter, Tab, Escape, etc. + - For key combinations: Cmd+K, Ctrl+C, Shift+Enter, Alt+F4, etc. + - Supported modifiers: Cmd/Command/Meta, Ctrl/Control, Shift, Alt/Option + - Example: Cmd+K or Shift+Enter * resize: Resize the viewport to a specific w,h size. - Use with the \`size\` parameter to specify the new size. * scroll_down: Scroll down the page by one page height. @@ -31,17 +39,24 @@ Parameters: - Example: \`close\` - url: (optional) Use this for providing the URL for the \`launch\` action. * Example: https://example.com -- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **${args.browserViewportSize}** resolution. - * Example: 450,300 +- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. + * **CRITICAL**: Screenshot dimensions are NOT the same as the browser viewport dimensions + * Format: x,y@widthxheight + * Measure x,y on the screenshot image you see in chat + * The widthxheight MUST be the EXACT pixel size of that screenshot image (never the browser viewport) + * Never use the browser viewport size for widthxheight - the viewport is only a reference and is often larger than the screenshot + * Images are often downscaled before you see them, so the screenshot's dimensions will likely be smaller than the viewport + * Example A: If the screenshot you see is 1094x1092 and you want to click (450,300) on that image, use: 450,300@1094x1092 + * Example B: If the browser viewport is 1280x800 but the screenshot is 1000x625 and you want to click (500,300) on the screenshot, use: 500,300@1000x625 - size: (optional) The width and height for the \`resize\` action. * Example: 1280,720 - text: (optional) Use this for providing the text for the \`type\` action. * Example: Hello, world! Usage: -Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +Action to perform (e.g., launch, click, type, press, scroll_down, scroll_up, close) URL to launch the browser at (optional) -x,y coordinates (optional) +x,y@widthxheight coordinates (optional) Text to type (optional) @@ -51,9 +66,9 @@ Example: Requesting to launch a browser at https://example.com https://example.com -Example: Requesting to click on the element at coordinates 450,300 +Example: Requesting to click on the element at coordinates 450,300 on a 1024x768 image click -450,300 +450,300@1024x768 ` } diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 508cf051d647..f13bd970dff7 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -374,7 +374,10 @@ export class Task extends EventEmitter implements TaskLike { this.autoApprovalHandler = new AutoApprovalHandler() this.urlContentFetcher = new UrlContentFetcher(provider.context) - this.browserSession = new BrowserSession(provider.context) + this.browserSession = new BrowserSession(provider.context, (isActive: boolean) => { + // Add a message to indicate browser session status change + this.say("browser_session_status", isActive ? "Browser session opened" : "Browser session closed") + }) this.diffEnabled = enableDiff this.fuzzyMatchThreshold = fuzzyMatchThreshold this.consecutiveMistakeLimit = consecutiveMistakeLimit ?? DEFAULT_CONSECUTIVE_MISTAKE_LIMIT diff --git a/src/core/tools/__tests__/browserActionTool.coordinateScaling.spec.ts b/src/core/tools/__tests__/browserActionTool.coordinateScaling.spec.ts new file mode 100644 index 000000000000..086040267459 --- /dev/null +++ b/src/core/tools/__tests__/browserActionTool.coordinateScaling.spec.ts @@ -0,0 +1,148 @@ +// Test coordinate scaling functionality in browser actions +import { describe, it, expect, vi, beforeEach } from "vitest" + +// Mock the scaleCoordinate function by extracting it +// In a real scenario, we'd export it or test through the main function +// For now, we'll test the regex pattern and logic + +describe("Browser Action Coordinate Scaling", () => { + describe("Coordinate format validation", () => { + it("should match valid coordinate format with image dimensions", () => { + const validFormats = [ + "450,300@1024x768", + "0,0@1920x1080", + "1920,1080@1920x1080", + "100,200@800x600", + " 273 , 273 @ 1280x800 ", + "267,273@1280,800", // comma separator for dimensions + "450,300@1024,768", // comma separator for dimensions + ] + + const regex = /^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/ + + validFormats.forEach((coord) => { + expect(coord).toMatch(regex) + }) + }) + + it("should not match invalid coordinate formats", () => { + const invalidFormats = [ + "450,300", // missing image dimensions + "450,300@", // incomplete dimensions + "450,300@1024", // missing height + "450,300@1024x", // missing height value + "@1024x768", // missing coordinates + "450@1024x768", // missing y coordinate + ",300@1024x768", // missing x coordinate + "450,300@1024x768x2", // extra dimension + "a,b@1024x768", // non-numeric coordinates + "450,300@axb", // non-numeric dimensions + ] + + const regex = /^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/ + + invalidFormats.forEach((coord) => { + expect(coord).not.toMatch(regex) + }) + }) + }) + + describe("Coordinate scaling logic", () => { + it("should correctly scale coordinates from image to viewport", () => { + // Simulate the scaling logic + const scaleCoordinate = (coordinate: string, viewportWidth: number, viewportHeight: number): string => { + const match = coordinate.match(/^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/) + if (!match) { + throw new Error(`Invalid coordinate format: "${coordinate}"`) + } + + const [, xStr, yStr, imgWidthStr, imgHeightStr] = match + const x = parseInt(xStr, 10) + const y = parseInt(yStr, 10) + const imgWidth = parseInt(imgWidthStr, 10) + const imgHeight = parseInt(imgHeightStr, 10) + + const scaledX = Math.round((x / imgWidth) * viewportWidth) + const scaledY = Math.round((y / imgHeight) * viewportHeight) + + return `${scaledX},${scaledY}` + } + + // Test case 1: Same dimensions (no scaling) + expect(scaleCoordinate("450,300@900x600", 900, 600)).toBe("450,300") + + // Test case 2: Half dimensions (2x upscale) + expect(scaleCoordinate("225,150@450x300", 900, 600)).toBe("450,300") + + // Test case 3: Double dimensions (0.5x downscale) + expect(scaleCoordinate("900,600@1800x1200", 900, 600)).toBe("450,300") + + // Test case 4: Different aspect ratio + expect(scaleCoordinate("512,384@1024x768", 1920, 1080)).toBe("960,540") + + // Test case 5: Edge cases (0,0) + expect(scaleCoordinate("0,0@1024x768", 1920, 1080)).toBe("0,0") + + // Test case 6: Edge cases (max coordinates) + expect(scaleCoordinate("1024,768@1024x768", 1920, 1080)).toBe("1920,1080") + }) + + it("should throw error for invalid coordinate format", () => { + const scaleCoordinate = (coordinate: string, viewportWidth: number, viewportHeight: number): string => { + const match = coordinate.match(/^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/) + if (!match) { + throw new Error( + `Invalid coordinate format: "${coordinate}". ` + + `Expected format: "x,y@widthxheight" (e.g., "450,300@1024x768")`, + ) + } + + const [, xStr, yStr, imgWidthStr, imgHeightStr] = match + const x = parseInt(xStr, 10) + const y = parseInt(yStr, 10) + const imgWidth = parseInt(imgWidthStr, 10) + const imgHeight = parseInt(imgHeightStr, 10) + + const scaledX = Math.round((x / imgWidth) * viewportWidth) + const scaledY = Math.round((y / imgHeight) * viewportHeight) + + return `${scaledX},${scaledY}` + } + + // Test invalid formats + expect(() => scaleCoordinate("450,300", 900, 600)).toThrow("Invalid coordinate format") + expect(() => scaleCoordinate("450,300@1024", 900, 600)).toThrow("Invalid coordinate format") + expect(() => scaleCoordinate("invalid", 900, 600)).toThrow("Invalid coordinate format") + }) + + it("should handle rounding correctly", () => { + const scaleCoordinate = (coordinate: string, viewportWidth: number, viewportHeight: number): string => { + const match = coordinate.match(/^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/) + if (!match) { + throw new Error(`Invalid coordinate format: "${coordinate}"`) + } + + const [, xStr, yStr, imgWidthStr, imgHeightStr] = match + const x = parseInt(xStr, 10) + const y = parseInt(yStr, 10) + const imgWidth = parseInt(imgWidthStr, 10) + const imgHeight = parseInt(imgHeightStr, 10) + + const scaledX = Math.round((x / imgWidth) * viewportWidth) + const scaledY = Math.round((y / imgHeight) * viewportHeight) + + return `${scaledX},${scaledY}` + } + + // Test rounding behavior + // 333 / 1000 * 900 = 299.7 -> rounds to 300 + expect(scaleCoordinate("333,333@1000x1000", 900, 900)).toBe("300,300") + + // 666 / 1000 * 900 = 599.4 -> rounds to 599 + expect(scaleCoordinate("666,666@1000x1000", 900, 900)).toBe("599,599") + + // 500 / 1000 * 900 = 450.0 -> rounds to 450 + expect(scaleCoordinate("500,500@1000x1000", 900, 900)).toBe("450,450") + }) + }) +}) diff --git a/src/core/tools/attemptCompletionTool.ts b/src/core/tools/attemptCompletionTool.ts index 5074d7f4e808..a47a3552bf8f 100644 --- a/src/core/tools/attemptCompletionTool.ts +++ b/src/core/tools/attemptCompletionTool.ts @@ -129,7 +129,8 @@ export async function attemptCompletionTool( }) toolResults.push(...formatResponse.imageBlocks(images)) - cline.userMessageContent.push({ type: "text", text: `${toolDescription()} Result:` }) + const labelSuffix = images && images.length > 0 ? " (see image below)" : "" + cline.userMessageContent.push({ type: "text", text: `${toolDescription()} Result:${labelSuffix}` }) cline.userMessageContent.push(...toolResults) return diff --git a/src/core/tools/browserActionTool.ts b/src/core/tools/browserActionTool.ts index 13cb9b0ec266..d9a31a9acb13 100644 --- a/src/core/tools/browserActionTool.ts +++ b/src/core/tools/browserActionTool.ts @@ -7,6 +7,40 @@ import { ClineSayBrowserAction, } from "../../shared/ExtensionMessage" import { formatResponse } from "../prompts/responses" +import { Anthropic } from "@anthropic-ai/sdk" + +/** + * Parses coordinate string and scales from image dimensions to viewport dimensions + * The LLM examines the screenshot it receives (which may be downscaled by the API) + * and reports coordinates in format: "x,y@widthxheight" where widthxheight is what the LLM observed + * + * Format: "x,y@widthxheight" (required) + * Returns: scaled coordinate string "x,y" in viewport coordinates + * Throws: Error if format is invalid or missing image dimensions + */ +function scaleCoordinate(coordinate: string, viewportWidth: number, viewportHeight: number): string { + // Parse coordinate with required image dimensions (accepts both 'x' and ',' as dimension separators) + const match = coordinate.match(/^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/) + + if (!match) { + throw new Error( + `Invalid coordinate format: "${coordinate}". ` + + `Expected format: "x,y@widthxheight" (e.g., "450,300@1024x768")`, + ) + } + + const [, xStr, yStr, imgWidthStr, imgHeightStr] = match + const x = parseInt(xStr, 10) + const y = parseInt(yStr, 10) + const imgWidth = parseInt(imgWidthStr, 10) + const imgHeight = parseInt(imgHeightStr, 10) + + // Scale coordinates from image dimensions to viewport dimensions + const scaledX = Math.round((x / imgWidth) * viewportWidth) + const scaledY = Math.round((y / imgHeight) * viewportHeight) + + return `${scaledX},${scaledY}` +} export async function browserActionTool( cline: Task, @@ -29,7 +63,7 @@ export async function browserActionTool( cline.consecutiveMistakeCount++ cline.recordToolError("browser_action") pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "action")) - await cline.browserSession.closeBrowser() + // Do not close the browser on parameter validation errors } return @@ -46,6 +80,7 @@ export async function browserActionTool( action: action as BrowserAction, coordinate: removeClosingTag("coordinate", coordinate), text: removeClosingTag("text", text), + size: removeClosingTag("size", size), } satisfies ClineSayBrowserAction), undefined, block.partial, @@ -61,7 +96,7 @@ export async function browserActionTool( cline.consecutiveMistakeCount++ cline.recordToolError("browser_action") pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "url")) - await cline.browserSession.closeBrowser() + // Do not close the browser on parameter validation errors return } @@ -75,27 +110,63 @@ export async function browserActionTool( // NOTE: It's okay that we call cline message since the partial inspect_site is finished streaming. // The only scenario we have to avoid is sending messages WHILE a partial message exists at the end of the messages array. // For example the api_req_finished message would interfere with the partial message, so we needed to remove that. - // await cline.say("inspect_site_result", "") // No result, starts the loading spinner waiting for result - await cline.say("browser_action_result", "") // Starts loading spinner + + // Launch browser first (this triggers "Browser session opened" status message) await cline.browserSession.launchBrowser() + + // Create browser_action say message AFTER launching so status appears first + await cline.say( + "browser_action", + JSON.stringify({ + action: "launch" as BrowserAction, + text: url, + } satisfies ClineSayBrowserAction), + undefined, + false, + ) + browserActionResult = await cline.browserSession.navigateToUrl(url) } else { + // Variables to hold validated and processed parameters + let processedCoordinate = coordinate + if (action === "click" || action === "hover") { if (!coordinate) { cline.consecutiveMistakeCount++ cline.recordToolError("browser_action") pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "coordinate")) - await cline.browserSession.closeBrowser() + // Do not close the browser on parameter validation errors return // can't be within an inner switch } + + // Get viewport dimensions from the browser session + const viewportSize = cline.browserSession.getViewportSize() + const viewportWidth = viewportSize.width || 900 // default to 900 if not available + const viewportHeight = viewportSize.height || 600 // default to 600 if not available + + // Scale coordinate from image dimensions to viewport dimensions + try { + processedCoordinate = scaleCoordinate(coordinate, viewportWidth, viewportHeight) + } catch (error) { + cline.consecutiveMistakeCount++ + cline.recordToolError("browser_action") + pushToolResult( + await cline.sayAndCreateMissingParamError( + "browser_action", + "coordinate", + error instanceof Error ? error.message : String(error), + ), + ) + return + } } - if (action === "type") { + if (action === "type" || action === "press") { if (!text) { cline.consecutiveMistakeCount++ cline.recordToolError("browser_action") pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "text")) - await cline.browserSession.closeBrowser() + // Do not close the browser on parameter validation errors return } } @@ -105,7 +176,7 @@ export async function browserActionTool( cline.consecutiveMistakeCount++ cline.recordToolError("browser_action") pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "size")) - await cline.browserSession.closeBrowser() + // Do not close the browser on parameter validation errors return } } @@ -118,6 +189,7 @@ export async function browserActionTool( action: action as BrowserAction, coordinate, text, + size, } satisfies ClineSayBrowserAction), undefined, false, @@ -125,14 +197,17 @@ export async function browserActionTool( switch (action) { case "click": - browserActionResult = await cline.browserSession.click(coordinate!) + browserActionResult = await cline.browserSession.click(processedCoordinate!) break case "hover": - browserActionResult = await cline.browserSession.hover(coordinate!) + browserActionResult = await cline.browserSession.hover(processedCoordinate!) break case "type": browserActionResult = await cline.browserSession.type(text!) break + case "press": + browserActionResult = await cline.browserSession.press(text!) + break case "scroll_down": browserActionResult = await cline.browserSession.scrollDown() break @@ -153,21 +228,48 @@ export async function browserActionTool( case "click": case "hover": case "type": + case "press": case "scroll_down": case "scroll_up": - case "resize": + case "resize": { await cline.say("browser_action_result", JSON.stringify(browserActionResult)) - pushToolResult( - formatResponse.toolResult( - `The browser action has been executed. The console logs and screenshot have been captured for your analysis.\n\nConsole logs:\n${ - browserActionResult?.logs || "(No new logs)" - }\n\n(REMEMBER: if you need to proceed to using non-\`browser_action\` tools or launch a new browser, you MUST first close cline browser. For example, if after analyzing the logs and screenshot you need to edit a file, you must first close the browser before you can use the write_to_file tool.)`, - browserActionResult?.screenshot ? [browserActionResult.screenshot] : [], - ), - ) + const images = browserActionResult?.screenshot ? [browserActionResult.screenshot] : [] + + let messageText = `The browser action has been executed.` + + messageText += `\n\n**CRITICAL**: When providing click/hover coordinates:` + messageText += `\n1. Screenshot dimensions != Browser viewport dimensions` + messageText += `\n2. Measure x,y on the screenshot image you see below` + messageText += `\n3. Use format: x,y@WIDTHxHEIGHT where WIDTHxHEIGHT is the EXACT pixel size of the screenshot image` + messageText += `\n4. Never use the browser viewport size for WIDTHxHEIGHT - it is only for reference and is often larger than the screenshot` + messageText += `\n5. Screenshots are often downscaled - always use the dimensions you see in the image` + messageText += `\nExample: Viewport 1280x800, screenshot 1000x625, click (500,300) -> 500,300@1000x625` + + // Include browser viewport dimensions (for reference only) + if (browserActionResult?.viewportWidth && browserActionResult?.viewportHeight) { + messageText += `\n\nBrowser viewport: ${browserActionResult.viewportWidth}x${browserActionResult.viewportHeight}` + } + + // Include cursor position if available + if (browserActionResult?.currentMousePosition) { + messageText += `\nCursor position: ${browserActionResult.currentMousePosition}` + } + + messageText += `\n\nConsole logs:\n${browserActionResult?.logs || "(No new logs)"}\n` + + if (images.length > 0) { + const blocks = [ + ...formatResponse.imageBlocks(images), + { type: "text", text: messageText } as Anthropic.TextBlockParam, + ] + pushToolResult(blocks) + } else { + pushToolResult(messageText) + } break + } case "close": pushToolResult( formatResponse.toolResult( @@ -181,7 +283,7 @@ export async function browserActionTool( return } } catch (error) { - await cline.browserSession.closeBrowser() // if any error occurs, the browser session is terminated + // Keep the browser session alive on errors; report the error without terminating the session await handleError("executing browser action", error) return } diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index ec1781d79fa7..54682be7adf1 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -1766,6 +1766,7 @@ export class ClineProvider soundVolume, browserViewportSize, screenshotQuality, + browserActionsAutoExpand, remoteBrowserHost, remoteBrowserEnabled, cachedChromeHostUrl, @@ -1866,6 +1867,7 @@ export class ClineProvider alwaysAllowModeSwitch: alwaysAllowModeSwitch ?? false, alwaysAllowSubtasks: alwaysAllowSubtasks ?? false, alwaysAllowUpdateTodoList: alwaysAllowUpdateTodoList ?? false, + isBrowserSessionActive: this.getCurrentTask()?.browserSession?.isSessionActive() ?? false, allowedMaxRequests, allowedMaxCost, autoCondenseContext: autoCondenseContext ?? true, @@ -1893,6 +1895,7 @@ export class ClineProvider soundVolume: soundVolume ?? 0.5, browserViewportSize: browserViewportSize ?? "900x600", screenshotQuality: screenshotQuality ?? 75, + browserActionsAutoExpand: browserActionsAutoExpand ?? false, remoteBrowserHost, remoteBrowserEnabled: remoteBrowserEnabled ?? false, cachedChromeHostUrl: cachedChromeHostUrl, @@ -2078,6 +2081,9 @@ export class ClineProvider ) } + // Get actual browser session state + const isBrowserSessionActive = this.getCurrentTask()?.browserSession?.isSessionActive() ?? false + // Return the same structure as before. return { apiConfiguration: providerSettings, @@ -2096,6 +2102,7 @@ export class ClineProvider alwaysAllowSubtasks: stateValues.alwaysAllowSubtasks ?? false, alwaysAllowFollowupQuestions: stateValues.alwaysAllowFollowupQuestions ?? false, alwaysAllowUpdateTodoList: stateValues.alwaysAllowUpdateTodoList ?? false, + isBrowserSessionActive, followupAutoApproveTimeoutMs: stateValues.followupAutoApproveTimeoutMs ?? 60000, diagnosticsEnabled: stateValues.diagnosticsEnabled ?? true, allowedMaxRequests: stateValues.allowedMaxRequests, @@ -2114,6 +2121,7 @@ export class ClineProvider soundVolume: stateValues.soundVolume, browserViewportSize: stateValues.browserViewportSize ?? "900x600", screenshotQuality: stateValues.screenshotQuality ?? 75, + browserActionsAutoExpand: stateValues.browserActionsAutoExpand ?? false, remoteBrowserHost: stateValues.remoteBrowserHost, remoteBrowserEnabled: stateValues.remoteBrowserEnabled ?? false, cachedChromeHostUrl: stateValues.cachedChromeHostUrl as string | undefined, diff --git a/src/core/webview/__tests__/ClineProvider.spec.ts b/src/core/webview/__tests__/ClineProvider.spec.ts index 3d68fac2acb0..6bba0fc43724 100644 --- a/src/core/webview/__tests__/ClineProvider.spec.ts +++ b/src/core/webview/__tests__/ClineProvider.spec.ts @@ -503,6 +503,7 @@ describe("ClineProvider", () => { const mockState: ExtensionState = { version: "1.0.0", + isBrowserSessionActive: false, clineMessages: [], taskHistory: [], shouldShowAnnouncement: false, diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index e32b818a96e9..4d4bffab525e 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -1047,6 +1047,15 @@ export const webviewMessageHandler = async ( case "cancelTask": await provider.cancelTask() break + case "killBrowserSession": + { + const task = provider.getCurrentTask() + if (task?.browserSession) { + await task.browserSession.closeBrowser() + await provider.postStateToWebview() + } + } + break case "allowedCommands": { // Validate and sanitize the commands array const commands = message.commands ?? [] @@ -1614,6 +1623,10 @@ export const webviewMessageHandler = async ( await updateGlobalState("browserToolEnabled", message.bool ?? true) await provider.postStateToWebview() break + case "browserActionsAutoExpand": + await updateGlobalState("browserActionsAutoExpand", message.bool ?? false) + await provider.postStateToWebview() + break case "language": changeLanguage(message.text ?? "en") await updateGlobalState("language", message.text as Language) diff --git a/src/services/browser/BrowserSession.ts b/src/services/browser/BrowserSession.ts index 75b432f01d2d..b42c5b0073ca 100644 --- a/src/services/browser/BrowserSession.ts +++ b/src/services/browser/BrowserSession.ts @@ -1,7 +1,7 @@ import * as vscode from "vscode" import * as fs from "fs/promises" import * as path from "path" -import { Browser, Page, ScreenshotOptions, TimeoutError, launch, connect } from "puppeteer-core" +import { Browser, Page, ScreenshotOptions, TimeoutError, launch, connect, KeyInput } from "puppeteer-core" // @ts-ignore import PCR from "puppeteer-chromium-resolver" import pWaitFor from "p-wait-for" @@ -25,9 +25,15 @@ export class BrowserSession { private currentMousePosition?: string private lastConnectionAttempt?: number private isUsingRemoteBrowser: boolean = false + private onStateChange?: (isActive: boolean) => void - constructor(context: vscode.ExtensionContext) { + // Track last known viewport to surface in environment details + private lastViewportWidth?: number + private lastViewportHeight?: number + + constructor(context: vscode.ExtensionContext, onStateChange?: (isActive: boolean) => void) { this.context = context + this.onStateChange = onStateChange } private async ensureChromiumExists(): Promise { @@ -189,13 +195,20 @@ export class BrowserSession { await this.launchLocalBrowser() } } + + // Notify that browser session is now active + if (this.browser && this.onStateChange) { + this.onStateChange(true) + } } /** * Closes the browser and resets browser state */ async closeBrowser(): Promise { - if (this.browser || this.page) { + const wasActive = !!(this.browser || this.page) + + if (wasActive) { console.log("closing browser...") if (this.isUsingRemoteBrowser && this.browser) { @@ -204,6 +217,11 @@ export class BrowserSession { await this.browser?.close().catch(() => {}) } this.resetBrowserState() + + // Notify that browser session is now inactive + if (this.onStateChange) { + this.onStateChange(false) + } } return {} } @@ -216,6 +234,8 @@ export class BrowserSession { this.page = undefined this.currentMousePosition = undefined this.isUsingRemoteBrowser = false + this.lastViewportWidth = undefined + this.lastViewportHeight = undefined } async doAction(action: (page: Page) => Promise): Promise { @@ -260,6 +280,11 @@ export class BrowserSession { interval: 100, }).catch(() => {}) + // Draw cursor indicator if we have a cursor position + if (this.currentMousePosition) { + await this.drawCursorIndicator(this.page, this.currentMousePosition) + } + let options: ScreenshotOptions = { encoding: "base64", @@ -291,15 +316,29 @@ export class BrowserSession { throw new Error("Failed to take screenshot.") } + // Remove cursor indicator after taking screenshot + if (this.currentMousePosition) { + await this.removeCursorIndicator(this.page) + } + // this.page.removeAllListeners() <- causes the page to crash! this.page.off("console", consoleListener) this.page.off("pageerror", errorListener) + // Get actual viewport dimensions + const viewport = this.page.viewport() + + // Persist last known viewport dimensions + this.lastViewportWidth = viewport?.width + this.lastViewportHeight = viewport?.height + return { screenshot, logs: logs.join("\n"), currentUrl: this.page.url(), currentMousePosition: this.currentMousePosition, + viewportWidth: viewport?.width, + viewportHeight: viewport?.height, } } @@ -453,6 +492,64 @@ export class BrowserSession { } } + /** + * Force links and window.open to navigate in the same tab. + * This makes clicks on anchors with target="_blank" stay in the current page + * and also intercepts window.open so SPA/open-in-new-tab patterns don't spawn popups. + */ + private async forceLinksToSameTab(page: Page): Promise { + try { + await page.evaluate(() => { + try { + // Ensure we only install once per document + if ((window as any).__ROO_FORCE_SAME_TAB__) return + ;(window as any).__ROO_FORCE_SAME_TAB__ = true + + // Override window.open to navigate current tab instead of creating a new one + const originalOpen = window.open + window.open = function (url: string | URL, target?: string, features?: string) { + try { + const href = typeof url === "string" ? url : String(url) + location.href = href + } catch { + // fall back to original if something unexpected occurs + try { + return originalOpen.apply(window, [url as any, "_self", features]) as any + } catch {} + } + return null as any + } as any + + // Rewrite anchors that explicitly open new tabs + document.querySelectorAll('a[target="_blank"]').forEach((a) => { + a.setAttribute("target", "_self") + }) + + // Defensive capture: if an element still tries to open in a new tab, force same-tab + document.addEventListener( + "click", + (ev) => { + const el = (ev.target as HTMLElement | null)?.closest?.( + 'a[target="_blank"]', + ) as HTMLAnchorElement | null + if (el && el.href) { + ev.preventDefault() + try { + location.href = el.href + } catch {} + } + }, + { capture: true, passive: false }, + ) + } catch { + // no-op; forcing same-tab is best-effort + } + }) + } catch { + // If evaluate fails (e.g., cross-origin/state), continue without breaking the action + } + } + /** * Handles mouse interaction with network activity monitoring */ @@ -463,6 +560,9 @@ export class BrowserSession { ): Promise { const [x, y] = coordinate.split(",").map(Number) + // Force any new-tab behavior (target="_blank", window.open) to stay in the same tab + await this.forceLinksToSameTab(page) + // Set up network request monitoring let hasNetworkActivity = false const requestListener = () => { @@ -506,6 +606,106 @@ export class BrowserSession { }) } + async press(key: string): Promise { + return this.doAction(async (page) => { + // Parse key combinations (e.g., "Cmd+K", "Shift+Enter") + const parts = key.split("+").map((k) => k.trim()) + const modifiers: string[] = [] + let mainKey = parts[parts.length - 1] + + // Identify modifiers + for (let i = 0; i < parts.length - 1; i++) { + const part = parts[i].toLowerCase() + if (part === "cmd" || part === "command" || part === "meta") { + modifiers.push("Meta") + } else if (part === "ctrl" || part === "control") { + modifiers.push("Control") + } else if (part === "shift") { + modifiers.push("Shift") + } else if (part === "alt" || part === "option") { + modifiers.push("Alt") + } + } + + // Map common key aliases to Puppeteer KeyInput values + const mapping: Record = { + esc: "Escape", + return: "Enter", + escape: "Escape", + enter: "Enter", + tab: "Tab", + space: "Space", + arrowup: "ArrowUp", + arrowdown: "ArrowDown", + arrowleft: "ArrowLeft", + arrowright: "ArrowRight", + } + mainKey = (mapping[mainKey.toLowerCase()] ?? mainKey) as string + + // Avoid new-tab behavior from Enter on links/buttons + await this.forceLinksToSameTab(page) + + // Track inflight requests so we can detect brief network bursts + let inflight = 0 + const onRequest = () => { + inflight++ + } + const onRequestDone = () => { + inflight = Math.max(0, inflight - 1) + } + page.on("request", onRequest) + page.on("requestfinished", onRequestDone) + page.on("requestfailed", onRequestDone) + + // Start a short navigation wait in parallel; if no nav, it times out harmlessly + const HARD_CAP_MS = 3000 + const navPromise = page + .waitForNavigation({ + // domcontentloaded is enough to confirm a submit navigated + waitUntil: ["domcontentloaded"], + timeout: HARD_CAP_MS, + }) + .catch(() => undefined) + + // Press key combination + if (modifiers.length > 0) { + // Hold down modifiers + for (const modifier of modifiers) { + await page.keyboard.down(modifier as KeyInput) + } + + // Press main key + await page.keyboard.press(mainKey as KeyInput) + + // Release modifiers + for (const modifier of modifiers) { + await page.keyboard.up(modifier as KeyInput) + } + } else { + // Single key press + await page.keyboard.press(mainKey as KeyInput) + } + + // Give time for any requests to kick off + await delay(120) + + // Hard-cap the wait to avoid UI hangs + await Promise.race([ + navPromise, + pWaitFor(() => inflight === 0, { timeout: HARD_CAP_MS, interval: 100 }).catch(() => {}), + delay(HARD_CAP_MS), + ]) + + // Stabilize DOM briefly before capturing screenshot (shorter cap) + await this.waitTillHTMLStable(page, 2_000) + + // Cleanup + page.off("request", onRequest) + page.off("requestfinished", onRequestDone) + page.off("requestfailed", onRequestDone) + }) + } + /** * Scrolls the page by the specified amount */ @@ -557,4 +757,84 @@ export class BrowserSession { }) }) } + + /** + * Draws a cursor indicator on the page at the specified position + */ + private async drawCursorIndicator(page: Page, coordinate: string): Promise { + const [x, y] = coordinate.split(",").map(Number) + + try { + await page.evaluate( + (cursorX: number, cursorY: number) => { + // Create a cursor indicator element + const cursor = document.createElement("div") + cursor.id = "__roo_cursor_indicator__" + cursor.style.cssText = ` + position: fixed; + left: ${cursorX}px; + top: ${cursorY}px; + width: 35px; + height: 35px; + pointer-events: none; + z-index: 2147483647; + ` + + // Create SVG cursor pointer + const svg = ` + + + + + ` + cursor.innerHTML = svg + + document.body.appendChild(cursor) + }, + x, + y, + ) + } catch (error) { + console.log("Failed to draw cursor indicator:", error) + } + } + + /** + * Removes the cursor indicator from the page + */ + private async removeCursorIndicator(page: Page): Promise { + try { + await page.evaluate(() => { + const cursor = document.getElementById("__roo_cursor_indicator__") + if (cursor) { + cursor.remove() + } + }) + } catch (error) { + console.log("Failed to remove cursor indicator:", error) + } + } + + /** + * Returns whether a browser session is currently active + */ + isSessionActive(): boolean { + return !!(this.browser && this.page) + } + + /** + * Returns the last known viewport size (if any) + */ + getViewportSize(): { width?: number; height?: number } { + return { + width: this.lastViewportWidth, + height: this.lastViewportHeight, + } + } } diff --git a/src/services/browser/UrlContentFetcher.ts b/src/services/browser/UrlContentFetcher.ts index b271bc2ef413..2d8e4a3de84a 100644 --- a/src/services/browser/UrlContentFetcher.ts +++ b/src/services/browser/UrlContentFetcher.ts @@ -90,9 +90,9 @@ export class UrlContentFetcher { throw new Error("Browser not initialized") } /* - - networkidle2 is equivalent to playwright's networkidle where it waits until there are no more than 2 network connections for at least 500 ms. - - domcontentloaded is when the basic DOM is loaded - this should be sufficient for most doc sites + - In Puppeteer, "networkidle2" waits until there are no more than 2 network connections for at least 500 ms (roughly equivalent to Playwright's "networkidle"). + - "domcontentloaded" is when the basic DOM is loaded. + This should be sufficient for most doc sites. */ try { await this.page.goto(url, { diff --git a/src/services/browser/__tests__/BrowserSession.spec.ts b/src/services/browser/__tests__/BrowserSession.spec.ts index b69fb2d14064..d3784c3afff2 100644 --- a/src/services/browser/__tests__/BrowserSession.spec.ts +++ b/src/services/browser/__tests__/BrowserSession.spec.ts @@ -229,4 +229,169 @@ describe("BrowserSession", () => { expect(mockBrowser.close).not.toHaveBeenCalled() }) }) + + it("forces same-tab behavior before click", async () => { + // Prepare a minimal mock page with required APIs + const page: any = { + on: vi.fn(), + off: vi.fn(), + screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"), + url: vi.fn().mockReturnValue("https://example.com"), + viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }), + waitForNavigation: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue(undefined), + mouse: { + click: vi.fn().mockResolvedValue(undefined), + move: vi.fn().mockResolvedValue(undefined), + }, + } + + ;(browserSession as any).page = page + + // Spy on the forceLinksToSameTab helper to ensure it's invoked + const forceSpy = vi.fn().mockResolvedValue(undefined) + ;(browserSession as any).forceLinksToSameTab = forceSpy + + await browserSession.click("10,20") + + expect(forceSpy).toHaveBeenCalledTimes(1) + expect(forceSpy).toHaveBeenCalledWith(page) + expect(page.mouse.click).toHaveBeenCalledWith(10, 20) + }) +}) + +describe("keyboard press", () => { + it("presses a keyboard key", async () => { + // Prepare a minimal mock page with required APIs + const page: any = { + on: vi.fn(), + off: vi.fn(), + screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"), + url: vi.fn().mockReturnValue("https://example.com"), + viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }), + waitForNavigation: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue(undefined), + keyboard: { + press: vi.fn().mockResolvedValue(undefined), + type: vi.fn().mockResolvedValue(undefined), + }, + } + + // Create a fresh BrowserSession with a mock context + const mockCtx: any = { + globalState: { get: vi.fn(), update: vi.fn() }, + globalStorageUri: { fsPath: "/mock/global/storage/path" }, + extensionUri: { fsPath: "/mock/extension/path" }, + } + const session = new BrowserSession(mockCtx) + + ;(session as any).page = page + + await session.press("Enter") + + expect(page.keyboard.press).toHaveBeenCalledTimes(1) + expect(page.keyboard.press).toHaveBeenCalledWith("Enter") + }) +}) + +describe("cursor visualization", () => { + it("should draw cursor indicator when cursor position exists", async () => { + // Prepare a minimal mock page with required APIs + const page: any = { + on: vi.fn(), + off: vi.fn(), + screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"), + url: vi.fn().mockReturnValue("https://example.com"), + viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }), + evaluate: vi.fn().mockResolvedValue(undefined), + mouse: { + click: vi.fn().mockResolvedValue(undefined), + }, + } + + // Create a fresh BrowserSession with a mock context + const mockCtx: any = { + globalState: { get: vi.fn(), update: vi.fn() }, + globalStorageUri: { fsPath: "/mock/global/storage/path" }, + extensionUri: { fsPath: "/mock/extension/path" }, + } + const session = new BrowserSession(mockCtx) + + ;(session as any).page = page + + // Perform a click action which sets cursor position + const result = await session.click("100,200") + + // Verify cursor indicator was drawn and removed + // evaluate is called 3 times: 1 for forceLinksToSameTab, 1 for draw cursor, 1 for remove cursor + expect(page.evaluate).toHaveBeenCalled() + + // Verify the result includes cursor position + expect(result.currentMousePosition).toBe("100,200") + }) + + it("should include cursor position in action result", async () => { + // Prepare a minimal mock page with required APIs + const page: any = { + on: vi.fn(), + off: vi.fn(), + screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"), + url: vi.fn().mockReturnValue("https://example.com"), + viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }), + evaluate: vi.fn().mockResolvedValue(undefined), + mouse: { + move: vi.fn().mockResolvedValue(undefined), + }, + } + + // Create a fresh BrowserSession with a mock context + const mockCtx: any = { + globalState: { get: vi.fn(), update: vi.fn() }, + globalStorageUri: { fsPath: "/mock/global/storage/path" }, + extensionUri: { fsPath: "/mock/extension/path" }, + } + const session = new BrowserSession(mockCtx) + + ;(session as any).page = page + + // Perform a hover action which sets cursor position + const result = await session.hover("150,250") + + // Verify the result includes cursor position + expect(result.currentMousePosition).toBe("150,250") + expect(result.viewportWidth).toBe(900) + expect(result.viewportHeight).toBe(600) + }) + + it("should not draw cursor indicator when no cursor position exists", async () => { + // Prepare a minimal mock page with required APIs + const page: any = { + on: vi.fn(), + off: vi.fn(), + screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"), + url: vi.fn().mockReturnValue("https://example.com"), + viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }), + evaluate: vi.fn().mockResolvedValue(undefined), + } + + // Create a fresh BrowserSession with a mock context + const mockCtx: any = { + globalState: { get: vi.fn(), update: vi.fn() }, + globalStorageUri: { fsPath: "/mock/global/storage/path" }, + extensionUri: { fsPath: "/mock/extension/path" }, + } + const session = new BrowserSession(mockCtx) + + ;(session as any).page = page + + // Perform scroll action which doesn't set cursor position + const result = await session.scrollDown() + + // Verify evaluate was called only for scroll operation (not for cursor drawing/removal) + // scrollDown calls evaluate once for scrolling + expect(page.evaluate).toHaveBeenCalledTimes(1) + + // Verify no cursor position in result + expect(result.currentMousePosition).toBeUndefined() + }) }) diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index 5929e7a950eb..c7d8d991307d 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -345,6 +345,9 @@ export type ExtensionState = Pick< organizationAllowList: OrganizationAllowList organizationSettingsVersion?: number + isBrowserSessionActive: boolean // Actual browser session state + browserActionsAutoExpand?: boolean // Auto-expand browser actions in chat + autoCondenseContext: boolean autoCondenseContextPercent: number marketplaceItems?: MarketplaceItem[] @@ -434,6 +437,7 @@ export const browserActions = [ "click", "hover", "type", + "press", "scroll_down", "scroll_up", "resize", @@ -454,6 +458,8 @@ export type BrowserActionResult = { logs?: string currentUrl?: string currentMousePosition?: string + viewportWidth?: number + viewportHeight?: number } export interface ClineAskUseMcpServer { diff --git a/src/shared/WebviewMessage.ts b/src/shared/WebviewMessage.ts index 9c475186288f..945dccc64c65 100644 --- a/src/shared/WebviewMessage.ts +++ b/src/shared/WebviewMessage.ts @@ -103,6 +103,7 @@ export interface WebviewMessage { | "checkpointTimeout" | "browserViewportSize" | "screenshotQuality" + | "browserActionsAutoExpand" | "remoteBrowserHost" | "openKeyboardShortcuts" | "openMcpSettings" @@ -233,6 +234,7 @@ export interface WebviewMessage { | "editQueuedMessage" | "dismissUpsell" | "getDismissedUpsells" + | "killBrowserSession" text?: string editedMessageContent?: string tab?: "settings" | "history" | "mcp" | "modes" | "chat" | "marketplace" | "cloud" diff --git a/webview-ui/src/components/chat/BrowserActionRow.tsx b/webview-ui/src/components/chat/BrowserActionRow.tsx new file mode 100644 index 000000000000..f2d17a853a46 --- /dev/null +++ b/webview-ui/src/components/chat/BrowserActionRow.tsx @@ -0,0 +1,316 @@ +import { memo, useMemo, useState, useEffect } from "react" +import { ClineMessage } from "@roo-code/types" +import { ClineSayBrowserAction, BrowserActionResult } from "@roo/ExtensionMessage" +import { vscode } from "@src/utils/vscode" +import { + MousePointer as MousePointerIcon, + Keyboard, + ArrowDown, + ArrowUp, + Pointer, + Play, + Check, + SquareTerminal, + Globe, + Maximize2, +} from "lucide-react" +import CodeBlock from "../common/CodeBlock" +import { useTranslation } from "react-i18next" +import { useExtensionState } from "@src/context/ExtensionStateContext" + +const BrowserCursor: React.FC<{ style?: React.CSSProperties }> = ({ style }) => { + const { t } = useTranslation() + // Base64 encoded cursor image (same as BrowserSessionRow) + const cursorBase64 = + "" + + return ( + {t("chat:browser.cursor")} + ) +} + +const prettyKey = (k?: string): string => { + if (!k) return "" + return k + .split("+") + .map((part) => { + const p = part.trim() + const lower = p.toLowerCase() + const map: Record = { + enter: "Enter", + tab: "Tab", + escape: "Esc", + esc: "Esc", + backspace: "Backspace", + space: "Space", + shift: "Shift", + control: "Ctrl", + ctrl: "Ctrl", + alt: "Alt", + meta: "Meta", + command: "Cmd", + cmd: "Cmd", + arrowup: "Arrow Up", + arrowdown: "Arrow Down", + arrowleft: "Arrow Left", + arrowright: "Arrow Right", + pageup: "Page Up", + pagedown: "Page Down", + home: "Home", + end: "End", + } + if (map[lower]) return map[lower] + const keyMatch = /^Key([A-Z])$/.exec(p) + if (keyMatch) return keyMatch[1].toUpperCase() + const digitMatch = /^Digit([0-9])$/.exec(p) + if (digitMatch) return digitMatch[1] + const spaced = p.replace(/([a-z])([A-Z])/g, "$1 $2") + return spaced.charAt(0).toUpperCase() + spaced.slice(1) + }) + .join(" + ") +} + +interface BrowserActionRowProps { + message: ClineMessage + nextMessage?: ClineMessage + actionIndex?: number + totalActions?: number +} + +// Get icon for each action type +const getActionIcon = (action: string) => { + switch (action) { + case "click": + return + case "type": + case "press": + return + case "scroll_down": + return + case "scroll_up": + return + case "launch": + return + case "close": + return + case "resize": + return + case "hover": + default: + return + } +} + +const BrowserActionRow = memo(({ message, nextMessage, actionIndex, totalActions }: BrowserActionRowProps) => { + const { browserViewportSize = "900x600", browserActionsAutoExpand } = useExtensionState() + const [isExpanded, setIsExpanded] = useState(browserActionsAutoExpand ?? false) + const [isLogsExpanded, setIsLogsExpanded] = useState(false) + + // Update expanded state when setting changes + useEffect(() => { + setIsExpanded(browserActionsAutoExpand ?? false) + }, [browserActionsAutoExpand]) + + // Use default viewport size from settings + const [defaultViewportWidth, defaultViewportHeight] = browserViewportSize.split("x").map(Number) + + // Parse this specific browser action + const browserAction = useMemo(() => { + try { + return JSON.parse(message.text || "{}") as ClineSayBrowserAction + } catch { + return null + } + }, [message.text]) + + // Parse the result from the next message + const actionResult = useMemo(() => { + if (!nextMessage || nextMessage.say !== "browser_action_result") return null + try { + return JSON.parse(nextMessage.text || "{}") as BrowserActionResult + } catch { + return null + } + }, [nextMessage]) + + // Use actual viewport dimensions from result if available, otherwise fall back to settings + const viewportWidth = actionResult?.viewportWidth ?? defaultViewportWidth + const viewportHeight = actionResult?.viewportHeight ?? defaultViewportHeight + + // Format action display text + const actionText = useMemo(() => { + if (!browserAction) return "Browser action" + + switch (browserAction.action) { + case "launch": + return `Launched browser` + case "click": + return `Clicked at: ${browserAction.coordinate}` + case "type": + return `Typed: ${browserAction.text}` + case "press": + return `Pressed key: ${prettyKey(browserAction.text)}` + case "hover": + return `Hovered at: ${browserAction.coordinate}` + case "scroll_down": + return "Scrolled down" + case "scroll_up": + return "Scrolled up" + case "resize": + return `Resized to: ${browserAction.size?.split(/[x,]/).join(" x ")}` + case "close": + return "Closed browser" + default: + return browserAction.action + } + }, [browserAction]) + + const handleImageClick = () => { + if (actionResult?.screenshot) { + vscode.postMessage({ + type: "openImage", + text: actionResult.screenshot, + }) + } + } + + const headerStyle: React.CSSProperties = { + display: "flex", + alignItems: "center", + gap: "10px", + marginBottom: "10px", + wordBreak: "break-word", + } + + const hasScreenshot = !!actionResult?.screenshot + + return ( +
+ {/* Header with action description */} +
setIsExpanded(!isExpanded) : undefined}> + + Browser Action + {actionIndex !== undefined && totalActions !== undefined && ( + + {" "} + - {actionIndex}/{totalActions} -{" "} + + )} + {browserAction && ( + <> + {getActionIcon(browserAction.action)} + {actionText} + + )} + {hasScreenshot && ( + + )} +
+ + {/* Expanded content - only show if we have a screenshot */} + {isExpanded && actionResult?.screenshot && ( +
+
+ {/* URL display with globe icon - centered */} + {actionResult.currentUrl && ( +
+ + + {actionResult.currentUrl} + +
+ )} + + {/* Screenshot with cursor position */} +
+ Browser screenshot + {actionResult.currentMousePosition && ( + + )} +
+ + {/* Console logs - matching BrowserSessionRow styling exactly */} +
+
{ + e.stopPropagation() + setIsLogsExpanded(!isLogsExpanded) + }} + className="text-vscode-editor-foreground/70 hover:text-vscode-editor-foreground transition-colors" + style={{ + display: "flex", + alignItems: "center", + gap: "8px", + marginBottom: isLogsExpanded ? "6px" : 0, + cursor: "pointer", + }}> + + + Console Logs + + +
+ {isLogsExpanded && ( +
+ +
+ )} +
+
+
+ )} +
+ ) +}) + +BrowserActionRow.displayName = "BrowserActionRow" + +export default BrowserActionRow diff --git a/webview-ui/src/components/chat/BrowserSessionRow.tsx b/webview-ui/src/components/chat/BrowserSessionRow.tsx index c23b79f568a3..b567735828e8 100644 --- a/webview-ui/src/components/chat/BrowserSessionRow.tsx +++ b/webview-ui/src/components/chat/BrowserSessionRow.tsx @@ -1,9 +1,6 @@ import React, { memo, useEffect, useMemo, useRef, useState } from "react" -import { useSize } from "react-use" import deepEqual from "fast-deep-equal" import { useTranslation } from "react-i18next" -import { VSCodeButton } from "@vscode/webview-ui-toolkit/react" - import type { ClineMessage } from "@roo-code/types" import { BrowserAction, BrowserActionResult, ClineSayBrowserAction } from "@roo/ExtensionMessage" @@ -11,10 +8,111 @@ import { BrowserAction, BrowserActionResult, ClineSayBrowserAction } from "@roo/ import { vscode } from "@src/utils/vscode" import { useExtensionState } from "@src/context/ExtensionStateContext" -import CodeBlock, { CODE_BLOCK_BG_COLOR } from "../common/CodeBlock" -import { ChatRowContent } from "./ChatRow" +import CodeBlock from "../common/CodeBlock" import { ProgressIndicator } from "./ProgressIndicator" -import { Globe, Pointer, SquareTerminal } from "lucide-react" +import { Button, StandardTooltip } from "@src/components/ui" +import { + Globe, + Pointer, + SquareTerminal, + MousePointer as MousePointerIcon, + Keyboard, + ArrowDown, + ArrowUp, + Play, + Check, + Maximize2, + OctagonX, +} from "lucide-react" + +const prettyKey = (k?: string): string => { + if (!k) return "" + return k + .split("+") + .map((part) => { + const p = part.trim() + const lower = p.toLowerCase() + const map: Record = { + enter: "Enter", + tab: "Tab", + escape: "Esc", + esc: "Esc", + backspace: "Backspace", + space: "Space", + shift: "Shift", + control: "Ctrl", + ctrl: "Ctrl", + alt: "Alt", + meta: "Meta", + command: "Cmd", + cmd: "Cmd", + arrowup: "Arrow Up", + arrowdown: "Arrow Down", + arrowleft: "Arrow Left", + arrowright: "Arrow Right", + pageup: "Page Up", + pagedown: "Page Down", + home: "Home", + end: "End", + } + if (map[lower]) return map[lower] + const keyMatch = /^Key([A-Z])$/.exec(p) + if (keyMatch) return keyMatch[1].toUpperCase() + const digitMatch = /^Digit([0-9])$/.exec(p) + if (digitMatch) return digitMatch[1] + const spaced = p.replace(/([a-z])([A-Z])/g, "$1 $2") + return spaced.charAt(0).toUpperCase() + spaced.slice(1) + }) + .join(" + ") +} + +const getBrowserActionText = (action: BrowserAction, coordinate?: string, text?: string, size?: string) => { + switch (action) { + case "launch": + return `Launched browser` + case "click": + return `Clicked at: ${coordinate}` + case "type": + return `Typed: ${text}` + case "press": + return `Pressed key: ${prettyKey(text)}` + case "scroll_down": + return "Scrolled down" + case "scroll_up": + return "Scrolled up" + case "hover": + return `Hovered at: ${coordinate}` + case "resize": + return `Resized to: ${size?.split(/[x,]/).join(" x ")}` + case "close": + return "Closed browser" + default: + return action + } +} + +const getActionIcon = (action: BrowserAction) => { + switch (action) { + case "click": + return + case "type": + case "press": + return + case "scroll_down": + return + case "scroll_up": + return + case "launch": + return + case "close": + return + case "resize": + return + case "hover": + default: + return + } +} interface BrowserSessionRowProps { messages: ClineMessage[] @@ -30,12 +128,11 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { const { messages, isLast, onHeightChange, lastModifiedMessage } = props const { t } = useTranslation() const prevHeightRef = useRef(0) - const [maxActionHeight, setMaxActionHeight] = useState(0) const [consoleLogsExpanded, setConsoleLogsExpanded] = useState(false) + const [nextActionsExpanded, setNextActionsExpanded] = useState(false) - const { browserViewportSize = "900x600" } = useExtensionState() + const { browserViewportSize = "900x600", isBrowserSessionActive = false } = useExtensionState() const [viewportWidth, viewportHeight] = browserViewportSize.split("x").map(Number) - const aspectRatio = ((viewportHeight / viewportWidth) * 100).toFixed(2) const defaultMousePosition = `${Math.round(viewportWidth / 2)},${Math.round(viewportHeight / 2)}` const isLastApiReqInterrupted = useMemo(() => { @@ -58,83 +155,54 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { return isLast && messages.some((m) => m.say === "browser_action_result") && !isLastApiReqInterrupted // after user approves, browser_action_result with "" is sent to indicate that the session has started }, [isLast, messages, isLastApiReqInterrupted]) - // Organize messages into pages with current state and next action + // Organize messages into pages based on ALL browser actions (including those without screenshots) const pages = useMemo(() => { const result: { - currentState: { - url?: string - screenshot?: string - mousePosition?: string - consoleLogs?: string - messages: ClineMessage[] // messages up to and including the result - } - nextAction?: { - messages: ClineMessage[] // messages leading to next result - } + url?: string + screenshot?: string + mousePosition?: string + consoleLogs?: string + action?: ClineSayBrowserAction + size?: string + viewportWidth?: number + viewportHeight?: number }[] = [] - let currentStateMessages: ClineMessage[] = [] - let nextActionMessages: ClineMessage[] = [] - + // Build pages from browser_action messages and pair with results messages.forEach((message) => { - if (message.ask === "browser_action_launch") { - // Start first page - currentStateMessages = [message] - } else if (message.say === "browser_action_result") { - if (message.text === "") { - // first browser_action_result is an empty string that signals that session has started - return + if (message.say === "browser_action") { + try { + const action = JSON.parse(message.text || "{}") as ClineSayBrowserAction + // Find the corresponding result message + const resultMessage = messages.find( + (m) => m.say === "browser_action_result" && m.ts > message.ts && m.text !== "", + ) + + if (resultMessage) { + const resultData = JSON.parse(resultMessage.text || "{}") as BrowserActionResult + result.push({ + url: resultData.currentUrl, + screenshot: resultData.screenshot, + mousePosition: resultData.currentMousePosition, + consoleLogs: resultData.logs, + action, + size: action.size, + viewportWidth: resultData.viewportWidth, + viewportHeight: resultData.viewportHeight, + }) + } else { + // For actions without results (like close), add a page without screenshot + result.push({ action, size: action.size }) + } + } catch { + // ignore parse errors } - // Complete current state - currentStateMessages.push(message) - const resultData = JSON.parse(message.text || "{}") as BrowserActionResult - - // Add page with current state and previous next actions - result.push({ - currentState: { - url: resultData.currentUrl, - screenshot: resultData.screenshot, - mousePosition: resultData.currentMousePosition, - consoleLogs: resultData.logs, - messages: [...currentStateMessages], - }, - nextAction: - nextActionMessages.length > 0 - ? { - messages: [...nextActionMessages], - } - : undefined, - }) - - // Reset for next page - currentStateMessages = [] - nextActionMessages = [] - } else if ( - message.say === "api_req_started" || - message.say === "text" || - message.say === "browser_action" - ) { - // These messages lead to the next result, so they should always go in nextActionMessages - nextActionMessages.push(message) - } else { - // Any other message types - currentStateMessages.push(message) } }) - // Add incomplete page if exists - if (currentStateMessages.length > 0 || nextActionMessages.length > 0) { - result.push({ - currentState: { - messages: [...currentStateMessages], - }, - nextAction: - nextActionMessages.length > 0 - ? { - messages: [...nextActionMessages], - } - : undefined, - }) + // Add placeholder page if no actions yet + if (result.length === 0) { + result.push({}) } return result @@ -152,240 +220,464 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { return launchMessage?.text || "" }, [messages]) - // Find the latest available URL and screenshot - const latestState = useMemo(() => { + const currentPage = pages[currentPageIndex] + + // Use actual viewport dimensions from result if available, otherwise fall back to settings + + // Find the last available screenshot and its associated data to use as placeholders + const lastPageWithScreenshot = useMemo(() => { for (let i = pages.length - 1; i >= 0; i--) { - const page = pages[i] - if (page.currentState.url || page.currentState.screenshot) { - return { - url: page.currentState.url, - mousePosition: page.currentState.mousePosition, - consoleLogs: page.currentState.consoleLogs, - screenshot: page.currentState.screenshot, - } + if (pages[i].screenshot) { + return pages[i] } } - return { url: undefined, mousePosition: undefined, consoleLogs: undefined, screenshot: undefined } + return undefined }, [pages]) - const currentPage = pages[currentPageIndex] - const isLastPage = currentPageIndex === pages.length - 1 - - // Use latest state if we're on the last page and don't have a state yet - const displayState = isLastPage - ? { - url: currentPage?.currentState.url || latestState.url || initialUrl, - mousePosition: - currentPage?.currentState.mousePosition || latestState.mousePosition || defaultMousePosition, - consoleLogs: currentPage?.currentState.consoleLogs, - screenshot: currentPage?.currentState.screenshot || latestState.screenshot, - } - : { - url: currentPage?.currentState.url || initialUrl, - mousePosition: currentPage?.currentState.mousePosition || defaultMousePosition, - consoleLogs: currentPage?.currentState.consoleLogs, - screenshot: currentPage?.currentState.screenshot, + const lastPageWithMousePosition = useMemo(() => { + for (let i = pages.length - 1; i >= 0; i--) { + if (pages[i].mousePosition) { + return pages[i] } + } + return undefined + }, [pages]) - const [actionContent, { height: actionHeight }] = useSize( -
- {currentPage?.nextAction?.messages.map((message) => ( - - ))} - {!isBrowsing && messages.some((m) => m.say === "browser_action_result") && currentPageIndex === 0 && ( - - )} -
, - ) + // Display state from current page, with smart fallbacks + const displayState = { + url: currentPage?.url || initialUrl, + mousePosition: currentPage?.mousePosition || lastPageWithMousePosition?.mousePosition || defaultMousePosition, + consoleLogs: currentPage?.consoleLogs, + screenshot: currentPage?.screenshot || lastPageWithScreenshot?.screenshot, + } - useEffect(() => { - if (actionHeight === 0 || actionHeight === Infinity) { - return - } - if (actionHeight > maxActionHeight) { - setMaxActionHeight(actionHeight) + // Use a fixed standard aspect ratio and dimensions for the drawer to prevent flickering + // Even if viewport changes, the drawer maintains consistent size + const fixedDrawerWidth = 900 + const fixedDrawerHeight = 600 + const drawerAspectRatio = (fixedDrawerHeight / fixedDrawerWidth) * 100 + + const mousePosition = displayState.mousePosition || defaultMousePosition + + // For cursor positioning, use the viewport dimensions from the same page as the data we're displaying + // This ensures cursor position matches the screenshot/mouse position being shown + let cursorViewportWidth: number + let cursorViewportHeight: number + + if (currentPage?.screenshot) { + // Current page has screenshot - use its dimensions + cursorViewportWidth = currentPage.viewportWidth ?? viewportWidth + cursorViewportHeight = currentPage.viewportHeight ?? viewportHeight + } else if (lastPageWithScreenshot) { + // Using placeholder screenshot - use dimensions from that page + cursorViewportWidth = lastPageWithScreenshot.viewportWidth ?? viewportWidth + cursorViewportHeight = lastPageWithScreenshot.viewportHeight ?? viewportHeight + } else { + // No screenshot available - use default settings + cursorViewportWidth = viewportWidth + cursorViewportHeight = viewportHeight + } + + // Get browser action for current page (now stored in pages array) + const currentPageAction = useMemo(() => { + return pages[currentPageIndex]?.action + }, [pages, currentPageIndex]) + + // Latest non-close browser_action for header summary (fallback) + + // Determine if the overall browser session is still active (spins until 'close') + const lastBrowserActionOverall = useMemo(() => { + const all = messages.filter((m) => m.say === "browser_action") + return all.at(-1) + }, [messages]) + + // Use actual Playwright session state from extension (not message parsing) + const isBrowserSessionOpen = isBrowserSessionActive + + // Check if currently performing a browser action (for spinner) + const isSessionActive = useMemo(() => { + // Only show active spinner if a session has started + const started = messages.some((m) => m.say === "browser_action_result") + if (!started) return false + // If the last API request got interrupted/cancelled, treat session as inactive + if (isLastApiReqInterrupted) return false + if (!lastBrowserActionOverall) return true + try { + const act = JSON.parse(lastBrowserActionOverall.text || "{}") as ClineSayBrowserAction + return act.action !== "close" + } catch { + return true } - }, [actionHeight, maxActionHeight]) + }, [messages, lastBrowserActionOverall, isLastApiReqInterrupted]) - // Track latest click coordinate - const latestClickPosition = useMemo(() => { - if (!isBrowsing) return undefined + // Browser session drawer never auto-expands - user must manually toggle it - // Look through current page's next actions for the latest browser_action - const actions = currentPage?.nextAction?.messages || [] - for (let i = actions.length - 1; i >= 0; i--) { - const message = actions[i] - if (message.say === "browser_action") { - const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction - if (browserAction.action === "click" && browserAction.coordinate) { - return browserAction.coordinate + // Calculate total API cost for the browser session + const totalApiCost = useMemo(() => { + let total = 0 + messages.forEach((message) => { + if (message.say === "api_req_started" && message.text) { + try { + const data = JSON.parse(message.text) + if (data.cost && typeof data.cost === "number") { + total += data.cost + } + } catch { + // Ignore parsing errors } } + }) + return total + }, [messages]) + + // Local size tracking without react-use to avoid timers after unmount in tests + const containerRef = useRef(null) + const [rowHeight, setRowHeight] = useState(0) + useEffect(() => { + const el = containerRef.current + if (!el) return + let mounted = true + const setH = (h: number) => { + if (mounted) setRowHeight(h) } - return undefined - }, [isBrowsing, currentPage?.nextAction?.messages]) - - // Use latest click position while browsing, otherwise use display state - const mousePosition = isBrowsing - ? latestClickPosition || displayState.mousePosition - : displayState.mousePosition || defaultMousePosition - - const [browserSessionRow, { height: rowHeight }] = useSize( -
-
- {isBrowsing ? : } - - <>{t("chat:browser.rooWantsToUse")} - -
+ const ro = + typeof window !== "undefined" && "ResizeObserver" in window + ? new ResizeObserver((entries) => { + const entry = entries[0] + setH(entry?.contentRect?.height ?? el.getBoundingClientRect().height) + }) + : null + // initial + setH(el.getBoundingClientRect().height) + if (ro) ro.observe(el) + return () => { + mounted = false + if (ro) ro.disconnect() + } + }, []) + + const browserSessionRow = ( +
+ {/* Main header - clickable to expand/collapse, mimics TodoList style */}
- {/* URL Bar */} -
setNextActionsExpanded((v) => !v)} + /> + + {/* Simple text: "Browser Session - 28/28" */} + setNextActionsExpanded((v) => !v)} style={{ - margin: "0px auto", - width: "calc(100%)", - boxSizing: "border-box", // includes padding in width calculation - borderRadius: "4px 4px 0 0", - padding: "5px", - display: "flex", - alignItems: "center", - justifyContent: "center", - color: "var(--vscode-descriptionForeground)", - fontSize: "12px", + flex: 1, + fontSize: 13, + fontWeight: 500, + lineHeight: "22px", + color: "var(--vscode-editor-foreground)", + cursor: "pointer", }}> + {t("chat:browser.session")} + {pages.length > 1 && ` - ${currentPageIndex + 1}/${pages.length}`} + + + {/* Right side: cost badge and chevron */} + {totalApiCost > 0 && (
- - {displayState.url || "http"} + ${totalApiCost.toFixed(4)}
-
+ )} + + {/* Chevron toggle - outside cost badge, matching "Browser Session" text style */} + setNextActionsExpanded((v) => !v)} + className={`codicon codicon-chevron-${nextActionsExpanded ? "up" : "down"}`} + style={{ + fontSize: 13, + fontWeight: 500, + lineHeight: "22px", + color: "var(--vscode-editor-foreground)", + cursor: "pointer", + }} + /> - {/* Screenshot Area */} + {/* Kill browser button - only visible when session is active, styled like terminal kill button */} + {isBrowserSessionOpen && ( + + + + )} +
+ + {/* Expanded drawer content - overlays on top of chat */} + {nextActionsExpanded && (
- {displayState.screenshot ? ( - {t("chat:browser.screenshot")} - vscode.postMessage({ - type: "openImage", - text: displayState.screenshot, - }) - } - /> - ) : ( + {/* URL Bar with Navigation */} +
+ {pages.length > 1 ? ( + + ) : ( +
+ )}
+ + style={{ + textOverflow: "ellipsis", + overflow: "hidden", + whiteSpace: "nowrap", + }}> + {displayState.url || "http"} +
- )} - {displayState.mousePosition && ( - - )} -
- - {/* Console Logs Accordion */} -
{ - setConsoleLogsExpanded(!consoleLogsExpanded) - }} - className="flex items-center justify-between gap-2 text-vscode-editor-foreground/50 hover:text-vscode-editor-foreground transition-colors" - style={{ - width: "100%", - cursor: "pointer", - padding: `9px 10px ${consoleLogsExpanded ? 0 : 8}px 10px`, - }}> - - {t("chat:browser.consoleLogs")} - -
- {consoleLogsExpanded && ( - - )} -
+ {pages.length > 1 ? ( + + ) : ( +
+ )} +
- {/* Action content with min height */} -
{actionContent}
+ {/* Screenshot Area */} +
+ {displayState.screenshot ? ( + {t("chat:browser.screenshot")} + vscode.postMessage({ + type: "openImage", + text: displayState.screenshot, + }) + } + /> + ) : ( +
+ +
+ )} + {displayState.mousePosition && ( + + )} +
- {/* Pagination moved to bottom */} - {pages.length > 1 && ( -
-
- {t("chat:browser.navigation.step", { current: currentPageIndex + 1, total: pages.length })} + {/* Browser Action Row - moved above Console Logs */} +
+ {isSessionActive ? ( + + ) : currentPageAction ? ( + getActionIcon(currentPageAction.action) + ) : ( + + )} + + {(() => { + // Show action for current page being viewed + const action = currentPageAction + const pageSize = pages[currentPageIndex]?.size + if (action) { + return getBrowserActionText(action.action, action.coordinate, action.text, pageSize) + } else if (initialUrl) { + return getBrowserActionText("launch", undefined, initialUrl, undefined) + } + return t("chat:browser.rooWantsToUse") + })()} +
-
- setCurrentPageIndex((i) => i - 1)}> - {t("chat:browser.navigation.previous")} - - setCurrentPageIndex((i) => i + 1)}> - {t("chat:browser.navigation.next")} - + + {/* Console Logs Section (collapsible, default collapsed) */} +
+
{ + e.stopPropagation() + setConsoleLogsExpanded((v) => !v) + }} + className="text-vscode-editor-foreground/70 hover:text-vscode-editor-foreground transition-colors" + style={{ + display: "flex", + alignItems: "center", + gap: "8px", + marginBottom: consoleLogsExpanded ? "6px" : 0, + cursor: "pointer", + }}> + + + {t("chat:browser.consoleLogs")} + + +
+ {consoleLogsExpanded && ( +
+ +
+ )}
)} -
, +
) // Height change effect @@ -402,150 +694,6 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { return browserSessionRow }, deepEqual) -interface BrowserSessionRowContentProps extends Omit { - message: ClineMessage - setMaxActionHeight: (height: number) => void - isStreaming: boolean -} - -const BrowserSessionRowContent = ({ - message, - isExpanded, - onToggleExpand, - lastModifiedMessage, - isLast, - setMaxActionHeight, - isStreaming, -}: BrowserSessionRowContentProps) => { - const { t } = useTranslation() - const headerStyle: React.CSSProperties = { - display: "flex", - alignItems: "center", - gap: "10px", - marginBottom: "10px", - wordBreak: "break-word", - } - - switch (message.type) { - case "say": - switch (message.say) { - case "api_req_started": - case "text": - return ( -
- { - if (message.say === "api_req_started") { - setMaxActionHeight(0) - } - onToggleExpand(message.ts) - }} - lastModifiedMessage={lastModifiedMessage} - isLast={isLast} - isStreaming={isStreaming} - /> -
- ) - - case "browser_action": - const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction - return ( - - ) - - default: - return null - } - - case "ask": - switch (message.ask) { - case "browser_action_launch": - return ( - <> -
- {t("chat:browser.sessionStarted")} -
-
- -
- - ) - - default: - return null - } - } -} - -const BrowserActionBox = ({ - action, - coordinate, - text, -}: { - action: BrowserAction - coordinate?: string - text?: string -}) => { - const { t } = useTranslation() - const getBrowserActionText = (action: BrowserAction, coordinate?: string, text?: string) => { - switch (action) { - case "launch": - return t("chat:browser.actions.launch", { url: text }) - case "click": - return t("chat:browser.actions.click", { coordinate: coordinate?.replace(",", ", ") }) - case "type": - return t("chat:browser.actions.type", { text }) - case "scroll_down": - return t("chat:browser.actions.scrollDown") - case "scroll_up": - return t("chat:browser.actions.scrollUp") - case "close": - return t("chat:browser.actions.close") - default: - return action - } - } - return ( -
-
-
- - {t("chat:browser.actions.title")} - {getBrowserActionText(action, coordinate, text)} - -
-
-
- ) -} - const BrowserCursor: React.FC<{ style?: React.CSSProperties }> = ({ style }) => { const { t } = useTranslation() // (can't use svgs in vsc extensions) diff --git a/webview-ui/src/components/chat/BrowserSessionStatusRow.tsx b/webview-ui/src/components/chat/BrowserSessionStatusRow.tsx new file mode 100644 index 000000000000..862dc80a62fe --- /dev/null +++ b/webview-ui/src/components/chat/BrowserSessionStatusRow.tsx @@ -0,0 +1,34 @@ +import { memo } from "react" +import { Globe } from "lucide-react" +import { ClineMessage } from "@roo-code/types" + +interface BrowserSessionStatusRowProps { + message: ClineMessage +} + +const BrowserSessionStatusRow = memo(({ message }: BrowserSessionStatusRowProps) => { + const isOpened = message.text?.includes("opened") + + return ( +
+ + + {message.text} + +
+ ) +}) + +BrowserSessionStatusRow.displayName = "BrowserSessionStatusRow" + +export default BrowserSessionStatusRow diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index ed5257528fe1..2a24d9c2fdf9 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -503,7 +503,7 @@ export const ChatRowContent = ({ vscode.postMessage({ type: "updateTodoList", payload: { todos: updatedTodos } }) } }} - editable={editable && isLast} + editable={!!(editable && isLast)} /> ) } @@ -1345,6 +1345,10 @@ export const ChatRowContent = ({
) + case "browser_action": + case "browser_action_result": + // Handled by BrowserSessionRow; prevent raw JSON (action/result) from rendering here + return null default: return ( <> diff --git a/webview-ui/src/components/chat/ChatView.tsx b/webview-ui/src/components/chat/ChatView.tsx index b454c97bef96..e23d8648ec52 100644 --- a/webview-ui/src/components/chat/ChatView.tsx +++ b/webview-ui/src/components/chat/ChatView.tsx @@ -13,7 +13,7 @@ import { appendImages } from "@src/utils/imageUtils" import type { ClineAsk, ClineMessage, McpServerUse } from "@roo-code/types" -import { ClineSayBrowserAction, ClineSayTool, ExtensionMessage } from "@roo/ExtensionMessage" +import { ClineSayTool, ExtensionMessage } from "@roo/ExtensionMessage" import { McpServer, McpTool } from "@roo/mcp" import { findLast } from "@roo/array" import { FollowUpData, SuggestionItem } from "@roo-code/types" @@ -48,6 +48,8 @@ import { useTaskSearch } from "../history/useTaskSearch" import HistoryPreview from "../history/HistoryPreview" import Announcement from "./Announcement" import BrowserSessionRow from "./BrowserSessionRow" +import BrowserActionRow from "./BrowserActionRow" +import BrowserSessionStatusRow from "./BrowserSessionStatusRow" import ChatRow from "./ChatRow" import { ChatTextArea } from "./ChatTextArea" import TaskHeader from "./TaskHeader" @@ -1251,97 +1253,52 @@ const ChatViewComponent: React.ForwardRefRenderFunction { - // Which of visible messages are browser session messages, see above. - if (message.type === "ask") { - return ["browser_action_launch"].includes(message.ask!) + // Compute current browser session messages for the top banner (not grouped into chat stream) + // Find the FIRST browser session from the beginning to show ALL sessions + const browserSessionStartIndex = useMemo(() => { + for (let i = 0; i < messages.length; i++) { + if (messages[i].ask === "browser_action_launch") { + return i + } + // Also check for browser_session_status as a fallback indicator + if (messages[i].say === "browser_session_status" && messages[i].text?.includes("opened")) { + return i + } } + return -1 + }, [messages]) - if (message.type === "say") { - return ["api_req_started", "text", "browser_action", "browser_action_result"].includes(message.say!) - } + const browserSessionMessages = useMemo(() => { + if (browserSessionStartIndex === -1) return [] + return messages.slice(browserSessionStartIndex) + }, [browserSessionStartIndex, messages]) + const isBrowserSessionMessage = useCallback((message: ClineMessage): boolean => { + // Only the launch ask should be hidden from chat (it's shown in the drawer header) + if (message.type === "ask" && message.ask === "browser_action_launch") { + return true + } + // browser_action_result messages are paired with browser_action and should not appear independently + if (message.type === "say" && message.say === "browser_action_result") { + return true + } return false - } + }, []) const groupedMessages = useMemo(() => { - const result: (ClineMessage | ClineMessage[])[] = [] - let currentGroup: ClineMessage[] = [] - let isInBrowserSession = false - - const endBrowserSession = () => { - if (currentGroup.length > 0) { - result.push([...currentGroup]) - currentGroup = [] - isInBrowserSession = false - } - } - - visibleMessages.forEach((message: ClineMessage) => { - if (message.ask === "browser_action_launch") { - // Complete existing browser session if any. - endBrowserSession() - // Start new. - isInBrowserSession = true - currentGroup.push(message) - } else if (isInBrowserSession) { - // End session if `api_req_started` is cancelled. - - if (message.say === "api_req_started") { - // Get last `api_req_started` in currentGroup to check if - // it's cancelled. If it is then this api req is not part - // of the current browser session. - const lastApiReqStarted = [...currentGroup].reverse().find((m) => m.say === "api_req_started") - - if (lastApiReqStarted?.text !== null && lastApiReqStarted?.text !== undefined) { - const info = JSON.parse(lastApiReqStarted.text) - const isCancelled = info.cancelReason !== null && info.cancelReason !== undefined - - if (isCancelled) { - endBrowserSession() - result.push(message) - return - } - } - } - - if (isBrowserSessionMessage(message)) { - currentGroup.push(message) - - // Check if this is a close action - if (message.say === "browser_action") { - const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction - if (browserAction.action === "close") { - endBrowserSession() - } - } - } else { - // complete existing browser session if any - endBrowserSession() - result.push(message) - } - } else { - result.push(message) - } - }) - - // Handle case where browser session is the last group - if (currentGroup.length > 0) { - result.push([...currentGroup]) - } + // Only filter out the launch ask and result messages - browser actions appear in chat + const result: ClineMessage[] = visibleMessages.filter((msg) => !isBrowserSessionMessage(msg)) if (isCondensing) { - // Show indicator after clicking condense button result.push({ type: "say", say: "condense_context", ts: Date.now(), partial: true, - }) + } as any) } - return result - }, [isCondensing, visibleMessages]) + }, [isCondensing, visibleMessages, isBrowserSessionMessage]) // scrolling @@ -1498,7 +1455,7 @@ const ChatViewComponent: React.ForwardRefRenderFunction { - // browser session group + // browser session group - this should never be called now since we don't group messages if (Array.isArray(messageOrGroup)) { return ( ) } + const hasCheckpoint = modifiedMessages.some((message) => message.say === "checkpoint_saved") + // Check if this is a browser action message + if (messageOrGroup.type === "say" && messageOrGroup.say === "browser_action") { + // Find the corresponding result message by looking for the next browser_action_result after this action's timestamp + const nextMessage = modifiedMessages.find( + (m) => m.ts > messageOrGroup.ts && m.say === "browser_action_result", + ) + + // Calculate action index and total count + const browserActions = modifiedMessages.filter((m) => m.say === "browser_action") + const actionIndex = browserActions.findIndex((m) => m.ts === messageOrGroup.ts) + 1 + const totalActions = browserActions.length + + return ( + + ) + } + + // Check if this is a browser session status message + if (messageOrGroup.type === "say" && messageOrGroup.say === "browser_session_status") { + return + } + // regular message return ( + {/* Top-of-chat browser session banner */} + {browserSessionStartIndex !== -1 && ( +
+ expandedRows[messageTs] ?? false} + onToggleExpand={(messageTs: number) => { + setExpandedRows((prev: Record) => ({ + ...prev, + [messageTs]: !prev[messageTs], + })) + }} + /> +
+ )}
{ + const renderRow = (messages: any[]) => { + const mockExtState: any = { + // Ensure known viewport so expected aspect ratio is deterministic (600/900 = 66.67%) + browserViewportSize: "900x600", + isBrowserSessionActive: false, + browserActionsAutoExpand: false, + } + + return render( + + true} + onToggleExpand={() => {}} + lastModifiedMessage={undefined as any} + isLast={true} + onHeightChange={() => {}} + isStreaming={false} + /> + , + ) + } + + it("reserves height while screenshot is loading (no layout collapse)", () => { + // Only a launch action, no corresponding browser_action_result yet (no screenshot) + const messages = [ + { + ts: 1, + say: "browser_action", + text: JSON.stringify({ action: "launch", url: "http://localhost:3000" }), + }, + ] + + renderRow(messages) + + // Open the browser session drawer + const globe = screen.getByLabelText("Browser interaction") + fireEvent.click(globe) + + const container = screen.getByTestId("screenshot-container") as HTMLDivElement + // padding-bottom should reflect aspect ratio (600/900 * 100) even without an image + const pb = parseFloat(container.style.paddingBottom || "0") + expect(pb).toBeGreaterThan(0) + // Be tolerant of rounding + expect(Math.round(pb)).toBe(67) + }) +}) diff --git a/webview-ui/src/components/chat/__tests__/BrowserSessionRow.disconnect-button.spec.tsx b/webview-ui/src/components/chat/__tests__/BrowserSessionRow.disconnect-button.spec.tsx new file mode 100644 index 000000000000..13f201080a7e --- /dev/null +++ b/webview-ui/src/components/chat/__tests__/BrowserSessionRow.disconnect-button.spec.tsx @@ -0,0 +1,43 @@ +import React from "react" +import { render, screen } from "@testing-library/react" +import BrowserSessionRow from "../BrowserSessionRow" +import { ExtensionStateContext } from "@src/context/ExtensionStateContext" +import { TooltipProvider } from "@radix-ui/react-tooltip" + +describe("BrowserSessionRow - Disconnect session button", () => { + const renderRow = (isActive: boolean) => { + const mockExtState: any = { + browserViewportSize: "900x600", + isBrowserSessionActive: isActive, + browserActionsAutoExpand: false, + } + + return render( + + + false} + onToggleExpand={() => {}} + lastModifiedMessage={undefined as any} + isLast={true} + onHeightChange={() => {}} + isStreaming={false} + /> + + , + ) + } + + it("shows the Disconnect session button when a session is active", () => { + renderRow(true) + const btn = screen.getByLabelText("Disconnect session") + expect(btn).toBeInTheDocument() + }) + + it("does not render the button when no session is active", () => { + renderRow(false) + const btn = screen.queryByLabelText("Disconnect session") + expect(btn).toBeNull() + }) +}) diff --git a/webview-ui/src/components/chat/__tests__/ChatView.followup-in-session.spec.tsx b/webview-ui/src/components/chat/__tests__/ChatView.followup-in-session.spec.tsx new file mode 100644 index 000000000000..e870e8df3c29 --- /dev/null +++ b/webview-ui/src/components/chat/__tests__/ChatView.followup-in-session.spec.tsx @@ -0,0 +1,119 @@ +// npx vitest run src/components/chat/__tests__/ChatView.followup-in-session.spec.tsx + +import { render, waitFor } from "@/utils/test-utils" +import { QueryClient, QueryClientProvider } from "@tanstack/react-query" +import { ExtensionStateContextProvider } from "@src/context/ExtensionStateContext" +import ChatView, { ChatViewProps } from "../ChatView" + +vi.mock("@src/utils/vscode", () => ({ + vscode: { postMessage: vi.fn() }, +})) + +vi.mock("rehype-highlight", () => ({ default: () => () => {} })) +vi.mock("hast-util-to-text", () => ({ default: () => "" })) + +vi.mock("../BrowserSessionRow", () => ({ + default: function MockBrowserSessionRow({ messages }: { messages: any[] }) { + return
{JSON.stringify(messages)}
+ }, +})) + +vi.mock("../ChatRow", () => ({ + default: function MockChatRow({ message }: { message: any }) { + return
{JSON.stringify(message)}
+ }, +})) + +vi.mock("../TaskHeader", () => ({ + default: function MockTaskHeader() { + return
+ }, +})) + +vi.mock("@src/components/common/CodeBlock", () => ({ + default: () => null, + CODE_BLOCK_BG_COLOR: "rgb(30, 30, 30)", +})) + +const queryClient = new QueryClient() + +const defaultProps: ChatViewProps = { + isHidden: false, + showAnnouncement: false, + hideAnnouncement: () => {}, +} + +const renderChatView = (props: Partial = {}) => { + return render( + + + + + , + ) +} + +const mockPostMessage = (state: any) => { + window.postMessage( + { + type: "state", + state: { + version: "1.0.0", + clineMessages: [], + taskHistory: [], + shouldShowAnnouncement: false, + allowedCommands: [], + autoApprovalEnabled: true, + ...state, + }, + }, + "*", + ) +} + +describe("ChatView followup inside browser session", () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it.skip("renders followup ask as a regular ChatRow while session banner is visible", async () => { + renderChatView() + + const ts = Date.now() + + // Send initial message with browser session and followup + mockPostMessage({ + alwaysAllowBrowser: true, + clineMessages: [ + { type: "say", say: "task", ts: ts - 4000, text: "Initial task" }, + { + type: "ask", + ask: "browser_action_launch", + ts: ts - 3000, + text: "http://example.com", + partial: false, + }, + { type: "say", say: "browser_action_result", ts: ts - 2000, text: "" }, + { + type: "ask", + ask: "followup", + ts: ts, + text: JSON.stringify({ question: "Continue?", suggest: [{ answer: "Yes" }, { answer: "No" }] }), + partial: false, + }, + ], + }) + + // Banner should be present (only contains browser_action_launch and browser_action_result) + await waitFor(() => { + const banner = document.querySelector('[data-testid="browser-session"]') + expect(banner).not.toBeNull() + }) + + // At least one ChatRow should render (the followup question) + await waitFor(() => { + const chatRows = document.querySelectorAll('[data-testid="chat-row"]') + expect(chatRows.length).toBeGreaterThan(0) + }) + }) +}) diff --git a/webview-ui/src/components/settings/BrowserSettings.tsx b/webview-ui/src/components/settings/BrowserSettings.tsx index 76b4a823be96..b306c6325cdc 100644 --- a/webview-ui/src/components/settings/BrowserSettings.tsx +++ b/webview-ui/src/components/settings/BrowserSettings.tsx @@ -18,12 +18,14 @@ type BrowserSettingsProps = HTMLAttributes & { screenshotQuality?: number remoteBrowserHost?: string remoteBrowserEnabled?: boolean + browserActionsAutoExpand?: boolean setCachedStateField: SetCachedStateField< | "browserToolEnabled" | "browserViewportSize" | "screenshotQuality" | "remoteBrowserHost" | "remoteBrowserEnabled" + | "browserActionsAutoExpand" > } @@ -33,6 +35,7 @@ export const BrowserSettings = ({ screenshotQuality, remoteBrowserHost, remoteBrowserEnabled, + browserActionsAutoExpand, setCachedStateField, ...props }: BrowserSettingsProps) => { @@ -168,6 +171,19 @@ export const BrowserSettings = ({
+
+ + setCachedStateField("browserActionsAutoExpand", e.target.checked) + }> + {t("settings:browser.autoExpand.label")} + +
+ {t("settings:browser.autoExpand.description")} +
+
+
(({ onDone, t reasoningBlockCollapsed, includeCurrentTime, includeCurrentCost, + browserActionsAutoExpand, } = cachedState const apiConfiguration = useMemo(() => cachedState.apiConfiguration ?? {}, [cachedState.apiConfiguration]) @@ -353,6 +354,7 @@ const SettingsView = forwardRef(({ onDone, t vscode.postMessage({ type: "fuzzyMatchThreshold", value: fuzzyMatchThreshold ?? 1.0 }) vscode.postMessage({ type: "writeDelayMs", value: writeDelayMs }) vscode.postMessage({ type: "screenshotQuality", value: screenshotQuality ?? 75 }) + vscode.postMessage({ type: "browserActionsAutoExpand", bool: browserActionsAutoExpand ?? false }) vscode.postMessage({ type: "terminalOutputLineLimit", value: terminalOutputLineLimit ?? 500 }) vscode.postMessage({ type: "terminalOutputCharacterLimit", value: terminalOutputCharacterLimit ?? 50000 }) vscode.postMessage({ type: "terminalShellIntegrationTimeout", value: terminalShellIntegrationTimeout }) @@ -710,6 +712,7 @@ const SettingsView = forwardRef(({ onDone, t screenshotQuality={screenshotQuality} remoteBrowserHost={remoteBrowserHost} remoteBrowserEnabled={remoteBrowserEnabled} + browserActionsAutoExpand={browserActionsAutoExpand} setCachedStateField={setCachedStateField} /> )} diff --git a/webview-ui/src/context/ExtensionStateContext.tsx b/webview-ui/src/context/ExtensionStateContext.tsx index 7c68795040b5..da237da0bdaa 100644 --- a/webview-ui/src/context/ExtensionStateContext.tsx +++ b/webview-ui/src/context/ExtensionStateContext.tsx @@ -148,6 +148,8 @@ export interface ExtensionStateContextType extends ExtensionState { setTerminalCompressProgressBar: (value: boolean) => void setHistoryPreviewCollapsed: (value: boolean) => void setReasoningBlockCollapsed: (value: boolean) => void + browserActionsAutoExpand?: boolean + setBrowserActionsAutoExpand: (value: boolean) => void autoCondenseContext: boolean setAutoCondenseContext: (value: boolean) => void autoCondenseContextPercent: number @@ -200,6 +202,7 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode deniedCommands: [], soundEnabled: false, soundVolume: 0.5, + isBrowserSessionActive: false, ttsEnabled: false, ttsSpeed: 1.0, diffEnabled: false, @@ -250,6 +253,7 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode terminalCompressProgressBar: true, // Default to compress progress bar output historyPreviewCollapsed: false, // Initialize the new state (default to expanded) reasoningBlockCollapsed: true, // Default to collapsed + browserActionsAutoExpand: false, // Default to collapsed browser actions cloudUserInfo: null, cloudIsAuthenticated: false, cloudOrganizations: [], @@ -569,6 +573,8 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode setState((prevState) => ({ ...prevState, historyPreviewCollapsed: value })), setReasoningBlockCollapsed: (value) => setState((prevState) => ({ ...prevState, reasoningBlockCollapsed: value })), + setBrowserActionsAutoExpand: (value) => + setState((prevState) => ({ ...prevState, browserActionsAutoExpand: value })), setHasOpenedModeSelector: (value) => setState((prevState) => ({ ...prevState, hasOpenedModeSelector: value })), setAutoCondenseContext: (value) => setState((prevState) => ({ ...prevState, autoCondenseContext: value })), setAutoCondenseContextPercent: (value) => diff --git a/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx b/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx index 92652733ddf1..61ebcb1fa385 100644 --- a/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx +++ b/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx @@ -214,6 +214,7 @@ describe("mergeExtensionState", () => { remoteControlEnabled: false, taskSyncEnabled: false, featureRoomoteControlEnabled: false, + isBrowserSessionActive: false, checkpointTimeout: DEFAULT_CHECKPOINT_TIMEOUT_SECONDS, // Add the checkpoint timeout property } diff --git a/webview-ui/src/i18n/locales/ca/chat.json b/webview-ui/src/i18n/locales/ca/chat.json index e37b51db563c..4ef8b7477011 100644 --- a/webview-ui/src/i18n/locales/ca/chat.json +++ b/webview-ui/src/i18n/locales/ca/chat.json @@ -312,6 +312,7 @@ "socialLinks": "Uneix-te a nosaltres a X, Discord, o r/RooCode 🚀" }, "browser": { + "session": "Sessió del navegador", "rooWantsToUse": "Roo vol utilitzar el navegador", "consoleLogs": "Registres de consola", "noNewLogs": "(Cap registre nou)", @@ -328,8 +329,10 @@ "launch": "Iniciar navegador a {{url}}", "click": "Clic ({{coordinate}})", "type": "Escriure \"{{text}}\"", + "press": "Prem {{key}}", "scrollDown": "Desplaçar avall", "scrollUp": "Desplaçar amunt", + "hover": "Plana sobre ({{coordinate}})", "close": "Tancar navegador" } }, diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json index c97345f5e46c..de4c8a405c48 100644 --- a/webview-ui/src/i18n/locales/ca/settings.json +++ b/webview-ui/src/i18n/locales/ca/settings.json @@ -501,6 +501,10 @@ "label": "Qualitat de captures de pantalla", "description": "Ajusteu la qualitat WebP de les captures de pantalla del navegador. Valors més alts proporcionen captures més clares però augmenten l'ús de token." }, + "autoExpand": { + "label": "Expansió automàtica de les accions del navegador", + "description": "Expandeix automàticament les captures de pantalla de les accions del navegador a la vista de xat" + }, "remote": { "label": "Utilitzar connexió remota del navegador", "description": "Connectar a un navegador Chrome que s'executa amb depuració remota habilitada (--remote-debugging-port=9222).", diff --git a/webview-ui/src/i18n/locales/de/chat.json b/webview-ui/src/i18n/locales/de/chat.json index 8ed111d5aa54..6c9d8543453f 100644 --- a/webview-ui/src/i18n/locales/de/chat.json +++ b/webview-ui/src/i18n/locales/de/chat.json @@ -312,6 +312,7 @@ "socialLinks": "Folge uns auf X, Discord oder r/RooCode 🚀" }, "browser": { + "session": "Browser-Sitzung", "rooWantsToUse": "Roo möchte den Browser verwenden", "consoleLogs": "Konsolenprotokolle", "noNewLogs": "(Keine neuen Protokolle)", @@ -328,8 +329,10 @@ "launch": "Browser starten auf {{url}}", "click": "Klicken ({{coordinate}})", "type": "Eingeben \"{{text}}\"", + "press": "{{key}} drücken", "scrollDown": "Nach unten scrollen", "scrollUp": "Nach oben scrollen", + "hover": "Hover ({{coordinate}})", "close": "Browser schließen" } }, diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json index 7588982b368c..c5201f580e13 100644 --- a/webview-ui/src/i18n/locales/de/settings.json +++ b/webview-ui/src/i18n/locales/de/settings.json @@ -501,6 +501,10 @@ "label": "Screenshot-Qualität", "description": "Passen Sie die WebP-Qualität von Browser-Screenshots an. Höhere Werte bieten klarere Screenshots, erhöhen aber den Token-Verbrauch." }, + "autoExpand": { + "label": "Browser-Aktionen automatisch erweitern", + "description": "Screenshots von Browser-Aktionen in der Chat-Ansicht automatisch erweitern" + }, "remote": { "label": "Remote-Browser-Verbindung verwenden", "description": "Verbindung zu einem Chrome-Browser herstellen, der mit aktiviertem Remote-Debugging läuft (--remote-debugging-port=9222).", diff --git a/webview-ui/src/i18n/locales/en/chat.json b/webview-ui/src/i18n/locales/en/chat.json index 6e2cf84f895d..cacfdfc3bf4f 100644 --- a/webview-ui/src/i18n/locales/en/chat.json +++ b/webview-ui/src/i18n/locales/en/chat.json @@ -329,6 +329,7 @@ "countdownDisplay": "{{count}}s" }, "browser": { + "session": "Browser Session", "rooWantsToUse": "Roo wants to use the browser", "consoleLogs": "Console Logs", "noNewLogs": "(No new logs)", @@ -341,12 +342,13 @@ }, "sessionStarted": "Browser Session Started", "actions": { - "title": "Browse Action: ", "launch": "Launch browser at {{url}}", "click": "Click ({{coordinate}})", "type": "Type \"{{text}}\"", + "press": "Press {{key}}", "scrollDown": "Scroll down", "scrollUp": "Scroll up", + "hover": "Hover ({{coordinate}})", "close": "Close browser" } }, diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index 789452fadcfe..13df77713e4b 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -506,6 +506,10 @@ "label": "Screenshot quality", "description": "Adjust the WebP quality of browser screenshots. Higher values provide clearer screenshots but increase token usage." }, + "autoExpand": { + "label": "Auto-expand browser actions", + "description": "Automatically expand browser action screenshots in chat view" + }, "remote": { "label": "Use remote browser connection", "description": "Connect to a Chrome browser running with remote debugging enabled (--remote-debugging-port=9222).", diff --git a/webview-ui/src/i18n/locales/es/chat.json b/webview-ui/src/i18n/locales/es/chat.json index 33e9956baa02..cdbd488aaf77 100644 --- a/webview-ui/src/i18n/locales/es/chat.json +++ b/webview-ui/src/i18n/locales/es/chat.json @@ -312,6 +312,7 @@ "socialLinks": "Únete a nosotros en X, Discord, o r/RooCode 🚀" }, "browser": { + "session": "Sesión del navegador", "rooWantsToUse": "Roo quiere usar el navegador", "consoleLogs": "Registros de la consola", "noNewLogs": "(No hay nuevos registros)", @@ -328,8 +329,10 @@ "launch": "Iniciar navegador en {{url}}", "click": "Clic ({{coordinate}})", "type": "Escribir \"{{text}}\"", + "press": "Pulsar {{key}}", "scrollDown": "Desplazar hacia abajo", "scrollUp": "Desplazar hacia arriba", + "hover": "Flotar ({{coordinate}})", "close": "Cerrar navegador" } }, diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json index fa5ce550e85c..e01740b6b47f 100644 --- a/webview-ui/src/i18n/locales/es/settings.json +++ b/webview-ui/src/i18n/locales/es/settings.json @@ -501,6 +501,10 @@ "label": "Calidad de capturas de pantalla", "description": "Ajuste la calidad WebP de las capturas de pantalla del navegador. Valores más altos proporcionan capturas más claras pero aumentan el uso de token." }, + "autoExpand": { + "label": "Expandir automáticamente las acciones del navegador", + "description": "Expandir automáticamente las capturas de pantalla de las acciones del navegador en la vista de chat" + }, "remote": { "label": "Usar conexión remota del navegador", "description": "Conectarse a un navegador Chrome que se ejecuta con depuración remota habilitada (--remote-debugging-port=9222).", diff --git a/webview-ui/src/i18n/locales/fr/chat.json b/webview-ui/src/i18n/locales/fr/chat.json index 77231e2b0ae1..c7a2705bb883 100644 --- a/webview-ui/src/i18n/locales/fr/chat.json +++ b/webview-ui/src/i18n/locales/fr/chat.json @@ -312,6 +312,7 @@ "socialLinks": "Rejoins-nous sur X, Discord, ou r/RooCode 🚀" }, "browser": { + "session": "Session du navigateur", "rooWantsToUse": "Roo veut utiliser le navigateur", "consoleLogs": "Journaux de console", "noNewLogs": "(Pas de nouveaux journaux)", @@ -328,8 +329,10 @@ "launch": "Lancer le navigateur sur {{url}}", "click": "Cliquer ({{coordinate}})", "type": "Saisir \"{{text}}\"", + "press": "Appuyer sur {{key}}", "scrollDown": "Défiler vers le bas", "scrollUp": "Défiler vers le haut", + "hover": "Survoler ({{coordinate}})", "close": "Fermer le navigateur" } }, diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json index d1c2ccd40f80..16f4f21f191d 100644 --- a/webview-ui/src/i18n/locales/fr/settings.json +++ b/webview-ui/src/i18n/locales/fr/settings.json @@ -501,6 +501,10 @@ "label": "Qualité des captures d'écran", "description": "Ajustez la qualité WebP des captures d'écran du navigateur. Des valeurs plus élevées fournissent des captures plus claires mais augmentent l'utilisation de token." }, + "autoExpand": { + "label": "Développer automatiquement les actions du navigateur", + "description": "Développer automatiquement les captures d'écran des actions du navigateur dans la vue de discussion" + }, "remote": { "label": "Utiliser une connexion de navigateur distant", "description": "Se connecter à un navigateur Chrome exécuté avec le débogage à distance activé (--remote-debugging-port=9222).", diff --git a/webview-ui/src/i18n/locales/hi/chat.json b/webview-ui/src/i18n/locales/hi/chat.json index 9a367ea4f217..aa5b94df6f81 100644 --- a/webview-ui/src/i18n/locales/hi/chat.json +++ b/webview-ui/src/i18n/locales/hi/chat.json @@ -312,6 +312,7 @@ "socialLinks": "X, Discord, या r/RooCode पर हमसे जुड़ें 🚀" }, "browser": { + "session": "ब्राउज़र सत्र", "rooWantsToUse": "Roo ब्राउज़र का उपयोग करना चाहता है", "consoleLogs": "कंसोल लॉग", "noNewLogs": "(कोई नया लॉग नहीं)", @@ -328,8 +329,10 @@ "launch": "{{url}} पर ब्राउज़र लॉन्च करें", "click": "क्लिक करें ({{coordinate}})", "type": "टाइप करें \"{{text}}\"", + "press": "{{key}} दबाएँ", "scrollDown": "नीचे स्क्रॉल करें", "scrollUp": "ऊपर स्क्रॉल करें", + "hover": "होवर करें ({{coordinate}})", "close": "ब्राउज़र बंद करें" } }, diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json index bcc45d5db75b..7e24192d8aec 100644 --- a/webview-ui/src/i18n/locales/hi/settings.json +++ b/webview-ui/src/i18n/locales/hi/settings.json @@ -501,6 +501,10 @@ "label": "स्क्रीनशॉट गुणवत्ता", "description": "ब्राउज़र स्क्रीनशॉट की WebP गुणवत्ता समायोजित करें। उच्च मान स्पष्ट स्क्रीनशॉट प्रदान करते हैं लेकिन token उपयोग बढ़ाते हैं।" }, + "autoExpand": { + "label": "ब्राउज़र क्रियाओं को स्वतः विस्तृत करें", + "description": "चैट दृश्य में ब्राउज़र क्रिया स्क्रीनशॉट को स्वचालित रूप से विस्तृत करें" + }, "remote": { "label": "दूरस्थ ब्राउज़र कनेक्शन का उपयोग करें", "description": "रिमोट डीबगिंग सक्षम के साथ चल रहे Chrome ब्राउज़र से कनेक्ट करें (--remote-debugging-port=9222)।", diff --git a/webview-ui/src/i18n/locales/id/chat.json b/webview-ui/src/i18n/locales/id/chat.json index ebc29eaac231..8e90b91c0bad 100644 --- a/webview-ui/src/i18n/locales/id/chat.json +++ b/webview-ui/src/i18n/locales/id/chat.json @@ -333,6 +333,7 @@ "countdownDisplay": "{{count}}dtk" }, "browser": { + "session": "Sesi Browser", "rooWantsToUse": "Roo ingin menggunakan browser", "consoleLogs": "Log Konsol", "noNewLogs": "(Tidak ada log baru)", @@ -349,8 +350,10 @@ "launch": "Luncurkan browser di {{url}}", "click": "Klik ({{coordinate}})", "type": "Ketik \"{{text}}\"", + "press": "Tekan {{key}}", "scrollDown": "Gulir ke bawah", "scrollUp": "Gulir ke atas", + "hover": "Arahkan ({{coordinate}})", "close": "Tutup browser" } }, diff --git a/webview-ui/src/i18n/locales/id/settings.json b/webview-ui/src/i18n/locales/id/settings.json index 01298d93ddf9..7c71dd829d13 100644 --- a/webview-ui/src/i18n/locales/id/settings.json +++ b/webview-ui/src/i18n/locales/id/settings.json @@ -505,6 +505,10 @@ "label": "Kualitas screenshot", "description": "Sesuaikan kualitas WebP screenshot browser. Nilai yang lebih tinggi memberikan screenshot yang lebih jelas tetapi meningkatkan penggunaan token." }, + "autoExpand": { + "label": "Perluas otomatis tindakan browser", + "description": "Perluas otomatis tangkapan layar tindakan browser di tampilan obrolan" + }, "remote": { "label": "Gunakan koneksi browser remote", "description": "Hubungkan ke browser Chrome yang berjalan dengan remote debugging diaktifkan (--remote-debugging-port=9222).", diff --git a/webview-ui/src/i18n/locales/it/chat.json b/webview-ui/src/i18n/locales/it/chat.json index 0fb2ad4ce1df..2ff9591998b2 100644 --- a/webview-ui/src/i18n/locales/it/chat.json +++ b/webview-ui/src/i18n/locales/it/chat.json @@ -312,6 +312,7 @@ "socialLinks": "Unisciti a noi su X, Discord, o r/RooCode 🚀" }, "browser": { + "session": "Sessione del browser", "rooWantsToUse": "Roo vuole utilizzare il browser", "consoleLogs": "Log della console", "noNewLogs": "(Nessun nuovo log)", @@ -328,8 +329,10 @@ "launch": "Avvia browser su {{url}}", "click": "Clic ({{coordinate}})", "type": "Digita \"{{text}}\"", + "press": "Premi {{key}}", "scrollDown": "Scorri verso il basso", "scrollUp": "Scorri verso l'alto", + "hover": "Passa il mouse ({{coordinate}})", "close": "Chiudi browser" } }, diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json index 7b3eb560b9c8..311f8b158451 100644 --- a/webview-ui/src/i18n/locales/it/settings.json +++ b/webview-ui/src/i18n/locales/it/settings.json @@ -501,6 +501,10 @@ "label": "Qualità screenshot", "description": "Regola la qualità WebP degli screenshot del browser. Valori più alti forniscono screenshot più nitidi ma aumentano l'utilizzo di token." }, + "autoExpand": { + "label": "Espandi automaticamente le azioni del browser", + "description": "Espandi automaticamente gli screenshot delle azioni del browser nella visualizzazione della chat" + }, "remote": { "label": "Usa connessione browser remoto", "description": "Connettiti a un browser Chrome in esecuzione con debug remoto abilitato (--remote-debugging-port=9222).", diff --git a/webview-ui/src/i18n/locales/ja/chat.json b/webview-ui/src/i18n/locales/ja/chat.json index 4dec78205ce5..681fead6dd68 100644 --- a/webview-ui/src/i18n/locales/ja/chat.json +++ b/webview-ui/src/i18n/locales/ja/chat.json @@ -312,6 +312,7 @@ "socialLinks": "XDiscord、またはr/RooCodeでフォローしてください 🚀" }, "browser": { + "session": "ブラウザセッション", "rooWantsToUse": "Rooはブラウザを使用したい", "consoleLogs": "コンソールログ", "noNewLogs": "(新しいログはありません)", @@ -328,8 +329,10 @@ "launch": "{{url}} でブラウザを起動", "click": "クリック ({{coordinate}})", "type": "入力 \"{{text}}\"", + "press": "{{key}}を押す", "scrollDown": "下にスクロール", "scrollUp": "上にスクロール", + "hover": "ホバー ({{coordinate}})", "close": "ブラウザを閉じる" } }, diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json index 9ad7035b5464..8e60715d3b80 100644 --- a/webview-ui/src/i18n/locales/ja/settings.json +++ b/webview-ui/src/i18n/locales/ja/settings.json @@ -501,6 +501,10 @@ "label": "スクリーンショット品質", "description": "ブラウザスクリーンショットのWebP品質を調整します。高い値はより鮮明なスクリーンショットを提供しますが、token使用量が増加します。" }, + "autoExpand": { + "label": "ブラウザアクションを自動展開", + "description": "チャットビューでブラウザアクションのスクリーンショットを自動的に展開します" + }, "remote": { "label": "リモートブラウザ接続を使用", "description": "リモートデバッグを有効にして実行しているChromeブラウザに接続します(--remote-debugging-port=9222)。", diff --git a/webview-ui/src/i18n/locales/ko/chat.json b/webview-ui/src/i18n/locales/ko/chat.json index 1d4e742d59fa..635fe660ac02 100644 --- a/webview-ui/src/i18n/locales/ko/chat.json +++ b/webview-ui/src/i18n/locales/ko/chat.json @@ -312,6 +312,7 @@ "socialLinks": "X, Discord, 또는 r/RooCode에서 만나요 🚀" }, "browser": { + "session": "브라우저 세션", "rooWantsToUse": "Roo가 브라우저를 사용하고 싶어합니다", "consoleLogs": "콘솔 로그", "noNewLogs": "(새 로그 없음)", @@ -328,8 +329,10 @@ "launch": "{{url}}에서 브라우저 실행", "click": "클릭 ({{coordinate}})", "type": "입력 \"{{text}}\"", + "press": "{{key}} 누르기", "scrollDown": "아래로 스크롤", "scrollUp": "위로 스크롤", + "hover": "가리키기 ({{coordinate}})", "close": "브라우저 닫기" } }, diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json index c0b8bce736c9..554efb72c8ed 100644 --- a/webview-ui/src/i18n/locales/ko/settings.json +++ b/webview-ui/src/i18n/locales/ko/settings.json @@ -501,6 +501,10 @@ "label": "스크린샷 품질", "description": "브라우저 스크린샷의 WebP 품질을 조정합니다. 높은 값은 더 선명한 스크린샷을 제공하지만 token 사용량이 증가합니다." }, + "autoExpand": { + "label": "브라우저 작업 자동 확장", + "description": "채팅 보기에서 브라우저 작업 스크린샷 자동 확장" + }, "remote": { "label": "원격 브라우저 연결 사용", "description": "원격 디버깅이 활성화된 Chrome 브라우저에 연결합니다(--remote-debugging-port=9222).", diff --git a/webview-ui/src/i18n/locales/nl/chat.json b/webview-ui/src/i18n/locales/nl/chat.json index 10733f23a724..eda1fe1454b2 100644 --- a/webview-ui/src/i18n/locales/nl/chat.json +++ b/webview-ui/src/i18n/locales/nl/chat.json @@ -312,6 +312,7 @@ "countdownDisplay": "{{count}}s" }, "browser": { + "session": "Browsersessie", "rooWantsToUse": "Roo wil de browser gebruiken", "consoleLogs": "Console-logboeken", "noNewLogs": "(Geen nieuwe logboeken)", @@ -328,8 +329,10 @@ "launch": "Browser starten op {{url}}", "click": "Klik ({{coordinate}})", "type": "Typ \"{{text}}\"", + "press": "Druk op {{key}}", "scrollDown": "Scroll naar beneden", "scrollUp": "Scroll naar boven", + "hover": "Zweven ({{coordinate}})", "close": "Browser sluiten" } }, diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json index 539e214e81b8..149295564168 100644 --- a/webview-ui/src/i18n/locales/nl/settings.json +++ b/webview-ui/src/i18n/locales/nl/settings.json @@ -501,6 +501,10 @@ "label": "Screenshotkwaliteit", "description": "Pas de WebP-kwaliteit van browserscreenshots aan. Hogere waarden geven duidelijkere screenshots maar verhogen het tokengebruik." }, + "autoExpand": { + "label": "Browseracties automatisch uitvouwen", + "description": "Screenshots van browseracties automatisch uitvouwen in chatweergave" + }, "remote": { "label": "Gebruik externe browserverbinding", "description": "Verbind met een Chrome-browser die draait met remote debugging ingeschakeld (--remote-debugging-port=9222).", diff --git a/webview-ui/src/i18n/locales/pl/chat.json b/webview-ui/src/i18n/locales/pl/chat.json index 6aeced346e55..6ec865e13498 100644 --- a/webview-ui/src/i18n/locales/pl/chat.json +++ b/webview-ui/src/i18n/locales/pl/chat.json @@ -312,6 +312,7 @@ "socialLinks": "Dołącz do nas na X, Discord, lub r/RooCode 🚀" }, "browser": { + "session": "Sesja przeglądarki", "rooWantsToUse": "Roo chce użyć przeglądarki", "consoleLogs": "Logi konsoli", "noNewLogs": "(Brak nowych logów)", @@ -328,8 +329,10 @@ "launch": "Uruchom przeglądarkę na {{url}}", "click": "Kliknij ({{coordinate}})", "type": "Wpisz \"{{text}}\"", + "press": "Naciśnij {{key}}", "scrollDown": "Przewiń w dół", "scrollUp": "Przewiń w górę", + "hover": "Najedź ({{coordinate}})", "close": "Zamknij przeglądarkę" } }, diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json index ebc192e23024..d8a34abeba8f 100644 --- a/webview-ui/src/i18n/locales/pl/settings.json +++ b/webview-ui/src/i18n/locales/pl/settings.json @@ -501,6 +501,10 @@ "label": "Jakość zrzutów ekranu", "description": "Dostosuj jakość WebP zrzutów ekranu przeglądarki. Wyższe wartości zapewniają wyraźniejsze zrzuty ekranu, ale zwiększają zużycie token." }, + "autoExpand": { + "label": "Automatycznie rozwijaj akcje przeglądarki", + "description": "Automatycznie rozwijaj zrzuty ekranu akcji przeglądarki w widoku czatu" + }, "remote": { "label": "Użyj zdalnego połączenia przeglądarki", "description": "Połącz się z przeglądarką Chrome uruchomioną z włączonym zdalnym debugowaniem (--remote-debugging-port=9222).", diff --git a/webview-ui/src/i18n/locales/pt-BR/chat.json b/webview-ui/src/i18n/locales/pt-BR/chat.json index 684d9464748c..fcc93a59ce58 100644 --- a/webview-ui/src/i18n/locales/pt-BR/chat.json +++ b/webview-ui/src/i18n/locales/pt-BR/chat.json @@ -312,6 +312,7 @@ "socialLinks": "Junte-se a nós no X, Discord, ou r/RooCode 🚀" }, "browser": { + "session": "Sessão do Navegador", "rooWantsToUse": "Roo quer usar o navegador", "consoleLogs": "Logs do console", "noNewLogs": "(Sem novos logs)", @@ -328,8 +329,10 @@ "launch": "Iniciar navegador em {{url}}", "click": "Clique ({{coordinate}})", "type": "Digitar \"{{text}}\"", + "press": "Pressione {{key}}", "scrollDown": "Rolar para baixo", "scrollUp": "Rolar para cima", + "hover": "Pairar ({{coordinate}})", "close": "Fechar navegador" } }, diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json index fb68793e4180..60e5c7399868 100644 --- a/webview-ui/src/i18n/locales/pt-BR/settings.json +++ b/webview-ui/src/i18n/locales/pt-BR/settings.json @@ -501,6 +501,10 @@ "label": "Qualidade das capturas de tela", "description": "Ajuste a qualidade WebP das capturas de tela do navegador. Valores mais altos fornecem capturas mais nítidas, mas aumentam o uso de token." }, + "autoExpand": { + "label": "Expandir ações do navegador automaticamente", + "description": "Expandir automaticamente as capturas de tela de ação do navegador na visualização de bate-papo" + }, "remote": { "label": "Usar conexão remota de navegador", "description": "Conectar a um navegador Chrome executando com depuração remota ativada (--remote-debugging-port=9222).", diff --git a/webview-ui/src/i18n/locales/ru/chat.json b/webview-ui/src/i18n/locales/ru/chat.json index ac35db92146f..4de9ab7e8cfb 100644 --- a/webview-ui/src/i18n/locales/ru/chat.json +++ b/webview-ui/src/i18n/locales/ru/chat.json @@ -313,6 +313,7 @@ "countdownDisplay": "{{count}}с" }, "browser": { + "session": "Сеанс браузера", "rooWantsToUse": "Roo хочет использовать браузер", "consoleLogs": "Логи консоли", "noNewLogs": "(Новых логов нет)", @@ -329,8 +330,10 @@ "launch": "Открыть браузер по адресу {{url}}", "click": "Клик ({{coordinate}})", "type": "Ввести \"{{text}}\"", + "press": "Нажать {{key}}", "scrollDown": "Прокрутить вниз", "scrollUp": "Прокрутить вверх", + "hover": "Навести ({{coordinate}})", "close": "Закрыть браузер" } }, diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json index 09fedab8f46f..2d456119bcba 100644 --- a/webview-ui/src/i18n/locales/ru/settings.json +++ b/webview-ui/src/i18n/locales/ru/settings.json @@ -501,6 +501,10 @@ "label": "Качество скриншота", "description": "Настройте качество WebP для скриншотов браузера. Более высокие значения дают более чёткие изображения, но увеличивают расход токенов." }, + "autoExpand": { + "label": "Автоматически расширять действия браузера", + "description": "Автоматически расширять скриншоты действий браузера в чате" + }, "remote": { "label": "Использовать удалённое подключение к браузеру", "description": "Подключиться к Chrome с включённым удалённым дебагом (--remote-debugging-port=9222).", diff --git a/webview-ui/src/i18n/locales/tr/chat.json b/webview-ui/src/i18n/locales/tr/chat.json index bffda4944012..1f94b4b36d0e 100644 --- a/webview-ui/src/i18n/locales/tr/chat.json +++ b/webview-ui/src/i18n/locales/tr/chat.json @@ -313,6 +313,7 @@ "socialLinks": "Bize X, Discord, veya r/RooCode'da katılın 🚀" }, "browser": { + "session": "Tarayıcı Oturumu", "rooWantsToUse": "Roo tarayıcıyı kullanmak istiyor", "consoleLogs": "Konsol Kayıtları", "noNewLogs": "(Yeni kayıt yok)", @@ -329,8 +330,10 @@ "launch": "{{url}} adresinde tarayıcı başlat", "click": "Tıkla ({{coordinate}})", "type": "Yaz \"{{text}}\"", + "press": "{{key}} tuşuna bas", "scrollDown": "Aşağı kaydır", "scrollUp": "Yukarı kaydır", + "hover": "Üzerine gel ({{coordinate}})", "close": "Tarayıcıyı kapat" } }, diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json index 2ce4732ff3a3..4599282197a9 100644 --- a/webview-ui/src/i18n/locales/tr/settings.json +++ b/webview-ui/src/i18n/locales/tr/settings.json @@ -501,6 +501,10 @@ "label": "Ekran görüntüsü kalitesi", "description": "Tarayıcı ekran görüntülerinin WebP kalitesini ayarlayın. Daha yüksek değerler daha net ekran görüntüleri sağlar ancak token kullanımını artırır." }, + "autoExpand": { + "label": "Tarayıcı eylemlerini otomatik genişlet", + "description": "Sohbet görünümünde tarayıcı eylem ekran görüntülerini otomatik olarak genişletin" + }, "remote": { "label": "Uzak tarayıcı bağlantısı kullan", "description": "Uzaktan hata ayıklama etkinleştirilmiş olarak çalışan bir Chrome tarayıcısına bağlanın (--remote-debugging-port=9222).", diff --git a/webview-ui/src/i18n/locales/vi/chat.json b/webview-ui/src/i18n/locales/vi/chat.json index cfda62992b41..8f3bf2554be3 100644 --- a/webview-ui/src/i18n/locales/vi/chat.json +++ b/webview-ui/src/i18n/locales/vi/chat.json @@ -313,6 +313,7 @@ "socialLinks": "Tham gia với chúng tôi trên X, Discord, hoặc r/RooCode 🚀" }, "browser": { + "session": "Phiên trình duyệt", "rooWantsToUse": "Roo muốn sử dụng trình duyệt", "consoleLogs": "Nhật ký bảng điều khiển", "noNewLogs": "(Không có nhật ký mới)", @@ -329,8 +330,10 @@ "launch": "Khởi chạy trình duyệt tại {{url}}", "click": "Nhấp ({{coordinate}})", "type": "Gõ \"{{text}}\"", + "press": "Nhấn {{key}}", "scrollDown": "Cuộn xuống", "scrollUp": "Cuộn lên", + "hover": "Di chuột ({{coordinate}})", "close": "Đóng trình duyệt" } }, diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json index e9b15db7d7a4..fd67d613d8d7 100644 --- a/webview-ui/src/i18n/locales/vi/settings.json +++ b/webview-ui/src/i18n/locales/vi/settings.json @@ -501,6 +501,10 @@ "label": "Chất lượng ảnh chụp màn hình", "description": "Điều chỉnh chất lượng WebP của ảnh chụp màn hình trình duyệt. Giá trị cao hơn cung cấp ảnh chụp màn hình rõ ràng hơn nhưng tăng sử dụng token." }, + "autoExpand": { + "label": "Tự động mở rộng các hành động của trình duyệt", + "description": "Tự động mở rộng ảnh chụp màn hình hành động của trình duyệt trong chế độ xem trò chuyện" + }, "remote": { "label": "Sử dụng kết nối trình duyệt từ xa", "description": "Kết nối với trình duyệt Chrome đang chạy với tính năng gỡ lỗi từ xa được bật (--remote-debugging-port=9222).", diff --git a/webview-ui/src/i18n/locales/zh-CN/chat.json b/webview-ui/src/i18n/locales/zh-CN/chat.json index cc0373caf642..b774d3e1c677 100644 --- a/webview-ui/src/i18n/locales/zh-CN/chat.json +++ b/webview-ui/src/i18n/locales/zh-CN/chat.json @@ -313,6 +313,7 @@ "socialLinks": "在 XDiscordr/RooCode 上关注我们 🚀" }, "browser": { + "session": "浏览器会话", "rooWantsToUse": "Roo想使用浏览器", "consoleLogs": "控制台日志", "noNewLogs": "(没有新日志)", @@ -329,8 +330,10 @@ "launch": "访问 {{url}}", "click": "点击 ({{coordinate}})", "type": "输入 \"{{text}}\"", + "press": "按 {{key}}", "scrollDown": "向下滚动", "scrollUp": "向上滚动", + "hover": "悬停 ({{coordinate}})", "close": "关闭浏览器" } }, diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json index 25a889939f20..d16ecc1471c6 100644 --- a/webview-ui/src/i18n/locales/zh-CN/settings.json +++ b/webview-ui/src/i18n/locales/zh-CN/settings.json @@ -501,6 +501,10 @@ "label": "截图质量", "description": "调整浏览器的截图质量。更高的值提供更清晰的截图,但会增加 token 消耗。" }, + "autoExpand": { + "label": "自动展开浏览器操作", + "description": "在聊天视图中自动展开浏览器操作截图" + }, "remote": { "label": "使用远程浏览器连接", "description": "连接到启用远程调试的 Chrome 浏览器 (--remote-debugging-port=9222)。", diff --git a/webview-ui/src/i18n/locales/zh-TW/chat.json b/webview-ui/src/i18n/locales/zh-TW/chat.json index 8e3b6f981617..6a4987b092ab 100644 --- a/webview-ui/src/i18n/locales/zh-TW/chat.json +++ b/webview-ui/src/i18n/locales/zh-TW/chat.json @@ -331,6 +331,7 @@ "countdownDisplay": "{{count}} 秒" }, "browser": { + "session": "瀏覽器會話", "rooWantsToUse": "Roo 想要使用瀏覽器", "consoleLogs": "主控台記錄", "noNewLogs": "(沒有新記錄)", @@ -347,8 +348,10 @@ "launch": "在 {{url}} 啟動瀏覽器", "click": "點選 ({{coordinate}})", "type": "輸入「{{text}}」", + "press": "按下 {{key}}", "scrollDown": "向下捲動", "scrollUp": "向上捲動", + "hover": "懸停 ({{coordinate}})", "close": "關閉瀏覽器" } }, diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json index 26225e2278fe..3b2b0db7e66a 100644 --- a/webview-ui/src/i18n/locales/zh-TW/settings.json +++ b/webview-ui/src/i18n/locales/zh-TW/settings.json @@ -501,6 +501,10 @@ "label": "截圖品質", "description": "調整瀏覽器截圖的 WebP 品質。數值越高截圖越清晰,但會增加 token 用量。" }, + "autoExpand": { + "label": "自動展開瀏覽器操作", + "description": "在聊天視圖中自動展開瀏覽器操作截圖" + }, "remote": { "label": "使用遠端瀏覽器連線", "description": "連線到啟用遠端除錯的 Chrome 瀏覽器(--remote-debugging-port=9222)。",