diff --git a/knip.json b/knip.json index a847111981b1..e15c62bda1b0 100644 --- a/knip.json +++ b/knip.json @@ -16,7 +16,7 @@ "project": ["**/*.ts"] }, "webview-ui": { - "entry": ["src/index.tsx"], + "entry": ["src/index.tsx", "src/browser-panel.tsx"], "project": ["src/**/*.{ts,tsx}", "../src/shared/*.ts"] }, "packages/{build,cloud,evals,ipc,telemetry,types}": { diff --git a/packages/types/src/message.ts b/packages/types/src/message.ts index b02078fc0d8b..dae13f5c2320 100644 --- a/packages/types/src/message.ts +++ b/packages/types/src/message.ts @@ -156,6 +156,7 @@ export const clineSays = [ "shell_integration_warning", "browser_action", "browser_action_result", + "browser_session_status", "mcp_server_request_started", "mcp_server_response", "subtask_result", diff --git a/src/core/assistant-message/presentAssistantMessage.ts b/src/core/assistant-message/presentAssistantMessage.ts index 2249c008d67e..c2133c96987e 100644 --- a/src/core/assistant-message/presentAssistantMessage.ts +++ b/src/core/assistant-message/presentAssistantMessage.ts @@ -352,8 +352,32 @@ export async function presentAssistantMessage(cline: Task) { return text.replace(tagRegex, "") } - if (block.name !== "browser_action") { - await cline.browserSession.closeBrowser() + // Keep browser open during an active session so other tools can run. + // Session is active if we've seen any browser_action_result and the last browser_action is not "close". + try { + const messages = cline.clineMessages || [] + const hasStarted = messages.some((m: any) => m.say === "browser_action_result") + let isClosed = false + for (let i = messages.length - 1; i >= 0; i--) { + const m = messages[i] + if (m.say === "browser_action") { + try { + const act = JSON.parse(m.text || "{}") + isClosed = act.action === "close" + } catch {} + break + } + } + const sessionActive = hasStarted && !isClosed + // Only auto-close when no active browser session is present, and this isn't a browser_action + if (!sessionActive && block.name !== "browser_action") { + await cline.browserSession.closeBrowser() + } + } catch { + // On any unexpected error, fall back to conservative behavior + if (block.name !== "browser_action") { + await cline.browserSession.closeBrowser() + } } if (!block.partial) { diff --git a/src/core/environment/__tests__/getEnvironmentDetails.spec.ts b/src/core/environment/__tests__/getEnvironmentDetails.spec.ts index 1110aa8831b9..4b07d4e775b2 100644 --- a/src/core/environment/__tests__/getEnvironmentDetails.spec.ts +++ b/src/core/environment/__tests__/getEnvironmentDetails.spec.ts @@ -116,6 +116,9 @@ describe("getEnvironmentDetails", () => { deref: vi.fn().mockReturnValue(mockProvider), [Symbol.toStringTag]: "WeakRef", } as unknown as WeakRef, + browserSession: { + isSessionActive: vi.fn().mockReturnValue(false), + } as any, } // Mock other dependencies. @@ -390,4 +393,18 @@ describe("getEnvironmentDetails", () => { const result = await getEnvironmentDetails(cline as Task) expect(result).toContain("REMINDERS") }) + it("should include Browser Session Status when inactive", async () => { + const result = await getEnvironmentDetails(mockCline as Task) + expect(result).toContain("# Browser Session Status") + expect(result).toContain("Inactive - Browser is not launched") + }) + + it("should include Browser Session Status with current viewport when active", async () => { + ;(mockCline.browserSession as any).isSessionActive = vi.fn().mockReturnValue(true) + ;(mockCline.browserSession as any).getViewportSize = vi.fn().mockReturnValue({ width: 1280, height: 720 }) + + const result = await getEnvironmentDetails(mockCline as Task) + expect(result).toContain("Active - A browser session is currently open and ready for browser_action commands") + expect(result).toContain("Current viewport size: 1280x720 pixels.") + }) }) diff --git a/src/core/environment/getEnvironmentDetails.ts b/src/core/environment/getEnvironmentDetails.ts index 30d9cd0b0d1e..4b73394b6359 100644 --- a/src/core/environment/getEnvironmentDetails.ts +++ b/src/core/environment/getEnvironmentDetails.ts @@ -244,6 +244,38 @@ export async function getEnvironmentDetails(cline: Task, includeFileDetails: boo } } + // Add browser session status - Always show to prevent LLM from trying browser actions when no session is active + const isBrowserActive = cline.browserSession.isSessionActive() + + // Build viewport info for status (prefer actual viewport if available, else fallback to configured setting) + const configuredViewport = (state?.browserViewportSize as string | undefined) ?? "900x600" + let configuredWidth: number | undefined + let configuredHeight: number | undefined + if (configuredViewport.includes("x")) { + const parts = configuredViewport.split("x").map((v) => Number(v)) + configuredWidth = parts[0] + configuredHeight = parts[1] + } + + let actualWidth: number | undefined + let actualHeight: number | undefined + // Use optional chaining to avoid issues with tests that stub browserSession + const vp = isBrowserActive ? (cline.browserSession as any).getViewportSize?.() : undefined + if (vp) { + actualWidth = vp.width + actualHeight = vp.height + } + + const width = actualWidth ?? configuredWidth + const height = actualHeight ?? configuredHeight + const viewportInfo = isBrowserActive && width && height ? `\nCurrent viewport size: ${width}x${height} pixels.` : "" + + details += `\n# Browser Session Status\n${ + isBrowserActive + ? "Active - A browser session is currently open and ready for browser_action commands" + : "Inactive - Browser is not launched. Using any browser action except the browser_action with action='launch' to start a new session will result in an error." + }${viewportInfo}\n` + if (includeFileDetails) { details += `\n\n# Current Workspace Directory (${cline.cwd.toPosix()}) Files\n` const isDesktop = arePathsEqual(cline.cwd, path.join(os.homedir(), "Desktop")) diff --git a/src/core/prompts/__tests__/__snapshots__/system-prompt/with-computer-use-support.snap b/src/core/prompts/__tests__/__snapshots__/system-prompt/with-computer-use-support.snap index 3420d1d1333c..3549596eecbc 100644 --- a/src/core/prompts/__tests__/__snapshots__/system-prompt/with-computer-use-support.snap +++ b/src/core/prompts/__tests__/__snapshots__/system-prompt/with-computer-use-support.snap @@ -228,10 +228,12 @@ Example for appending to the end of file: ## browser_action Description: Request to interact with a Puppeteer-controlled browser. Every action, except `close`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. -- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. -- While the browser is active, only the `browser_action` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. -- The browser window has a resolution of **1280x800** pixels. When performing any click actions, ensure the coordinates are within this resolution range. -- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. + +**Browser Session Lifecycle:** +- Browser sessions **start** with `launch` and **end** with `close` +- The session remains active across multiple messages and tool uses +- You can use other tools while the browser session is active - it will stay open in the background + Parameters: - action: (required) The action to perform. The available actions are: * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. @@ -245,6 +247,12 @@ Parameters: - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. - Use with the `text` parameter to provide the string to type. + * press: Press a single keyboard key or key combination (e.g., Enter, Tab, Escape, Cmd+K, Shift+Enter). + - Use with the `text` parameter to provide the key name or combination. + - For single keys: Enter, Tab, Escape, etc. + - For key combinations: Cmd+K, Ctrl+C, Shift+Enter, Alt+F4, etc. + - Supported modifiers: Cmd/Command/Meta, Ctrl/Control, Shift, Alt/Option + - Example: Cmd+K or Shift+Enter * resize: Resize the viewport to a specific w,h size. - Use with the `size` parameter to specify the new size. * scroll_down: Scroll down the page by one page height. @@ -253,17 +261,24 @@ Parameters: - Example: `close` - url: (optional) Use this for providing the URL for the `launch` action. * Example: https://example.com -- coordinate: (optional) The X and Y coordinates for the `click` and `hover` actions. Coordinates should be within the **1280x800** resolution. - * Example: 450,300 +- coordinate: (optional) The X and Y coordinates for the `click` and `hover` actions. + * **CRITICAL**: Screenshot dimensions are NOT the same as the browser viewport dimensions + * Format: x,y@widthxheight + * Measure x,y on the screenshot image you see in chat + * The widthxheight MUST be the EXACT pixel size of that screenshot image (never the browser viewport) + * Never use the browser viewport size for widthxheight - the viewport is only a reference and is often larger than the screenshot + * Images are often downscaled before you see them, so the screenshot's dimensions will likely be smaller than the viewport + * Example A: If the screenshot you see is 1094x1092 and you want to click (450,300) on that image, use: 450,300@1094x1092 + * Example B: If the browser viewport is 1280x800 but the screenshot is 1000x625 and you want to click (500,300) on the screenshot, use: 500,300@1000x625 - size: (optional) The width and height for the `resize` action. * Example: 1280,720 - text: (optional) Use this for providing the text for the `type` action. * Example: Hello, world! Usage: -Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +Action to perform (e.g., launch, click, type, press, scroll_down, scroll_up, close) URL to launch the browser at (optional) -x,y coordinates (optional) +x,y@widthxheight coordinates (optional) Text to type (optional) @@ -273,10 +288,10 @@ Example: Requesting to launch a browser at https://example.com https://example.com -Example: Requesting to click on the element at coordinates 450,300 +Example: Requesting to click on the element at coordinates 450,300 on a 1024x768 image click -450,300 +450,300@1024x768 ## ask_followup_question @@ -504,7 +519,7 @@ RULES - At the end of each user message, you will automatically receive environment_details. This information is not written by the user themselves, but is auto-generated to provide potentially relevant context about the project structure and environment. While this information can be valuable for understanding the project context, do not treat it as a direct part of the user's request or response. Use it to inform your actions and decisions, but don't assume the user is explicitly asking about or referring to this information unless they clearly do so in their message. When using environment_details, explain your actions clearly to ensure the user understands, as they may not be aware of these details. - Before executing commands, check the "Actively Running Terminals" section in environment_details. If present, consider how these active processes might impact your task. For example, if a local development server is already running, you wouldn't need to start it again. If no active terminals are listed, proceed with command execution as normal. - MCP operations should be used one at a time, similar to other tool usage. Wait for confirmation of success before proceeding with additional operations. -- It is critical you wait for the user's response after each tool use, in order to confirm the success of the tool use. For example, if asked to make a todo app, you would create a file, wait for the user's response it was created successfully, then create another file if needed, wait for the user's response it was created successfully, etc. Then if you want to test your work, you might use browser_action to launch the site, wait for the user's response confirming the site was launched along with a screenshot, then perhaps e.g., click a button to test functionality if needed, wait for the user's response confirming the button was clicked along with a screenshot of the new state, before finally closing the browser. +- It is critical you wait for the user's response after each tool use, in order to confirm the success of the tool use. For example, if asked to make a todo app, you would create a file, wait for the user's response it was created successfully, then create another file if needed, wait for the user's response it was created successfully, etc. ==== diff --git a/src/core/prompts/sections/rules.ts b/src/core/prompts/sections/rules.ts index e8c7534b18a4..6a670d99e071 100644 --- a/src/core/prompts/sections/rules.ts +++ b/src/core/prompts/sections/rules.ts @@ -87,9 +87,5 @@ ${getEditingInstructions(diffStrategy)} - At the end of each user message, you will automatically receive environment_details. This information is not written by the user themselves, but is auto-generated to provide potentially relevant context about the project structure and environment. While this information can be valuable for understanding the project context, do not treat it as a direct part of the user's request or response. Use it to inform your actions and decisions, but don't assume the user is explicitly asking about or referring to this information unless they clearly do so in their message. When using environment_details, explain your actions clearly to ensure the user understands, as they may not be aware of these details. - Before executing commands, check the "Actively Running Terminals" section in environment_details. If present, consider how these active processes might impact your task. For example, if a local development server is already running, you wouldn't need to start it again. If no active terminals are listed, proceed with command execution as normal. - MCP operations should be used one at a time, similar to other tool usage. Wait for confirmation of success before proceeding with additional operations. -- It is critical you wait for the user's response after each tool use, in order to confirm the success of the tool use. For example, if asked to make a todo app, you would create a file, wait for the user's response it was created successfully, then create another file if needed, wait for the user's response it was created successfully, etc.${ - supportsComputerUse - ? " Then if you want to test your work, you might use browser_action to launch the site, wait for the user's response confirming the site was launched along with a screenshot, then perhaps e.g., click a button to test functionality if needed, wait for the user's response confirming the button was clicked along with a screenshot of the new state, before finally closing the browser." - : "" - }` +- It is critical you wait for the user's response after each tool use, in order to confirm the success of the tool use. For example, if asked to make a todo app, you would create a file, wait for the user's response it was created successfully, then create another file if needed, wait for the user's response it was created successfully, etc.` } diff --git a/src/core/prompts/tools/browser-action.ts b/src/core/prompts/tools/browser-action.ts index e1b33b9d7d1d..3f9a5c1ae290 100644 --- a/src/core/prompts/tools/browser-action.ts +++ b/src/core/prompts/tools/browser-action.ts @@ -6,10 +6,12 @@ export function getBrowserActionDescription(args: ToolArgs): string | undefined } return `## browser_action Description: Request to interact with a Puppeteer-controlled browser. Every action, except \`close\`, will be responded to with a screenshot of the browser's current state, along with any new console logs. You may only perform one browser action per message, and wait for the user's response including a screenshot and logs to determine the next action. -- The sequence of actions **must always start with** launching the browser at a URL, and **must always end with** closing the browser. If you need to visit a new URL that is not possible to navigate to from the current webpage, you must first close the browser, then launch again at the new URL. -- While the browser is active, only the \`browser_action\` tool can be used. No other tools should be called during this time. You may proceed to use other tools only after closing the browser. For example if you run into an error and need to fix a file, you must close the browser, then use other tools to make the necessary changes, then re-launch the browser to verify the result. -- The browser window has a resolution of **${args.browserViewportSize}** pixels. When performing any click actions, ensure the coordinates are within this resolution range. -- Before clicking on any elements such as icons, links, or buttons, you must consult the provided screenshot of the page to determine the coordinates of the element. The click should be targeted at the **center of the element**, not on its edges. + +**Browser Session Lifecycle:** +- Browser sessions **start** with \`launch\` and **end** with \`close\` +- The session remains active across multiple messages and tool uses +- You can use other tools while the browser session is active - it will stay open in the background + Parameters: - action: (required) The action to perform. The available actions are: * launch: Launch a new Puppeteer-controlled browser instance at the specified URL. This **must always be the first action**. @@ -23,6 +25,12 @@ Parameters: - Always click in the center of an element (icon, button, link, etc.) based on coordinates derived from a screenshot. * type: Type a string of text on the keyboard. You might use this after clicking on a text field to input text. - Use with the \`text\` parameter to provide the string to type. + * press: Press a single keyboard key or key combination (e.g., Enter, Tab, Escape, Cmd+K, Shift+Enter). + - Use with the \`text\` parameter to provide the key name or combination. + - For single keys: Enter, Tab, Escape, etc. + - For key combinations: Cmd+K, Ctrl+C, Shift+Enter, Alt+F4, etc. + - Supported modifiers: Cmd/Command/Meta, Ctrl/Control, Shift, Alt/Option + - Example: Cmd+K or Shift+Enter * resize: Resize the viewport to a specific w,h size. - Use with the \`size\` parameter to specify the new size. * scroll_down: Scroll down the page by one page height. @@ -31,17 +39,24 @@ Parameters: - Example: \`close\` - url: (optional) Use this for providing the URL for the \`launch\` action. * Example: https://example.com -- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. Coordinates should be within the **${args.browserViewportSize}** resolution. - * Example: 450,300 +- coordinate: (optional) The X and Y coordinates for the \`click\` and \`hover\` actions. + * **CRITICAL**: Screenshot dimensions are NOT the same as the browser viewport dimensions + * Format: x,y@widthxheight + * Measure x,y on the screenshot image you see in chat + * The widthxheight MUST be the EXACT pixel size of that screenshot image (never the browser viewport) + * Never use the browser viewport size for widthxheight - the viewport is only a reference and is often larger than the screenshot + * Images are often downscaled before you see them, so the screenshot's dimensions will likely be smaller than the viewport + * Example A: If the screenshot you see is 1094x1092 and you want to click (450,300) on that image, use: 450,300@1094x1092 + * Example B: If the browser viewport is 1280x800 but the screenshot is 1000x625 and you want to click (500,300) on the screenshot, use: 500,300@1000x625 - size: (optional) The width and height for the \`resize\` action. * Example: 1280,720 - text: (optional) Use this for providing the text for the \`type\` action. * Example: Hello, world! Usage: -Action to perform (e.g., launch, click, type, scroll_down, scroll_up, close) +Action to perform (e.g., launch, click, type, press, scroll_down, scroll_up, close) URL to launch the browser at (optional) -x,y coordinates (optional) +x,y@widthxheight coordinates (optional) Text to type (optional) @@ -51,9 +66,9 @@ Example: Requesting to launch a browser at https://example.com https://example.com -Example: Requesting to click on the element at coordinates 450,300 +Example: Requesting to click on the element at coordinates 450,300 on a 1024x768 image click -450,300 +450,300@1024x768 ` } diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 2fcc92426a43..e4b291da0f6e 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -374,7 +374,40 @@ export class Task extends EventEmitter implements TaskLike { this.autoApprovalHandler = new AutoApprovalHandler() this.urlContentFetcher = new UrlContentFetcher(provider.context) - this.browserSession = new BrowserSession(provider.context) + this.browserSession = new BrowserSession(provider.context, (isActive: boolean) => { + // Add a message to indicate browser session status change + this.say("browser_session_status", isActive ? "Browser session opened" : "Browser session closed") + // Broadcast to browser panel + this.broadcastBrowserSessionUpdate() + + // When a browser session becomes active, automatically open/reveal the Browser Session tab. + // Defer to the microtask queue and re-check active state to avoid race conditions + // where the session could be closed before the panel opens. + if (isActive) { + try { + const providerRef = this.providerRef.deref() + if (!providerRef) return + + Promise.resolve().then(async () => { + try { + // Ensure the session is still active before opening the panel + if (!(this.browserSession?.isSessionActive() ?? false)) return + + // Lazy-load to avoid circular imports at module load time + const { BrowserSessionPanelManager } = require("../webview/BrowserSessionPanelManager") + const mgr = BrowserSessionPanelManager.getInstance(providerRef) + // New session: allow auto-open again + mgr.resetManualCloseFlag() + await mgr.show() + } catch { + // swallow + } + }) + } catch (err) { + console.error("[Task] Failed to auto-open Browser Session panel:", err) + } + } + }) this.diffEnabled = enableDiff this.fuzzyMatchThreshold = fuzzyMatchThreshold this.consecutiveMistakeLimit = consecutiveMistakeLimit ?? DEFAULT_CONSECUTIVE_MISTAKE_LIMIT @@ -1199,6 +1232,11 @@ export class Task extends EventEmitter implements TaskLike { contextCondense, }) } + + // Broadcast browser session updates to panel when browser-related messages are added + if (type === "browser_action" || type === "browser_action_result" || type === "browser_session_status") { + this.broadcastBrowserSessionUpdate() + } } async sayAndCreateMissingParamError(toolName: ToolName, paramName: string, relPath?: string) { @@ -1608,6 +1646,16 @@ export class Task extends EventEmitter implements TaskLike { } catch (error) { console.error("Error closing browser session:", error) } + // Also close the Browser Session panel when the task is disposed + try { + const provider = this.providerRef.deref() + if (provider) { + const { BrowserSessionPanelManager } = require("../webview/BrowserSessionPanelManager") + BrowserSessionPanelManager.getInstance(provider).dispose() + } + } catch (error) { + console.error("Error closing browser session panel:", error) + } try { if (this.rooIgnoreController) { @@ -3073,6 +3121,41 @@ export class Task extends EventEmitter implements TaskLike { return this.workspacePath } + /** + * Broadcast browser session updates to the browser panel (if open) + */ + private broadcastBrowserSessionUpdate(): void { + const provider = this.providerRef.deref() + if (!provider) { + return + } + + try { + const { BrowserSessionPanelManager } = require("../webview/BrowserSessionPanelManager") + const panelManager = BrowserSessionPanelManager.getInstance(provider) + + // Get browser session messages + const browserSessionStartIndex = this.clineMessages.findIndex( + (m) => + m.ask === "browser_action_launch" || + (m.say === "browser_session_status" && m.text?.includes("opened")), + ) + + const browserSessionMessages = + browserSessionStartIndex !== -1 ? this.clineMessages.slice(browserSessionStartIndex) : [] + + const isBrowserSessionActive = this.browserSession?.isSessionActive() ?? false + + // Update the panel asynchronously + panelManager.updateBrowserSession(browserSessionMessages, isBrowserSessionActive).catch((error: Error) => { + console.error("Failed to broadcast browser session update:", error) + }) + } catch (error) { + // Silently fail if panel manager is not available + console.debug("Browser panel not available for update:", error) + } + } + /** * Process any queued messages by dequeuing and submitting them. * This ensures that queued user messages are sent when appropriate, diff --git a/src/core/tools/__tests__/browserActionTool.coordinateScaling.spec.ts b/src/core/tools/__tests__/browserActionTool.coordinateScaling.spec.ts new file mode 100644 index 000000000000..086040267459 --- /dev/null +++ b/src/core/tools/__tests__/browserActionTool.coordinateScaling.spec.ts @@ -0,0 +1,148 @@ +// Test coordinate scaling functionality in browser actions +import { describe, it, expect, vi, beforeEach } from "vitest" + +// Mock the scaleCoordinate function by extracting it +// In a real scenario, we'd export it or test through the main function +// For now, we'll test the regex pattern and logic + +describe("Browser Action Coordinate Scaling", () => { + describe("Coordinate format validation", () => { + it("should match valid coordinate format with image dimensions", () => { + const validFormats = [ + "450,300@1024x768", + "0,0@1920x1080", + "1920,1080@1920x1080", + "100,200@800x600", + " 273 , 273 @ 1280x800 ", + "267,273@1280,800", // comma separator for dimensions + "450,300@1024,768", // comma separator for dimensions + ] + + const regex = /^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/ + + validFormats.forEach((coord) => { + expect(coord).toMatch(regex) + }) + }) + + it("should not match invalid coordinate formats", () => { + const invalidFormats = [ + "450,300", // missing image dimensions + "450,300@", // incomplete dimensions + "450,300@1024", // missing height + "450,300@1024x", // missing height value + "@1024x768", // missing coordinates + "450@1024x768", // missing y coordinate + ",300@1024x768", // missing x coordinate + "450,300@1024x768x2", // extra dimension + "a,b@1024x768", // non-numeric coordinates + "450,300@axb", // non-numeric dimensions + ] + + const regex = /^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/ + + invalidFormats.forEach((coord) => { + expect(coord).not.toMatch(regex) + }) + }) + }) + + describe("Coordinate scaling logic", () => { + it("should correctly scale coordinates from image to viewport", () => { + // Simulate the scaling logic + const scaleCoordinate = (coordinate: string, viewportWidth: number, viewportHeight: number): string => { + const match = coordinate.match(/^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/) + if (!match) { + throw new Error(`Invalid coordinate format: "${coordinate}"`) + } + + const [, xStr, yStr, imgWidthStr, imgHeightStr] = match + const x = parseInt(xStr, 10) + const y = parseInt(yStr, 10) + const imgWidth = parseInt(imgWidthStr, 10) + const imgHeight = parseInt(imgHeightStr, 10) + + const scaledX = Math.round((x / imgWidth) * viewportWidth) + const scaledY = Math.round((y / imgHeight) * viewportHeight) + + return `${scaledX},${scaledY}` + } + + // Test case 1: Same dimensions (no scaling) + expect(scaleCoordinate("450,300@900x600", 900, 600)).toBe("450,300") + + // Test case 2: Half dimensions (2x upscale) + expect(scaleCoordinate("225,150@450x300", 900, 600)).toBe("450,300") + + // Test case 3: Double dimensions (0.5x downscale) + expect(scaleCoordinate("900,600@1800x1200", 900, 600)).toBe("450,300") + + // Test case 4: Different aspect ratio + expect(scaleCoordinate("512,384@1024x768", 1920, 1080)).toBe("960,540") + + // Test case 5: Edge cases (0,0) + expect(scaleCoordinate("0,0@1024x768", 1920, 1080)).toBe("0,0") + + // Test case 6: Edge cases (max coordinates) + expect(scaleCoordinate("1024,768@1024x768", 1920, 1080)).toBe("1920,1080") + }) + + it("should throw error for invalid coordinate format", () => { + const scaleCoordinate = (coordinate: string, viewportWidth: number, viewportHeight: number): string => { + const match = coordinate.match(/^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/) + if (!match) { + throw new Error( + `Invalid coordinate format: "${coordinate}". ` + + `Expected format: "x,y@widthxheight" (e.g., "450,300@1024x768")`, + ) + } + + const [, xStr, yStr, imgWidthStr, imgHeightStr] = match + const x = parseInt(xStr, 10) + const y = parseInt(yStr, 10) + const imgWidth = parseInt(imgWidthStr, 10) + const imgHeight = parseInt(imgHeightStr, 10) + + const scaledX = Math.round((x / imgWidth) * viewportWidth) + const scaledY = Math.round((y / imgHeight) * viewportHeight) + + return `${scaledX},${scaledY}` + } + + // Test invalid formats + expect(() => scaleCoordinate("450,300", 900, 600)).toThrow("Invalid coordinate format") + expect(() => scaleCoordinate("450,300@1024", 900, 600)).toThrow("Invalid coordinate format") + expect(() => scaleCoordinate("invalid", 900, 600)).toThrow("Invalid coordinate format") + }) + + it("should handle rounding correctly", () => { + const scaleCoordinate = (coordinate: string, viewportWidth: number, viewportHeight: number): string => { + const match = coordinate.match(/^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/) + if (!match) { + throw new Error(`Invalid coordinate format: "${coordinate}"`) + } + + const [, xStr, yStr, imgWidthStr, imgHeightStr] = match + const x = parseInt(xStr, 10) + const y = parseInt(yStr, 10) + const imgWidth = parseInt(imgWidthStr, 10) + const imgHeight = parseInt(imgHeightStr, 10) + + const scaledX = Math.round((x / imgWidth) * viewportWidth) + const scaledY = Math.round((y / imgHeight) * viewportHeight) + + return `${scaledX},${scaledY}` + } + + // Test rounding behavior + // 333 / 1000 * 900 = 299.7 -> rounds to 300 + expect(scaleCoordinate("333,333@1000x1000", 900, 900)).toBe("300,300") + + // 666 / 1000 * 900 = 599.4 -> rounds to 599 + expect(scaleCoordinate("666,666@1000x1000", 900, 900)).toBe("599,599") + + // 500 / 1000 * 900 = 450.0 -> rounds to 450 + expect(scaleCoordinate("500,500@1000x1000", 900, 900)).toBe("450,450") + }) + }) +}) diff --git a/src/core/tools/attemptCompletionTool.ts b/src/core/tools/attemptCompletionTool.ts index 5074d7f4e808..a47a3552bf8f 100644 --- a/src/core/tools/attemptCompletionTool.ts +++ b/src/core/tools/attemptCompletionTool.ts @@ -129,7 +129,8 @@ export async function attemptCompletionTool( }) toolResults.push(...formatResponse.imageBlocks(images)) - cline.userMessageContent.push({ type: "text", text: `${toolDescription()} Result:` }) + const labelSuffix = images && images.length > 0 ? " (see image below)" : "" + cline.userMessageContent.push({ type: "text", text: `${toolDescription()} Result:${labelSuffix}` }) cline.userMessageContent.push(...toolResults) return diff --git a/src/core/tools/browserActionTool.ts b/src/core/tools/browserActionTool.ts index 13cb9b0ec266..5b04cf6ad301 100644 --- a/src/core/tools/browserActionTool.ts +++ b/src/core/tools/browserActionTool.ts @@ -7,6 +7,40 @@ import { ClineSayBrowserAction, } from "../../shared/ExtensionMessage" import { formatResponse } from "../prompts/responses" +import { Anthropic } from "@anthropic-ai/sdk" + +/** + * Parses coordinate string and scales from image dimensions to viewport dimensions + * The LLM examines the screenshot it receives (which may be downscaled by the API) + * and reports coordinates in format: "x,y@widthxheight" where widthxheight is what the LLM observed + * + * Format: "x,y@widthxheight" (required) + * Returns: scaled coordinate string "x,y" in viewport coordinates + * Throws: Error if format is invalid or missing image dimensions + */ +function scaleCoordinate(coordinate: string, viewportWidth: number, viewportHeight: number): string { + // Parse coordinate with required image dimensions (accepts both 'x' and ',' as dimension separators) + const match = coordinate.match(/^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/) + + if (!match) { + throw new Error( + `Invalid coordinate format: "${coordinate}". ` + + `Expected format: "x,y@widthxheight" (e.g., "450,300@1024x768")`, + ) + } + + const [, xStr, yStr, imgWidthStr, imgHeightStr] = match + const x = parseInt(xStr, 10) + const y = parseInt(yStr, 10) + const imgWidth = parseInt(imgWidthStr, 10) + const imgHeight = parseInt(imgHeightStr, 10) + + // Scale coordinates from image dimensions to viewport dimensions + const scaledX = Math.round((x / imgWidth) * viewportWidth) + const scaledY = Math.round((y / imgHeight) * viewportHeight) + + return `${scaledX},${scaledY}` +} export async function browserActionTool( cline: Task, @@ -29,7 +63,7 @@ export async function browserActionTool( cline.consecutiveMistakeCount++ cline.recordToolError("browser_action") pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "action")) - await cline.browserSession.closeBrowser() + // Do not close the browser on parameter validation errors } return @@ -46,6 +80,7 @@ export async function browserActionTool( action: action as BrowserAction, coordinate: removeClosingTag("coordinate", coordinate), text: removeClosingTag("text", text), + size: removeClosingTag("size", size), } satisfies ClineSayBrowserAction), undefined, block.partial, @@ -61,7 +96,7 @@ export async function browserActionTool( cline.consecutiveMistakeCount++ cline.recordToolError("browser_action") pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "url")) - await cline.browserSession.closeBrowser() + // Do not close the browser on parameter validation errors return } @@ -75,27 +110,67 @@ export async function browserActionTool( // NOTE: It's okay that we call cline message since the partial inspect_site is finished streaming. // The only scenario we have to avoid is sending messages WHILE a partial message exists at the end of the messages array. // For example the api_req_finished message would interfere with the partial message, so we needed to remove that. - // await cline.say("inspect_site_result", "") // No result, starts the loading spinner waiting for result - await cline.say("browser_action_result", "") // Starts loading spinner + + // Launch browser first (this triggers "Browser session opened" status message) await cline.browserSession.launchBrowser() + + // Create browser_action say message AFTER launching so status appears first + // Include size to keep message shape consistent with other actions + const vs = cline.browserSession.getViewportSize() + const launchSize = `${vs.width ?? 900}x${vs.height ?? 600}` + await cline.say( + "browser_action", + JSON.stringify({ + action: "launch" as BrowserAction, + text: url, + size: launchSize, + } satisfies ClineSayBrowserAction), + undefined, + false, + ) + browserActionResult = await cline.browserSession.navigateToUrl(url) } else { + // Variables to hold validated and processed parameters + let processedCoordinate = coordinate + if (action === "click" || action === "hover") { if (!coordinate) { cline.consecutiveMistakeCount++ cline.recordToolError("browser_action") pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "coordinate")) - await cline.browserSession.closeBrowser() + // Do not close the browser on parameter validation errors return // can't be within an inner switch } + + // Get viewport dimensions from the browser session + const viewportSize = cline.browserSession.getViewportSize() + const viewportWidth = viewportSize.width || 900 // default to 900 if not available + const viewportHeight = viewportSize.height || 600 // default to 600 if not available + + // Scale coordinate from image dimensions to viewport dimensions + try { + processedCoordinate = scaleCoordinate(coordinate, viewportWidth, viewportHeight) + } catch (error) { + cline.consecutiveMistakeCount++ + cline.recordToolError("browser_action") + pushToolResult( + await cline.sayAndCreateMissingParamError( + "browser_action", + "coordinate", + error instanceof Error ? error.message : String(error), + ), + ) + return + } } - if (action === "type") { + if (action === "type" || action === "press") { if (!text) { cline.consecutiveMistakeCount++ cline.recordToolError("browser_action") pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "text")) - await cline.browserSession.closeBrowser() + // Do not close the browser on parameter validation errors return } } @@ -105,7 +180,7 @@ export async function browserActionTool( cline.consecutiveMistakeCount++ cline.recordToolError("browser_action") pushToolResult(await cline.sayAndCreateMissingParamError("browser_action", "size")) - await cline.browserSession.closeBrowser() + // Do not close the browser on parameter validation errors return } } @@ -116,8 +191,9 @@ export async function browserActionTool( "browser_action", JSON.stringify({ action: action as BrowserAction, - coordinate, + coordinate: processedCoordinate, text, + size, } satisfies ClineSayBrowserAction), undefined, false, @@ -125,14 +201,17 @@ export async function browserActionTool( switch (action) { case "click": - browserActionResult = await cline.browserSession.click(coordinate!) + browserActionResult = await cline.browserSession.click(processedCoordinate!) break case "hover": - browserActionResult = await cline.browserSession.hover(coordinate!) + browserActionResult = await cline.browserSession.hover(processedCoordinate!) break case "type": browserActionResult = await cline.browserSession.type(text!) break + case "press": + browserActionResult = await cline.browserSession.press(text!) + break case "scroll_down": browserActionResult = await cline.browserSession.scrollDown() break @@ -153,21 +232,48 @@ export async function browserActionTool( case "click": case "hover": case "type": + case "press": case "scroll_down": case "scroll_up": - case "resize": + case "resize": { await cline.say("browser_action_result", JSON.stringify(browserActionResult)) - pushToolResult( - formatResponse.toolResult( - `The browser action has been executed. The console logs and screenshot have been captured for your analysis.\n\nConsole logs:\n${ - browserActionResult?.logs || "(No new logs)" - }\n\n(REMEMBER: if you need to proceed to using non-\`browser_action\` tools or launch a new browser, you MUST first close cline browser. For example, if after analyzing the logs and screenshot you need to edit a file, you must first close the browser before you can use the write_to_file tool.)`, - browserActionResult?.screenshot ? [browserActionResult.screenshot] : [], - ), - ) + const images = browserActionResult?.screenshot ? [browserActionResult.screenshot] : [] + + let messageText = `The browser action has been executed.` + + messageText += `\n\n**CRITICAL**: When providing click/hover coordinates:` + messageText += `\n1. Screenshot dimensions != Browser viewport dimensions` + messageText += `\n2. Measure x,y on the screenshot image you see below` + messageText += `\n3. Use format: x,y@WIDTHxHEIGHT where WIDTHxHEIGHT is the EXACT pixel size of the screenshot image` + messageText += `\n4. Never use the browser viewport size for WIDTHxHEIGHT - it is only for reference and is often larger than the screenshot` + messageText += `\n5. Screenshots are often downscaled - always use the dimensions you see in the image` + messageText += `\nExample: Viewport 1280x800, screenshot 1000x625, click (500,300) -> 500,300@1000x625` + + // Include browser viewport dimensions (for reference only) + if (browserActionResult?.viewportWidth && browserActionResult?.viewportHeight) { + messageText += `\n\nBrowser viewport: ${browserActionResult.viewportWidth}x${browserActionResult.viewportHeight}` + } + + // Include cursor position if available + if (browserActionResult?.currentMousePosition) { + messageText += `\nCursor position: ${browserActionResult.currentMousePosition}` + } + + messageText += `\n\nConsole logs:\n${browserActionResult?.logs || "(No new logs)"}\n` + + if (images.length > 0) { + const blocks = [ + ...formatResponse.imageBlocks(images), + { type: "text", text: messageText } as Anthropic.TextBlockParam, + ] + pushToolResult(blocks) + } else { + pushToolResult(messageText) + } break + } case "close": pushToolResult( formatResponse.toolResult( @@ -181,7 +287,7 @@ export async function browserActionTool( return } } catch (error) { - await cline.browserSession.closeBrowser() // if any error occurs, the browser session is terminated + // Keep the browser session alive on errors; report the error without terminating the session await handleError("executing browser action", error) return } diff --git a/src/core/webview/BrowserSessionPanelManager.ts b/src/core/webview/BrowserSessionPanelManager.ts new file mode 100644 index 000000000000..514c1315f7f5 --- /dev/null +++ b/src/core/webview/BrowserSessionPanelManager.ts @@ -0,0 +1,310 @@ +import * as vscode from "vscode" +import type { ClineMessage } from "@roo-code/types" +import { getUri } from "./getUri" +import { getNonce } from "./getNonce" +import type { ClineProvider } from "./ClineProvider" +import { webviewMessageHandler } from "./webviewMessageHandler" + +export class BrowserSessionPanelManager { + private static instances: WeakMap = new WeakMap() + private panel: vscode.WebviewPanel | undefined + private disposables: vscode.Disposable[] = [] + private isReady: boolean = false + private pendingUpdate?: { messages: ClineMessage[]; isActive: boolean } + private pendingNavigateIndex?: number + private userManuallyClosedPanel: boolean = false + + private constructor(private readonly provider: ClineProvider) {} + + /** + * Get or create a BrowserSessionPanelManager instance for the given provider + */ + public static getInstance(provider: ClineProvider): BrowserSessionPanelManager { + let instance = BrowserSessionPanelManager.instances.get(provider) + if (!instance) { + instance = new BrowserSessionPanelManager(provider) + BrowserSessionPanelManager.instances.set(provider, instance) + } + return instance + } + + /** + * Show the browser session panel, creating it if necessary + */ + public async show(): Promise { + await this.createOrShowPanel() + + // Send initial browser session data + const task = this.provider.getCurrentTask() + if (task) { + const messages = task.clineMessages || [] + const browserSessionStartIndex = messages.findIndex( + (m) => + m.ask === "browser_action_launch" || + (m.say === "browser_session_status" && m.text?.includes("opened")), + ) + const browserSessionMessages = + browserSessionStartIndex !== -1 ? messages.slice(browserSessionStartIndex) : [] + const isBrowserSessionActive = task.browserSession?.isSessionActive() ?? false + + await this.updateBrowserSession(browserSessionMessages, isBrowserSessionActive) + } + } + + private async createOrShowPanel(): Promise { + // If panel already exists, show it + if (this.panel) { + this.panel.reveal(vscode.ViewColumn.One) + return + } + + const extensionUri = this.provider.context.extensionUri + const extensionMode = this.provider.context.extensionMode + + // Create new panel + this.panel = vscode.window.createWebviewPanel("roo.browserSession", "Browser Session", vscode.ViewColumn.One, { + enableScripts: true, + retainContextWhenHidden: true, + localResourceRoots: [extensionUri], + }) + + // Set up the webview's HTML content + this.panel.webview.html = + extensionMode === vscode.ExtensionMode.Development + ? await this.getHMRHtmlContent(this.panel.webview, extensionUri) + : this.getHtmlContent(this.panel.webview, extensionUri) + + // Wire message channel for this panel (state handshake + actions) + this.panel.webview.onDidReceiveMessage( + async (message: any) => { + try { + // Let the shared handler process commands that work for any webview + if (message?.type) { + await webviewMessageHandler(this.provider as any, message) + } + // Panel-specific readiness and initial state + if (message?.type === "webviewDidLaunch") { + this.isReady = true + // Send full extension state to this panel (the sidebar postState targets the main webview) + const state = await (this.provider as any).getStateToPostToWebview?.() + if (state) { + await this.panel?.webview.postMessage({ type: "state", state }) + } + // Flush any pending browser session update queued before readiness + if (this.pendingUpdate) { + await this.updateBrowserSession(this.pendingUpdate.messages, this.pendingUpdate.isActive) + this.pendingUpdate = undefined + } + // Flush any pending navigation request queued before readiness + if (this.pendingNavigateIndex !== undefined) { + await this.navigateToStep(this.pendingNavigateIndex) + this.pendingNavigateIndex = undefined + } + } + } catch (err) { + console.error("[BrowserSessionPanel] onDidReceiveMessage error:", err) + } + }, + undefined, + this.disposables, + ) + + // Handle panel disposal - track that user closed it manually + this.panel.onDidDispose( + () => { + // Mark that user manually closed the panel (unless we're programmatically disposing) + if (this.panel) { + this.userManuallyClosedPanel = true + } + this.panel = undefined + this.dispose() + }, + null, + this.disposables, + ) + } + + public async updateBrowserSession(messages: ClineMessage[], isBrowserSessionActive: boolean): Promise { + if (!this.panel) { + return + } + // If the panel isn't ready yet, queue the latest snapshot to post after handshake + if (!this.isReady) { + this.pendingUpdate = { messages, isActive: isBrowserSessionActive } + return + } + + await this.panel.webview.postMessage({ + type: "browserSessionUpdate", + browserSessionMessages: messages, + isBrowserSessionActive, + }) + } + + /** + * Navigate the Browser Session panel to a specific step index. + * If the panel isn't ready yet, queue the navigation to run after handshake. + */ + public async navigateToStep(stepIndex: number): Promise { + if (!this.panel) { + return + } + if (!this.isReady) { + this.pendingNavigateIndex = stepIndex + return + } + + await this.panel.webview.postMessage({ + type: "browserSessionNavigate", + stepIndex, + }) + } + + /** + * Reset the manual close flag (call this when a new browser session launches) + */ + public resetManualCloseFlag(): void { + this.userManuallyClosedPanel = false + } + + /** + * Check if auto-opening should be allowed (not manually closed by user) + */ + public shouldAllowAutoOpen(): boolean { + return !this.userManuallyClosedPanel + } + + /** + * Whether the Browser Session panel is currently open. + */ + public isOpen(): boolean { + return !!this.panel + } + + /** + * Toggle the Browser Session panel visibility. + * - If open: closes it + * - If closed: opens it and sends initial session snapshot + */ + public async toggle(): Promise { + if (this.panel) { + this.dispose() + } else { + await this.show() + } + } + + public dispose(): void { + // Clear the panel reference before disposing to prevent marking as manual close + const panelToDispose = this.panel + this.panel = undefined + + while (this.disposables.length) { + const disposable = this.disposables.pop() + if (disposable) { + disposable.dispose() + } + } + try { + panelToDispose?.dispose() + } catch {} + this.isReady = false + this.pendingUpdate = undefined + } + + private async getHMRHtmlContent(webview: vscode.Webview, extensionUri: vscode.Uri): Promise { + const fs = require("fs") + const path = require("path") + let localPort = "5173" + + try { + const portFilePath = path.resolve(__dirname, "../../.vite-port") + if (fs.existsSync(portFilePath)) { + localPort = fs.readFileSync(portFilePath, "utf8").trim() + } + } catch (err) { + console.error("[BrowserSessionPanel:Vite] Failed to read port file:", err) + } + + const localServerUrl = `localhost:${localPort}` + const nonce = getNonce() + + const stylesUri = getUri(webview, extensionUri, ["webview-ui", "build", "assets", "index.css"]) + const codiconsUri = getUri(webview, extensionUri, ["assets", "codicons", "codicon.css"]) + + const scriptUri = `http://${localServerUrl}/src/browser-panel.tsx` + + const reactRefresh = ` + + ` + + const csp = [ + "default-src 'none'", + `font-src ${webview.cspSource} data:`, + `style-src ${webview.cspSource} 'unsafe-inline' https://* http://${localServerUrl}`, + `img-src ${webview.cspSource} data:`, + `script-src 'unsafe-eval' ${webview.cspSource} http://${localServerUrl} 'nonce-${nonce}'`, + `connect-src ${webview.cspSource} ws://${localServerUrl} http://${localServerUrl}`, + ] + + return ` + + + + + + + + + Browser Session + + +
+ ${reactRefresh} + + + + ` + } + + private getHtmlContent(webview: vscode.Webview, extensionUri: vscode.Uri): string { + const stylesUri = getUri(webview, extensionUri, ["webview-ui", "build", "assets", "index.css"]) + const scriptUri = getUri(webview, extensionUri, ["webview-ui", "build", "assets", "browser-panel.js"]) + const codiconsUri = getUri(webview, extensionUri, ["assets", "codicons", "codicon.css"]) + + const nonce = getNonce() + + const csp = [ + "default-src 'none'", + `font-src ${webview.cspSource} data:`, + `style-src ${webview.cspSource} 'unsafe-inline'`, + `img-src ${webview.cspSource} data:`, + `script-src ${webview.cspSource} 'wasm-unsafe-eval' 'nonce-${nonce}'`, + `connect-src ${webview.cspSource}`, + ] + + return ` + + + + + + + + + Browser Session + + +
+ + + + ` + } +} diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index f73a36d445ee..08e9582d9c71 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -1919,6 +1919,7 @@ export class ClineProvider alwaysAllowModeSwitch: alwaysAllowModeSwitch ?? false, alwaysAllowSubtasks: alwaysAllowSubtasks ?? false, alwaysAllowUpdateTodoList: alwaysAllowUpdateTodoList ?? false, + isBrowserSessionActive: this.getCurrentTask()?.browserSession?.isSessionActive() ?? false, allowedMaxRequests, allowedMaxCost, autoCondenseContext: autoCondenseContext ?? true, @@ -2131,6 +2132,9 @@ export class ClineProvider ) } + // Get actual browser session state + const isBrowserSessionActive = this.getCurrentTask()?.browserSession?.isSessionActive() ?? false + // Return the same structure as before. return { apiConfiguration: providerSettings, @@ -2149,6 +2153,7 @@ export class ClineProvider alwaysAllowSubtasks: stateValues.alwaysAllowSubtasks ?? false, alwaysAllowFollowupQuestions: stateValues.alwaysAllowFollowupQuestions ?? false, alwaysAllowUpdateTodoList: stateValues.alwaysAllowUpdateTodoList ?? false, + isBrowserSessionActive, followupAutoApproveTimeoutMs: stateValues.followupAutoApproveTimeoutMs ?? 60000, diagnosticsEnabled: stateValues.diagnosticsEnabled ?? true, allowedMaxRequests: stateValues.allowedMaxRequests, diff --git a/src/core/webview/__tests__/ClineProvider.spec.ts b/src/core/webview/__tests__/ClineProvider.spec.ts index a8ab39108d95..53aada895117 100644 --- a/src/core/webview/__tests__/ClineProvider.spec.ts +++ b/src/core/webview/__tests__/ClineProvider.spec.ts @@ -503,6 +503,7 @@ describe("ClineProvider", () => { const mockState: ExtensionState = { version: "1.0.0", + isBrowserSessionActive: false, clineMessages: [], taskHistory: [], shouldShowAnnouncement: false, diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index c06729674503..96d98018434e 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -21,6 +21,7 @@ import { type ApiMessage } from "../task-persistence/apiMessages" import { saveTaskMessages } from "../task-persistence" import { ClineProvider } from "./ClineProvider" +import { BrowserSessionPanelManager } from "./BrowserSessionPanelManager" import { handleCheckpointRestoreOperation } from "./checkpointRestoreHandler" import { changeLanguage, t } from "../../i18n" import { Package } from "../../shared/package" @@ -1053,6 +1054,69 @@ export const webviewMessageHandler = async ( case "cancelTask": await provider.cancelTask() break + case "killBrowserSession": + { + const task = provider.getCurrentTask() + if (task?.browserSession) { + await task.browserSession.closeBrowser() + await provider.postStateToWebview() + } + } + break + case "openBrowserSessionPanel": + { + // Toggle the Browser Session panel (open if closed, close if open) + const panelManager = BrowserSessionPanelManager.getInstance(provider) + await panelManager.toggle() + } + break + case "showBrowserSessionPanelAtStep": + { + const panelManager = BrowserSessionPanelManager.getInstance(provider) + + // If this is a launch action, reset the manual close flag + if (message.isLaunchAction) { + panelManager.resetManualCloseFlag() + } + + // Show panel if: + // 1. Manual click (forceShow) - always show + // 2. Launch action - always show and reset flag + // 3. Auto-open for non-launch action - only if user hasn't manually closed + if (message.forceShow || message.isLaunchAction || panelManager.shouldAllowAutoOpen()) { + // Ensure panel is shown and populated + await panelManager.show() + + // Navigate to a specific step if provided + // For launch actions: navigate to step 0 + // For manual clicks: navigate to the clicked step + // For auto-opens of regular actions: don't navigate, let BrowserSessionRow's + // internal auto-advance logic handle it (only advances if user is on most recent step) + if (typeof message.stepIndex === "number" && message.stepIndex >= 0) { + await panelManager.navigateToStep(message.stepIndex) + } + } + } + break + case "refreshBrowserSessionPanel": + { + // Re-send the latest browser session snapshot to the panel + const panelManager = BrowserSessionPanelManager.getInstance(provider) + const task = provider.getCurrentTask() + if (task) { + const messages = task.clineMessages || [] + const browserSessionStartIndex = messages.findIndex( + (m) => + m.ask === "browser_action_launch" || + (m.say === "browser_session_status" && m.text?.includes("opened")), + ) + const browserSessionMessages = + browserSessionStartIndex !== -1 ? messages.slice(browserSessionStartIndex) : [] + const isBrowserSessionActive = task.browserSession?.isSessionActive() ?? false + await panelManager.updateBrowserSession(browserSessionMessages, isBrowserSessionActive) + } + } + break case "allowedCommands": { // Validate and sanitize the commands array const commands = message.commands ?? [] diff --git a/src/services/browser/BrowserSession.ts b/src/services/browser/BrowserSession.ts index 75b432f01d2d..fdd897c5ac65 100644 --- a/src/services/browser/BrowserSession.ts +++ b/src/services/browser/BrowserSession.ts @@ -1,7 +1,7 @@ import * as vscode from "vscode" import * as fs from "fs/promises" import * as path from "path" -import { Browser, Page, ScreenshotOptions, TimeoutError, launch, connect } from "puppeteer-core" +import { Browser, Page, ScreenshotOptions, TimeoutError, launch, connect, KeyInput } from "puppeteer-core" // @ts-ignore import PCR from "puppeteer-chromium-resolver" import pWaitFor from "p-wait-for" @@ -25,9 +25,15 @@ export class BrowserSession { private currentMousePosition?: string private lastConnectionAttempt?: number private isUsingRemoteBrowser: boolean = false + private onStateChange?: (isActive: boolean) => void - constructor(context: vscode.ExtensionContext) { + // Track last known viewport to surface in environment details + private lastViewportWidth?: number + private lastViewportHeight?: number + + constructor(context: vscode.ExtensionContext, onStateChange?: (isActive: boolean) => void) { this.context = context + this.onStateChange = onStateChange } private async ensureChromiumExists(): Promise { @@ -189,13 +195,20 @@ export class BrowserSession { await this.launchLocalBrowser() } } + + // Notify that browser session is now active + if (this.browser && this.onStateChange) { + this.onStateChange(true) + } } /** * Closes the browser and resets browser state */ async closeBrowser(): Promise { - if (this.browser || this.page) { + const wasActive = !!(this.browser || this.page) + + if (wasActive) { console.log("closing browser...") if (this.isUsingRemoteBrowser && this.browser) { @@ -204,6 +217,11 @@ export class BrowserSession { await this.browser?.close().catch(() => {}) } this.resetBrowserState() + + // Notify that browser session is now inactive + if (this.onStateChange) { + this.onStateChange(false) + } } return {} } @@ -216,12 +234,14 @@ export class BrowserSession { this.page = undefined this.currentMousePosition = undefined this.isUsingRemoteBrowser = false + this.lastViewportWidth = undefined + this.lastViewportHeight = undefined } async doAction(action: (page: Page) => Promise): Promise { if (!this.page) { throw new Error( - "Browser is not launched. This may occur if the browser was automatically closed by a non-`browser_action` tool.", + "Cannot perform browser action: no active browser session. The browser must be launched first using the 'launch' action before other browser actions can be performed.", ) } @@ -260,6 +280,11 @@ export class BrowserSession { interval: 100, }).catch(() => {}) + // Draw cursor indicator if we have a cursor position + if (this.currentMousePosition) { + await this.drawCursorIndicator(this.page, this.currentMousePosition) + } + let options: ScreenshotOptions = { encoding: "base64", @@ -291,15 +316,29 @@ export class BrowserSession { throw new Error("Failed to take screenshot.") } + // Remove cursor indicator after taking screenshot + if (this.currentMousePosition) { + await this.removeCursorIndicator(this.page) + } + // this.page.removeAllListeners() <- causes the page to crash! this.page.off("console", consoleListener) this.page.off("pageerror", errorListener) + // Get actual viewport dimensions + const viewport = this.page.viewport() + + // Persist last known viewport dimensions + this.lastViewportWidth = viewport?.width + this.lastViewportHeight = viewport?.height + return { screenshot, logs: logs.join("\n"), currentUrl: this.page.url(), currentMousePosition: this.currentMousePosition, + viewportWidth: viewport?.width, + viewportHeight: viewport?.height, } } @@ -453,6 +492,64 @@ export class BrowserSession { } } + /** + * Force links and window.open to navigate in the same tab. + * This makes clicks on anchors with target="_blank" stay in the current page + * and also intercepts window.open so SPA/open-in-new-tab patterns don't spawn popups. + */ + private async forceLinksToSameTab(page: Page): Promise { + try { + await page.evaluate(() => { + try { + // Ensure we only install once per document + if ((window as any).__ROO_FORCE_SAME_TAB__) return + ;(window as any).__ROO_FORCE_SAME_TAB__ = true + + // Override window.open to navigate current tab instead of creating a new one + const originalOpen = window.open + window.open = function (url: string | URL, target?: string, features?: string) { + try { + const href = typeof url === "string" ? url : String(url) + location.href = href + } catch { + // fall back to original if something unexpected occurs + try { + return originalOpen.apply(window, [url as any, "_self", features]) as any + } catch {} + } + return null as any + } as any + + // Rewrite anchors that explicitly open new tabs + document.querySelectorAll('a[target="_blank"]').forEach((a) => { + a.setAttribute("target", "_self") + }) + + // Defensive capture: if an element still tries to open in a new tab, force same-tab + document.addEventListener( + "click", + (ev) => { + const el = (ev.target as HTMLElement | null)?.closest?.( + 'a[target="_blank"]', + ) as HTMLAnchorElement | null + if (el && el.href) { + ev.preventDefault() + try { + location.href = el.href + } catch {} + } + }, + { capture: true, passive: false }, + ) + } catch { + // no-op; forcing same-tab is best-effort + } + }) + } catch { + // If evaluate fails (e.g., cross-origin/state), continue without breaking the action + } + } + /** * Handles mouse interaction with network activity monitoring */ @@ -463,6 +560,9 @@ export class BrowserSession { ): Promise { const [x, y] = coordinate.split(",").map(Number) + // Force any new-tab behavior (target="_blank", window.open) to stay in the same tab + await this.forceLinksToSameTab(page) + // Set up network request monitoring let hasNetworkActivity = false const requestListener = () => { @@ -506,6 +606,106 @@ export class BrowserSession { }) } + async press(key: string): Promise { + return this.doAction(async (page) => { + // Parse key combinations (e.g., "Cmd+K", "Shift+Enter") + const parts = key.split("+").map((k) => k.trim()) + const modifiers: string[] = [] + let mainKey = parts[parts.length - 1] + + // Identify modifiers + for (let i = 0; i < parts.length - 1; i++) { + const part = parts[i].toLowerCase() + if (part === "cmd" || part === "command" || part === "meta") { + modifiers.push("Meta") + } else if (part === "ctrl" || part === "control") { + modifiers.push("Control") + } else if (part === "shift") { + modifiers.push("Shift") + } else if (part === "alt" || part === "option") { + modifiers.push("Alt") + } + } + + // Map common key aliases to Puppeteer KeyInput values + const mapping: Record = { + esc: "Escape", + return: "Enter", + escape: "Escape", + enter: "Enter", + tab: "Tab", + space: "Space", + arrowup: "ArrowUp", + arrowdown: "ArrowDown", + arrowleft: "ArrowLeft", + arrowright: "ArrowRight", + } + mainKey = (mapping[mainKey.toLowerCase()] ?? mainKey) as string + + // Avoid new-tab behavior from Enter on links/buttons + await this.forceLinksToSameTab(page) + + // Track inflight requests so we can detect brief network bursts + let inflight = 0 + const onRequest = () => { + inflight++ + } + const onRequestDone = () => { + inflight = Math.max(0, inflight - 1) + } + page.on("request", onRequest) + page.on("requestfinished", onRequestDone) + page.on("requestfailed", onRequestDone) + + // Start a short navigation wait in parallel; if no nav, it times out harmlessly + const HARD_CAP_MS = 3000 + const navPromise = page + .waitForNavigation({ + // domcontentloaded is enough to confirm a submit navigated + waitUntil: ["domcontentloaded"], + timeout: HARD_CAP_MS, + }) + .catch(() => undefined) + + // Press key combination + if (modifiers.length > 0) { + // Hold down modifiers + for (const modifier of modifiers) { + await page.keyboard.down(modifier as KeyInput) + } + + // Press main key + await page.keyboard.press(mainKey as KeyInput) + + // Release modifiers + for (const modifier of modifiers) { + await page.keyboard.up(modifier as KeyInput) + } + } else { + // Single key press + await page.keyboard.press(mainKey as KeyInput) + } + + // Give time for any requests to kick off + await delay(120) + + // Hard-cap the wait to avoid UI hangs + await Promise.race([ + navPromise, + pWaitFor(() => inflight === 0, { timeout: HARD_CAP_MS, interval: 100 }).catch(() => {}), + delay(HARD_CAP_MS), + ]) + + // Stabilize DOM briefly before capturing screenshot (shorter cap) + await this.waitTillHTMLStable(page, 2_000) + + // Cleanup + page.off("request", onRequest) + page.off("requestfinished", onRequestDone) + page.off("requestfailed", onRequestDone) + }) + } + /** * Scrolls the page by the specified amount */ @@ -557,4 +757,84 @@ export class BrowserSession { }) }) } + + /** + * Draws a cursor indicator on the page at the specified position + */ + private async drawCursorIndicator(page: Page, coordinate: string): Promise { + const [x, y] = coordinate.split(",").map(Number) + + try { + await page.evaluate( + (cursorX: number, cursorY: number) => { + // Create a cursor indicator element + const cursor = document.createElement("div") + cursor.id = "__roo_cursor_indicator__" + cursor.style.cssText = ` + position: fixed; + left: ${cursorX}px; + top: ${cursorY}px; + width: 35px; + height: 35px; + pointer-events: none; + z-index: 2147483647; + ` + + // Create SVG cursor pointer + const svg = ` + + + + + ` + cursor.innerHTML = svg + + document.body.appendChild(cursor) + }, + x, + y, + ) + } catch (error) { + console.log("Failed to draw cursor indicator:", error) + } + } + + /** + * Removes the cursor indicator from the page + */ + private async removeCursorIndicator(page: Page): Promise { + try { + await page.evaluate(() => { + const cursor = document.getElementById("__roo_cursor_indicator__") + if (cursor) { + cursor.remove() + } + }) + } catch (error) { + console.log("Failed to remove cursor indicator:", error) + } + } + + /** + * Returns whether a browser session is currently active + */ + isSessionActive(): boolean { + return !!(this.browser && this.page) + } + + /** + * Returns the last known viewport size (if any) + */ + getViewportSize(): { width?: number; height?: number } { + return { + width: this.lastViewportWidth, + height: this.lastViewportHeight, + } + } } diff --git a/src/services/browser/UrlContentFetcher.ts b/src/services/browser/UrlContentFetcher.ts index b271bc2ef413..2d8e4a3de84a 100644 --- a/src/services/browser/UrlContentFetcher.ts +++ b/src/services/browser/UrlContentFetcher.ts @@ -90,9 +90,9 @@ export class UrlContentFetcher { throw new Error("Browser not initialized") } /* - - networkidle2 is equivalent to playwright's networkidle where it waits until there are no more than 2 network connections for at least 500 ms. - - domcontentloaded is when the basic DOM is loaded - this should be sufficient for most doc sites + - In Puppeteer, "networkidle2" waits until there are no more than 2 network connections for at least 500 ms (roughly equivalent to Playwright's "networkidle"). + - "domcontentloaded" is when the basic DOM is loaded. + This should be sufficient for most doc sites. */ try { await this.page.goto(url, { diff --git a/src/services/browser/__tests__/BrowserSession.spec.ts b/src/services/browser/__tests__/BrowserSession.spec.ts index b69fb2d14064..d3784c3afff2 100644 --- a/src/services/browser/__tests__/BrowserSession.spec.ts +++ b/src/services/browser/__tests__/BrowserSession.spec.ts @@ -229,4 +229,169 @@ describe("BrowserSession", () => { expect(mockBrowser.close).not.toHaveBeenCalled() }) }) + + it("forces same-tab behavior before click", async () => { + // Prepare a minimal mock page with required APIs + const page: any = { + on: vi.fn(), + off: vi.fn(), + screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"), + url: vi.fn().mockReturnValue("https://example.com"), + viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }), + waitForNavigation: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue(undefined), + mouse: { + click: vi.fn().mockResolvedValue(undefined), + move: vi.fn().mockResolvedValue(undefined), + }, + } + + ;(browserSession as any).page = page + + // Spy on the forceLinksToSameTab helper to ensure it's invoked + const forceSpy = vi.fn().mockResolvedValue(undefined) + ;(browserSession as any).forceLinksToSameTab = forceSpy + + await browserSession.click("10,20") + + expect(forceSpy).toHaveBeenCalledTimes(1) + expect(forceSpy).toHaveBeenCalledWith(page) + expect(page.mouse.click).toHaveBeenCalledWith(10, 20) + }) +}) + +describe("keyboard press", () => { + it("presses a keyboard key", async () => { + // Prepare a minimal mock page with required APIs + const page: any = { + on: vi.fn(), + off: vi.fn(), + screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"), + url: vi.fn().mockReturnValue("https://example.com"), + viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }), + waitForNavigation: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue(undefined), + keyboard: { + press: vi.fn().mockResolvedValue(undefined), + type: vi.fn().mockResolvedValue(undefined), + }, + } + + // Create a fresh BrowserSession with a mock context + const mockCtx: any = { + globalState: { get: vi.fn(), update: vi.fn() }, + globalStorageUri: { fsPath: "/mock/global/storage/path" }, + extensionUri: { fsPath: "/mock/extension/path" }, + } + const session = new BrowserSession(mockCtx) + + ;(session as any).page = page + + await session.press("Enter") + + expect(page.keyboard.press).toHaveBeenCalledTimes(1) + expect(page.keyboard.press).toHaveBeenCalledWith("Enter") + }) +}) + +describe("cursor visualization", () => { + it("should draw cursor indicator when cursor position exists", async () => { + // Prepare a minimal mock page with required APIs + const page: any = { + on: vi.fn(), + off: vi.fn(), + screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"), + url: vi.fn().mockReturnValue("https://example.com"), + viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }), + evaluate: vi.fn().mockResolvedValue(undefined), + mouse: { + click: vi.fn().mockResolvedValue(undefined), + }, + } + + // Create a fresh BrowserSession with a mock context + const mockCtx: any = { + globalState: { get: vi.fn(), update: vi.fn() }, + globalStorageUri: { fsPath: "/mock/global/storage/path" }, + extensionUri: { fsPath: "/mock/extension/path" }, + } + const session = new BrowserSession(mockCtx) + + ;(session as any).page = page + + // Perform a click action which sets cursor position + const result = await session.click("100,200") + + // Verify cursor indicator was drawn and removed + // evaluate is called 3 times: 1 for forceLinksToSameTab, 1 for draw cursor, 1 for remove cursor + expect(page.evaluate).toHaveBeenCalled() + + // Verify the result includes cursor position + expect(result.currentMousePosition).toBe("100,200") + }) + + it("should include cursor position in action result", async () => { + // Prepare a minimal mock page with required APIs + const page: any = { + on: vi.fn(), + off: vi.fn(), + screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"), + url: vi.fn().mockReturnValue("https://example.com"), + viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }), + evaluate: vi.fn().mockResolvedValue(undefined), + mouse: { + move: vi.fn().mockResolvedValue(undefined), + }, + } + + // Create a fresh BrowserSession with a mock context + const mockCtx: any = { + globalState: { get: vi.fn(), update: vi.fn() }, + globalStorageUri: { fsPath: "/mock/global/storage/path" }, + extensionUri: { fsPath: "/mock/extension/path" }, + } + const session = new BrowserSession(mockCtx) + + ;(session as any).page = page + + // Perform a hover action which sets cursor position + const result = await session.hover("150,250") + + // Verify the result includes cursor position + expect(result.currentMousePosition).toBe("150,250") + expect(result.viewportWidth).toBe(900) + expect(result.viewportHeight).toBe(600) + }) + + it("should not draw cursor indicator when no cursor position exists", async () => { + // Prepare a minimal mock page with required APIs + const page: any = { + on: vi.fn(), + off: vi.fn(), + screenshot: vi.fn().mockResolvedValue("mockScreenshotBase64"), + url: vi.fn().mockReturnValue("https://example.com"), + viewport: vi.fn().mockReturnValue({ width: 900, height: 600 }), + evaluate: vi.fn().mockResolvedValue(undefined), + } + + // Create a fresh BrowserSession with a mock context + const mockCtx: any = { + globalState: { get: vi.fn(), update: vi.fn() }, + globalStorageUri: { fsPath: "/mock/global/storage/path" }, + extensionUri: { fsPath: "/mock/extension/path" }, + } + const session = new BrowserSession(mockCtx) + + ;(session as any).page = page + + // Perform scroll action which doesn't set cursor position + const result = await session.scrollDown() + + // Verify evaluate was called only for scroll operation (not for cursor drawing/removal) + // scrollDown calls evaluate once for scrolling + expect(page.evaluate).toHaveBeenCalledTimes(1) + + // Verify no cursor position in result + expect(result.currentMousePosition).toBeUndefined() + }) }) diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index 7d2759c91905..1271ae1b7ced 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -127,6 +127,8 @@ export interface ExtensionMessage { | "insertTextIntoTextarea" | "dismissedUpsells" | "organizationSwitchResult" + | "browserSessionUpdate" + | "browserSessionNavigate" text?: string payload?: any // Add a generic payload for now, can refine later // Checkpoint warning message @@ -211,6 +213,9 @@ export interface ExtensionMessage { queuedMessages?: QueuedMessage[] list?: string[] // For dismissedUpsells organizationId?: string | null // For organizationSwitchResult + browserSessionMessages?: ClineMessage[] // For browser session panel updates + isBrowserSessionActive?: boolean // For browser session panel updates + stepIndex?: number // For browserSessionNavigate: the target step index to display } export type ExtensionState = Pick< @@ -345,6 +350,8 @@ export type ExtensionState = Pick< organizationAllowList: OrganizationAllowList organizationSettingsVersion?: number + isBrowserSessionActive: boolean // Actual browser session state + autoCondenseContext: boolean autoCondenseContextPercent: number marketplaceItems?: MarketplaceItem[] @@ -427,6 +434,7 @@ export const browserActions = [ "click", "hover", "type", + "press", "scroll_down", "scroll_up", "resize", @@ -447,6 +455,8 @@ export type BrowserActionResult = { logs?: string currentUrl?: string currentMousePosition?: string + viewportWidth?: number + viewportHeight?: number } export interface ClineAskUseMcpServer { diff --git a/src/shared/WebviewMessage.ts b/src/shared/WebviewMessage.ts index f10808cd428d..9b9646f8adbf 100644 --- a/src/shared/WebviewMessage.ts +++ b/src/shared/WebviewMessage.ts @@ -233,6 +233,11 @@ export interface WebviewMessage { | "editQueuedMessage" | "dismissUpsell" | "getDismissedUpsells" + | "killBrowserSession" + | "openBrowserSessionPanel" + | "showBrowserSessionPanelAtStep" + | "refreshBrowserSessionPanel" + | "browserPanelDidLaunch" text?: string editedMessageContent?: string tab?: "settings" | "history" | "mcp" | "modes" | "chat" | "marketplace" | "cloud" @@ -244,6 +249,9 @@ export interface WebviewMessage { images?: string[] bool?: boolean value?: number + stepIndex?: number + isLaunchAction?: boolean + forceShow?: boolean commands?: string[] audioType?: AudioType serverName?: string diff --git a/webview-ui/browser-panel.html b/webview-ui/browser-panel.html new file mode 100644 index 000000000000..92943abfe341 --- /dev/null +++ b/webview-ui/browser-panel.html @@ -0,0 +1,12 @@ + + + + + + Browser Session + + +
+ + + \ No newline at end of file diff --git a/webview-ui/src/browser-panel.tsx b/webview-ui/src/browser-panel.tsx new file mode 100644 index 000000000000..a7f5af891e63 --- /dev/null +++ b/webview-ui/src/browser-panel.tsx @@ -0,0 +1,12 @@ +import { StrictMode } from "react" +import { createRoot } from "react-dom/client" + +import "./index.css" +import BrowserSessionPanel from "./components/browser-session/BrowserSessionPanel" +import "../node_modules/@vscode/codicons/dist/codicon.css" + +createRoot(document.getElementById("root")!).render( + + + , +) diff --git a/webview-ui/src/components/browser-session/BrowserPanelStateProvider.tsx b/webview-ui/src/components/browser-session/BrowserPanelStateProvider.tsx new file mode 100644 index 000000000000..50b078c7402d --- /dev/null +++ b/webview-ui/src/components/browser-session/BrowserPanelStateProvider.tsx @@ -0,0 +1,60 @@ +import React, { createContext, useContext, useState, useEffect, useCallback } from "react" +import { ExtensionMessage } from "@roo/ExtensionMessage" + +interface BrowserPanelState { + browserViewportSize: string + isBrowserSessionActive: boolean + language: string +} + +const BrowserPanelStateContext = createContext(undefined) + +export const BrowserPanelStateProvider: React.FC<{ children: React.ReactNode }> = ({ children }) => { + const [state, setState] = useState({ + browserViewportSize: "900x600", + isBrowserSessionActive: false, + language: "en", + }) + + const handleMessage = useCallback((event: MessageEvent) => { + const message: ExtensionMessage = event.data + + switch (message.type) { + case "state": + if (message.state) { + setState((prev) => ({ + ...prev, + browserViewportSize: message.state?.browserViewportSize || "900x600", + isBrowserSessionActive: message.state?.isBrowserSessionActive || false, + language: message.state?.language || "en", + })) + } + break + case "browserSessionUpdate": + if (message.isBrowserSessionActive !== undefined) { + setState((prev) => ({ + ...prev, + isBrowserSessionActive: message.isBrowserSessionActive || false, + })) + } + break + } + }, []) + + useEffect(() => { + window.addEventListener("message", handleMessage) + return () => { + window.removeEventListener("message", handleMessage) + } + }, [handleMessage]) + + return {children} +} + +export const useBrowserPanelState = () => { + const context = useContext(BrowserPanelStateContext) + if (context === undefined) { + throw new Error("useBrowserPanelState must be used within a BrowserPanelStateProvider") + } + return context +} diff --git a/webview-ui/src/components/browser-session/BrowserSessionPanel.tsx b/webview-ui/src/components/browser-session/BrowserSessionPanel.tsx new file mode 100644 index 000000000000..00f3e176b6c4 --- /dev/null +++ b/webview-ui/src/components/browser-session/BrowserSessionPanel.tsx @@ -0,0 +1,109 @@ +import React, { useEffect, useState, useCallback } from "react" +import { type ClineMessage } from "@roo-code/types" +import BrowserSessionRow from "../chat/BrowserSessionRow" +import { TooltipProvider } from "@src/components/ui/tooltip" +import ErrorBoundary from "../ErrorBoundary" +import TranslationProvider from "@src/i18n/TranslationContext" +import { ExtensionMessage } from "@roo/ExtensionMessage" +import { BrowserPanelStateProvider, useBrowserPanelState } from "./BrowserPanelStateProvider" +import { vscode } from "@src/utils/vscode" +import { ExtensionStateContextProvider } from "@/context/ExtensionStateContext" + +interface BrowserSessionPanelState { + messages: ClineMessage[] +} + +const BrowserSessionPanelContent: React.FC = () => { + const { browserViewportSize, isBrowserSessionActive } = useBrowserPanelState() + const [state, setState] = useState({ + messages: [], + }) + // Target page index to navigate BrowserSessionRow to + const [navigateToStepIndex, setNavigateToStepIndex] = useState(undefined) + + const [expandedRows, setExpandedRows] = useState>({}) + + useEffect(() => { + const handleMessage = (event: MessageEvent) => { + const message: ExtensionMessage = event.data + + switch (message.type) { + case "browserSessionUpdate": + if (message.browserSessionMessages) { + setState((prev) => ({ + ...prev, + messages: message.browserSessionMessages || [], + })) + } + break + case "browserSessionNavigate": + if (typeof message.stepIndex === "number" && message.stepIndex >= 0) { + setNavigateToStepIndex(message.stepIndex) + } + break + } + } + + window.addEventListener("message", handleMessage) + + return () => { + window.removeEventListener("message", handleMessage) + } + }, []) + + const handleHeightChange = useCallback(() => { + // No-op for panel - no scrolling needed + }, []) + + return ( +
+
+ expandedRows[messageTs] ?? false} + onToggleExpand={(messageTs: number) => { + setExpandedRows((prev: Record) => ({ + ...prev, + [messageTs]: !prev[messageTs], + })) + }} + fullScreen={true} + browserViewportSizeProp={browserViewportSize} + isBrowserSessionActiveProp={isBrowserSessionActive} + navigateToPageIndex={navigateToStepIndex} + /> +
+
+ ) +} + +const BrowserSessionPanel: React.FC = () => { + // Ensure the panel receives initial state and becomes "ready" without needing a second click + useEffect(() => { + try { + vscode.postMessage({ type: "webviewDidLaunch" }) + } catch { + // Ignore errors during initial launch + } + }, []) + + return ( + + + + + + + + + + + + ) +} + +export default BrowserSessionPanel diff --git a/webview-ui/src/components/chat/BrowserActionRow.tsx b/webview-ui/src/components/chat/BrowserActionRow.tsx new file mode 100644 index 000000000000..3082f3eabc8f --- /dev/null +++ b/webview-ui/src/components/chat/BrowserActionRow.tsx @@ -0,0 +1,248 @@ +import { memo, useMemo, useEffect, useRef } from "react" +import { ClineMessage } from "@roo-code/types" +import { ClineSayBrowserAction } from "@roo/ExtensionMessage" +import { vscode } from "@src/utils/vscode" +import { + MousePointer as MousePointerIcon, + Keyboard, + ArrowDown, + ArrowUp, + Pointer, + Play, + Check, + Maximize2, +} from "lucide-react" +import { useExtensionState } from "@src/context/ExtensionStateContext" + +const prettyKey = (k?: string): string => { + if (!k) return "" + return k + .split("+") + .map((part) => { + const p = part.trim() + const lower = p.toLowerCase() + const map: Record = { + enter: "Enter", + tab: "Tab", + escape: "Esc", + esc: "Esc", + backspace: "Backspace", + space: "Space", + shift: "Shift", + control: "Ctrl", + ctrl: "Ctrl", + alt: "Alt", + meta: "Meta", + command: "Cmd", + cmd: "Cmd", + arrowup: "Arrow Up", + arrowdown: "Arrow Down", + arrowleft: "Arrow Left", + arrowright: "Arrow Right", + pageup: "Page Up", + pagedown: "Page Down", + home: "Home", + end: "End", + } + if (map[lower]) return map[lower] + const keyMatch = /^Key([A-Z])$/.exec(p) + if (keyMatch) return keyMatch[1].toUpperCase() + const digitMatch = /^Digit([0-9])$/.exec(p) + if (digitMatch) return digitMatch[1] + const spaced = p.replace(/([a-z])([A-Z])/g, "$1 $2") + return spaced.charAt(0).toUpperCase() + spaced.slice(1) + }) + .join(" + ") +} + +interface BrowserActionRowProps { + message: ClineMessage + nextMessage?: ClineMessage + actionIndex?: number + totalActions?: number +} + +// Get icon for each action type +const getActionIcon = (action: string) => { + switch (action) { + case "click": + return + case "type": + case "press": + return + case "scroll_down": + return + case "scroll_up": + return + case "launch": + return + case "close": + return + case "resize": + return + case "hover": + default: + return + } +} + +const BrowserActionRow = memo(({ message, nextMessage, actionIndex, totalActions }: BrowserActionRowProps) => { + const { isBrowserSessionActive } = useExtensionState() + const hasHandledAutoOpenRef = useRef(false) + + // Parse this specific browser action + const browserAction = useMemo(() => { + try { + return JSON.parse(message.text || "{}") as ClineSayBrowserAction + } catch { + return null + } + }, [message.text]) + + // Get viewport dimensions from the result message if available + const viewportDimensions = useMemo(() => { + if (!nextMessage || nextMessage.say !== "browser_action_result") return null + try { + const result = JSON.parse(nextMessage.text || "{}") + return { + width: result.viewportWidth, + height: result.viewportHeight, + } + } catch { + return null + } + }, [nextMessage]) + + // Format action display text + const actionText = useMemo(() => { + if (!browserAction) return "Browser action" + + // Helper to scale coordinates from screenshot dimensions to viewport dimensions + // Matches the backend's scaleCoordinate function logic + const getViewportCoordinate = (coord?: string): string => { + if (!coord) return "" + + // Parse "x,y@widthxheight" format + const match = /^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/.exec(coord) + if (!match) { + // If no @dimensions, return as-is (might be plain x,y format) + const simpleMatch = /^\s*(\d+)\s*,\s*(\d+)/.exec(coord) + return simpleMatch ? `${simpleMatch[1]},${simpleMatch[2]}` : coord + } + + const x = parseInt(match[1], 10) + const y = parseInt(match[2], 10) + const imgWidth = parseInt(match[3], 10) + const imgHeight = parseInt(match[4], 10) + + // If we don't have viewport dimensions, just return the screenshot coordinates + if (!viewportDimensions?.width || !viewportDimensions?.height) { + return `${x},${y}` + } + + // Scale coordinates from image dimensions to viewport dimensions (same as backend) + const scaledX = Math.round((x / imgWidth) * viewportDimensions.width) + const scaledY = Math.round((y / imgHeight) * viewportDimensions.height) + + return `${scaledX},${scaledY}` + } + + switch (browserAction.action) { + case "launch": + return `Launched browser` + case "click": + return `Clicked at: ${getViewportCoordinate(browserAction.coordinate)}` + case "type": + return `Typed: ${browserAction.text}` + case "press": + return `Pressed key: ${prettyKey(browserAction.text)}` + case "hover": + return `Hovered at: ${getViewportCoordinate(browserAction.coordinate)}` + case "scroll_down": + return "Scrolled down" + case "scroll_up": + return "Scrolled up" + case "resize": + return `Resized to: ${browserAction.size?.split(/[x,]/).join(" x ")}` + case "close": + return "Closed browser" + default: + return browserAction.action + } + }, [browserAction, viewportDimensions]) + + // Auto-open Browser Session panel when: + // 1. This is a "launch" action (new browser session) - always opens and navigates to launch + // 2. Regular actions - only open panel if user hasn't manually closed it, let internal auto-advance logic handle step + // Only run this once per action to avoid re-sending messages when scrolling + useEffect(() => { + if (!isBrowserSessionActive || hasHandledAutoOpenRef.current) { + return + } + + const isLaunchAction = browserAction?.action === "launch" + + if (isLaunchAction) { + // Launch action: show panel but do NOT force navigation. + // This preserves the user's manually selected step; BrowserSessionRow will only auto-advance + // when the user is already on the most recent step. + vscode.postMessage({ + type: "showBrowserSessionPanelAtStep", + isLaunchAction: true, + }) + hasHandledAutoOpenRef.current = true + } else { + // Regular actions: just show panel, don't navigate + // BrowserSessionRow's internal auto-advance logic will handle jumping to new steps + // only if user is currently on the most recent step + vscode.postMessage({ + type: "showBrowserSessionPanelAtStep", + isLaunchAction: false, + }) + hasHandledAutoOpenRef.current = true + } + }, [isBrowserSessionActive, browserAction]) + + const headerStyle: React.CSSProperties = { + display: "flex", + alignItems: "center", + gap: "10px", + marginBottom: "10px", + wordBreak: "break-word", + } + + return ( +
+ {/* Header with action description - clicking opens Browser Session panel at this step */} +
{ + const idx = typeof actionIndex === "number" ? actionIndex : 0 + vscode.postMessage({ type: "showBrowserSessionPanelAtStep", stepIndex: idx, forceShow: true }) + }}> + + Browser Action + {actionIndex !== undefined && totalActions !== undefined && ( + + {" "} + - {actionIndex}/{totalActions} -{" "} + + )} + {browserAction && ( + <> + {getActionIcon(browserAction.action)} + {actionText} + + )} +
+
+ ) +}) + +BrowserActionRow.displayName = "BrowserActionRow" + +export default BrowserActionRow diff --git a/webview-ui/src/components/chat/BrowserSessionRow.tsx b/webview-ui/src/components/chat/BrowserSessionRow.tsx index c23b79f568a3..1e87f1cf1436 100644 --- a/webview-ui/src/components/chat/BrowserSessionRow.tsx +++ b/webview-ui/src/components/chat/BrowserSessionRow.tsx @@ -1,9 +1,6 @@ import React, { memo, useEffect, useMemo, useRef, useState } from "react" -import { useSize } from "react-use" import deepEqual from "fast-deep-equal" import { useTranslation } from "react-i18next" -import { VSCodeButton } from "@vscode/webview-ui-toolkit/react" - import type { ClineMessage } from "@roo-code/types" import { BrowserAction, BrowserActionResult, ClineSayBrowserAction } from "@roo/ExtensionMessage" @@ -11,10 +8,153 @@ import { BrowserAction, BrowserActionResult, ClineSayBrowserAction } from "@roo/ import { vscode } from "@src/utils/vscode" import { useExtensionState } from "@src/context/ExtensionStateContext" -import CodeBlock, { CODE_BLOCK_BG_COLOR } from "../common/CodeBlock" -import { ChatRowContent } from "./ChatRow" -import { ProgressIndicator } from "./ProgressIndicator" -import { Globe, Pointer, SquareTerminal } from "lucide-react" +import CodeBlock from "../common/CodeBlock" +import { Button, StandardTooltip } from "@src/components/ui" +import { + Globe, + Pointer, + SquareTerminal, + MousePointer as MousePointerIcon, + Keyboard, + ArrowDown, + ArrowUp, + Play, + Check, + Maximize2, + OctagonX, + ArrowLeft, + ArrowRight, + ChevronsLeft, + ChevronsRight, + ExternalLink, + Copy, +} from "lucide-react" + +const prettyKey = (k?: string): string => { + if (!k) return "" + return k + .split("+") + .map((part) => { + const p = part.trim() + const lower = p.toLowerCase() + const map: Record = { + enter: "Enter", + tab: "Tab", + escape: "Esc", + esc: "Esc", + backspace: "Backspace", + space: "Space", + shift: "Shift", + control: "Ctrl", + ctrl: "Ctrl", + alt: "Alt", + meta: "Meta", + command: "Cmd", + cmd: "Cmd", + arrowup: "Arrow Up", + arrowdown: "Arrow Down", + arrowleft: "Arrow Left", + arrowright: "Arrow Right", + pageup: "Page Up", + pagedown: "Page Down", + home: "Home", + end: "End", + } + if (map[lower]) return map[lower] + const keyMatch = /^Key([A-Z])$/.exec(p) + if (keyMatch) return keyMatch[1].toUpperCase() + const digitMatch = /^Digit([0-9])$/.exec(p) + if (digitMatch) return digitMatch[1] + const spaced = p.replace(/([a-z])([A-Z])/g, "$1 $2") + return spaced.charAt(0).toUpperCase() + spaced.slice(1) + }) + .join(" + ") +} + +const getBrowserActionText = ( + action: BrowserAction, + coordinate?: string, + text?: string, + size?: string, + viewportWidth?: number, + viewportHeight?: number, +) => { + // Helper to scale coordinates from screenshot dimensions to viewport dimensions + // Matches the backend's scaleCoordinate function logic + const getViewportCoordinate = (coord?: string): string => { + if (!coord) return "" + + // Parse "x,y@widthxheight" format + const match = /^\s*(\d+)\s*,\s*(\d+)\s*@\s*(\d+)\s*[x,]\s*(\d+)\s*$/.exec(coord) + if (!match) { + // If no @dimensions, return as-is (might be plain x,y format) + const simpleMatch = /^\s*(\d+)\s*,\s*(\d+)/.exec(coord) + return simpleMatch ? `${simpleMatch[1]},${simpleMatch[2]}` : coord + } + + const x = parseInt(match[1], 10) + const y = parseInt(match[2], 10) + const imgWidth = parseInt(match[3], 10) + const imgHeight = parseInt(match[4], 10) + + // If we don't have viewport dimensions, just return the screenshot coordinates + if (!viewportWidth || !viewportHeight) { + return `${x},${y}` + } + + // Scale coordinates from image dimensions to viewport dimensions (same as backend) + const scaledX = Math.round((x / imgWidth) * viewportWidth) + const scaledY = Math.round((y / imgHeight) * viewportHeight) + + return `${scaledX},${scaledY}` + } + + switch (action) { + case "launch": + return `Launched browser` + case "click": + return `Clicked at: ${getViewportCoordinate(coordinate)}` + case "type": + return `Typed: ${text}` + case "press": + return `Pressed key: ${prettyKey(text)}` + case "scroll_down": + return "Scrolled down" + case "scroll_up": + return "Scrolled up" + case "hover": + return `Hovered at: ${getViewportCoordinate(coordinate)}` + case "resize": + return `Resized to: ${size?.split(/[x,]/).join(" x ")}` + case "close": + return "Closed browser" + default: + return action + } +} + +const getActionIcon = (action: BrowserAction) => { + switch (action) { + case "click": + return + case "type": + case "press": + return + case "scroll_down": + return + case "scroll_up": + return + case "launch": + return + case "close": + return + case "resize": + return + case "hover": + default: + return + } +} interface BrowserSessionRowProps { messages: ClineMessage[] @@ -24,18 +164,68 @@ interface BrowserSessionRowProps { isLast: boolean onHeightChange: (isTaller: boolean) => void isStreaming: boolean + onExpandChange?: (expanded: boolean) => void + fullScreen?: boolean + // Optional props for standalone panel (when not using ExtensionStateContext) + browserViewportSizeProp?: string + isBrowserSessionActiveProp?: boolean + // Optional: navigate to a specific page index (used by Browser Session panel) + navigateToPageIndex?: number } const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { - const { messages, isLast, onHeightChange, lastModifiedMessage } = props + const { messages, isLast, onHeightChange, lastModifiedMessage, onExpandChange, fullScreen } = props const { t } = useTranslation() const prevHeightRef = useRef(0) - const [maxActionHeight, setMaxActionHeight] = useState(0) const [consoleLogsExpanded, setConsoleLogsExpanded] = useState(false) + const [nextActionsExpanded, setNextActionsExpanded] = useState(false) + const [logFilter, setLogFilter] = useState<"all" | "debug" | "info" | "warn" | "error" | "log">("all") + // Track screenshot container size for precise cursor positioning with object-fit: contain + const screenshotRef = useRef(null) + const [sW, setSW] = useState(0) + const [sH, setSH] = useState(0) + + // Auto-expand drawer when in fullScreen takeover mode so content is visible immediately + useEffect(() => { + if (fullScreen) { + setNextActionsExpanded(true) + } + }, [fullScreen]) + + // Observe screenshot container size to align cursor correctly with letterboxing + useEffect(() => { + const el = screenshotRef.current + if (!el) return + let mounted = true + const update = () => { + if (!mounted) return + const r = el.getBoundingClientRect() + setSW(r.width) + setSH(r.height) + } + update() + const ro = + typeof window !== "undefined" && "ResizeObserver" in window ? new ResizeObserver(() => update()) : null + if (ro) ro.observe(el) + return () => { + mounted = false + if (ro) ro.disconnect() + } + }, []) + + // Try to use ExtensionStateContext if available, otherwise use props + let browserViewportSize = props.browserViewportSizeProp || "900x600" + let isBrowserSessionActive = props.isBrowserSessionActiveProp || false + + try { + const extensionState = useExtensionState() + browserViewportSize = extensionState.browserViewportSize || "900x600" + isBrowserSessionActive = extensionState.isBrowserSessionActive || false + } catch (_e) { + // Not in ExtensionStateContext, use props + } - const { browserViewportSize = "900x600" } = useExtensionState() const [viewportWidth, viewportHeight] = browserViewportSize.split("x").map(Number) - const aspectRatio = ((viewportHeight / viewportWidth) * 100).toFixed(2) const defaultMousePosition = `${Math.round(viewportWidth / 2)},${Math.round(viewportHeight / 2)}` const isLastApiReqInterrupted = useMemo(() => { @@ -58,93 +248,106 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { return isLast && messages.some((m) => m.say === "browser_action_result") && !isLastApiReqInterrupted // after user approves, browser_action_result with "" is sent to indicate that the session has started }, [isLast, messages, isLastApiReqInterrupted]) - // Organize messages into pages with current state and next action + // Organize messages into pages based on ALL browser actions (including those without screenshots) const pages = useMemo(() => { const result: { - currentState: { - url?: string - screenshot?: string - mousePosition?: string - consoleLogs?: string - messages: ClineMessage[] // messages up to and including the result - } - nextAction?: { - messages: ClineMessage[] // messages leading to next result - } + url?: string + screenshot?: string + mousePosition?: string + consoleLogs?: string + action?: ClineSayBrowserAction + size?: string + viewportWidth?: number + viewportHeight?: number }[] = [] - let currentStateMessages: ClineMessage[] = [] - let nextActionMessages: ClineMessage[] = [] - + // Build pages from browser_action messages and pair with results messages.forEach((message) => { - if (message.ask === "browser_action_launch") { - // Start first page - currentStateMessages = [message] - } else if (message.say === "browser_action_result") { - if (message.text === "") { - // first browser_action_result is an empty string that signals that session has started - return + if (message.say === "browser_action") { + try { + const action = JSON.parse(message.text || "{}") as ClineSayBrowserAction + // Find the corresponding result message + const resultMessage = messages.find( + (m) => m.say === "browser_action_result" && m.ts > message.ts && m.text !== "", + ) + + if (resultMessage) { + const resultData = JSON.parse(resultMessage.text || "{}") as BrowserActionResult + result.push({ + url: resultData.currentUrl, + screenshot: resultData.screenshot, + mousePosition: resultData.currentMousePosition, + consoleLogs: resultData.logs, + action, + size: action.size, + viewportWidth: resultData.viewportWidth, + viewportHeight: resultData.viewportHeight, + }) + } else { + // For actions without results (like close), add a page without screenshot + result.push({ action, size: action.size }) + } + } catch { + // ignore parse errors } - // Complete current state - currentStateMessages.push(message) - const resultData = JSON.parse(message.text || "{}") as BrowserActionResult - - // Add page with current state and previous next actions - result.push({ - currentState: { - url: resultData.currentUrl, - screenshot: resultData.screenshot, - mousePosition: resultData.currentMousePosition, - consoleLogs: resultData.logs, - messages: [...currentStateMessages], - }, - nextAction: - nextActionMessages.length > 0 - ? { - messages: [...nextActionMessages], - } - : undefined, - }) - - // Reset for next page - currentStateMessages = [] - nextActionMessages = [] - } else if ( - message.say === "api_req_started" || - message.say === "text" || - message.say === "browser_action" - ) { - // These messages lead to the next result, so they should always go in nextActionMessages - nextActionMessages.push(message) - } else { - // Any other message types - currentStateMessages.push(message) } }) - // Add incomplete page if exists - if (currentStateMessages.length > 0 || nextActionMessages.length > 0) { - result.push({ - currentState: { - messages: [...currentStateMessages], - }, - nextAction: - nextActionMessages.length > 0 - ? { - messages: [...nextActionMessages], - } - : undefined, - }) + // Add placeholder page if no actions yet + if (result.length === 0) { + result.push({}) } return result }, [messages]) - // Auto-advance to latest page + // Page index + user navigation guard (don't auto-jump while exploring history) const [currentPageIndex, setCurrentPageIndex] = useState(0) + const hasUserNavigatedRef = useRef(false) + const didInitIndexRef = useRef(false) + const prevPagesLengthRef = useRef(0) + + useEffect(() => { + // Initialize to last page on mount + if (!didInitIndexRef.current && pages.length > 0) { + didInitIndexRef.current = true + setCurrentPageIndex(pages.length - 1) + prevPagesLengthRef.current = pages.length + return + } + + // Auto-advance if user is on the most recent step and a new step arrives + if (pages.length > prevPagesLengthRef.current) { + const wasOnLastPage = currentPageIndex === prevPagesLengthRef.current - 1 + if (wasOnLastPage && !hasUserNavigatedRef.current) { + // User was on the most recent step, auto-advance to the new step + setCurrentPageIndex(pages.length - 1) + } + prevPagesLengthRef.current = pages.length + } + }, [pages.length, currentPageIndex]) + + // External navigation request (from panel host) + // Only navigate when navigateToPageIndex actually changes, not when pages.length changes + const prevNavigateToPageIndexRef = useRef() useEffect(() => { - setCurrentPageIndex(pages.length - 1) - }, [pages.length]) + if ( + typeof props.navigateToPageIndex === "number" && + props.navigateToPageIndex !== prevNavigateToPageIndexRef.current && + pages.length > 0 + ) { + const idx = Math.max(0, Math.min(pages.length - 1, props.navigateToPageIndex)) + setCurrentPageIndex(idx) + // Only reset manual navigation guard if navigating to the last page + // This allows auto-advance to work when clicking to the most recent step + // but prevents unwanted auto-advance when viewing historical steps + if (idx === pages.length - 1) { + hasUserNavigatedRef.current = false + } + prevNavigateToPageIndexRef.current = props.navigateToPageIndex + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [props.navigateToPageIndex]) // Get initial URL from launch message const initialUrl = useMemo(() => { @@ -152,240 +355,791 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { return launchMessage?.text || "" }, [messages]) - // Find the latest available URL and screenshot - const latestState = useMemo(() => { + const currentPage = pages[currentPageIndex] + + // Use actual viewport dimensions from result if available, otherwise fall back to settings + + // Find the last available screenshot and its associated data to use as placeholders + const lastPageWithScreenshot = useMemo(() => { for (let i = pages.length - 1; i >= 0; i--) { - const page = pages[i] - if (page.currentState.url || page.currentState.screenshot) { - return { - url: page.currentState.url, - mousePosition: page.currentState.mousePosition, - consoleLogs: page.currentState.consoleLogs, - screenshot: page.currentState.screenshot, - } + if (pages[i].screenshot) { + return pages[i] } } - return { url: undefined, mousePosition: undefined, consoleLogs: undefined, screenshot: undefined } + return undefined }, [pages]) - const currentPage = pages[currentPageIndex] - const isLastPage = currentPageIndex === pages.length - 1 - - // Use latest state if we're on the last page and don't have a state yet - const displayState = isLastPage - ? { - url: currentPage?.currentState.url || latestState.url || initialUrl, - mousePosition: - currentPage?.currentState.mousePosition || latestState.mousePosition || defaultMousePosition, - consoleLogs: currentPage?.currentState.consoleLogs, - screenshot: currentPage?.currentState.screenshot || latestState.screenshot, - } - : { - url: currentPage?.currentState.url || initialUrl, - mousePosition: currentPage?.currentState.mousePosition || defaultMousePosition, - consoleLogs: currentPage?.currentState.consoleLogs, - screenshot: currentPage?.currentState.screenshot, + // Find last mouse position up to current page (not from future pages) + const lastPageWithMousePositionUpToCurrent = useMemo(() => { + for (let i = currentPageIndex; i >= 0; i--) { + if (pages[i].mousePosition) { + return pages[i] } + } + return undefined + }, [pages, currentPageIndex]) - const [actionContent, { height: actionHeight }] = useSize( -
- {currentPage?.nextAction?.messages.map((message) => ( - - ))} - {!isBrowsing && messages.some((m) => m.say === "browser_action_result") && currentPageIndex === 0 && ( - - )} -
, - ) + // Display state from current page, with smart fallbacks + const displayState = { + url: currentPage?.url || initialUrl, + mousePosition: + currentPage?.mousePosition || lastPageWithMousePositionUpToCurrent?.mousePosition || defaultMousePosition, + consoleLogs: currentPage?.consoleLogs, + screenshot: currentPage?.screenshot || lastPageWithScreenshot?.screenshot, + } - useEffect(() => { - if (actionHeight === 0 || actionHeight === Infinity) { - return + // Parse logs for counts and filtering + const parsedLogs = useMemo(() => { + const counts = { debug: 0, info: 0, warn: 0, error: 0, log: 0 } + const byType: Record<"debug" | "info" | "warn" | "error" | "log", string[]> = { + debug: [], + info: [], + warn: [], + error: [], + log: [], } - if (actionHeight > maxActionHeight) { - setMaxActionHeight(actionHeight) + const raw = displayState.consoleLogs || "" + raw.split(/\r?\n/).forEach((line) => { + const trimmed = line.trim() + if (!trimmed) return + const m = /^\[([^\]]+)\]\s*/i.exec(trimmed) + let type = (m?.[1] || "").toLowerCase() + if (type === "warning") type = "warn" + if (!["debug", "info", "warn", "error", "log"].includes(type)) type = "log" + counts[type as keyof typeof counts]++ + byType[type as keyof typeof byType].push(line) + }) + return { counts, byType } + }, [displayState.consoleLogs]) + + const logsToShow = useMemo(() => { + if (!displayState.consoleLogs) return t("chat:browser.noNewLogs") as string + if (logFilter === "all") return displayState.consoleLogs + const arr = parsedLogs.byType[logFilter] + return arr.length ? arr.join("\n") : (t("chat:browser.noNewLogs") as string) + }, [displayState.consoleLogs, logFilter, parsedLogs, t]) + + // Meta for log badges (include "All" first) + const logTypeMeta = [ + { key: "all", label: "All" }, + { key: "debug", label: "Debug" }, + { key: "info", label: "Info" }, + { key: "warn", label: "Warn" }, + { key: "error", label: "Error" }, + { key: "log", label: "Log" }, + ] as const + + // Use a fixed standard aspect ratio and dimensions for the drawer to prevent flickering + // Even if viewport changes, the drawer maintains consistent size + const fixedDrawerWidth = 900 + const fixedDrawerHeight = 600 + const drawerAspectRatio = (fixedDrawerHeight / fixedDrawerWidth) * 100 + + // For cursor positioning, use the viewport dimensions from the same page as the data we're displaying + // This ensures cursor position matches the screenshot/mouse position being shown + let cursorViewportWidth: number + let cursorViewportHeight: number + + if (currentPage?.screenshot) { + // Current page has screenshot - use its dimensions + cursorViewportWidth = currentPage.viewportWidth ?? viewportWidth + cursorViewportHeight = currentPage.viewportHeight ?? viewportHeight + } else if (lastPageWithScreenshot) { + // Using placeholder screenshot - use dimensions from that page + cursorViewportWidth = lastPageWithScreenshot.viewportWidth ?? viewportWidth + cursorViewportHeight = lastPageWithScreenshot.viewportHeight ?? viewportHeight + } else { + // No screenshot available - use default settings + cursorViewportWidth = viewportWidth + cursorViewportHeight = viewportHeight + } + + // Get browser action for current page (now stored in pages array) + const currentPageAction = useMemo(() => { + return pages[currentPageIndex]?.action + }, [pages, currentPageIndex]) + + // Latest non-close browser_action for header summary (fallback) + + // Determine if the overall browser session is still active (spins until 'close') + const lastBrowserActionOverall = useMemo(() => { + const all = messages.filter((m) => m.say === "browser_action") + return all.at(-1) + }, [messages]) + + // Use actual Playwright session state from extension (not message parsing) + const isBrowserSessionOpen = isBrowserSessionActive + + // Check if currently performing a browser action (for spinner) + const _isSessionActive = useMemo(() => { + // Only show active spinner if a session has started + const started = messages.some((m) => m.say === "browser_action_result") + if (!started) return false + // If the last API request got interrupted/cancelled, treat session as inactive + if (isLastApiReqInterrupted) return false + if (!lastBrowserActionOverall) return true + try { + const act = JSON.parse(lastBrowserActionOverall.text || "{}") as ClineSayBrowserAction + return act.action !== "close" + } catch { + return true } - }, [actionHeight, maxActionHeight]) + }, [messages, lastBrowserActionOverall, isLastApiReqInterrupted]) - // Track latest click coordinate - const latestClickPosition = useMemo(() => { - if (!isBrowsing) return undefined + // Browser session drawer never auto-expands - user must manually toggle it - // Look through current page's next actions for the latest browser_action - const actions = currentPage?.nextAction?.messages || [] - for (let i = actions.length - 1; i >= 0; i--) { - const message = actions[i] - if (message.say === "browser_action") { - const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction - if (browserAction.action === "click" && browserAction.coordinate) { - return browserAction.coordinate + // Calculate total API cost for the browser session + const totalApiCost = useMemo(() => { + let total = 0 + messages.forEach((message) => { + if (message.say === "api_req_started" && message.text) { + try { + const data = JSON.parse(message.text) + if (data.cost && typeof data.cost === "number") { + total += data.cost + } + } catch { + // Ignore parsing errors } } + }) + return total + }, [messages]) + + // Local size tracking without react-use to avoid timers after unmount in tests + const containerRef = useRef(null) + const [rowHeight, setRowHeight] = useState(0) + useEffect(() => { + const el = containerRef.current + if (!el) return + let mounted = true + const setH = (h: number) => { + if (mounted) setRowHeight(h) } - return undefined - }, [isBrowsing, currentPage?.nextAction?.messages]) - - // Use latest click position while browsing, otherwise use display state - const mousePosition = isBrowsing - ? latestClickPosition || displayState.mousePosition - : displayState.mousePosition || defaultMousePosition - - const [browserSessionRow, { height: rowHeight }] = useSize( -
-
- {isBrowsing ? : } - - <>{t("chat:browser.rooWantsToUse")} - -
+ const ro = + typeof window !== "undefined" && "ResizeObserver" in window + ? new ResizeObserver((entries) => { + const entry = entries[0] + setH(entry?.contentRect?.height ?? el.getBoundingClientRect().height) + }) + : null + // initial + setH(el.getBoundingClientRect().height) + if (ro) ro.observe(el) + return () => { + mounted = false + if (ro) ro.disconnect() + } + }, []) + + const browserSessionRow = ( +
+ {/* Main header - clickable to expand/collapse, mimics TodoList style */}
- {/* URL Bar */} -
+ setNextActionsExpanded((v) => { + const nv = !v + onExpandChange?.(nv) + return nv + }), + })} + /> + + {/* Simple text: "Browser Session" with step counter */} + + setNextActionsExpanded((v) => { + const nv = !v + onExpandChange?.(nv) + return nv + }), + })} style={{ - margin: "0px auto", - width: "calc(100%)", - boxSizing: "border-box", // includes padding in width calculation - borderRadius: "4px 4px 0 0", - padding: "5px", + flex: 1, + fontSize: 13, + fontWeight: 500, + lineHeight: "22px", + color: "var(--vscode-editor-foreground)", + cursor: fullScreen ? "default" : "pointer", display: "flex", alignItems: "center", - justifyContent: "center", - color: "var(--vscode-descriptionForeground)", - fontSize: "12px", + gap: 8, }}> + {t("chat:browser.session")} + {pages.length > 0 && ( + + {currentPageIndex + 1}/{pages.length} + + )} + {/* Inline action summary to the right, similar to ChatView */} + + {(() => { + const action = currentPageAction + const pageSize = pages[currentPageIndex]?.size + const pageViewportWidth = pages[currentPageIndex]?.viewportWidth + const pageViewportHeight = pages[currentPageIndex]?.viewportHeight + if (action) { + return ( + <> + {getActionIcon(action.action)} + + {getBrowserActionText( + action.action, + action.coordinate, + action.text, + pageSize, + pageViewportWidth, + pageViewportHeight, + )} + + + ) + } else if (initialUrl) { + return ( + <> + {getActionIcon("launch" as any)} + {getBrowserActionText("launch", undefined, initialUrl, undefined)} + + ) + } + return null + })()} + + + + {/* Right side: cost badge and chevron */} + {totalApiCost > 0 && (
- - {displayState.url || "http"} + ${totalApiCost.toFixed(4)}
-
+ )} + + {/* Chevron toggle hidden in fullScreen */} + {!fullScreen && ( + + setNextActionsExpanded((v) => { + const nv = !v + onExpandChange?.(nv) + return nv + }) + } + className={`codicon ${nextActionsExpanded ? "codicon-chevron-up" : "codicon-chevron-down"}`} + style={{ + fontSize: 13, + fontWeight: 500, + lineHeight: "22px", + color: "var(--vscode-editor-foreground)", + cursor: "pointer", + display: "inline-block", + transition: "transform 150ms ease", + }} + /> + )} + + {/* Kill browser button hidden from header in fullScreen; kept in toolbar */} + {isBrowserSessionOpen && !fullScreen && ( + + + + )} +
- {/* Screenshot Area */} + {/* Expanded drawer content - inline/fullscreen */} + {nextActionsExpanded && (
- {displayState.screenshot ? ( - {t("chat:browser.screenshot")} - vscode.postMessage({ - type: "openImage", - text: displayState.screenshot, - }) - } - /> - ) : ( + {/* Browser-like Toolbar */} +
+ {/* Go to beginning */} + + + + + {/* Back */} + + + + + {/* Forward */} + + + + + {/* Go to end */} + + + + + {/* Address Bar */}
+ + style={{ + fontSize: 12, + lineHeight: "18px", + textOverflow: "ellipsis", + overflow: "hidden", + whiteSpace: "nowrap", + color: "var(--vscode-foreground)", + }}> + {displayState.url || "about:blank"} + + {/* Step counter removed */}
- )} - {displayState.mousePosition && ( - - )} -
- {/* Console Logs Accordion */} -
{ - setConsoleLogsExpanded(!consoleLogsExpanded) - }} - className="flex items-center justify-between gap-2 text-vscode-editor-foreground/50 hover:text-vscode-editor-foreground transition-colors" - style={{ - width: "100%", - cursor: "pointer", - padding: `9px 10px ${consoleLogsExpanded ? 0 : 8}px 10px`, - }}> - - {t("chat:browser.consoleLogs")} - -
- {consoleLogsExpanded && ( - - )} -
+ {/* Kill (Disconnect) replaces Reload */} + + + + + {/* Open External */} + + + - {/* Action content with min height */} -
{actionContent}
+ {/* Copy URL */} + + + +
- {/* Pagination moved to bottom */} - {pages.length > 1 && ( -
-
- {t("chat:browser.navigation.step", { current: currentPageIndex + 1, total: pages.length })} + {/* Screenshot Area */} +
+ {displayState.screenshot ? ( + {t("chat:browser.screenshot")} + vscode.postMessage({ + type: "openImage", + text: displayState.screenshot, + }) + } + /> + ) : ( +
+ +
+ )} + {displayState.mousePosition && + (() => { + // Use measured size if available; otherwise fall back to current client size so cursor remains visible + const containerW = sW || (screenshotRef.current?.clientWidth ?? 0) + const containerH = sH || (screenshotRef.current?.clientHeight ?? 0) + if (containerW <= 0 || containerH <= 0) { + // Minimal fallback to keep cursor visible before first measurement + return ( + + ) + } + + // Compute displayed image box within the container for object-fit: contain; objectPosition: top center + const imgAspect = cursorViewportWidth / cursorViewportHeight + const containerAspect = containerW / containerH + let displayW = containerW + let displayH = containerH + let offsetX = 0 + let offsetY = 0 + if (containerAspect > imgAspect) { + // Full height, letterboxed left/right; top aligned + displayH = containerH + displayW = containerH * imgAspect + offsetX = (containerW - displayW) / 2 + offsetY = 0 + } else { + // Full width, potential space below; top aligned + displayW = containerW + displayH = containerW / imgAspect + offsetX = 0 + offsetY = 0 + } + + // Parse "x,y" or "x,y@widthxheight" for original basis + const m = /^\s*(\d+)\s*,\s*(\d+)(?:\s*@\s*(\d+)\s*[x,]\s*(\d+))?\s*$/.exec( + displayState.mousePosition || "", + ) + const mx = parseInt(m?.[1] || "0", 10) + const my = parseInt(m?.[2] || "0", 10) + const baseW = m?.[3] ? parseInt(m[3], 10) : cursorViewportWidth + const baseH = m?.[4] ? parseInt(m[4], 10) : cursorViewportHeight + + const leftPx = offsetX + (baseW > 0 ? (mx / baseW) * displayW : 0) + const topPx = offsetY + (baseH > 0 ? (my / baseH) * displayH : 0) + + return ( + + ) + })()}
-
- setCurrentPageIndex((i) => i - 1)}> - {t("chat:browser.navigation.previous")} - - setCurrentPageIndex((i) => i + 1)}> - {t("chat:browser.navigation.next")} - + + {/* Browser Action summary moved inline to header; row removed */} + + {/* Console Logs Section (collapsible, default collapsed) */} +
+
{ + e.stopPropagation() + setConsoleLogsExpanded((v) => !v) + }} + className="text-vscode-editor-foreground/70 hover:text-vscode-editor-foreground transition-colors" + style={{ + display: "flex", + alignItems: "center", + gap: "8px", + marginBottom: consoleLogsExpanded ? "6px" : 0, + cursor: "pointer", + }}> + + + {t("chat:browser.consoleLogs")} + + + {/* Log type indicators */} +
e.stopPropagation()} + style={{ display: "flex", alignItems: "center", gap: 6, marginLeft: "auto" }}> + {logTypeMeta.map(({ key, label }) => { + const isAll = key === "all" + const count = isAll + ? (Object.values(parsedLogs.counts) as number[]).reduce((a, b) => a + b, 0) + : parsedLogs.counts[key as "debug" | "info" | "warn" | "error" | "log"] + const isActive = logFilter === (key as any) + const disabled = count === 0 + return ( + + ) + })} + setConsoleLogsExpanded((v) => !v)} + className={`codicon codicon-chevron-${consoleLogsExpanded ? "down" : "right"}`} + style={{ marginLeft: 6 }} + /> +
+
+ {consoleLogsExpanded && ( +
+ +
+ )}
)} -
, +
) // Height change effect @@ -402,150 +1156,6 @@ const BrowserSessionRow = memo((props: BrowserSessionRowProps) => { return browserSessionRow }, deepEqual) -interface BrowserSessionRowContentProps extends Omit { - message: ClineMessage - setMaxActionHeight: (height: number) => void - isStreaming: boolean -} - -const BrowserSessionRowContent = ({ - message, - isExpanded, - onToggleExpand, - lastModifiedMessage, - isLast, - setMaxActionHeight, - isStreaming, -}: BrowserSessionRowContentProps) => { - const { t } = useTranslation() - const headerStyle: React.CSSProperties = { - display: "flex", - alignItems: "center", - gap: "10px", - marginBottom: "10px", - wordBreak: "break-word", - } - - switch (message.type) { - case "say": - switch (message.say) { - case "api_req_started": - case "text": - return ( -
- { - if (message.say === "api_req_started") { - setMaxActionHeight(0) - } - onToggleExpand(message.ts) - }} - lastModifiedMessage={lastModifiedMessage} - isLast={isLast} - isStreaming={isStreaming} - /> -
- ) - - case "browser_action": - const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction - return ( - - ) - - default: - return null - } - - case "ask": - switch (message.ask) { - case "browser_action_launch": - return ( - <> -
- {t("chat:browser.sessionStarted")} -
-
- -
- - ) - - default: - return null - } - } -} - -const BrowserActionBox = ({ - action, - coordinate, - text, -}: { - action: BrowserAction - coordinate?: string - text?: string -}) => { - const { t } = useTranslation() - const getBrowserActionText = (action: BrowserAction, coordinate?: string, text?: string) => { - switch (action) { - case "launch": - return t("chat:browser.actions.launch", { url: text }) - case "click": - return t("chat:browser.actions.click", { coordinate: coordinate?.replace(",", ", ") }) - case "type": - return t("chat:browser.actions.type", { text }) - case "scroll_down": - return t("chat:browser.actions.scrollDown") - case "scroll_up": - return t("chat:browser.actions.scrollUp") - case "close": - return t("chat:browser.actions.close") - default: - return action - } - } - return ( -
-
-
- - {t("chat:browser.actions.title")} - {getBrowserActionText(action, coordinate, text)} - -
-
-
- ) -} - const BrowserCursor: React.FC<{ style?: React.CSSProperties }> = ({ style }) => { const { t } = useTranslation() // (can't use svgs in vsc extensions) diff --git a/webview-ui/src/components/chat/BrowserSessionStatusRow.tsx b/webview-ui/src/components/chat/BrowserSessionStatusRow.tsx new file mode 100644 index 000000000000..862dc80a62fe --- /dev/null +++ b/webview-ui/src/components/chat/BrowserSessionStatusRow.tsx @@ -0,0 +1,34 @@ +import { memo } from "react" +import { Globe } from "lucide-react" +import { ClineMessage } from "@roo-code/types" + +interface BrowserSessionStatusRowProps { + message: ClineMessage +} + +const BrowserSessionStatusRow = memo(({ message }: BrowserSessionStatusRowProps) => { + const isOpened = message.text?.includes("opened") + + return ( +
+ + + {message.text} + +
+ ) +}) + +BrowserSessionStatusRow.displayName = "BrowserSessionStatusRow" + +export default BrowserSessionStatusRow diff --git a/webview-ui/src/components/chat/ChatRow.tsx b/webview-ui/src/components/chat/ChatRow.tsx index 4299240a549a..2a77e037e86c 100644 --- a/webview-ui/src/components/chat/ChatRow.tsx +++ b/webview-ui/src/components/chat/ChatRow.tsx @@ -471,7 +471,7 @@ export const ChatRowContent = ({ vscode.postMessage({ type: "updateTodoList", payload: { todos: updatedTodos } }) } }} - editable={editable && isLast} + editable={!!(editable && isLast)} /> ) } @@ -1314,6 +1314,10 @@ export const ChatRowContent = ({
) + case "browser_action": + case "browser_action_result": + // Handled by BrowserSessionRow; prevent raw JSON (action/result) from rendering here + return null default: return ( <> diff --git a/webview-ui/src/components/chat/ChatTextArea.tsx b/webview-ui/src/components/chat/ChatTextArea.tsx index c7813372fa79..2879db109de9 100644 --- a/webview-ui/src/components/chat/ChatTextArea.tsx +++ b/webview-ui/src/components/chat/ChatTextArea.tsx @@ -51,6 +51,9 @@ interface ChatTextAreaProps { // Edit mode props isEditMode?: boolean onCancel?: () => void + // Browser session status + isBrowserSessionActive?: boolean + showBrowserDockToggle?: boolean } export const ChatTextArea = forwardRef( @@ -71,6 +74,8 @@ export const ChatTextArea = forwardRef( modeShortcutText, isEditMode = false, onCancel, + isBrowserSessionActive = false, + showBrowserDockToggle = false, }, ref, ) => { @@ -1236,7 +1241,7 @@ export const ChatTextArea = forwardRef(
{isTtsPlaying && ( @@ -1261,6 +1266,12 @@ export const ChatTextArea = forwardRef( )} {!isEditMode ? : null} {!isEditMode && cloudUserInfo && } + {/* keep props referenced after moving browser button */} +
diff --git a/webview-ui/src/components/chat/ChatView.tsx b/webview-ui/src/components/chat/ChatView.tsx index 929fa9427aa0..58fb844b970d 100644 --- a/webview-ui/src/components/chat/ChatView.tsx +++ b/webview-ui/src/components/chat/ChatView.tsx @@ -1,5 +1,5 @@ import React, { forwardRef, useCallback, useEffect, useImperativeHandle, useMemo, useRef, useState } from "react" -import { useDeepCompareEffect, useEvent, useMount } from "react-use" +import { useDeepCompareEffect, useEvent } from "react-use" import debounce from "debounce" import { Virtuoso, type VirtuosoHandle } from "react-virtuoso" import removeMd from "remove-markdown" @@ -13,7 +13,7 @@ import { appendImages } from "@src/utils/imageUtils" import type { ClineAsk, ClineMessage, McpServerUse } from "@roo-code/types" -import { ClineSayBrowserAction, ClineSayTool, ExtensionMessage } from "@roo/ExtensionMessage" +import { ClineSayTool, ExtensionMessage } from "@roo/ExtensionMessage" import { McpServer, McpTool } from "@roo/mcp" import { findLast } from "@roo/array" import { FollowUpData, SuggestionItem } from "@roo-code/types" @@ -48,6 +48,8 @@ import { useTaskSearch } from "../history/useTaskSearch" import HistoryPreview from "../history/HistoryPreview" import Announcement from "./Announcement" import BrowserSessionRow from "./BrowserSessionRow" +import BrowserActionRow from "./BrowserActionRow" +import BrowserSessionStatusRow from "./BrowserSessionStatusRow" import ChatRow from "./ChatRow" import { ChatTextArea } from "./ChatTextArea" import TaskHeader from "./TaskHeader" @@ -123,6 +125,7 @@ const ChatViewComponent: React.ForwardRefRenderFunction textAreaRef.current?.focus()) - const visibleMessages = useMemo(() => { // Pre-compute checkpoint hashes that have associated user messages for O(1) lookup const userMessageCheckpointHashes = new Set() @@ -1250,97 +1250,58 @@ const ChatViewComponent: React.ForwardRefRenderFunction { - // Which of visible messages are browser session messages, see above. - if (message.type === "ask") { - return ["browser_action_launch"].includes(message.ask!) - } - - if (message.type === "say") { - return ["api_req_started", "text", "browser_action", "browser_action_result"].includes(message.say!) - } - - return false - } - - const groupedMessages = useMemo(() => { - const result: (ClineMessage | ClineMessage[])[] = [] - let currentGroup: ClineMessage[] = [] - let isInBrowserSession = false - - const endBrowserSession = () => { - if (currentGroup.length > 0) { - result.push([...currentGroup]) - currentGroup = [] - isInBrowserSession = false + // Compute current browser session messages for the top banner (not grouped into chat stream) + // Find the FIRST browser session from the beginning to show ALL sessions + const browserSessionStartIndex = useMemo(() => { + for (let i = 0; i < messages.length; i++) { + if (messages[i].ask === "browser_action_launch") { + return i + } + // Also check for browser_session_status as a fallback indicator + if (messages[i].say === "browser_session_status" && messages[i].text?.includes("opened")) { + return i } } + return -1 + }, [messages]) - visibleMessages.forEach((message: ClineMessage) => { - if (message.ask === "browser_action_launch") { - // Complete existing browser session if any. - endBrowserSession() - // Start new. - isInBrowserSession = true - currentGroup.push(message) - } else if (isInBrowserSession) { - // End session if `api_req_started` is cancelled. - - if (message.say === "api_req_started") { - // Get last `api_req_started` in currentGroup to check if - // it's cancelled. If it is then this api req is not part - // of the current browser session. - const lastApiReqStarted = [...currentGroup].reverse().find((m) => m.say === "api_req_started") - - if (lastApiReqStarted?.text !== null && lastApiReqStarted?.text !== undefined) { - const info = JSON.parse(lastApiReqStarted.text) - const isCancelled = info.cancelReason !== null && info.cancelReason !== undefined - - if (isCancelled) { - endBrowserSession() - result.push(message) - return - } - } - } - - if (isBrowserSessionMessage(message)) { - currentGroup.push(message) + const _browserSessionMessages = useMemo(() => { + if (browserSessionStartIndex === -1) return [] + return messages.slice(browserSessionStartIndex) + }, [browserSessionStartIndex, messages]) - // Check if this is a close action - if (message.say === "browser_action") { - const browserAction = JSON.parse(message.text || "{}") as ClineSayBrowserAction - if (browserAction.action === "close") { - endBrowserSession() - } - } - } else { - // complete existing browser session if any - endBrowserSession() - result.push(message) - } - } else { - result.push(message) - } - }) + // Show globe toggle only when in a task that has a browser session (active or inactive) + const showBrowserDockToggle = useMemo( + () => Boolean(task && (browserSessionStartIndex !== -1 || isBrowserSessionActive)), + [task, browserSessionStartIndex, isBrowserSessionActive], + ) - // Handle case where browser session is the last group - if (currentGroup.length > 0) { - result.push([...currentGroup]) + const isBrowserSessionMessage = useCallback((message: ClineMessage): boolean => { + // Only the launch ask should be hidden from chat (it's shown in the drawer header) + if (message.type === "ask" && message.ask === "browser_action_launch") { + return true + } + // browser_action_result messages are paired with browser_action and should not appear independently + if (message.type === "say" && message.say === "browser_action_result") { + return true } + return false + }, []) + + const groupedMessages = useMemo(() => { + // Only filter out the launch ask and result messages - browser actions appear in chat + const result: ClineMessage[] = visibleMessages.filter((msg) => !isBrowserSessionMessage(msg)) if (isCondensing) { - // Show indicator after clicking condense button result.push({ type: "say", say: "condense_context", ts: Date.now(), partial: true, - }) + } as any) } - return result - }, [isCondensing, visibleMessages]) + }, [isCondensing, visibleMessages, isBrowserSessionMessage]) // scrolling @@ -1497,7 +1458,7 @@ const ChatViewComponent: React.ForwardRefRenderFunction { - // browser session group + // browser session group - this should never be called now since we don't group messages if (Array.isArray(messageOrGroup)) { return ( ) } + const hasCheckpoint = modifiedMessages.some((message) => message.say === "checkpoint_saved") + // Check if this is a browser action message + if (messageOrGroup.type === "say" && messageOrGroup.say === "browser_action") { + // Find the corresponding result message by looking for the next browser_action_result after this action's timestamp + const nextMessage = modifiedMessages.find( + (m) => m.ts > messageOrGroup.ts && m.say === "browser_action_result", + ) + + // Calculate action index and total count + const browserActions = modifiedMessages.filter((m) => m.say === "browser_action") + const actionIndex = browserActions.findIndex((m) => m.ts === messageOrGroup.ts) + 1 + const totalActions = browserActions.length + + return ( + + ) + } + + // Check if this is a browser session status message + if (messageOrGroup.type === "say" && messageOrGroup.say === "browser_session_status") { + return + } + // regular message return ( -
- { - setIsAtBottom(isAtBottom) - if (isAtBottom) { - disableAutoScrollRef.current = false - } - setShowScrollToBottom(disableAutoScrollRef.current && !isAtBottom) - }} - atBottomThreshold={10} - initialTopMostItemIndex={groupedMessages.length - 1} - /> +
+
+ { + setIsAtBottom(isAtBottom) + if (isAtBottom) { + disableAutoScrollRef.current = false + } + setShowScrollToBottom(disableAutoScrollRef.current && !isAtBottom) + }} + atBottomThreshold={10} + initialTopMostItemIndex={groupedMessages.length - 1} + /> +
{areButtonsVisible && (
{isProfileDisabled && ( diff --git a/webview-ui/src/components/chat/TaskHeader.tsx b/webview-ui/src/components/chat/TaskHeader.tsx index aef0bc5eee93..2fd05406686a 100644 --- a/webview-ui/src/components/chat/TaskHeader.tsx +++ b/webview-ui/src/components/chat/TaskHeader.tsx @@ -1,9 +1,9 @@ -import { memo, useEffect, useRef, useState } from "react" +import { memo, useEffect, useRef, useState, useMemo } from "react" import { useTranslation } from "react-i18next" import { useCloudUpsell } from "@src/hooks/useCloudUpsell" import { CloudUpsellDialog } from "@src/components/cloud/CloudUpsellDialog" import DismissibleUpsell from "@src/components/common/DismissibleUpsell" -import { FoldVertical, ChevronUp, ChevronDown } from "lucide-react" +import { FoldVertical, ChevronUp, ChevronDown, Globe } from "lucide-react" import prettyBytes from "pretty-bytes" import type { ClineMessage } from "@roo-code/types" @@ -13,9 +13,10 @@ import { findLastIndex } from "@roo/array" import { formatLargeNumber } from "@src/utils/format" import { cn } from "@src/lib/utils" -import { StandardTooltip } from "@src/components/ui" +import { StandardTooltip, Button } from "@src/components/ui" import { useExtensionState } from "@src/context/ExtensionStateContext" import { useSelectedModel } from "@/components/ui/hooks/useSelectedModel" +import { vscode } from "@src/utils/vscode" import Thumbnails from "../common/Thumbnails" @@ -50,7 +51,7 @@ const TaskHeader = ({ todos, }: TaskHeaderProps) => { const { t } = useTranslation() - const { apiConfiguration, currentTaskItem, clineMessages } = useExtensionState() + const { apiConfiguration, currentTaskItem, clineMessages, isBrowserSessionActive } = useExtensionState() const { id: modelId, info: model } = useSelectedModel(apiConfiguration) const [isTaskExpanded, setIsTaskExpanded] = useState(false) const [showLongRunningTaskMessage, setShowLongRunningTaskMessage] = useState(false) @@ -86,6 +87,21 @@ const TaskHeader = ({ const textRef = useRef(null) const contextWindow = model?.contextWindow || 1 + // Detect if this task had any browser session activity so we can show a grey globe when inactive + const browserSessionStartIndex = useMemo(() => { + const msgs = clineMessages || [] + for (let i = 0; i < msgs.length; i++) { + const m = msgs[i] as any + if (m?.ask === "browser_action_launch") return i + if (m?.say === "browser_session_status" && typeof m.text === "string" && m.text.includes("opened")) { + return i + } + } + return -1 + }, [clineMessages]) + + const showBrowserGlobe = browserSessionStartIndex !== -1 || !!isBrowserSessionActive + const condenseButton = ( + + {isBrowserSessionActive && ( + + Active + + )} +
+ )}
diff --git a/webview-ui/src/components/chat/__tests__/BrowserSessionRow.aspect-ratio.spec.tsx b/webview-ui/src/components/chat/__tests__/BrowserSessionRow.aspect-ratio.spec.tsx new file mode 100644 index 000000000000..87465862032d --- /dev/null +++ b/webview-ui/src/components/chat/__tests__/BrowserSessionRow.aspect-ratio.spec.tsx @@ -0,0 +1,55 @@ +import { render, screen, fireEvent } from "@testing-library/react" +import React from "react" +import BrowserSessionRow from "../BrowserSessionRow" +import { ExtensionStateContext } from "@src/context/ExtensionStateContext" +import { TooltipProvider } from "@src/components/ui/tooltip" + +describe("BrowserSessionRow - screenshot area", () => { + const renderRow = (messages: any[]) => { + const mockExtState: any = { + // Ensure known viewport so expected aspect ratio is deterministic (600/900 = 66.67%) + browserViewportSize: "900x600", + isBrowserSessionActive: false, + } + + return render( + + + true} + onToggleExpand={() => {}} + lastModifiedMessage={undefined as any} + isLast={true} + onHeightChange={() => {}} + isStreaming={false} + /> + + , + ) + } + + it("reserves height while screenshot is loading (no layout collapse)", () => { + // Only a launch action, no corresponding browser_action_result yet (no screenshot) + const messages = [ + { + ts: 1, + say: "browser_action", + text: JSON.stringify({ action: "launch", url: "http://localhost:3000" }), + }, + ] + + renderRow(messages) + + // Open the browser session drawer + const globe = screen.getByLabelText("Browser interaction") + fireEvent.click(globe) + + const container = screen.getByTestId("screenshot-container") as HTMLDivElement + // padding-bottom should reflect aspect ratio (600/900 * 100) even without an image + const pb = parseFloat(container.style.paddingBottom || "0") + expect(pb).toBeGreaterThan(0) + // Be tolerant of rounding + expect(Math.round(pb)).toBe(67) + }) +}) diff --git a/webview-ui/src/components/chat/__tests__/BrowserSessionRow.disconnect-button.spec.tsx b/webview-ui/src/components/chat/__tests__/BrowserSessionRow.disconnect-button.spec.tsx new file mode 100644 index 000000000000..0c2b4762c4ea --- /dev/null +++ b/webview-ui/src/components/chat/__tests__/BrowserSessionRow.disconnect-button.spec.tsx @@ -0,0 +1,42 @@ +import React from "react" +import { render, screen } from "@testing-library/react" +import BrowserSessionRow from "../BrowserSessionRow" +import { ExtensionStateContext } from "@src/context/ExtensionStateContext" +import { TooltipProvider } from "@radix-ui/react-tooltip" + +describe("BrowserSessionRow - Disconnect session button", () => { + const renderRow = (isActive: boolean) => { + const mockExtState: any = { + browserViewportSize: "900x600", + isBrowserSessionActive: isActive, + } + + return render( + + + false} + onToggleExpand={() => {}} + lastModifiedMessage={undefined as any} + isLast={true} + onHeightChange={() => {}} + isStreaming={false} + /> + + , + ) + } + + it("shows the Disconnect session button when a session is active", () => { + renderRow(true) + const btn = screen.getByLabelText("Disconnect session") + expect(btn).toBeInTheDocument() + }) + + it("does not render the button when no session is active", () => { + renderRow(false) + const btn = screen.queryByLabelText("Disconnect session") + expect(btn).toBeNull() + }) +}) diff --git a/webview-ui/src/components/chat/__tests__/ChatView.followup-in-session.spec.tsx b/webview-ui/src/components/chat/__tests__/ChatView.followup-in-session.spec.tsx new file mode 100644 index 000000000000..e870e8df3c29 --- /dev/null +++ b/webview-ui/src/components/chat/__tests__/ChatView.followup-in-session.spec.tsx @@ -0,0 +1,119 @@ +// npx vitest run src/components/chat/__tests__/ChatView.followup-in-session.spec.tsx + +import { render, waitFor } from "@/utils/test-utils" +import { QueryClient, QueryClientProvider } from "@tanstack/react-query" +import { ExtensionStateContextProvider } from "@src/context/ExtensionStateContext" +import ChatView, { ChatViewProps } from "../ChatView" + +vi.mock("@src/utils/vscode", () => ({ + vscode: { postMessage: vi.fn() }, +})) + +vi.mock("rehype-highlight", () => ({ default: () => () => {} })) +vi.mock("hast-util-to-text", () => ({ default: () => "" })) + +vi.mock("../BrowserSessionRow", () => ({ + default: function MockBrowserSessionRow({ messages }: { messages: any[] }) { + return
{JSON.stringify(messages)}
+ }, +})) + +vi.mock("../ChatRow", () => ({ + default: function MockChatRow({ message }: { message: any }) { + return
{JSON.stringify(message)}
+ }, +})) + +vi.mock("../TaskHeader", () => ({ + default: function MockTaskHeader() { + return
+ }, +})) + +vi.mock("@src/components/common/CodeBlock", () => ({ + default: () => null, + CODE_BLOCK_BG_COLOR: "rgb(30, 30, 30)", +})) + +const queryClient = new QueryClient() + +const defaultProps: ChatViewProps = { + isHidden: false, + showAnnouncement: false, + hideAnnouncement: () => {}, +} + +const renderChatView = (props: Partial = {}) => { + return render( + + + + + , + ) +} + +const mockPostMessage = (state: any) => { + window.postMessage( + { + type: "state", + state: { + version: "1.0.0", + clineMessages: [], + taskHistory: [], + shouldShowAnnouncement: false, + allowedCommands: [], + autoApprovalEnabled: true, + ...state, + }, + }, + "*", + ) +} + +describe("ChatView followup inside browser session", () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + it.skip("renders followup ask as a regular ChatRow while session banner is visible", async () => { + renderChatView() + + const ts = Date.now() + + // Send initial message with browser session and followup + mockPostMessage({ + alwaysAllowBrowser: true, + clineMessages: [ + { type: "say", say: "task", ts: ts - 4000, text: "Initial task" }, + { + type: "ask", + ask: "browser_action_launch", + ts: ts - 3000, + text: "http://example.com", + partial: false, + }, + { type: "say", say: "browser_action_result", ts: ts - 2000, text: "" }, + { + type: "ask", + ask: "followup", + ts: ts, + text: JSON.stringify({ question: "Continue?", suggest: [{ answer: "Yes" }, { answer: "No" }] }), + partial: false, + }, + ], + }) + + // Banner should be present (only contains browser_action_launch and browser_action_result) + await waitFor(() => { + const banner = document.querySelector('[data-testid="browser-session"]') + expect(banner).not.toBeNull() + }) + + // At least one ChatRow should render (the followup question) + await waitFor(() => { + const chatRows = document.querySelectorAll('[data-testid="chat-row"]') + expect(chatRows.length).toBeGreaterThan(0) + }) + }) +}) diff --git a/webview-ui/src/context/ExtensionStateContext.tsx b/webview-ui/src/context/ExtensionStateContext.tsx index 6443ccad93d5..4bc03e259c70 100644 --- a/webview-ui/src/context/ExtensionStateContext.tsx +++ b/webview-ui/src/context/ExtensionStateContext.tsx @@ -200,6 +200,7 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode deniedCommands: [], soundEnabled: false, soundVolume: 0.5, + isBrowserSessionActive: false, ttsEnabled: false, ttsSpeed: 1.0, diffEnabled: false, diff --git a/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx b/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx index 92652733ddf1..61ebcb1fa385 100644 --- a/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx +++ b/webview-ui/src/context/__tests__/ExtensionStateContext.spec.tsx @@ -214,6 +214,7 @@ describe("mergeExtensionState", () => { remoteControlEnabled: false, taskSyncEnabled: false, featureRoomoteControlEnabled: false, + isBrowserSessionActive: false, checkpointTimeout: DEFAULT_CHECKPOINT_TIMEOUT_SECONDS, // Add the checkpoint timeout property } diff --git a/webview-ui/src/i18n/locales/ca/chat.json b/webview-ui/src/i18n/locales/ca/chat.json index 16ea62c44390..d4cdc75b549b 100644 --- a/webview-ui/src/i18n/locales/ca/chat.json +++ b/webview-ui/src/i18n/locales/ca/chat.json @@ -313,6 +313,7 @@ "socialLinks": "Uneix-te a nosaltres a X, Discord, o r/RooCode 🚀" }, "browser": { + "session": "Sessió del navegador", "rooWantsToUse": "Roo vol utilitzar el navegador", "consoleLogs": "Registres de consola", "noNewLogs": "(Cap registre nou)", @@ -329,8 +330,10 @@ "launch": "Iniciar navegador a {{url}}", "click": "Clic ({{coordinate}})", "type": "Escriure \"{{text}}\"", + "press": "Prem {{key}}", "scrollDown": "Desplaçar avall", "scrollUp": "Desplaçar amunt", + "hover": "Plana sobre ({{coordinate}})", "close": "Tancar navegador" } }, diff --git a/webview-ui/src/i18n/locales/de/chat.json b/webview-ui/src/i18n/locales/de/chat.json index a3f14d12549b..b08cc6f608b2 100644 --- a/webview-ui/src/i18n/locales/de/chat.json +++ b/webview-ui/src/i18n/locales/de/chat.json @@ -313,6 +313,7 @@ "socialLinks": "Folge uns auf X, Discord oder r/RooCode 🚀" }, "browser": { + "session": "Browser-Sitzung", "rooWantsToUse": "Roo möchte den Browser verwenden", "consoleLogs": "Konsolenprotokolle", "noNewLogs": "(Keine neuen Protokolle)", @@ -329,8 +330,10 @@ "launch": "Browser starten auf {{url}}", "click": "Klicken ({{coordinate}})", "type": "Eingeben \"{{text}}\"", + "press": "{{key}} drücken", "scrollDown": "Nach unten scrollen", "scrollUp": "Nach oben scrollen", + "hover": "Hover ({{coordinate}})", "close": "Browser schließen" } }, diff --git a/webview-ui/src/i18n/locales/en/chat.json b/webview-ui/src/i18n/locales/en/chat.json index 6f47f040c67b..3c90d886bcf4 100644 --- a/webview-ui/src/i18n/locales/en/chat.json +++ b/webview-ui/src/i18n/locales/en/chat.json @@ -328,6 +328,7 @@ "countdownDisplay": "{{count}}s" }, "browser": { + "session": "Browser Session", "rooWantsToUse": "Roo wants to use the browser", "consoleLogs": "Console Logs", "noNewLogs": "(No new logs)", @@ -340,12 +341,13 @@ }, "sessionStarted": "Browser Session Started", "actions": { - "title": "Browse Action: ", "launch": "Launch browser at {{url}}", "click": "Click ({{coordinate}})", "type": "Type \"{{text}}\"", + "press": "Press {{key}}", "scrollDown": "Scroll down", "scrollUp": "Scroll up", + "hover": "Hover ({{coordinate}})", "close": "Close browser" } }, diff --git a/webview-ui/src/i18n/locales/es/chat.json b/webview-ui/src/i18n/locales/es/chat.json index e6084b371399..0ebbb5eb3dff 100644 --- a/webview-ui/src/i18n/locales/es/chat.json +++ b/webview-ui/src/i18n/locales/es/chat.json @@ -313,6 +313,7 @@ "socialLinks": "Únete a nosotros en X, Discord, o r/RooCode 🚀" }, "browser": { + "session": "Sesión del navegador", "rooWantsToUse": "Roo quiere usar el navegador", "consoleLogs": "Registros de la consola", "noNewLogs": "(No hay nuevos registros)", @@ -329,8 +330,10 @@ "launch": "Iniciar navegador en {{url}}", "click": "Clic ({{coordinate}})", "type": "Escribir \"{{text}}\"", + "press": "Pulsar {{key}}", "scrollDown": "Desplazar hacia abajo", "scrollUp": "Desplazar hacia arriba", + "hover": "Flotar ({{coordinate}})", "close": "Cerrar navegador" } }, diff --git a/webview-ui/src/i18n/locales/fr/chat.json b/webview-ui/src/i18n/locales/fr/chat.json index 38d63358ca98..35bffcbcaacc 100644 --- a/webview-ui/src/i18n/locales/fr/chat.json +++ b/webview-ui/src/i18n/locales/fr/chat.json @@ -313,6 +313,7 @@ "socialLinks": "Rejoins-nous sur X, Discord, ou r/RooCode 🚀" }, "browser": { + "session": "Session du navigateur", "rooWantsToUse": "Roo veut utiliser le navigateur", "consoleLogs": "Journaux de console", "noNewLogs": "(Pas de nouveaux journaux)", @@ -329,8 +330,10 @@ "launch": "Lancer le navigateur sur {{url}}", "click": "Cliquer ({{coordinate}})", "type": "Saisir \"{{text}}\"", + "press": "Appuyer sur {{key}}", "scrollDown": "Défiler vers le bas", "scrollUp": "Défiler vers le haut", + "hover": "Survoler ({{coordinate}})", "close": "Fermer le navigateur" } }, diff --git a/webview-ui/src/i18n/locales/hi/chat.json b/webview-ui/src/i18n/locales/hi/chat.json index 9bc4b4619766..3686609c974a 100644 --- a/webview-ui/src/i18n/locales/hi/chat.json +++ b/webview-ui/src/i18n/locales/hi/chat.json @@ -313,6 +313,7 @@ "socialLinks": "X, Discord, या r/RooCode पर हमसे जुड़ें 🚀" }, "browser": { + "session": "ब्राउज़र सत्र", "rooWantsToUse": "Roo ब्राउज़र का उपयोग करना चाहता है", "consoleLogs": "कंसोल लॉग", "noNewLogs": "(कोई नया लॉग नहीं)", @@ -329,8 +330,10 @@ "launch": "{{url}} पर ब्राउज़र लॉन्च करें", "click": "क्लिक करें ({{coordinate}})", "type": "टाइप करें \"{{text}}\"", + "press": "{{key}} दबाएँ", "scrollDown": "नीचे स्क्रॉल करें", "scrollUp": "ऊपर स्क्रॉल करें", + "hover": "होवर करें ({{coordinate}})", "close": "ब्राउज़र बंद करें" } }, diff --git a/webview-ui/src/i18n/locales/id/chat.json b/webview-ui/src/i18n/locales/id/chat.json index b61b47b3d884..9d22b6d9b30f 100644 --- a/webview-ui/src/i18n/locales/id/chat.json +++ b/webview-ui/src/i18n/locales/id/chat.json @@ -334,6 +334,7 @@ "countdownDisplay": "{{count}}dtk" }, "browser": { + "session": "Sesi Browser", "rooWantsToUse": "Roo ingin menggunakan browser", "consoleLogs": "Log Konsol", "noNewLogs": "(Tidak ada log baru)", @@ -350,8 +351,10 @@ "launch": "Luncurkan browser di {{url}}", "click": "Klik ({{coordinate}})", "type": "Ketik \"{{text}}\"", + "press": "Tekan {{key}}", "scrollDown": "Gulir ke bawah", "scrollUp": "Gulir ke atas", + "hover": "Arahkan ({{coordinate}})", "close": "Tutup browser" } }, diff --git a/webview-ui/src/i18n/locales/it/chat.json b/webview-ui/src/i18n/locales/it/chat.json index 082c489e9313..5916f877af18 100644 --- a/webview-ui/src/i18n/locales/it/chat.json +++ b/webview-ui/src/i18n/locales/it/chat.json @@ -313,6 +313,7 @@ "socialLinks": "Unisciti a noi su X, Discord, o r/RooCode 🚀" }, "browser": { + "session": "Sessione del browser", "rooWantsToUse": "Roo vuole utilizzare il browser", "consoleLogs": "Log della console", "noNewLogs": "(Nessun nuovo log)", @@ -329,8 +330,10 @@ "launch": "Avvia browser su {{url}}", "click": "Clic ({{coordinate}})", "type": "Digita \"{{text}}\"", + "press": "Premi {{key}}", "scrollDown": "Scorri verso il basso", "scrollUp": "Scorri verso l'alto", + "hover": "Passa il mouse ({{coordinate}})", "close": "Chiudi browser" } }, diff --git a/webview-ui/src/i18n/locales/ja/chat.json b/webview-ui/src/i18n/locales/ja/chat.json index 531afc067e4d..dd9df13b7a48 100644 --- a/webview-ui/src/i18n/locales/ja/chat.json +++ b/webview-ui/src/i18n/locales/ja/chat.json @@ -313,6 +313,7 @@ "socialLinks": "XDiscord、またはr/RooCodeでフォローしてください 🚀" }, "browser": { + "session": "ブラウザセッション", "rooWantsToUse": "Rooはブラウザを使用したい", "consoleLogs": "コンソールログ", "noNewLogs": "(新しいログはありません)", @@ -329,8 +330,10 @@ "launch": "{{url}} でブラウザを起動", "click": "クリック ({{coordinate}})", "type": "入力 \"{{text}}\"", + "press": "{{key}}を押す", "scrollDown": "下にスクロール", "scrollUp": "上にスクロール", + "hover": "ホバー ({{coordinate}})", "close": "ブラウザを閉じる" } }, diff --git a/webview-ui/src/i18n/locales/ko/chat.json b/webview-ui/src/i18n/locales/ko/chat.json index 9f7818bc4757..c5c3c7c53c4d 100644 --- a/webview-ui/src/i18n/locales/ko/chat.json +++ b/webview-ui/src/i18n/locales/ko/chat.json @@ -313,6 +313,7 @@ "socialLinks": "X, Discord, 또는 r/RooCode에서 만나요 🚀" }, "browser": { + "session": "브라우저 세션", "rooWantsToUse": "Roo가 브라우저를 사용하고 싶어합니다", "consoleLogs": "콘솔 로그", "noNewLogs": "(새 로그 없음)", @@ -329,8 +330,10 @@ "launch": "{{url}}에서 브라우저 실행", "click": "클릭 ({{coordinate}})", "type": "입력 \"{{text}}\"", + "press": "{{key}} 누르기", "scrollDown": "아래로 스크롤", "scrollUp": "위로 스크롤", + "hover": "가리키기 ({{coordinate}})", "close": "브라우저 닫기" } }, diff --git a/webview-ui/src/i18n/locales/nl/chat.json b/webview-ui/src/i18n/locales/nl/chat.json index 97380d370af0..98b05ca280ed 100644 --- a/webview-ui/src/i18n/locales/nl/chat.json +++ b/webview-ui/src/i18n/locales/nl/chat.json @@ -313,6 +313,7 @@ "countdownDisplay": "{{count}}s" }, "browser": { + "session": "Browsersessie", "rooWantsToUse": "Roo wil de browser gebruiken", "consoleLogs": "Console-logboeken", "noNewLogs": "(Geen nieuwe logboeken)", @@ -329,8 +330,10 @@ "launch": "Browser starten op {{url}}", "click": "Klik ({{coordinate}})", "type": "Typ \"{{text}}\"", + "press": "Druk op {{key}}", "scrollDown": "Scroll naar beneden", "scrollUp": "Scroll naar boven", + "hover": "Zweven ({{coordinate}})", "close": "Browser sluiten" } }, diff --git a/webview-ui/src/i18n/locales/pl/chat.json b/webview-ui/src/i18n/locales/pl/chat.json index 414262eaa7cf..72dd45cb39ca 100644 --- a/webview-ui/src/i18n/locales/pl/chat.json +++ b/webview-ui/src/i18n/locales/pl/chat.json @@ -313,6 +313,7 @@ "socialLinks": "Dołącz do nas na X, Discord, lub r/RooCode 🚀" }, "browser": { + "session": "Sesja przeglądarki", "rooWantsToUse": "Roo chce użyć przeglądarki", "consoleLogs": "Logi konsoli", "noNewLogs": "(Brak nowych logów)", @@ -329,8 +330,10 @@ "launch": "Uruchom przeglądarkę na {{url}}", "click": "Kliknij ({{coordinate}})", "type": "Wpisz \"{{text}}\"", + "press": "Naciśnij {{key}}", "scrollDown": "Przewiń w dół", "scrollUp": "Przewiń w górę", + "hover": "Najedź ({{coordinate}})", "close": "Zamknij przeglądarkę" } }, diff --git a/webview-ui/src/i18n/locales/pt-BR/chat.json b/webview-ui/src/i18n/locales/pt-BR/chat.json index 0baa415bc7d3..0a222741c61c 100644 --- a/webview-ui/src/i18n/locales/pt-BR/chat.json +++ b/webview-ui/src/i18n/locales/pt-BR/chat.json @@ -313,6 +313,7 @@ "socialLinks": "Junte-se a nós no X, Discord, ou r/RooCode 🚀" }, "browser": { + "session": "Sessão do Navegador", "rooWantsToUse": "Roo quer usar o navegador", "consoleLogs": "Logs do console", "noNewLogs": "(Sem novos logs)", @@ -329,8 +330,10 @@ "launch": "Iniciar navegador em {{url}}", "click": "Clique ({{coordinate}})", "type": "Digitar \"{{text}}\"", + "press": "Pressione {{key}}", "scrollDown": "Rolar para baixo", "scrollUp": "Rolar para cima", + "hover": "Pairar ({{coordinate}})", "close": "Fechar navegador" } }, diff --git a/webview-ui/src/i18n/locales/ru/chat.json b/webview-ui/src/i18n/locales/ru/chat.json index 10cca124d355..1996d4506f54 100644 --- a/webview-ui/src/i18n/locales/ru/chat.json +++ b/webview-ui/src/i18n/locales/ru/chat.json @@ -314,6 +314,7 @@ "countdownDisplay": "{{count}}с" }, "browser": { + "session": "Сеанс браузера", "rooWantsToUse": "Roo хочет использовать браузер", "consoleLogs": "Логи консоли", "noNewLogs": "(Новых логов нет)", @@ -330,8 +331,10 @@ "launch": "Открыть браузер по адресу {{url}}", "click": "Клик ({{coordinate}})", "type": "Ввести \"{{text}}\"", + "press": "Нажать {{key}}", "scrollDown": "Прокрутить вниз", "scrollUp": "Прокрутить вверх", + "hover": "Навести ({{coordinate}})", "close": "Закрыть браузер" } }, diff --git a/webview-ui/src/i18n/locales/tr/chat.json b/webview-ui/src/i18n/locales/tr/chat.json index 9b92b937a799..000e5aacfc6d 100644 --- a/webview-ui/src/i18n/locales/tr/chat.json +++ b/webview-ui/src/i18n/locales/tr/chat.json @@ -314,6 +314,7 @@ "socialLinks": "Bize X, Discord, veya r/RooCode'da katılın 🚀" }, "browser": { + "session": "Tarayıcı Oturumu", "rooWantsToUse": "Roo tarayıcıyı kullanmak istiyor", "consoleLogs": "Konsol Kayıtları", "noNewLogs": "(Yeni kayıt yok)", @@ -330,8 +331,10 @@ "launch": "{{url}} adresinde tarayıcı başlat", "click": "Tıkla ({{coordinate}})", "type": "Yaz \"{{text}}\"", + "press": "{{key}} tuşuna bas", "scrollDown": "Aşağı kaydır", "scrollUp": "Yukarı kaydır", + "hover": "Üzerine gel ({{coordinate}})", "close": "Tarayıcıyı kapat" } }, diff --git a/webview-ui/src/i18n/locales/vi/chat.json b/webview-ui/src/i18n/locales/vi/chat.json index 39cb1cf01663..4ca73c4edab3 100644 --- a/webview-ui/src/i18n/locales/vi/chat.json +++ b/webview-ui/src/i18n/locales/vi/chat.json @@ -314,6 +314,7 @@ "socialLinks": "Tham gia với chúng tôi trên X, Discord, hoặc r/RooCode 🚀" }, "browser": { + "session": "Phiên trình duyệt", "rooWantsToUse": "Roo muốn sử dụng trình duyệt", "consoleLogs": "Nhật ký bảng điều khiển", "noNewLogs": "(Không có nhật ký mới)", @@ -330,8 +331,10 @@ "launch": "Khởi chạy trình duyệt tại {{url}}", "click": "Nhấp ({{coordinate}})", "type": "Gõ \"{{text}}\"", + "press": "Nhấn {{key}}", "scrollDown": "Cuộn xuống", "scrollUp": "Cuộn lên", + "hover": "Di chuột ({{coordinate}})", "close": "Đóng trình duyệt" } }, diff --git a/webview-ui/src/i18n/locales/zh-CN/chat.json b/webview-ui/src/i18n/locales/zh-CN/chat.json index ef87b819ea99..d22df32cb964 100644 --- a/webview-ui/src/i18n/locales/zh-CN/chat.json +++ b/webview-ui/src/i18n/locales/zh-CN/chat.json @@ -314,6 +314,7 @@ "socialLinks": "在 XDiscordr/RooCode 上关注我们 🚀" }, "browser": { + "session": "浏览器会话", "rooWantsToUse": "Roo想使用浏览器", "consoleLogs": "控制台日志", "noNewLogs": "(没有新日志)", @@ -330,8 +331,10 @@ "launch": "访问 {{url}}", "click": "点击 ({{coordinate}})", "type": "输入 \"{{text}}\"", + "press": "按 {{key}}", "scrollDown": "向下滚动", "scrollUp": "向上滚动", + "hover": "悬停 ({{coordinate}})", "close": "关闭浏览器" } }, diff --git a/webview-ui/src/i18n/locales/zh-TW/chat.json b/webview-ui/src/i18n/locales/zh-TW/chat.json index 985859bec418..640a93199765 100644 --- a/webview-ui/src/i18n/locales/zh-TW/chat.json +++ b/webview-ui/src/i18n/locales/zh-TW/chat.json @@ -332,6 +332,7 @@ "countdownDisplay": "{{count}} 秒" }, "browser": { + "session": "瀏覽器會話", "rooWantsToUse": "Roo 想要使用瀏覽器", "consoleLogs": "主控台記錄", "noNewLogs": "(沒有新記錄)", @@ -348,8 +349,10 @@ "launch": "在 {{url}} 啟動瀏覽器", "click": "點選 ({{coordinate}})", "type": "輸入「{{text}}」", + "press": "按下 {{key}}", "scrollDown": "向下捲動", "scrollUp": "向上捲動", + "hover": "懸停 ({{coordinate}})", "close": "關閉瀏覽器" } }, diff --git a/webview-ui/vite.config.ts b/webview-ui/vite.config.ts index b38452a99024..6bf6412bfb01 100644 --- a/webview-ui/vite.config.ts +++ b/webview-ui/vite.config.ts @@ -101,6 +101,10 @@ export default defineConfig(({ mode }) => { // Ensure source maps are properly included in the build minify: mode === "production" ? "esbuild" : false, rollupOptions: { + input: { + index: resolve(__dirname, "index.html"), + "browser-panel": resolve(__dirname, "browser-panel.html"), + }, output: { entryFileNames: `assets/[name].js`, chunkFileNames: (chunkInfo) => {