diff --git a/.gitignore b/.gitignore index 02fdf8f88e5..cc6551885f2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.pnpm-store dist out out-* diff --git a/scripts/generate-types.mts b/scripts/generate-types.mts index c9bed9a0033..2ad167b0d8f 100644 --- a/scripts/generate-types.mts +++ b/scripts/generate-types.mts @@ -3,7 +3,8 @@ import fs from "fs/promises" import { zodToTs, createTypeAlias, printNode } from "zod-to-ts" import { $ } from "execa" -import { typeDefinitions } from "../src/schemas" +import schemas from "../src/schemas" +const { typeDefinitions } = schemas async function main() { const types: string[] = [ diff --git a/src/core/Cline.ts b/src/core/Cline.ts index 7618a640ebf..947cef016d7 100644 --- a/src/core/Cline.ts +++ b/src/core/Cline.ts @@ -2466,7 +2466,8 @@ export class Cline extends EventEmitter { } break } else { - let browserActionResult: BrowserActionResult + // Initialize with empty object to avoid "used before assigned" errors + let browserActionResult: BrowserActionResult = {} if (action === "launch") { if (!url) { this.consecutiveMistakeCount++ @@ -2552,9 +2553,9 @@ export class Cline extends EventEmitter { pushToolResult( formatResponse.toolResult( `The browser action has been executed. The console logs and screenshot have been captured for your analysis.\n\nConsole logs:\n${ - browserActionResult.logs || "(No new logs)" + browserActionResult?.logs || "(No new logs)" }\n\n(REMEMBER: if you need to proceed to using non-\`browser_action\` tools or launch a new browser, you MUST first close this browser. For example, if after analyzing the logs and screenshot you need to edit a file, you must first close the browser before you can use the write_to_file tool.)`, - browserActionResult.screenshot ? [browserActionResult.screenshot] : [], + browserActionResult?.screenshot ? [browserActionResult.screenshot] : [], ), ) break diff --git a/src/core/mentions/index.ts b/src/core/mentions/index.ts index e716359b7cc..24696fe0700 100644 --- a/src/core/mentions/index.ts +++ b/src/core/mentions/index.ts @@ -22,10 +22,7 @@ export async function openMention(mention?: string, osInfo?: string): Promise implements await this.postStateToWebview() break case "testBrowserConnection": - try { - const browserSession = new BrowserSession(this.context) - // If no text is provided, try auto-discovery - if (!message.text) { - try { - const discoveredHost = await discoverChromeInstances() - if (discoveredHost) { - // Test the connection to the discovered host - const result = await browserSession.testConnection(discoveredHost) - // Send the result back to the webview - await this.postMessageToWebview({ - type: "browserConnectionResult", - success: result.success, - text: `Auto-discovered and tested connection to Chrome at ${discoveredHost}: ${result.message}`, - values: { endpoint: result.endpoint }, - }) - } else { - await this.postMessageToWebview({ - type: "browserConnectionResult", - success: false, - text: "No Chrome instances found on the network. Make sure Chrome is running with remote debugging enabled (--remote-debugging-port=9222).", - }) - } - } catch (error) { - await this.postMessageToWebview({ - type: "browserConnectionResult", - success: false, - text: `Error during auto-discovery: ${error instanceof Error ? error.message : String(error)}`, - }) - } - } else { - // Test the provided URL - const result = await browserSession.testConnection(message.text) - - // Send the result back to the webview - await this.postMessageToWebview({ - type: "browserConnectionResult", - success: result.success, - text: result.message, - values: { endpoint: result.endpoint }, - }) - } - } catch (error) { - await this.postMessageToWebview({ - type: "browserConnectionResult", - success: false, - text: `Error testing connection: ${error instanceof Error ? error.message : String(error)}`, - }) - } - break - case "discoverBrowser": - try { - const discoveredHost = await discoverChromeInstances() - - if (discoveredHost) { - // Don't update the remoteBrowserHost state when auto-discovering - // This way we don't override the user's preference - - // Test the connection to get the endpoint - const browserSession = new BrowserSession(this.context) - const result = await browserSession.testConnection(discoveredHost) - + // If no text is provided, try auto-discovery + if (!message.text) { + // Use testBrowserConnection for auto-discovery + const chromeHostUrl = await discoverChromeHostUrl() + if (chromeHostUrl) { // Send the result back to the webview await this.postMessageToWebview({ type: "browserConnectionResult", - success: true, - text: `Successfully discovered and connected to Chrome at ${discoveredHost}`, - values: { endpoint: result.endpoint }, + success: !!chromeHostUrl, + text: `Auto-discovered and tested connection to Chrome: ${chromeHostUrl}`, + values: { endpoint: chromeHostUrl }, }) } else { await this.postMessageToWebview({ @@ -1496,11 +1439,17 @@ export class ClineProvider extends EventEmitter implements text: "No Chrome instances found on the network. Make sure Chrome is running with remote debugging enabled (--remote-debugging-port=9222).", }) } - } catch (error) { + } else { + // Test the provided URL + const customHostUrl = message.text + const hostIsValid = await tryChromeHostUrl(message.text) + // Send the result back to the webview await this.postMessageToWebview({ type: "browserConnectionResult", - success: false, - text: `Error discovering browser: ${error instanceof Error ? error.message : String(error)}`, + success: hostIsValid, + text: hostIsValid + ? `Successfully connected to Chrome: ${customHostUrl}` + : "Failed to connect to Chrome", }) } break @@ -2602,6 +2551,7 @@ export class ClineProvider extends EventEmitter implements screenshotQuality, remoteBrowserHost, remoteBrowserEnabled, + cachedChromeHostUrl, writeDelayMs, terminalOutputLineLimit, terminalShellIntegrationTimeout, @@ -2670,6 +2620,7 @@ export class ClineProvider extends EventEmitter implements screenshotQuality: screenshotQuality ?? 75, remoteBrowserHost, remoteBrowserEnabled: remoteBrowserEnabled ?? false, + cachedChromeHostUrl: cachedChromeHostUrl, writeDelayMs: writeDelayMs ?? 1000, terminalOutputLineLimit: terminalOutputLineLimit ?? 500, terminalShellIntegrationTimeout: terminalShellIntegrationTimeout ?? TERMINAL_SHELL_INTEGRATION_TIMEOUT, @@ -2755,6 +2706,7 @@ export class ClineProvider extends EventEmitter implements screenshotQuality: stateValues.screenshotQuality ?? 75, remoteBrowserHost: stateValues.remoteBrowserHost, remoteBrowserEnabled: stateValues.remoteBrowserEnabled ?? false, + cachedChromeHostUrl: stateValues.cachedChromeHostUrl as string | undefined, fuzzyMatchThreshold: stateValues.fuzzyMatchThreshold ?? 1.0, writeDelayMs: stateValues.writeDelayMs ?? 1000, terminalOutputLineLimit: stateValues.terminalOutputLineLimit ?? 500, diff --git a/src/core/webview/__tests__/ClineProvider.test.ts b/src/core/webview/__tests__/ClineProvider.test.ts index 5c859568ebe..ea0677b0102 100644 --- a/src/core/webview/__tests__/ClineProvider.test.ts +++ b/src/core/webview/__tests__/ClineProvider.test.ts @@ -40,9 +40,12 @@ jest.mock("../../../services/browser/BrowserSession", () => ({ // Mock browserDiscovery jest.mock("../../../services/browser/browserDiscovery", () => ({ - discoverChromeInstances: jest.fn().mockImplementation(async () => { + discoverChromeHostUrl: jest.fn().mockImplementation(async () => { return "http://localhost:9222" }), + tryChromeHostUrl: jest.fn().mockImplementation(async (url) => { + return url === "http://localhost:9222" + }), })) jest.mock( @@ -1916,9 +1919,9 @@ describe("ClineProvider", () => { type: "testBrowserConnection", }) - // Verify discoverChromeInstances was called - const { discoverChromeInstances } = require("../../../services/browser/browserDiscovery") - expect(discoverChromeInstances).toHaveBeenCalled() + // Verify discoverChromeHostUrl was called + const { discoverChromeHostUrl } = require("../../../services/browser/browserDiscovery") + expect(discoverChromeHostUrl).toHaveBeenCalled() // Verify postMessage was called with success result expect(mockPostMessage).toHaveBeenCalledWith( @@ -1929,77 +1932,6 @@ describe("ClineProvider", () => { }), ) }) - - test("handles discoverBrowser message", async () => { - // Get the message handler - const messageHandler = (mockWebviewView.webview.onDidReceiveMessage as jest.Mock).mock.calls[0][0] - - // Test browser discovery - await messageHandler({ - type: "discoverBrowser", - }) - - // Verify discoverChromeInstances was called - const { discoverChromeInstances } = require("../../../services/browser/browserDiscovery") - expect(discoverChromeInstances).toHaveBeenCalled() - - // Verify postMessage was called with success result - expect(mockPostMessage).toHaveBeenCalledWith( - expect.objectContaining({ - type: "browserConnectionResult", - success: true, - text: expect.stringContaining("Successfully discovered and connected to Chrome"), - }), - ) - }) - - test("handles errors during browser discovery", async () => { - // Mock discoverChromeInstances to throw an error - const { discoverChromeInstances } = require("../../../services/browser/browserDiscovery") - discoverChromeInstances.mockImplementationOnce(() => { - throw new Error("Discovery error") - }) - - // Get the message handler - const messageHandler = (mockWebviewView.webview.onDidReceiveMessage as jest.Mock).mock.calls[0][0] - - // Test browser discovery with error - await messageHandler({ - type: "discoverBrowser", - }) - - // Verify postMessage was called with error result - expect(mockPostMessage).toHaveBeenCalledWith( - expect.objectContaining({ - type: "browserConnectionResult", - success: false, - text: expect.stringContaining("Error discovering browser"), - }), - ) - }) - - test("handles case when no browsers are discovered", async () => { - // Mock discoverChromeInstances to return null (no browsers found) - const { discoverChromeInstances } = require("../../../services/browser/browserDiscovery") - discoverChromeInstances.mockImplementationOnce(() => null) - - // Get the message handler - const messageHandler = (mockWebviewView.webview.onDidReceiveMessage as jest.Mock).mock.calls[0][0] - - // Test browser discovery with no browsers found - await messageHandler({ - type: "discoverBrowser", - }) - - // Verify postMessage was called with failure result - expect(mockPostMessage).toHaveBeenCalledWith( - expect.objectContaining({ - type: "browserConnectionResult", - success: false, - text: expect.stringContaining("No Chrome instances found"), - }), - ) - }) }) }) diff --git a/src/exports/roo-code.d.ts b/src/exports/roo-code.d.ts index 87ac4306e82..2f71c6662ea 100644 --- a/src/exports/roo-code.d.ts +++ b/src/exports/roo-code.d.ts @@ -234,6 +234,7 @@ type GlobalSettings = { screenshotQuality?: number | undefined remoteBrowserEnabled?: boolean | undefined remoteBrowserHost?: string | undefined + cachedChromeHostUrl?: string | undefined enableCheckpoints?: boolean | undefined checkpointStorage?: ("task" | "workspace") | undefined ttsEnabled?: boolean | undefined diff --git a/src/exports/types.ts b/src/exports/types.ts index 812a12b2438..fb3260d4f05 100644 --- a/src/exports/types.ts +++ b/src/exports/types.ts @@ -237,6 +237,7 @@ type GlobalSettings = { screenshotQuality?: number | undefined remoteBrowserEnabled?: boolean | undefined remoteBrowserHost?: string | undefined + cachedChromeHostUrl?: string | undefined enableCheckpoints?: boolean | undefined checkpointStorage?: ("task" | "workspace") | undefined ttsEnabled?: boolean | undefined diff --git a/src/schemas/index.ts b/src/schemas/index.ts index 72c02c61fc4..eef9ed3cd78 100644 --- a/src/schemas/index.ts +++ b/src/schemas/index.ts @@ -513,6 +513,7 @@ export const globalSettingsSchema = z.object({ screenshotQuality: z.number().optional(), remoteBrowserEnabled: z.boolean().optional(), remoteBrowserHost: z.string().optional(), + cachedChromeHostUrl: z.string().optional(), enableCheckpoints: z.boolean().optional(), checkpointStorage: checkpointStoragesSchema.optional(), @@ -618,6 +619,7 @@ const globalSettingsRecord: GlobalSettingsRecord = { customModePrompts: undefined, customSupportPrompts: undefined, enhancementApiConfigId: undefined, + cachedChromeHostUrl: undefined, } export const GLOBAL_SETTINGS_KEYS = Object.keys(globalSettingsRecord) as Keys[] @@ -791,7 +793,7 @@ export type TokenUsage = z.infer * TypeDefinition */ -type TypeDefinition = { +export type TypeDefinition = { schema: z.ZodTypeAny identifier: string } @@ -802,3 +804,6 @@ export const typeDefinitions: TypeDefinition[] = [ { schema: clineMessageSchema, identifier: "ClineMessage" }, { schema: tokenUsageSchema, identifier: "TokenUsage" }, ] + +// Also export as default for ESM compatibility +export default { typeDefinitions } diff --git a/src/services/browser/BrowserSession.ts b/src/services/browser/BrowserSession.ts index 5c5f59ffebf..7f8963fe1df 100644 --- a/src/services/browser/BrowserSession.ts +++ b/src/services/browser/BrowserSession.ts @@ -9,7 +9,7 @@ import delay from "delay" import axios from "axios" import { fileExistsAtPath } from "../../utils/fs" import { BrowserActionResult } from "../../shared/ExtensionMessage" -import { discoverChromeInstances, testBrowserConnection } from "./browserDiscovery" +import { discoverChromeHostUrl, tryChromeHostUrl } from "./browserDiscovery" interface PCRStats { puppeteer: { launch: typeof launch } @@ -21,20 +21,12 @@ export class BrowserSession { private browser?: Browser private page?: Page private currentMousePosition?: string - private cachedWebSocketEndpoint?: string - private lastConnectionAttempt: number = 0 + private lastConnectionAttempt?: number constructor(context: vscode.ExtensionContext) { this.context = context } - /** - * Test connection to a remote browser - */ - async testConnection(host: string): Promise<{ success: boolean; message: string; endpoint?: string }> { - return testBrowserConnection(host) - } - private async ensureChromiumExists(): Promise { const globalStoragePath = this.context?.globalStorageUri?.fsPath if (!globalStoragePath) { @@ -56,162 +48,173 @@ export class BrowserSession { return stats } - async launchBrowser(): Promise { - console.log("launch browser called") - if (this.browser) { - // throw new Error("Browser already launched") - await this.closeBrowser() // this may happen when the model launches a browser again after having used it already before - } - - // Function to get viewport size - const getViewport = () => { - const size = (this.context.globalState.get("browserViewportSize") as string | undefined) || "900x600" - const [width, height] = size.split("x").map(Number) - return { width, height } - } + /** + * Gets the viewport size from global state or returns default + */ + private getViewport() { + const size = (this.context.globalState.get("browserViewportSize") as string | undefined) || "900x600" + const [width, height] = size.split("x").map(Number) + return { width, height } + } - // Check if remote browser connection is enabled - const remoteBrowserEnabled = this.context.globalState.get("remoteBrowserEnabled") as boolean | undefined + /** + * Launches a local browser instance + */ + private async launchLocalBrowser(): Promise { + console.log("Launching local browser") + const stats = await this.ensureChromiumExists() + this.browser = await stats.puppeteer.launch({ + args: [ + "--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36", + ], + executablePath: stats.executablePath, + defaultViewport: this.getViewport(), + // headless: false, + }) + } - // If remote browser connection is not enabled, use local browser - if (!remoteBrowserEnabled) { - console.log("Remote browser connection is disabled, using local browser") - const stats = await this.ensureChromiumExists() - this.browser = await stats.puppeteer.launch({ - args: [ - "--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36", - ], - executablePath: stats.executablePath, - defaultViewport: getViewport(), - // headless: false, + /** + * Connects to a browser using a WebSocket URL + */ + private async connectWithChromeHostUrl(chromeHostUrl: string): Promise { + try { + this.browser = await connect({ + browserURL: chromeHostUrl, + defaultViewport: this.getViewport(), }) - this.page = await this.browser?.newPage() - return + + // Cache the successful endpoint + console.log(`Connected to remote browser at ${chromeHostUrl}`) + this.context.globalState.update("cachedChromeHostUrl", chromeHostUrl) + this.lastConnectionAttempt = Date.now() + + return true + } catch (error) { + console.log(`Failed to connect using WebSocket endpoint: ${error}`) + return false } - // Remote browser connection is enabled + } + + /** + * Attempts to connect to a remote browser using various methods + * Returns true if connection was successful, false otherwise + */ + private async connectToRemoteBrowser(): Promise { let remoteBrowserHost = this.context.globalState.get("remoteBrowserHost") as string | undefined - let browserWSEndpoint: string | undefined = this.cachedWebSocketEndpoint let reconnectionAttempted = false // Try to connect with cached endpoint first if it exists and is recent (less than 1 hour old) - if (browserWSEndpoint && Date.now() - this.lastConnectionAttempt < 3600000) { - try { - console.log(`Attempting to connect using cached WebSocket endpoint: ${browserWSEndpoint}`) - this.browser = await connect({ - browserWSEndpoint, - defaultViewport: getViewport(), - }) - this.page = await this.browser?.newPage() - return - } catch (error) { - console.log(`Failed to connect using cached endpoint: ${error}`) - // Clear the cached endpoint since it's no longer valid - this.cachedWebSocketEndpoint = undefined - // User wants to give up after one reconnection attempt - if (remoteBrowserHost) { - reconnectionAttempted = true - } + const cachedChromeHostUrl = this.context.globalState.get("cachedChromeHostUrl") as string | undefined + if (cachedChromeHostUrl && this.lastConnectionAttempt && Date.now() - this.lastConnectionAttempt < 3_600_000) { + console.log(`Attempting to connect using cached Chrome Host Url: ${cachedChromeHostUrl}`) + if (await this.connectWithChromeHostUrl(cachedChromeHostUrl)) { + return true + } + + console.log(`Failed to connect using cached Chrome Host Url: ${cachedChromeHostUrl}`) + // Clear the cached endpoint since it's no longer valid + this.context.globalState.update("cachedChromeHostUrl", undefined) + + // User wants to give up after one reconnection attempt + if (remoteBrowserHost) { + reconnectionAttempted = true } } // If user provided a remote browser host, try to connect to it - if (remoteBrowserHost && !reconnectionAttempted) { + else if (remoteBrowserHost && !reconnectionAttempted) { console.log(`Attempting to connect to remote browser at ${remoteBrowserHost}`) try { - // Fetch the WebSocket endpoint from the Chrome DevTools Protocol - const versionUrl = `${remoteBrowserHost.replace(/\/$/, "")}/json/version` - console.log(`Fetching WebSocket endpoint from ${versionUrl}`) + const hostIsValid = await tryChromeHostUrl(remoteBrowserHost) - const response = await axios.get(versionUrl) - browserWSEndpoint = response.data.webSocketDebuggerUrl - - if (!browserWSEndpoint) { - throw new Error("Could not find webSocketDebuggerUrl in the response") + if (!hostIsValid) { + throw new Error("Could not find chromeHostUrl in the response") } - console.log(`Found WebSocket endpoint: ${browserWSEndpoint}`) - - // Cache the successful endpoint - this.cachedWebSocketEndpoint = browserWSEndpoint - this.lastConnectionAttempt = Date.now() + console.log(`Found WebSocket endpoint: ${remoteBrowserHost}`) - this.browser = await connect({ - browserWSEndpoint, - defaultViewport: getViewport(), - }) - this.page = await this.browser?.newPage() - return + if (await this.connectWithChromeHostUrl(remoteBrowserHost)) { + return true + } } catch (error) { console.error(`Failed to connect to remote browser: ${error}`) // Fall back to auto-discovery if remote connection fails } } - // Always try auto-discovery if no custom URL is specified or if connection failed try { - console.log("Attempting auto-discovery...") - const discoveredHost = await discoverChromeInstances() - - if (discoveredHost) { - console.log(`Auto-discovered Chrome at ${discoveredHost}`) + console.log("Attempting browser auto-discovery...") + const chromeHostUrl = await discoverChromeHostUrl() - // Don't save the discovered host to global state to avoid overriding user preference - // We'll just use it for this session - - // Try to connect to the discovered host - const testResult = await testBrowserConnection(discoveredHost) - - if (testResult.success && testResult.endpoint) { - // Cache the successful endpoint - this.cachedWebSocketEndpoint = testResult.endpoint - this.lastConnectionAttempt = Date.now() - - this.browser = await connect({ - browserWSEndpoint: testResult.endpoint, - defaultViewport: getViewport(), - }) - this.page = await this.browser?.newPage() - return - } + if (chromeHostUrl && (await this.connectWithChromeHostUrl(chromeHostUrl))) { + return true } } catch (error) { console.error(`Auto-discovery failed: ${error}`) // Fall back to local browser if auto-discovery fails } - // If all remote connection attempts fail, fall back to local browser - console.log("Falling back to local browser") - const stats = await this.ensureChromiumExists() - this.browser = await stats.puppeteer.launch({ - args: [ - "--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36", - ], - executablePath: stats.executablePath, - defaultViewport: getViewport(), - // headless: false, - }) - // (latest version of puppeteer does not add headless to user agent) - this.page = await this.browser?.newPage() + return false + } + + async launchBrowser(): Promise { + console.log("launch browser called") + + // Check if remote browser connection is enabled + const remoteBrowserEnabled = this.context.globalState.get("remoteBrowserEnabled") as boolean | undefined + + if (!remoteBrowserEnabled) { + console.log("Launching local browser") + if (this.browser) { + // throw new Error("Browser already launched") + await this.closeBrowser() // this may happen when the model launches a browser again after having used it already before + } else { + // If browser wasn't open, just reset the state + this.resetBrowserState() + } + await this.launchLocalBrowser() + } else { + console.log("Connecting to remote browser") + // Remote browser connection is enabled + const remoteConnected = await this.connectToRemoteBrowser() + + // If all remote connection attempts fail, fall back to local browser + if (!remoteConnected) { + console.log("Falling back to local browser") + await this.launchLocalBrowser() + } + } } + /** + * Closes the browser and resets browser state + */ async closeBrowser(): Promise { if (this.browser || this.page) { console.log("closing browser...") - const remoteBrowserEnabled = this.context.globalState.get("remoteBrowserEnabled") as string | undefined + const remoteBrowserEnabled = this.context.globalState.get("remoteBrowserEnabled") as boolean | undefined if (remoteBrowserEnabled && this.browser) { await this.browser.disconnect().catch(() => {}) } else { await this.browser?.close().catch(() => {}) + this.resetBrowserState() } - this.browser = undefined - this.page = undefined - this.currentMousePosition = undefined + // this.resetBrowserState() } return {} } + /** + * Resets all browser state variables + */ + private resetBrowserState(): void { + this.browser = undefined + this.page = undefined + this.currentMousePosition = undefined + } + async doAction(action: (page: Page) => Promise): Promise { if (!this.page) { throw new Error( @@ -297,13 +300,118 @@ export class BrowserSession { } } - async navigateToUrl(url: string): Promise { - return this.doAction(async (page) => { - // networkidle2 isn't good enough since page may take some time to load. we can assume locally running dev sites will reach networkidle0 in a reasonable amount of time - await page.goto(url, { timeout: 7_000, waitUntil: ["domcontentloaded", "networkidle2"] }) - // await page.goto(url, { timeout: 10_000, waitUntil: "load" }) - await this.waitTillHTMLStable(page) // in case the page is loading more resources + /** + * Extract the root domain from a URL + * e.g., http://localhost:3000/path -> localhost:3000 + * e.g., https://example.com/path -> example.com + */ + private getRootDomain(url: string): string { + try { + const urlObj = new URL(url) + // Remove www. prefix if present + return urlObj.host.replace(/^www\./, "") + } catch (error) { + // If URL parsing fails, return the original URL + return url + } + } + + /** + * Navigate to a URL with standard loading options + */ + private async navigatePageToUrl(page: Page, url: string): Promise { + await page.goto(url, { timeout: 7_000, waitUntil: ["domcontentloaded", "networkidle2"] }) + await this.waitTillHTMLStable(page) + } + + /** + * Creates a new tab and navigates to the specified URL + */ + private async createNewTab(url: string): Promise { + if (!this.browser) { + throw new Error("Browser is not launched") + } + + // Create a new page + const newPage = await this.browser.newPage() + + // Set the new page as the active page + this.page = newPage + + // Navigate to the URL + const result = await this.doAction(async (page) => { + await this.navigatePageToUrl(page, url) }) + + return result + } + + async navigateToUrl(url: string): Promise { + if (!this.browser) { + throw new Error("Browser is not launched") + } + // Remove trailing slash for comparison + const normalizedNewUrl = url.replace(/\/$/, "") + + // Extract the root domain from the URL + const rootDomain = this.getRootDomain(normalizedNewUrl) + + // Get all current pages + const pages = await this.browser.pages() + + // Try to find a page with the same root domain + let existingPage: Page | undefined + + for (const page of pages) { + try { + const pageUrl = page.url() + if (pageUrl && this.getRootDomain(pageUrl) === rootDomain) { + existingPage = page + break + } + } catch (error) { + // Skip pages that might have been closed or have errors + console.log(`Error checking page URL: ${error}`) + continue + } + } + + if (existingPage) { + // Tab with the same root domain exists, switch to it + console.log(`Tab with domain ${rootDomain} already exists, switching to it`) + + // Update the active page + this.page = existingPage + existingPage.bringToFront() + + // Navigate to the new URL if it's different] + const currentUrl = existingPage.url().replace(/\/$/, "") // Remove trailing / if present + if (this.getRootDomain(currentUrl) === rootDomain && currentUrl !== normalizedNewUrl) { + console.log(`Navigating to new URL: ${normalizedNewUrl}`) + console.log(`Current URL: ${currentUrl}`) + console.log(`Root domain: ${this.getRootDomain(currentUrl)}`) + console.log(`New URL: ${normalizedNewUrl}`) + // Navigate to the new URL + return this.doAction(async (page) => { + await this.navigatePageToUrl(page, normalizedNewUrl) + }) + } else { + console.log(`Tab with domain ${rootDomain} already exists, and URL is the same: ${normalizedNewUrl}`) + // URL is the same, just reload the page to ensure it's up to date + console.log(`Reloading page: ${normalizedNewUrl}`) + console.log(`Current URL: ${currentUrl}`) + console.log(`Root domain: ${this.getRootDomain(currentUrl)}`) + console.log(`New URL: ${normalizedNewUrl}`) + return this.doAction(async (page) => { + await page.reload({ timeout: 7_000, waitUntil: ["domcontentloaded", "networkidle2"] }) + await this.waitTillHTMLStable(page) + }) + } + } else { + // No tab with this root domain exists, create a new one + console.log(`No tab with domain ${rootDomain} exists, creating a new one`) + return this.createNewTab(normalizedNewUrl) + } } // page.goto { waitUntil: "networkidle0" } may not ever resolve, and not waiting could return page content too early before js has loaded @@ -339,36 +447,50 @@ export class BrowserSession { } } - async click(coordinate: string): Promise { + /** + * Handles mouse interaction with network activity monitoring + */ + private async handleMouseInteraction( + page: Page, + coordinate: string, + action: (x: number, y: number) => Promise, + ): Promise { const [x, y] = coordinate.split(",").map(Number) - return this.doAction(async (page) => { - // Set up network request monitoring - let hasNetworkActivity = false - const requestListener = () => { - hasNetworkActivity = true - } - page.on("request", requestListener) - - // Perform the click - await page.mouse.click(x, y) - this.currentMousePosition = coordinate - - // Small delay to check if click triggered any network activity - await delay(100) - - if (hasNetworkActivity) { - // If we detected network activity, wait for navigation/loading - await page - .waitForNavigation({ - waitUntil: ["domcontentloaded", "networkidle2"], - timeout: 7000, - }) - .catch(() => {}) - await this.waitTillHTMLStable(page) - } - // Clean up listener - page.off("request", requestListener) + // Set up network request monitoring + let hasNetworkActivity = false + const requestListener = () => { + hasNetworkActivity = true + } + page.on("request", requestListener) + + // Perform the mouse action + await action(x, y) + this.currentMousePosition = coordinate + + // Small delay to check if action triggered any network activity + await delay(100) + + if (hasNetworkActivity) { + // If we detected network activity, wait for navigation/loading + await page + .waitForNavigation({ + waitUntil: ["domcontentloaded", "networkidle2"], + timeout: 7000, + }) + .catch(() => {}) + await this.waitTillHTMLStable(page) + } + + // Clean up listener + page.off("request", requestListener) + } + + async click(coordinate: string): Promise { + return this.doAction(async (page) => { + await this.handleMouseInteraction(page, coordinate, async (x, y) => { + await page.mouse.click(x, y) + }) }) } @@ -378,31 +500,42 @@ export class BrowserSession { }) } + /** + * Scrolls the page by the specified amount + */ + private async scrollPage(page: Page, direction: "up" | "down"): Promise { + const { height } = this.getViewport() + const scrollAmount = direction === "down" ? height : -height + + await page.evaluate((scrollHeight) => { + window.scrollBy({ + top: scrollHeight, + behavior: "auto", + }) + }, scrollAmount) + + await delay(300) + } + async scrollDown(): Promise { - const size = ((await this.context.globalState.get("browserViewportSize")) as string | undefined) || "900x600" - const height = parseInt(size.split("x")[1]) return this.doAction(async (page) => { - await page.evaluate((scrollHeight) => { - window.scrollBy({ - top: scrollHeight, - behavior: "auto", - }) - }, height) - await delay(300) + await this.scrollPage(page, "down") }) } async scrollUp(): Promise { - const size = ((await this.context.globalState.get("browserViewportSize")) as string | undefined) || "900x600" - const height = parseInt(size.split("x")[1]) return this.doAction(async (page) => { - await page.evaluate((scrollHeight) => { - window.scrollBy({ - top: -scrollHeight, - behavior: "auto", - }) - }, height) - await delay(300) + await this.scrollPage(page, "up") + }) + } + + async hover(coordinate: string): Promise { + return this.doAction(async (page) => { + await this.handleMouseInteraction(page, coordinate, async (x, y) => { + await page.mouse.move(x, y) + // Small delay to allow any hover effects to appear + await delay(300) + }) }) } } diff --git a/src/services/browser/browserDiscovery.ts b/src/services/browser/browserDiscovery.ts index 187f90e2994..b17e166a9b5 100644 --- a/src/services/browser/browserDiscovery.ts +++ b/src/services/browser/browserDiscovery.ts @@ -1,7 +1,6 @@ -import * as vscode from "vscode" -import * as os from "os" import * as net from "net" import axios from "axios" +import * as dns from "dns" /** * Check if a port is open on a given host @@ -43,46 +42,14 @@ export async function isPortOpen(host: string, port: number, timeout = 1000): Pr /** * Try to connect to Chrome at a specific IP address */ -export async function tryConnect(ipAddress: string): Promise<{ endpoint: string; ip: string } | null> { +export async function tryChromeHostUrl(chromeHostUrl: string): Promise { try { - console.log(`Trying to connect to Chrome at: http://${ipAddress}:9222/json/version`) - const response = await axios.get(`http://${ipAddress}:9222/json/version`, { timeout: 1000 }) + console.log(`Trying to connect to Chrome at: ${chromeHostUrl}/json/version`) + const response = await axios.get(`${chromeHostUrl}/json/version`, { timeout: 1000 }) const data = response.data - return { endpoint: data.webSocketDebuggerUrl, ip: ipAddress } + return true } catch (error) { - return null - } -} - -/** - * Execute a shell command and return stdout and stderr - */ -export async function executeShellCommand(command: string): Promise<{ stdout: string; stderr: string }> { - return new Promise<{ stdout: string; stderr: string }>((resolve) => { - const cp = require("child_process") - cp.exec(command, (err: any, stdout: string, stderr: string) => { - resolve({ stdout, stderr }) - }) - }) -} - -/** - * Get Docker gateway IP without UI feedback - */ -export async function getDockerGatewayIP(): Promise { - try { - if (process.platform === "linux") { - try { - const { stdout } = await executeShellCommand("ip route | grep default | awk '{print $3}'") - return stdout.trim() - } catch (error) { - console.log("Could not determine Docker gateway IP:", error) - } - } - return null - } catch (error) { - console.log("Could not determine Docker gateway IP:", error) - return null + return false } } @@ -93,7 +60,6 @@ export async function getDockerHostIP(): Promise { try { // Try to resolve host.docker.internal (works on Docker Desktop) return new Promise((resolve) => { - const dns = require("dns") dns.lookup("host.docker.internal", (err: any, address: string) => { if (err) { resolve(null) @@ -111,7 +77,7 @@ export async function getDockerHostIP(): Promise { /** * Scan a network range for Chrome debugging port */ -export async function scanNetworkForChrome(baseIP: string): Promise { +export async function scanNetworkForChrome(baseIP: string, port: number): Promise { if (!baseIP || !baseIP.match(/^\d+\.\d+\.\d+\./)) { return null } @@ -130,7 +96,7 @@ export async function scanNetworkForChrome(baseIP: string): Promise { +// Function to discover Chrome instances on the network +const discoverChromeHosts = async (port: number): Promise => { // Get all network interfaces - const networkInterfaces = os.networkInterfaces() const ipAddresses = [] - // Always try localhost first - ipAddresses.push("localhost") - ipAddresses.push("127.0.0.1") - - // Try to get Docker gateway IP (headless mode) - const gatewayIP = await getDockerGatewayIP() - if (gatewayIP) { - console.log("Found Docker gateway IP:", gatewayIP) - ipAddresses.push(gatewayIP) - } - // Try to get Docker host IP const hostIP = await getDockerHostIP() if (hostIP) { @@ -166,44 +118,21 @@ export async function discoverChromeInstances(): Promise { ipAddresses.push(hostIP) } - // Add all local IP addresses from network interfaces - const localIPs: string[] = [] - Object.values(networkInterfaces).forEach((interfaces) => { - if (!interfaces) return - interfaces.forEach((iface) => { - // Only consider IPv4 addresses - if (iface.family === "IPv4" || iface.family === (4 as any)) { - localIPs.push(iface.address) - } - }) - }) - - // Add local IPs to the list - ipAddresses.push(...localIPs) - - // Scan network for Chrome debugging port - for (const ip of localIPs) { - const chromeIP = await scanNetworkForChrome(ip) - if (chromeIP && !ipAddresses.includes(chromeIP)) { - console.log("Found potential Chrome host via network scan:", chromeIP) - ipAddresses.push(chromeIP) - } - } - // Remove duplicates const uniqueIPs = [...new Set(ipAddresses)] console.log("IP Addresses to try:", uniqueIPs) // Try connecting to each IP address for (const ip of uniqueIPs) { - const connection = await tryConnect(ip) - if (connection) { - console.log(`Successfully connected to Chrome at: ${connection.ip}`) + const hostEndpoint = `http://${ip}:${port}` + + const hostIsValid = await tryChromeHostUrl(hostEndpoint) + if (hostIsValid) { // Store the successful IP for future use - console.log(`✅ Found Chrome at ${connection.ip} - You can hardcode this IP if needed`) + console.log(`✅ Found Chrome at ${hostEndpoint}`) // Return the host URL and endpoint - return `http://${connection.ip}:9222` + return hostEndpoint } } @@ -211,36 +140,43 @@ export async function discoverChromeInstances(): Promise { } /** - * Test connection to a remote browser + * Test connection to a remote browser debugging websocket. + * First tries specific hosts, then attempts auto-discovery if needed. + * @param browserHostUrl Optional specific host URL to check first + * @param port Browser debugging port (default: 9222) + * @returns WebSocket debugger URL if connection is successful, null otherwise */ -export async function testBrowserConnection( - host: string, -): Promise<{ success: boolean; message: string; endpoint?: string }> { - try { - // Fetch the WebSocket endpoint from the Chrome DevTools Protocol - const versionUrl = `${host.replace(/\/$/, "")}/json/version` - console.log(`Testing connection to ${versionUrl}`) - - const response = await axios.get(versionUrl, { timeout: 3000 }) - const browserWSEndpoint = response.data.webSocketDebuggerUrl - - if (!browserWSEndpoint) { - return { - success: false, - message: "Could not find webSocketDebuggerUrl in the response", - } +export async function discoverChromeHostUrl(port: number = 9222): Promise { + // First try specific hosts + const hostsToTry = [`http://localhost:${port}`, `http://127.0.0.1:${port}`] + + // Try each host directly first + for (const hostUrl of hostsToTry) { + console.log(`Trying to connect to: ${hostUrl}`) + try { + const hostIsValid = await tryChromeHostUrl(hostUrl) + if (hostIsValid) return hostUrl + } catch (error) { + console.log(`Failed to connect to ${hostUrl}: ${error instanceof Error ? error.message : error}`) } + } - return { - success: true, - message: "Successfully connected to Chrome browser", - endpoint: browserWSEndpoint, - } - } catch (error) { - console.error(`Failed to connect to remote browser: ${error}`) - return { - success: false, - message: `Failed to connect: ${error instanceof Error ? error.message : String(error)}`, + // If direct connections failed, attempt auto-discovery + console.log("Direct connections failed. Attempting auto-discovery...") + + const discoveredHostUrl = await discoverChromeHosts(port) + if (discoveredHostUrl) { + console.log(`Trying to connect to discovered host: ${discoveredHostUrl}`) + try { + const hostIsValid = await tryChromeHostUrl(discoveredHostUrl) + if (hostIsValid) return discoveredHostUrl + console.log(`Failed to connect to discovered host ${discoveredHostUrl}`) + } catch (error) { + console.log(`Error connecting to discovered host: ${error instanceof Error ? error.message : error}`) } + } else { + console.log("No browser instances discovered on network") } + + return null } diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index eb5e89a2b4b..60c20b6503f 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -233,7 +233,7 @@ export interface ClineSayTool { } // Must keep in sync with system prompt. -export const browserActions = ["launch", "click", "type", "scroll_down", "scroll_up", "close"] as const +export const browserActions = ["launch", "click", "hover", "type", "scroll_down", "scroll_up", "close"] as const export type BrowserAction = (typeof browserActions)[number] diff --git a/src/shared/WebviewMessage.ts b/src/shared/WebviewMessage.ts index 6b23d63b29d..2cb16589884 100644 --- a/src/shared/WebviewMessage.ts +++ b/src/shared/WebviewMessage.ts @@ -115,7 +115,6 @@ export interface WebviewMessage { | "telemetrySetting" | "showRooIgnoredFiles" | "testBrowserConnection" - | "discoverBrowser" | "browserConnectionResult" | "remoteBrowserEnabled" | "language" diff --git a/webview-ui/src/components/chat/ChatTextArea.tsx b/webview-ui/src/components/chat/ChatTextArea.tsx index e7b0ca6d89e..8bc681034e3 100644 --- a/webview-ui/src/components/chat/ChatTextArea.tsx +++ b/webview-ui/src/components/chat/ChatTextArea.tsx @@ -70,12 +70,11 @@ const ChatTextArea = forwardRef( currentApiConfigName, listApiConfigMeta, customModes, - cwd, + cwd, osInfo, pinnedApiConfigs, togglePinnedApiConfig, - } = - useExtensionState() + } = useExtensionState() // Find the ID and display text for the currently selected API configuration const { currentConfigId, displayName } = useMemo(() => { diff --git a/webview-ui/src/components/settings/BrowserSettings.tsx b/webview-ui/src/components/settings/BrowserSettings.tsx index ee998ad2b49..d77ee16a8b8 100644 --- a/webview-ui/src/components/settings/BrowserSettings.tsx +++ b/webview-ui/src/components/settings/BrowserSettings.tsx @@ -1,14 +1,14 @@ -import { HTMLAttributes, useState, useEffect, useMemo } from "react" import { VSCodeButton, VSCodeCheckbox, VSCodeTextField } from "@vscode/webview-ui-toolkit/react" import { SquareMousePointer } from "lucide-react" +import { HTMLAttributes, useEffect, useMemo, useState } from "react" -import { vscode } from "@/utils/vscode" -import { useAppTranslation } from "@/i18n/TranslationContext" import { Select, SelectContent, SelectGroup, SelectItem, SelectTrigger, SelectValue, Slider } from "@/components/ui" +import { useAppTranslation } from "@/i18n/TranslationContext" +import { vscode } from "@/utils/vscode" -import { SetCachedStateField } from "./types" -import { SectionHeader } from "./SectionHeader" import { Section } from "./Section" +import { SectionHeader } from "./SectionHeader" +import { SetCachedStateField } from "./types" type BrowserSettingsProps = HTMLAttributes & { browserToolEnabled?: boolean @@ -37,7 +37,7 @@ export const BrowserSettings = ({ const { t } = useAppTranslation() const [testingConnection, setTestingConnection] = useState(false) - const [testResult, setTestResult] = useState<{ success: boolean; message: string } | null>(null) + const [testResult, setTestResult] = useState<{ success: boolean; text: string } | null>(null) const [discovering, setDiscovering] = useState(false) // We don't need a local state for useRemoteBrowser since we're using the @@ -50,7 +50,7 @@ export const BrowserSettings = ({ const message = event.data if (message.type === "browserConnectionResult") { - setTestResult({ success: message.success, message: message.text }) + setTestResult({ success: message.success, text: message.text }) setTestingConnection(false) setDiscovering(false) } @@ -73,28 +73,12 @@ export const BrowserSettings = ({ } catch (error) { setTestResult({ success: false, - message: `Error: ${error instanceof Error ? error.message : String(error)}`, + text: `Error: ${error instanceof Error ? error.message : String(error)}`, }) setTestingConnection(false) } } - const discoverBrowser = async () => { - setDiscovering(true) - setTestResult(null) - - try { - // Send a message to the extension to discover Chrome instances. - vscode.postMessage({ type: "discoverBrowser" }) - } catch (error) { - setTestResult({ - success: false, - message: `Error: ${error instanceof Error ? error.message : String(error)}`, - }) - setDiscovering(false) - } - } - const options = useMemo( () => [ { @@ -206,9 +190,7 @@ export const BrowserSettings = ({ placeholder={t("settings:browser.remote.urlPlaceholder")} style={{ flexGrow: 1 }} /> - + {testingConnection || discovering ? t("settings:browser.remote.testingButton") : t("settings:browser.remote.testButton")} @@ -221,7 +203,7 @@ export const BrowserSettings = ({ ? "bg-green-800/20 text-green-400" : "bg-red-800/20 text-red-400" }`}> - {testResult.message} + {testResult.text} )}