diff --git a/examples/external_clients/google_vertex.ts b/examples/external_clients/google_vertex.ts new file mode 100644 index 000000000..e802d96c3 --- /dev/null +++ b/examples/external_clients/google_vertex.ts @@ -0,0 +1,39 @@ +import { Stagehand } from "@browserbasehq/stagehand"; +import { z } from "zod"; + +/** + * Example of using Google Vertex AI directly (not through AI SDK). + * When you provide `vertexai: true` in the client options, + * the system will route to GoogleVertexClient instead of using AI SDK. + */ +async function main() { + const stagehand = new Stagehand({ + env: "LOCAL", + enableCaching: false, + modelName: "google/gemini-1.5-pro", // Google model with slash notation + modelClientOptions: { + // Vertex AI specific configuration - bypasses AI SDK + vertexai: true, + project: "your-gcp-project-id", + location: "us-central1", + // Optional: API key if not using default auth + // apiKey: process.env.GOOGLE_API_KEY, + }, + }); + + await stagehand.init(); + await stagehand.page.goto("https://docs.stagehand.dev"); + + // Extract some text using Vertex AI (not AI SDK) + const result = await stagehand.page.extract({ + instruction: "extract the main heading of this page", + schema: z.object({ + heading: z.string(), + }), + }); + + console.log("Extracted:", result); + await stagehand.close(); +} + +main().catch(console.error); diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index 80387d134..d6d19705d 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -652,9 +652,17 @@ ${scriptContent} \ } const requestId = Math.random().toString(36).substring(2); - const llmClient: LLMClient = modelName - ? this.stagehand.llmProvider.getClient(modelName, modelClientOptions) - : this.llmClient; + + // Use provided modelName if available AND if modelClientOptions has an API key, otherwise use the configured llmClient + const llmClient = + modelName && modelClientOptions?.apiKey + ? this.stagehand.llmProvider.getClient(modelName, modelClientOptions) + : this.stagehand.llmClient; + + // Add null check for llmClient before accessing modelName + if (!llmClient) { + throw new MissingLLMConfigurationError(); + } this.stagehand.log({ category: "act", @@ -746,9 +754,17 @@ ${scriptContent} \ } const requestId = Math.random().toString(36).substring(2); - const llmClient = modelName - ? this.stagehand.llmProvider.getClient(modelName, modelClientOptions) - : this.llmClient; + + // Use provided modelName if available AND if modelClientOptions has an API key, otherwise use the configured llmClient + const llmClient = + modelName && modelClientOptions?.apiKey + ? this.stagehand.llmProvider.getClient(modelName, modelClientOptions) + : this.stagehand.llmClient; + + // Add null check for llmClient before accessing modelName + if (!llmClient) { + throw new MissingLLMConfigurationError(); + } this.stagehand.log({ category: "extract", @@ -850,9 +866,17 @@ ${scriptContent} \ } const requestId = Math.random().toString(36).substring(2); - const llmClient = modelName - ? this.stagehand.llmProvider.getClient(modelName, modelClientOptions) - : this.llmClient; + + // Use provided modelName if available AND if modelClientOptions has an API key, otherwise use the configured llmClient + const llmClient = + modelName && modelClientOptions?.apiKey + ? this.stagehand.llmProvider.getClient(modelName, modelClientOptions) + : this.stagehand.llmClient; + + // Add null check for llmClient before accessing modelName + if (!llmClient) { + throw new MissingLLMConfigurationError(); + } this.stagehand.log({ category: "observe", diff --git a/lib/handlers/handlerUtils/actHandlerUtils.ts b/lib/handlers/handlerUtils/actHandlerUtils.ts index 1c32cf7f9..faacc5c5f 100644 --- a/lib/handlers/handlerUtils/actHandlerUtils.ts +++ b/lib/handlers/handlerUtils/actHandlerUtils.ts @@ -8,32 +8,51 @@ import { StagehandClickError } from "@/types/stagehandErrors"; const IFRAME_STEP_RE = /^iframe(\[[^\]]+])?$/i; -export function deepLocator(root: Page | FrameLocator, xpath: string): Locator { - // 1 ─ prepend with slash if not already included - if (!xpath.startsWith("/")) xpath = "/" + xpath; +export function deepLocator( + root: Page | FrameLocator, + rawXPath: string, +): Locator { + // 1 ─ strip optional 'xpath=' prefix and whitespace + const xpath = rawXPath.replace(/^xpath=/i, "").trim(); + + // Split the path by sequences of slashes, but keep the slashes as + // separate elements in the array. This preserves separators like '//'. + // e.g., '//a/b' becomes ['//', 'a', '/', 'b'] + const parts = xpath.split(/(\/+)/).filter(Boolean); - // 2 ─ split into steps, accumulate until we hit an iframe step - const steps = xpath.split("/").filter(Boolean); // tokens let ctx: Page | FrameLocator = root; let buffer: string[] = []; const flushIntoFrame = () => { if (buffer.length === 0) return; - const selector = "xpath=/" + buffer.join("/"); + + // Join the buffered parts to form the selector for the iframe. + // .join('') is used because the separators are already in the buffer. + const selector = "xpath=" + buffer.join(""); ctx = (ctx as Page | FrameLocator).frameLocator(selector); - buffer = []; + buffer = []; // Reset buffer for the next path segment. }; - for (const step of steps) { - buffer.push(step); - if (IFRAME_STEP_RE.test(step)) { - // we've included the