[Browser Rendering] Add an how to on how to use browser rendering together with LLMs

meddulla · meddulla · commit c5ae04e8a228 · 2025-01-13T20:43:35.000Z
diff --git a/src/content/docs/browser-rendering/how-to/ai.mdx b/src/content/docs/browser-rendering/how-to/ai.mdx
@@ -0,0 +1,254 @@
+---
+title: Use browser rendering with AI
+sidebar:
+  order: 2
+---
+
+import { Aside } from "~/components";
+
+The ability to browse websites can be crucial when building workflows with AI. Here, we provide an example where we use Browser Rendering to visit 
+`https://news.ycombinator.com/` and then, using a machine learning model available in [Workers AI](/workers-ai/), extract the first post as JSON with a specified schema.
+
+## Prerequisites
+
+1. Use the `create-cloudflare` CLI to generate a new Hello World Cloudflare Worker script:
+
+```sh
+npm create cloudflare@latest -- browser-worker
+```
+
+2. Install `@cloudflare/puppeteer`, which allows you to control the Browser Rendering instance:
+
+```sh
+npm i @cloudflare/puppeteer
+```
+
+2. Install `zod` so we can define our output format and `zod-to-json-schema` so we can convert it into a JSON schema format:
+
+```sh
+npm i zod
+npm i zod-to-json-schema
+```
+
+3. Add your Browser Rendering binding to your new `wrangler.toml` configuration:
+
+```toml
+browser = { binding = "BROWSER" }
+```
+
+4.  In order to use [Workers AI](/workers-ai/), you need to get your [Account ID and API token](/get-started/rest-api/#1-get-api-token-and-account-id).
+Once you have those, create a [`.dev.vars`](/workers/configuration/environment-variables/#add-environment-variables-via-wrangler) file and set them there:
+
+```
+ACCOUNT_ID=
+API_TOKEN=
+```
+
+We use `.dev.vars` here since it's only for local development, otherwise you'd use [Secrets](/workers/configuration/secrets/).
+
+## Load the page using Browser Rendering
+
+In the code below, we launch a browser using `await puppeteer.launch(env.BROWSER)`, extract the rendered text and close the browser.
+Then, with the user prompt, the desired output schema and the rendered text, prepare a prompt to send to the LLM.
+
+Replace the contents of `src/index.ts` with the following skeleton script:
+
+```ts
+// src/index.ts
+import { z } from "zod";
+import puppeteer from "@cloudflare/puppeteer";
+import zodToJsonSchema from "zod-to-json-schema";
+
+export default {
+  async fetch(request, env) {
+    const url = new URL(request.url);
+    if (url.pathname != "/") {
+      return new Response("Not found");
+    }
+
+    // Your prompt and site to scrape
+    const userPrompt = "Extract the first post";
+    const targetUrl = "https://news.ycombinator.com/";
+
+    // Launch browser
+    const browser = await puppeteer.launch(env.BROWSER);
+    const page = await browser.newPage();
+    await page.goto(targetUrl);
+
+    // Get website text
+    const renderedText = await page.evaluate(() => {
+      const body = document.querySelector("body");
+      return body ? body.innerText : "";
+    });
+    // Close browser since we no longer need it
+    await browser.close();
+
+    // define your desired json schema
+    const outputSchema = zodToJsonSchema(
+      z.object({ title: z.string(), url: z.string(), totalComments: z.number() })
+    );
+
+    // Example prompt
+    const prompt = `
+    You are a sophisticated web scraper. You are given the user data extraction goal and the JSON schema for the output data format.
+    Your task is to extract the requested information from the text and output it in the specified JSON schema format:
+        ${JSON.stringify(outputSchema)}
+    User Data Extraction Goal: ${userPrompt}
+    Text extracted from the webpage: ${renderedText}`;
+
+    // TODO call llm
+    //const result = await this.getLLMResult(env, prompt, outputSchema);
+    //return Response.json(result);
+  }
+};
+```
+
+## Call an LLM
+
+Having the webpage text, the user's goal and output schema, we can now use an LLM to transform it to JSON according to the user's request.
+The example below uses `@hf/thebloke/deepseek-coder-6.7b-instruct-awq` but other [models](/workers-ai/models/), or services like OpenAI, could be used with minimal changes:
+
+```ts
+async getLLMResult(env, prompt: string, schema?: any) {
+    const model = "@hf/thebloke/deepseek-coder-6.7b-instruct-awq"
+    const requestBody = {
+        messages: [{
+            role: "user",
+            content: prompt
+        }
+        ],
+    };
+    const aiUrl = `https://api.cloudflare.com/client/v4/accounts/${env.ACCOUNT_ID}/ai/run/${model}`
+
+    const response = await fetch(aiUrl, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Authorization: `Bearer ${env.API_TOKEN}`,
+      },
+      body: JSON.stringify(requestBody),
+    });
+    if (!response.ok) {
+      console.log(JSON.stringify(await response.text(), null, 2));
+      throw new Error(`LLM call failed ${aiUrl} ${response.status}`);
+    }
+
+    // process response
+    const data = await response.json();
+    const text = data.result.response || '';
+    const value = (text.match(/```(?:json)?\s*([\s\S]*?)\s*```/) || [null, text])[1];
+    try {
+      return JSON.parse(value);
+    } catch(e) {
+      console.error(`${e} . Response: ${value}`)
+    }
+  }
+```
+
+If you want to use Browser Rendering with OpenAI instead you'd just need to change the `aiUrl` endpoint and `requestBody` (or check out the [llm-scraper-worker](https://www.npmjs.com/package/llm-scraper-worker) package).
+
+## Conclusion
+
+The full Worker script now looks as follows:
+
+```ts
+import { z } from "zod";
+import puppeteer from "@cloudflare/puppeteer";
+import zodToJsonSchema from "zod-to-json-schema";
+
+export default {
+  async fetch(request, env) {
+    const url = new URL(request.url);
+    if (url.pathname != "/") {
+      return new Response("Not found");
+    }
+
+    // Your prompt and site to scrape
+    const userPrompt = "Extract the first post";
+    const targetUrl = "https://news.ycombinator.com/";
+
+    // Launch browser
+    const browser = await puppeteer.launch(env.BROWSER);
+    const page = await browser.newPage();
+    await page.goto(targetUrl);
+
+    // Get website text
+    const renderedText = await page.evaluate(() => {
+      const body = document.querySelector("body");
+      return body ? body.innerText : "";
+    });
+    // Close browser since we no longer need it
+    await browser.close();
+
+    // define your desired json schema
+    const outputSchema = zodToJsonSchema(
+      z.object({ title: z.string(), url: z.string(), totalComments: z.number() })
+    );
+
+    // Example prompt
+    const prompt = `
+    You are a sophisticated web scraper. You are given the user data extraction goal and the JSON schema for the output data format.
+    Your task is to extract the requested information from the text and output it in the specified JSON schema format:
+        ${JSON.stringify(outputSchema)}
+    User Data Extraction Goal: ${userPrompt}
+    Text extracted from the webpage: ${renderedText}`;
+
+    // call llm
+    const result = await this.getLLMResult(env, prompt, outputSchema);
+    return Response.json(result);
+  },
+
+  async getLLMResult(env, prompt: string, schema?: any) {
+    const model = "@hf/thebloke/deepseek-coder-6.7b-instruct-awq"
+    const requestBody = {
+        messages: [{
+            role: "user",
+            content: prompt
+        }
+        ],
+    };
+    const aiUrl = `https://api.cloudflare.com/client/v4/accounts/${env.ACCOUNT_ID}/ai/run/${model}`
+
+    const response = await fetch(aiUrl, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Authorization: `Bearer ${env.LLM_API_KEY}`,
+      },
+      body: JSON.stringify(requestBody),
+    });
+    if (!response.ok) {
+      console.log(JSON.stringify(await response.text(), null, 2));
+      throw new Error(`LLM call failed ${aiUrl} ${response.status}`);
+    }
+
+    // process response
+    const data = await response.json();
+    const text = data.result.response || '';
+    const value = (text.match(/```(?:json)?\s*([\s\S]*?)\s*```/) || [null, text])[1];
+    try {
+      return JSON.parse(value);
+    } catch(e) {
+      console.error(`${e} . Response: ${value}`)
+    }
+  },
+};
+
+
+```
+
+You can run this script to test it using Wrangler’s `--remote` flag:
+
+```sh
+npx wrangler dev --remote
+```
+
+With your script now running, you can go to `http://localhost:8787/` and should see something like the following:
+
+```json
+{
+    "title": "Debugging: Indispensable rules for finding even the most elusive problems",
+    "url": "dwheeler.com",
+    "totalComments": 143
+}
+```