Skip to content

Commit 99b5bb4

Browse files
committed
[Browser Rendering] Improve tutorial
1 parent dff72d6 commit 99b5bb4

File tree

1 file changed

+46
-26
lines changed
  • src/content/docs/browser-rendering/how-to

1 file changed

+46
-26
lines changed

src/content/docs/browser-rendering/how-to/ai.mdx

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,15 @@ npm i zod
3030
npm i zod-to-json-schema
3131
```
3232

33-
3. Add your Browser Rendering binding to your new `wrangler.toml` configuration:
33+
3. Activate the nodejs compatibility flag and add your Browser Rendering binding to your new `wrangler.toml` configuration:
3434

3535
```toml
36-
browser = { binding = "BROWSER" }
36+
compatibility_flags = [ "nodejs_compat" ]
37+
```
38+
39+
```toml
40+
[browser]
41+
binding = "MY_BROWSER"
3742
```
3843

3944
4. In order to use [Workers AI](/workers-ai/), you need to get your [Account ID and API token](/workers-ai/get-started/rest-api/#1-get-api-token-and-account-id).
@@ -54,7 +59,6 @@ Then, with the user prompt, the desired output schema and the rendered text, pre
5459
Replace the contents of `src/index.ts` with the following skeleton script:
5560

5661
```ts
57-
// src/index.ts
5862
import { z } from "zod";
5963
import puppeteer from "@cloudflare/puppeteer";
6064
import zodToJsonSchema from "zod-to-json-schema";
@@ -67,16 +71,17 @@ export default {
6771
}
6872

6973
// Your prompt and site to scrape
70-
const userPrompt = "Extract the first post";
71-
const targetUrl = "https://news.ycombinator.com/";
74+
const userPrompt = "Extract the first post only.";
75+
const targetUrl = "https://labs.apnic.net/";
7276

7377
// Launch browser
74-
const browser = await puppeteer.launch(env.BROWSER);
78+
const browser = await puppeteer.launch(env.MY_BROWSER);
7579
const page = await browser.newPage();
7680
await page.goto(targetUrl);
7781

7882
// Get website text
7983
const renderedText = await page.evaluate(() => {
84+
// @ts-ignore js code to run in the browser context
8085
const body = document.querySelector("body");
8186
return body ? body.innerText : "";
8287
});
@@ -85,22 +90,29 @@ export default {
8590

8691
// define your desired json schema
8792
const outputSchema = zodToJsonSchema(
88-
z.object({ title: z.string(), url: z.string(), totalComments: z.number() })
93+
z.object({ title: z.string(), url: z.string(), date: z.string() })
8994
);
9095

9196
// Example prompt
9297
const prompt = `
9398
You are a sophisticated web scraper. You are given the user data extraction goal and the JSON schema for the output data format.
9499
Your task is to extract the requested information from the text and output it in the specified JSON schema format:
100+
95101
${JSON.stringify(outputSchema)}
102+
103+
DO NOT include anything else besides the JSON output, no markdown, no plaintext, just JSON.
104+
96105
User Data Extraction Goal: ${userPrompt}
106+
97107
Text extracted from the webpage: ${renderedText}`;
98108

99109
// TODO call llm
100-
//const result = await this.getLLMResult(env, prompt, outputSchema);
110+
//const result = await getLLMResult(env, prompt, outputSchema);
101111
//return Response.json(result);
102112
}
103-
};
113+
114+
} satisfies ExportedHandler<Env>;
115+
104116
```
105117

106118
## Call an LLM
@@ -164,16 +176,17 @@ export default {
164176
}
165177

166178
// Your prompt and site to scrape
167-
const userPrompt = "Extract the first post";
168-
const targetUrl = "https://news.ycombinator.com/";
179+
const userPrompt = "Extract the first post only.";
180+
const targetUrl = "https://labs.apnic.net/";
169181

170182
// Launch browser
171-
const browser = await puppeteer.launch(env.BROWSER);
183+
const browser = await puppeteer.launch(env.MY_BROWSER);
172184
const page = await browser.newPage();
173185
await page.goto(targetUrl);
174186

175187
// Get website text
176188
const renderedText = await page.evaluate(() => {
189+
// @ts-ignore js code to run in the browser context
177190
const body = document.querySelector("body");
178191
return body ? body.innerText : "";
179192
});
@@ -182,23 +195,31 @@ export default {
182195

183196
// define your desired json schema
184197
const outputSchema = zodToJsonSchema(
185-
z.object({ title: z.string(), url: z.string(), totalComments: z.number() })
198+
z.object({ title: z.string(), url: z.string(), date: z.string() })
186199
);
187200

188201
// Example prompt
189202
const prompt = `
190203
You are a sophisticated web scraper. You are given the user data extraction goal and the JSON schema for the output data format.
191204
Your task is to extract the requested information from the text and output it in the specified JSON schema format:
205+
192206
${JSON.stringify(outputSchema)}
207+
208+
DO NOT include anything else besides the JSON output, no markdown, no plaintext, just JSON.
209+
193210
User Data Extraction Goal: ${userPrompt}
211+
194212
Text extracted from the webpage: ${renderedText}`;
195213

196214
// call llm
197-
const result = await this.getLLMResult(env, prompt, outputSchema);
215+
const result = await getLLMResult(env, prompt, outputSchema);
198216
return Response.json(result);
199-
},
217+
}
218+
219+
} satisfies ExportedHandler<Env>;
200220

201-
async getLLMResult(env, prompt: string, schema?: any) {
221+
222+
async function getLLMResult(env, prompt: string, schema?: any) {
202223
const model = "@hf/thebloke/deepseek-coder-6.7b-instruct-awq"
203224
const requestBody = {
204225
messages: [{
@@ -213,7 +234,7 @@ export default {
213234
method: "POST",
214235
headers: {
215236
"Content-Type": "application/json",
216-
Authorization: `Bearer ${env.LLM_API_KEY}`,
237+
Authorization: `Bearer ${env.API_TOKEN}`,
217238
},
218239
body: JSON.stringify(requestBody),
219240
});
@@ -223,18 +244,15 @@ export default {
223244
}
224245

225246
// process response
226-
const data = await response.json();
247+
const data = await response.json() as { result: { response: string }};
227248
const text = data.result.response || '';
228249
const value = (text.match(/```(?:json)?\s*([\s\S]*?)\s*```/) || [null, text])[1];
229250
try {
230251
return JSON.parse(value);
231252
} catch(e) {
232253
console.error(`${e} . Response: ${value}`)
233254
}
234-
},
235-
};
236-
237-
255+
}
238256
```
239257

240258
You can run this script to test it using Wrangler's `--remote` flag:
@@ -247,8 +265,10 @@ With your script now running, you can go to `http://localhost:8787/` and should
247265

248266
```json
249267
{
250-
"title": "Debugging: Indispensable rules for finding even the most elusive problems",
251-
"url": "dwheeler.com",
252-
"totalComments": 143
268+
"title": "IP Addresses in 2024",
269+
"url": "http://example.com/ip-addresses-in-2024",
270+
"date": "11 Jan 2025"
253271
}
254-
```
272+
```
273+
274+
For more complex websites or prompts, you might need a better model. Check out the latest models in [Workers AI](/workers-ai/models/).

0 commit comments

Comments
 (0)