@@ -30,10 +30,15 @@ npm i zod
3030npm i zod-to-json-schema
3131```
3232
33- 3 . Add your Browser Rendering binding to your new ` wrangler.toml ` configuration:
33+ 3 . Activate the nodejs compatibility flag and add your Browser Rendering binding to your new ` wrangler.toml ` configuration:
3434
3535``` toml
36- browser = { binding = " BROWSER" }
36+ compatibility_flags = [ " nodejs_compat" ]
37+ ```
38+
39+ ``` toml
40+ [browser ]
41+ binding = " MY_BROWSER"
3742```
3843
39444 . In order to use [ Workers AI] ( /workers-ai/ ) , you need to get your [ Account ID and API token] ( /workers-ai/get-started/rest-api/#1-get-api-token-and-account-id ) .
@@ -54,7 +59,6 @@ Then, with the user prompt, the desired output schema and the rendered text, pre
5459Replace the contents of ` src/index.ts ` with the following skeleton script:
5560
5661``` ts
57- // src/index.ts
5862import { z } from " zod" ;
5963import puppeteer from " @cloudflare/puppeteer" ;
6064import zodToJsonSchema from " zod-to-json-schema" ;
@@ -67,16 +71,17 @@ export default {
6771 }
6872
6973 // Your prompt and site to scrape
70- const userPrompt = " Extract the first post" ;
71- const targetUrl = " https://news.ycombinator.com /" ;
74+ const userPrompt = " Extract the first post only. " ;
75+ const targetUrl = " https://labs.apnic.net /" ;
7276
7377 // Launch browser
74- const browser = await puppeteer .launch (env .BROWSER );
78+ const browser = await puppeteer .launch (env .MY_BROWSER );
7579 const page = await browser .newPage ();
7680 await page .goto (targetUrl );
7781
7882 // Get website text
7983 const renderedText = await page .evaluate (() => {
84+ // @ts-ignore js code to run in the browser context
8085 const body = document .querySelector (" body" );
8186 return body ? body .innerText : " " ;
8287 });
@@ -85,22 +90,29 @@ export default {
8590
8691 // define your desired json schema
8792 const outputSchema = zodToJsonSchema (
88- z .object ({ title: z .string (), url: z .string (), totalComments : z .number () })
93+ z .object ({ title: z .string (), url: z .string (), date : z .string () })
8994 );
9095
9196 // Example prompt
9297 const prompt = `
9398 You are a sophisticated web scraper. You are given the user data extraction goal and the JSON schema for the output data format.
9499 Your task is to extract the requested information from the text and output it in the specified JSON schema format:
100+
95101 ${JSON .stringify (outputSchema )}
102+
103+ DO NOT include anything else besides the JSON output, no markdown, no plaintext, just JSON.
104+
96105 User Data Extraction Goal: ${userPrompt }
106+
97107 Text extracted from the webpage: ${renderedText } ` ;
98108
99109 // TODO call llm
100- // const result = await this. getLLMResult(env, prompt, outputSchema);
110+ // const result = await getLLMResult(env, prompt, outputSchema);
101111 // return Response.json(result);
102112 }
103- };
113+
114+ } satisfies ExportedHandler <Env >;
115+
104116```
105117
106118## Call an LLM
@@ -164,16 +176,17 @@ export default {
164176 }
165177
166178 // Your prompt and site to scrape
167- const userPrompt = " Extract the first post" ;
168- const targetUrl = " https://news.ycombinator.com /" ;
179+ const userPrompt = " Extract the first post only. " ;
180+ const targetUrl = " https://labs.apnic.net /" ;
169181
170182 // Launch browser
171- const browser = await puppeteer .launch (env .BROWSER );
183+ const browser = await puppeteer .launch (env .MY_BROWSER );
172184 const page = await browser .newPage ();
173185 await page .goto (targetUrl );
174186
175187 // Get website text
176188 const renderedText = await page .evaluate (() => {
189+ // @ts-ignore js code to run in the browser context
177190 const body = document .querySelector (" body" );
178191 return body ? body .innerText : " " ;
179192 });
@@ -182,23 +195,31 @@ export default {
182195
183196 // define your desired json schema
184197 const outputSchema = zodToJsonSchema (
185- z .object ({ title: z .string (), url: z .string (), totalComments : z .number () })
198+ z .object ({ title: z .string (), url: z .string (), date : z .string () })
186199 );
187200
188201 // Example prompt
189202 const prompt = `
190203 You are a sophisticated web scraper. You are given the user data extraction goal and the JSON schema for the output data format.
191204 Your task is to extract the requested information from the text and output it in the specified JSON schema format:
205+
192206 ${JSON .stringify (outputSchema )}
207+
208+ DO NOT include anything else besides the JSON output, no markdown, no plaintext, just JSON.
209+
193210 User Data Extraction Goal: ${userPrompt }
211+
194212 Text extracted from the webpage: ${renderedText } ` ;
195213
196214 // call llm
197- const result = await this . getLLMResult (env , prompt , outputSchema );
215+ const result = await getLLMResult (env , prompt , outputSchema );
198216 return Response .json (result );
199- },
217+ }
218+
219+ } satisfies ExportedHandler <Env >;
200220
201- async getLLMResult(env , prompt : string , schema ? : any ) {
221+
222+ async function getLLMResult(env , prompt : string , schema ? : any ) {
202223 const model = " @hf/thebloke/deepseek-coder-6.7b-instruct-awq"
203224 const requestBody = {
204225 messages: [{
@@ -213,7 +234,7 @@ export default {
213234 method: " POST" ,
214235 headers: {
215236 " Content-Type" : " application/json" ,
216- Authorization: ` Bearer ${env .LLM_API_KEY } ` ,
237+ Authorization: ` Bearer ${env .API_TOKEN } ` ,
217238 },
218239 body: JSON .stringify (requestBody ),
219240 });
@@ -223,18 +244,15 @@ export default {
223244 }
224245
225246 // process response
226- const data = await response .json ();
247+ const data = await response .json () as { result : { response : string }} ;
227248 const text = data .result .response || ' ' ;
228249 const value = (text .match (/ ```(?:json)? \s * ([\s\S ] *? )\s * ```/ ) || [null , text ])[1 ];
229250 try {
230251 return JSON .parse (value );
231252 } catch (e ) {
232253 console .error (` ${e } . Response: ${value } ` )
233254 }
234- },
235- };
236-
237-
255+ }
238256```
239257
240258You can run this script to test it using Wrangler's ` --remote ` flag:
@@ -247,8 +265,10 @@ With your script now running, you can go to `http://localhost:8787/` and should
247265
248266``` json
249267{
250- "title" : " Debugging: Indispensable rules for finding even the most elusive problems " ,
251- "url" : " dwheeler .com" ,
252- "totalComments " : 143
268+ "title" : " IP Addresses in 2024 " ,
269+ "url" : " http://example .com/ip-addresses-in-2024 " ,
270+ "date " : " 11 Jan 2025 "
253271}
254- ```
272+ ```
273+
274+ For more complex websites or prompts, you might need a better model. Check out the latest models in [ Workers AI] ( /workers-ai/models/ ) .
0 commit comments