@@ -118,22 +118,92 @@ This task initialises a Lightpanda CDP server to allow you to scrape directly vi
118118### Task
119119Your task will have to launch a child process in order to have the websocket available to scrape using Puppeteer.
120120
121- ``` ts trigger/lightpandaLaunch.ts
122- import puppeteer from " puppeteer"
123-
124- export const lightpandaLaunch = task ({
125- id: " lightpanda-launch" ,
126- run : async (payload : { url: string }) => {
121+ ``` ts trigger/lightpandaCDP.ts
122+ import { logger , task } from ' @trigger.dev/sdk/v3'
123+ import { spawn , type ChildProcessWithoutNullStreams } from ' node:child_process'
124+ import puppeteer from ' puppeteer'
125+
126+ const spawnLightpanda = async (log : typeof logger ) =>
127+ new Promise <ChildProcessWithoutNullStreams >((resolve , reject ) => {
128+ const child = spawn (process .env .LIGHTPANDA_BROWSER_PATH as string , [
129+ ' serve' ,
130+ ' --host' ,
131+ ' 127.0.0.1' ,
132+ ' --port' ,
133+ ' 9222' ,
134+ ' --log_level' ,
135+ ' info' ,
136+ ])
137+
138+ child .on (' spawn' , async () => {
139+ log .info (" Running Lightpanda's CDP server…" , {
140+ pid: child .pid ,
141+ })
127142
128- // use browserWSEndpoint to pass the Lightpanda's CDP server address.
129- const browser = await puppeteer .connect ({
130- browserWSEndpoint: " ws://127.0.0.1:9222" ,
143+ await new Promise (resolve => setTimeout (resolve , 250 ))
144+ resolve (child )
131145 })
146+ child .on (' error' , e => reject (e ))
147+ })
132148
133- const page = await browser .newPage ()
149+ export const lightpandaCDP = task ({
150+ id: ' lightpanda-cdp' ,
151+ machine: {
152+ preset: ' micro' ,
153+ },
154+ run : async (payload : { url: string }, { ctx }) => {
155+ logger .log (" Lets get a page's links with Lightpanda!" , { payload , ctx })
134156
135- return {
136- data: scrapeResult ,
157+ if (! payload .url ) {
158+ logger .warn (' Please define the payload url' )
159+ throw new Error (' payload.url is undefined' )
160+ }
161+
162+ if (typeof process .env .LIGHTPANDA_BROWSER_PATH === ' undefined' ) {
163+ logger .warn (' Please define the env variable $LIGHTPANDA_BROWSER_PATH' , {
164+ env: process .env ,
165+ })
166+
167+ throw new Error (' $LIGHTPANDA_BROWSER_PATH is undefined' )
168+ }
169+
170+ try {
171+ // Launch Lightpanda's CDP server
172+ const lpProcess = await spawnLightpanda (logger )
173+
174+ const browser = await puppeteer .connect ({
175+ browserWSEndpoint: ' ws://127.0.0.1:9222' ,
176+ })
177+ const context = await browser .createBrowserContext ()
178+ const page = await context .newPage ()
179+
180+ // Dump all the links from the page.
181+ await page .goto (payload .url )
182+
183+ const links = await page .evaluate (() => {
184+ return Array .from (document .querySelectorAll (' a' )).map (row => {
185+ return row .getAttribute (' href' )
186+ })
187+ })
188+
189+ logger .info (' Processing done' )
190+ logger .info (' Shutting down…' )
191+
192+ // Close Puppeteer instance
193+ await browser .close ()
194+
195+ // Stop Lightpanda's CDP Server
196+ lpProcess .stdout .destroy ()
197+ lpProcess .stderr .destroy ()
198+ lpProcess .kill ()
199+
200+ logger .info (' ✅ Completed' )
201+
202+ return {
203+ links ,
204+ }
205+ } catch (e : any ) {
206+ throw new Error (e )
137207 }
138208 },
139209})
0 commit comments