Skip to content

Commit 11efe5c

Browse files
committed
fix: lightpanda 3rd example
1 parent f43f8af commit 11efe5c

File tree

1 file changed

+82
-12
lines changed

1 file changed

+82
-12
lines changed

docs/guides/examples/lightpanda.mdx

Lines changed: 82 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -118,22 +118,92 @@ This task initialises a Lightpanda CDP server to allow you to scrape directly vi
118118
### Task
119119
Your task will have to launch a child process in order to have the websocket available to scrape using Puppeteer.
120120

121-
```ts trigger/lightpandaLaunch.ts
122-
import puppeteer from "puppeteer"
123-
124-
export const lightpandaLaunch = task({
125-
id: "lightpanda-launch",
126-
run: async (payload: { url: string }) => {
121+
```ts trigger/lightpandaCDP.ts
122+
import { logger, task } from '@trigger.dev/sdk/v3'
123+
import { spawn, type ChildProcessWithoutNullStreams } from 'node:child_process'
124+
import puppeteer from 'puppeteer'
125+
126+
const spawnLightpanda = async (log: typeof logger) =>
127+
new Promise<ChildProcessWithoutNullStreams>((resolve, reject) => {
128+
const child = spawn(process.env.LIGHTPANDA_BROWSER_PATH as string, [
129+
'serve',
130+
'--host',
131+
'127.0.0.1',
132+
'--port',
133+
'9222',
134+
'--log_level',
135+
'info',
136+
])
137+
138+
child.on('spawn', async () => {
139+
log.info("Running Lightpanda's CDP server…", {
140+
pid: child.pid,
141+
})
127142

128-
// use browserWSEndpoint to pass the Lightpanda's CDP server address.
129-
const browser = await puppeteer.connect({
130-
browserWSEndpoint: "ws://127.0.0.1:9222",
143+
await new Promise(resolve => setTimeout(resolve, 250))
144+
resolve(child)
131145
})
146+
child.on('error', e => reject(e))
147+
})
132148

133-
const page = await browser.newPage()
149+
export const lightpandaCDP = task({
150+
id: 'lightpanda-cdp',
151+
machine: {
152+
preset: 'micro',
153+
},
154+
run: async (payload: { url: string }, { ctx }) => {
155+
logger.log("Lets get a page's links with Lightpanda!", { payload, ctx })
134156

135-
return {
136-
data: scrapeResult,
157+
if (!payload.url) {
158+
logger.warn('Please define the payload url')
159+
throw new Error('payload.url is undefined')
160+
}
161+
162+
if (typeof process.env.LIGHTPANDA_BROWSER_PATH === 'undefined') {
163+
logger.warn('Please define the env variable $LIGHTPANDA_BROWSER_PATH', {
164+
env: process.env,
165+
})
166+
167+
throw new Error('$LIGHTPANDA_BROWSER_PATH is undefined')
168+
}
169+
170+
try {
171+
// Launch Lightpanda's CDP server
172+
const lpProcess = await spawnLightpanda(logger)
173+
174+
const browser = await puppeteer.connect({
175+
browserWSEndpoint: 'ws://127.0.0.1:9222',
176+
})
177+
const context = await browser.createBrowserContext()
178+
const page = await context.newPage()
179+
180+
// Dump all the links from the page.
181+
await page.goto(payload.url)
182+
183+
const links = await page.evaluate(() => {
184+
return Array.from(document.querySelectorAll('a')).map(row => {
185+
return row.getAttribute('href')
186+
})
187+
})
188+
189+
logger.info('Processing done')
190+
logger.info('Shutting down…')
191+
192+
// Close Puppeteer instance
193+
await browser.close()
194+
195+
// Stop Lightpanda's CDP Server
196+
lpProcess.stdout.destroy()
197+
lpProcess.stderr.destroy()
198+
lpProcess.kill()
199+
200+
logger.info('✅ Completed')
201+
202+
return {
203+
links,
204+
}
205+
} catch (e: any) {
206+
throw new Error(e)
137207
}
138208
},
139209
})

0 commit comments

Comments
 (0)