|
| 1 | +--- |
| 2 | +title: "Lightpanda" |
| 3 | +sidebarTitle: "Lightpanda" |
| 4 | +description: "These examples demonstrate how to use Lightpanda with Trigger.dev." |
| 5 | +tag: "v4" |
| 6 | +--- |
| 7 | + |
| 8 | +import ScrapingWarning from "/snippets/web-scraping-warning.mdx"; |
| 9 | +import UpgradeToV4Note from "/snippets/upgrade-to-v4-note.mdx"; |
| 10 | + |
| 11 | +<UpgradeToV4Note /> |
| 12 | + |
| 13 | +## Overview |
| 14 | + |
| 15 | +Lightpanda is a purpose-built browser for AI and automation workflows. It is 10x faster, uses 10x less RAM than Chrome headless. |
| 16 | + |
| 17 | +Here are a few examples of how to use Lightpanda with Trigger.dev. |
| 18 | + |
| 19 | +<ScrapingWarning /> |
| 20 | + |
| 21 | +## Limitations |
| 22 | + |
| 23 | +- Lightpanda does not support the `puppeteer` screenshot feature. |
| 24 | + |
| 25 | +## Using Lightpanda Cloud |
| 26 | + |
| 27 | +### Prerequisites |
| 28 | + |
| 29 | +- A [Lightpanda](https://lightpanda.io/) cloud token |
| 30 | + |
| 31 | +### Get links from a website |
| 32 | +In this task we use Lightpanda browser to get links from a provided URL. You will have to pass the URL as a payload when triggering the task. |
| 33 | + |
| 34 | +Make sure to add `LIGHTPANDA_TOKEN` to your Trigger.dev dashboard on the Environment Variables page: |
| 35 | +```bash |
| 36 | +LIGHTPANDA_TOKEN="<your-token>" |
| 37 | +``` |
| 38 | + |
| 39 | +```ts trigger/lightpanda-cloud-puppeteer.ts |
| 40 | +import { logger, task } from "@trigger.dev/sdk"; |
| 41 | +import puppeteer from "puppeteer-core"; |
| 42 | + |
| 43 | +export const lightpandaCloudPuppeteer = task({ |
| 44 | + id: "lightpanda-cloud-puppeteer", |
| 45 | + machine: { |
| 46 | + preset: "micro", |
| 47 | + }, |
| 48 | + run: async (payload: { url: string }, { ctx }) => { |
| 49 | + logger.log("Lets get a page's links with Lightpanda!", { payload, ctx }); |
| 50 | + |
| 51 | + if (!payload.url) { |
| 52 | + logger.warn("Please define the payload url"); |
| 53 | + throw new Error("payload.url is undefined"); |
| 54 | + } |
| 55 | + |
| 56 | + const token = process.env.LIGHTPANDA_TOKEN; |
| 57 | + if (!token) { |
| 58 | + logger.warn("Please define the env variable LIGHTPANDA_TOKEN"); |
| 59 | + throw new Error("LIGHTPANDA_TOKEN is undefined"); |
| 60 | + } |
| 61 | + |
| 62 | + // Connect to Lightpanda's cloud |
| 63 | + const browser = await puppeteer.connect({ |
| 64 | + browserWSEndpoint: `wss://cloud.lightpanda.io/ws?browser=lightpanda&token=${token}`, |
| 65 | + }); |
| 66 | + const context = await browser.createBrowserContext(); |
| 67 | + const page = await context.newPage(); |
| 68 | + |
| 69 | + // Dump all the links from the page. |
| 70 | + await page.goto(payload.url); |
| 71 | + |
| 72 | + const links = await page.evaluate(() => { |
| 73 | + return Array.from(document.querySelectorAll("a")).map((row) => { |
| 74 | + return row.getAttribute("href"); |
| 75 | + }); |
| 76 | + }); |
| 77 | + |
| 78 | + logger.info("Processing done, shutting down…"); |
| 79 | + |
| 80 | + await page.close(); |
| 81 | + await context.close(); |
| 82 | + await browser.disconnect(); |
| 83 | + |
| 84 | + logger.info("✅ Completed"); |
| 85 | + |
| 86 | + return { |
| 87 | + links, |
| 88 | + }; |
| 89 | + }, |
| 90 | +}); |
| 91 | +``` |
| 92 | + |
| 93 | +### Proxies |
| 94 | + |
| 95 | +Proxies can be used with your browser via the proxy query string parameter. By default, the proxy used is "datacenter" which is a pool of shared datacenter IPs. |
| 96 | +`datacenter` accepts an optional `country` query string parameter which is an [ISO 3166-1 alpha-2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) country code. |
| 97 | + |
| 98 | +```bash |
| 99 | +# This example will use a German IP |
| 100 | +wss://cloud.lightpanda.io/ws?proxy=datacenter&country=de&token=${token} |
| 101 | +``` |
| 102 | + |
| 103 | +### Session |
| 104 | + |
| 105 | +A session is alive until you close it or the connection is closed. The max duration of a session is 15 minutes. |
| 106 | + |
| 107 | +## Using Lightpanda browser directly |
| 108 | + |
| 109 | +### Prerequisites |
| 110 | + |
| 111 | +- Setup the [Lightpanda build extension](/config/extensions/lightpanda) |
| 112 | + |
| 113 | +### Get the HTML of a webpage |
| 114 | + |
| 115 | +This task will dump the HTML of a provided URL using the Lightpanda browser binary. You will have to pass the URL as a payload when triggering the task. |
| 116 | + |
| 117 | +```ts trigger/lightpanda-fetch.ts |
| 118 | +import { logger, task } from "@trigger.dev/sdk"; |
| 119 | +import { execSync } from "node:child_process"; |
| 120 | + |
| 121 | +export const lightpandaFetch = task({ |
| 122 | + id: "lightpanda-fetch", |
| 123 | + machine: { |
| 124 | + preset: "micro", |
| 125 | + }, |
| 126 | + run: async (payload: { url: string }, { ctx }) => { |
| 127 | + logger.log("Lets get a page's content with Lightpanda!", { payload, ctx }); |
| 128 | + |
| 129 | + if (!payload.url) { |
| 130 | + logger.warn("Please define the payload url"); |
| 131 | + throw new Error("payload.url is undefined"); |
| 132 | + } |
| 133 | + |
| 134 | + const buffer = execSync(`lightpanda fetch --dump ${payload.url}`); |
| 135 | + |
| 136 | + logger.info("✅ Completed"); |
| 137 | + |
| 138 | + return { |
| 139 | + message: buffer.toString(), |
| 140 | + }; |
| 141 | + }, |
| 142 | +}); |
| 143 | +``` |
| 144 | + |
| 145 | +### Lightpanda CDP with Puppeteer |
| 146 | + |
| 147 | +This task initializes a Lightpanda CDP server and uses it with `puppeteer-core` to scrape a provided URL. |
| 148 | + |
| 149 | +```ts trigger/lightpanda-cdp.ts |
| 150 | +import { logger, task } from "@trigger.dev/sdk"; |
| 151 | +import { spawn, type ChildProcessWithoutNullStreams } from "node:child_process"; |
| 152 | +import puppeteer from "puppeteer-core"; |
| 153 | + |
| 154 | +const spawnLightpanda = async (host: string, port: string) => |
| 155 | + new Promise<ChildProcessWithoutNullStreams>((resolve, reject) => { |
| 156 | + const child = spawn("lightpanda", [ |
| 157 | + "serve", |
| 158 | + "--host", |
| 159 | + host, |
| 160 | + "--port", |
| 161 | + port, |
| 162 | + "--log_level", |
| 163 | + "info", |
| 164 | + ]); |
| 165 | + |
| 166 | + child.on("spawn", async () => { |
| 167 | + logger.info("Running Lightpanda's CDP server…", { |
| 168 | + pid: child.pid, |
| 169 | + }); |
| 170 | + |
| 171 | + await new Promise((resolve) => setTimeout(resolve, 250)); |
| 172 | + resolve(child); |
| 173 | + }); |
| 174 | + child.on("error", (e) => reject(e)); |
| 175 | + }); |
| 176 | + |
| 177 | +export const lightpandaCDP = task({ |
| 178 | + id: "lightpanda-cdp", |
| 179 | + machine: { |
| 180 | + preset: "micro", |
| 181 | + }, |
| 182 | + run: async (payload: { url: string }, { ctx }) => { |
| 183 | + logger.log("Lets get a page's links with Lightpanda!", { payload, ctx }); |
| 184 | + |
| 185 | + if (!payload.url) { |
| 186 | + logger.warn("Please define the payload url"); |
| 187 | + throw new Error("payload.url is undefined"); |
| 188 | + } |
| 189 | + |
| 190 | + const host = process.env.LIGHTPANDA_CDP_HOST ?? "127.0.0.1"; |
| 191 | + const port = process.env.LIGHTPANDA_CDP_PORT ?? "9222"; |
| 192 | + |
| 193 | + // Launch Lightpanda's CDP server |
| 194 | + const lpProcess = await spawnLightpanda(host, port); |
| 195 | + |
| 196 | + const browser = await puppeteer.connect({ |
| 197 | + browserWSEndpoint: `ws://${host}:${port}`, |
| 198 | + }); |
| 199 | + const context = await browser.createBrowserContext(); |
| 200 | + const page = await context.newPage(); |
| 201 | + |
| 202 | + // Dump all the links from the page. |
| 203 | + await page.goto(payload.url); |
| 204 | + |
| 205 | + const links = await page.evaluate(() => { |
| 206 | + return Array.from(document.querySelectorAll("a")).map((row) => { |
| 207 | + return row.getAttribute("href"); |
| 208 | + }); |
| 209 | + }); |
| 210 | + |
| 211 | + logger.info("Processing done"); |
| 212 | + logger.info("Shutting down…"); |
| 213 | + |
| 214 | + // Close Puppeteer instance |
| 215 | + await browser.close(); |
| 216 | + |
| 217 | + // Stop Lightpanda's CDP Server |
| 218 | + lpProcess.kill(); |
| 219 | + |
| 220 | + logger.info("✅ Completed"); |
| 221 | + |
| 222 | + return { |
| 223 | + links, |
| 224 | + }; |
| 225 | + }, |
| 226 | +}); |
| 227 | +``` |
0 commit comments