diff --git a/docs/guides/python/python-crawl4ai.mdx b/docs/guides/python/python-crawl4ai.mdx
index b233506177..51d23fe75d 100644
--- a/docs/guides/python/python-crawl4ai.mdx
+++ b/docs/guides/python/python-crawl4ai.mdx
@@ -22,9 +22,26 @@ This demo showcases how to use Trigger.dev with Python to build a web crawler th
- Our [Python build extension](/config/extensions/pythonExtension) to install the dependencies and run the Python script
- [Crawl4AI](https://github.com/unclecode/crawl4ai), an open source LLM friendly web crawler
- A custom [Playwright extension](https://playwright.dev/) to create a headless chromium browser
+- Proxy support
+
+## Using Proxies
+Some popular proxy services are:
+
+- [Smartproxy](https://smartproxy.com/)
+- [Bright Data](https://brightdata.com/)
+- [Browserbase](https://browserbase.com/)
+- [Oxylabs](https://oxylabs.io/)
+- [ScrapingBee](https://scrapingbee.com/)
+
+Once you have a proxy service, set the following environment variables in your Trigger.dev .env file, and add them in the Trigger.dev dashboard:
+
+- `PROXY_URL`: The URL of your proxy server (e.g., `http://proxy.example.com:8080`)
+- `PROXY_USERNAME`: Username for authenticated proxies (optional)
+- `PROXY_PASSWORD`: Password for authenticated proxies (optional)
+
## GitHub repo
{
- const result = await python.runScript("./src/python/crawl-url.py", [payload.url]);
+ // Pass through any proxy environment variables
+ const env = {
+ PROXY_URL: process.env.PROXY_URL,
+ PROXY_USERNAME: process.env.PROXY_USERNAME,
+ PROXY_PASSWORD: process.env.PROXY_PASSWORD,
+ };
+
+ const result = await python.runScript("./src/python/crawl-url.py", [payload.url], { env });
logger.debug("convert-url-to-markdown", {
url: payload.url,
@@ -142,10 +166,34 @@ The Python script is a simple script using Crawl4AI that takes a URL and returns
```python src/python/crawl-url.py
import asyncio
import sys
+import os
from crawl4ai import *
+from crawl4ai.async_configs import BrowserConfig
async def main(url: str):
- async with AsyncWebCrawler() as crawler:
+ # Get proxy configuration from environment variables
+ proxy_url = os.environ.get("PROXY_URL")
+ proxy_username = os.environ.get("PROXY_USERNAME")
+ proxy_password = os.environ.get("PROXY_PASSWORD")
+
+ # Configure the proxy
+ browser_config = None
+ if proxy_url:
+ if proxy_username and proxy_password:
+ # Use authenticated proxy
+ proxy_config = {
+ "server": proxy_url,
+ "username": proxy_username,
+ "password": proxy_password
+ }
+ browser_config = BrowserConfig(proxy_config=proxy_config)
+ else:
+ # Use simple proxy
+ browser_config = BrowserConfig(proxy=proxy_url)
+ else:
+ browser_config = BrowserConfig()
+
+ async with AsyncWebCrawler(config=browser_config) as crawler:
result = await crawler.arun(
url=url,
)