Skip to content

Commit 31d6db1

Browse files
committed
Add a crawler task using crawl4ai
1 parent 5ee83d2 commit 31d6db1

File tree

11 files changed

+492
-116
lines changed

11 files changed

+492
-116
lines changed

apps/webapp/app/components/runs/v3/RunIcon.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ export function RunIcon({ name, className, spanName }: TaskIconProps) {
7373
case "trigger":
7474
return <TriggerIcon className={cn(className, "text-orange-500")} />;
7575
case "python":
76-
return <PythonLogoIcon />;
76+
return <PythonLogoIcon className={className} />;
7777
//log levels
7878
case "debug":
7979
case "log":

pnpm-lock.yaml

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

references/d3-chat/package.json

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,14 @@
88
"start": "next start",
99
"lint": "next lint",
1010
"dev:trigger": "trigger dev",
11-
"tunnel": "ngrok http --url=d3-demo.ngrok.dev 3000"
11+
"tunnel": "ngrok http --url=d3-demo.ngrok.dev 3000",
12+
"python:install-requirements": "uv pip sync requirements.txt",
13+
"python:compile-requirements": "uv pip compile requirements.in -o requirements.txt",
14+
"python:install-browsers": "./.venv/bin/playwright install",
15+
"python:install": "pnpm run python:compile-requirements && pnpm run python:install-requirements",
16+
"python:create-env": "uv venv .venv",
17+
"db:migrate": "tsx -r dotenv/config src/lib/migrate.ts up",
18+
"db:migrate:down": "tsx -r dotenv/config src/lib/migrate.ts down"
1219
},
1320
"dependencies": {
1421
"@radix-ui/react-avatar": "^1.1.3",
@@ -23,6 +30,7 @@
2330
"zod": "3.23.8",
2431
"@trigger.dev/react-hooks": "workspace:*",
2532
"@trigger.dev/sdk": "workspace:*",
33+
"@trigger.dev/python": "workspace:*",
2634
"nanoid": "^5.1.5",
2735
"ai": "4.2.5",
2836
"@ai-sdk/openai": "1.3.3",

references/d3-chat/requirements.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
crawl4ai
2+
playwright
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
# This file was autogenerated by uv via the following command:
2+
# uv pip compile requirements.in -o requirements.txt
3+
aiofiles==24.1.0
4+
# via crawl4ai
5+
aiohappyeyeballs==2.6.1
6+
# via aiohttp
7+
aiohttp==3.11.15
8+
# via
9+
# crawl4ai
10+
# litellm
11+
aiosignal==1.3.2
12+
# via aiohttp
13+
aiosqlite==0.21.0
14+
# via crawl4ai
15+
annotated-types==0.7.0
16+
# via pydantic
17+
anyio==4.9.0
18+
# via
19+
# httpx
20+
# openai
21+
attrs==25.3.0
22+
# via
23+
# aiohttp
24+
# jsonschema
25+
# referencing
26+
beautifulsoup4==4.13.3
27+
# via crawl4ai
28+
certifi==2025.1.31
29+
# via
30+
# httpcore
31+
# httpx
32+
# requests
33+
cffi==1.17.1
34+
# via cryptography
35+
charset-normalizer==3.4.1
36+
# via requests
37+
click==8.1.8
38+
# via
39+
# crawl4ai
40+
# litellm
41+
# nltk
42+
colorama==0.4.6
43+
# via crawl4ai
44+
crawl4ai==0.5.0.post8
45+
# via -r requirements.in
46+
cryptography==44.0.2
47+
# via pyopenssl
48+
cssselect==1.3.0
49+
# via crawl4ai
50+
distro==1.9.0
51+
# via openai
52+
fake-http-header==0.3.5
53+
# via tf-playwright-stealth
54+
fake-useragent==2.1.0
55+
# via crawl4ai
56+
faust-cchardet==2.1.19
57+
# via crawl4ai
58+
filelock==3.18.0
59+
# via huggingface-hub
60+
frozenlist==1.5.0
61+
# via
62+
# aiohttp
63+
# aiosignal
64+
fsspec==2025.3.2
65+
# via huggingface-hub
66+
greenlet==3.1.1
67+
# via playwright
68+
h11==0.14.0
69+
# via httpcore
70+
httpcore==1.0.7
71+
# via httpx
72+
httpx==0.28.1
73+
# via
74+
# crawl4ai
75+
# litellm
76+
# openai
77+
huggingface-hub==0.30.1
78+
# via tokenizers
79+
humanize==4.12.2
80+
# via crawl4ai
81+
idna==3.10
82+
# via
83+
# anyio
84+
# httpx
85+
# requests
86+
# yarl
87+
importlib-metadata==8.6.1
88+
# via litellm
89+
jinja2==3.1.6
90+
# via litellm
91+
jiter==0.9.0
92+
# via openai
93+
joblib==1.4.2
94+
# via nltk
95+
jsonschema==4.23.0
96+
# via litellm
97+
jsonschema-specifications==2024.10.1
98+
# via jsonschema
99+
litellm==1.65.1
100+
# via crawl4ai
101+
lxml==5.3.1
102+
# via crawl4ai
103+
markdown-it-py==3.0.0
104+
# via rich
105+
markupsafe==3.0.2
106+
# via jinja2
107+
mdurl==0.1.2
108+
# via markdown-it-py
109+
multidict==6.3.0
110+
# via
111+
# aiohttp
112+
# yarl
113+
nltk==3.9.1
114+
# via crawl4ai
115+
numpy==2.2.4
116+
# via
117+
# crawl4ai
118+
# rank-bm25
119+
openai==1.70.0
120+
# via litellm
121+
packaging==24.2
122+
# via huggingface-hub
123+
pillow==10.4.0
124+
# via crawl4ai
125+
playwright==1.51.0
126+
# via
127+
# -r requirements.in
128+
# crawl4ai
129+
# tf-playwright-stealth
130+
propcache==0.3.1
131+
# via
132+
# aiohttp
133+
# yarl
134+
psutil==7.0.0
135+
# via crawl4ai
136+
pycparser==2.22
137+
# via cffi
138+
pydantic==2.11.1
139+
# via
140+
# crawl4ai
141+
# litellm
142+
# openai
143+
pydantic-core==2.33.0
144+
# via pydantic
145+
pyee==12.1.1
146+
# via playwright
147+
pygments==2.19.1
148+
# via rich
149+
pyopenssl==25.0.0
150+
# via crawl4ai
151+
pyperclip==1.9.0
152+
# via crawl4ai
153+
python-dotenv==1.1.0
154+
# via
155+
# crawl4ai
156+
# litellm
157+
pyyaml==6.0.2
158+
# via huggingface-hub
159+
rank-bm25==0.2.2
160+
# via crawl4ai
161+
referencing==0.36.2
162+
# via
163+
# jsonschema
164+
# jsonschema-specifications
165+
regex==2024.11.6
166+
# via
167+
# nltk
168+
# tiktoken
169+
requests==2.32.3
170+
# via
171+
# crawl4ai
172+
# huggingface-hub
173+
# tiktoken
174+
rich==14.0.0
175+
# via crawl4ai
176+
rpds-py==0.24.0
177+
# via
178+
# jsonschema
179+
# referencing
180+
sniffio==1.3.1
181+
# via
182+
# anyio
183+
# openai
184+
snowballstemmer==2.2.0
185+
# via crawl4ai
186+
soupsieve==2.6
187+
# via beautifulsoup4
188+
tf-playwright-stealth==1.1.2
189+
# via crawl4ai
190+
tiktoken==0.9.0
191+
# via litellm
192+
tokenizers==0.21.1
193+
# via litellm
194+
tqdm==4.67.1
195+
# via
196+
# huggingface-hub
197+
# nltk
198+
# openai
199+
typing-extensions==4.13.0
200+
# via
201+
# aiosqlite
202+
# beautifulsoup4
203+
# huggingface-hub
204+
# openai
205+
# pydantic
206+
# pydantic-core
207+
# pyee
208+
# typing-inspection
209+
typing-inspection==0.4.0
210+
# via pydantic
211+
urllib3==2.3.0
212+
# via requests
213+
xxhash==3.5.0
214+
# via crawl4ai
215+
yarl==1.18.3
216+
# via aiohttp
217+
zipp==3.21.0
218+
# via importlib-metadata
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import type { BuildContext, BuildExtension } from "@trigger.dev/build";
2+
3+
// This is a custom build extension to install Playwright and Chromium
4+
export function installPlaywrightChromium(): BuildExtension {
5+
return {
6+
name: "InstallPlaywrightChromium",
7+
onBuildComplete(context: BuildContext) {
8+
const instructions = [
9+
// Base and Chromium dependencies
10+
`RUN apt-get update && apt-get install -y --no-install-recommends \
11+
curl unzip npm libnspr4 libatk1.0-0 libatk-bridge2.0-0 libatspi2.0-0 \
12+
libasound2 libnss3 libxcomposite1 libxdamage1 libxfixes3 libxrandr2 \
13+
libgbm1 libxkbcommon0 \
14+
&& apt-get clean && rm -rf /var/lib/apt/lists/*`,
15+
16+
// Install Playwright and Chromium
17+
`RUN npm install -g playwright`,
18+
`RUN mkdir -p /ms-playwright`,
19+
`RUN PLAYWRIGHT_BROWSERS_PATH=/ms-playwright python -m playwright install --with-deps chromium`,
20+
];
21+
22+
context.addLayer({
23+
id: "playwright",
24+
image: { instructions },
25+
deploy: {
26+
env: {
27+
PLAYWRIGHT_BROWSERS_PATH: "/ms-playwright",
28+
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: "1",
29+
PLAYWRIGHT_SKIP_BROWSER_VALIDATION: "1",
30+
},
31+
override: true,
32+
},
33+
});
34+
},
35+
};
36+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import { sql } from "@vercel/postgres";
2+
3+
export async function migrate(direction: "up" | "down") {
4+
if (direction === "up") {
5+
await migrateUp();
6+
} else {
7+
await migrateDown();
8+
}
9+
}
10+
11+
export async function migrateUp() {
12+
const createTable = await sql`
13+
CREATE TABLE IF NOT EXISTS todos (
14+
id VARCHAR(255) PRIMARY KEY,
15+
user_id VARCHAR(255) NOT NULL,
16+
title VARCHAR(255) NOT NULL,
17+
description TEXT,
18+
status VARCHAR(50) NOT NULL DEFAULT 'pending',
19+
priority INTEGER NOT NULL DEFAULT 3,
20+
due_date TIMESTAMP WITH TIME ZONE,
21+
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
22+
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
23+
completed_at TIMESTAMP WITH TIME ZONE,
24+
tags TEXT[], -- Array of tags
25+
assigned_to VARCHAR(255)
26+
);
27+
`;
28+
29+
console.log(`Created "todos" table`);
30+
31+
return {
32+
createTable,
33+
};
34+
}
35+
36+
export async function migrateDown() {
37+
const dropTable = await sql`
38+
DROP TABLE IF EXISTS todos;
39+
`;
40+
41+
console.log(`Dropped "todos" table`);
42+
}
43+
44+
async function main() {
45+
const direction = process.argv[2];
46+
await migrate(direction as "up" | "down");
47+
}
48+
49+
main().catch(console.error);

0 commit comments

Comments
 (0)