Skip to content

Commit 234379d

Browse files
authored
feat!: add cloudflare queue & convex http actions for workers (#11)
1 parent ba0a4c4 commit 234379d

File tree

23 files changed

+756
-134
lines changed

23 files changed

+756
-134
lines changed

.github/workflows/scraper.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ jobs:
2525
- name: 📦 Install dependencies
2626
run: bun install
2727

28+
- name: 📦 Install Doppler CLI
29+
uses: dopplerhq/cli-action@v3
30+
31+
- name: 🔑 Sync secrets with Cloudflare Worker
32+
run: doppler secrets --json | jq -c 'with_entries(.value = .value.computed)') | wrangler secret bulk
33+
2834
- name: 🚀 Deploy to Cloudflare Workers
2935
run: bun run deploy
3036
env:

apps/scraper/drizzle-dev.config.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import { config } from "dotenv";
2+
import { defineConfig } from "drizzle-kit";
3+
4+
config({ path: "./.env" });
5+
6+
export default defineConfig({
7+
out: "./src/drizzle/migrations",
8+
schema: "./src/drizzle/schema.ts",
9+
dialect: "sqlite",
10+
dbCredentials: {
11+
// biome-ignore lint/style/noNonNullAssertion: env variables must exist
12+
url: process.env.DEV_DATABASE_URL!,
13+
},
14+
});
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/** biome-ignore-all lint/style/noNonNullAssertion: env variables must exist */
2+
import { config } from "dotenv";
3+
import { defineConfig } from "drizzle-kit";
4+
5+
config({ path: "./.env" });
6+
7+
const accountId = process.env.CLOUDFLARE_ACCOUNT_ID!;
8+
const databaseId = process.env.CLOUDFLARE_DATABASE_ID!;
9+
const token = process.env.CLOUDFLARE_D1_TOKEN!;
10+
11+
export default defineConfig({
12+
out: "./src/drizzle/migrations",
13+
schema: "./src/drizzle/schema.ts",
14+
dialect: "sqlite",
15+
driver: "d1-http",
16+
dbCredentials: {
17+
accountId,
18+
databaseId,
19+
token,
20+
},
21+
});

apps/scraper/package.json

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,22 @@
77
"deploy": "wrangler deploy --minify",
88
"check": "biome check",
99
"check:types": "tsc --noEmit",
10+
"db:studio:local": "drizzle-kit studio --config=drizzle-dev.config.ts",
11+
"db:studio:remote": "drizzle-kit studio --config=drizzle-prod.config.ts",
12+
"db:push:local": "drizzle-kit push --config=drizzle-dev.config.ts",
13+
"db:push:remote": "drizzle-kit push --config=drizzle-prod.config.ts",
1014
"cf-typegen": "wrangler types --env-interface CloudflareBindings"
1115
},
1216
"dependencies": {
13-
"hono": "^4.9.9"
17+
"dotenv": "^17.2.3",
18+
"drizzle-orm": "^0.44.5",
19+
"hono": "^4.9.9",
20+
"zod": "^4.1.11",
21+
"@dev-team-fall-25/server": "workspace:*"
1422
},
1523
"devDependencies": {
1624
"@biomejs/biome": "2.2.4",
25+
"drizzle-kit": "^0.31.5",
1726
"wrangler": "^4.40.3"
1827
}
1928
}

apps/scraper/src/drizzle/index.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import { drizzle } from "drizzle-orm/d1";
2+
3+
const createDB = async (env: CloudflareBindings) => {
4+
return drizzle(env.DB);
5+
};
6+
7+
export default createDB;

apps/scraper/src/drizzle/schema.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import { integer, sqliteTable, text } from "drizzle-orm/sqlite-core";
2+
3+
export const jobs = sqliteTable("jobs", {
4+
id: integer("id").primaryKey({ autoIncrement: true }),
5+
url: text("url").notNull(),
6+
status: text("status", {
7+
enum: ["pending", "processing", "completed", "failed"],
8+
}).notNull(),
9+
jobType: text("job_type", {
10+
enum: ["discovery", "program", "course"],
11+
}).notNull(),
12+
createdAt: integer("created_at", { mode: "timestamp" })
13+
.notNull()
14+
.$defaultFn(() => new Date()),
15+
startedAt: integer("started_at", { mode: "timestamp" }),
16+
completedAt: integer("completed_at", { mode: "timestamp" }),
17+
});
18+
19+
export const errorLogs = sqliteTable("error_logs", {
20+
id: integer("id").primaryKey({ autoIncrement: true }),
21+
jobId: integer("job_id").references(() => jobs.id),
22+
errorType: text("error_type", {
23+
enum: ["network", "parsing", "validation", "timeout"],
24+
}).notNull(),
25+
errorMessage: text("error_message").notNull(),
26+
stackTrace: text("stack_trace"),
27+
retryCount: integer("retry_count").notNull(),
28+
timestamp: integer("timestamp", { mode: "timestamp" }).notNull(),
29+
});

apps/scraper/src/index.ts

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,31 @@
1+
/** biome-ignore-all lint/correctness/noUnusedFunctionParameters: disable for now as they haven't been implemented yet */
12
import { Hono } from "hono";
23

3-
const app = new Hono();
4+
const app = new Hono<{ Bindings: CloudflareBindings }>();
45

5-
app.get("/", (c) => {
6-
return c.text("Hello Hono!");
6+
app.get("/", async (c) => {
7+
// const db = await createDB(c.env);
8+
// TODO: use hono to render a dashboard to monitor the scraping status
79
});
810

9-
export default app;
11+
export default {
12+
fetch: app.fetch,
13+
14+
async scheduled(event: ScheduledEvent, env: CloudflareBindings) {
15+
// const db = await createDB(env);
16+
// const api = new ConvexApi({
17+
// baseUrl: env.CONVEX_SITE_URL,
18+
// apiKey: env.CONVEX_API_KEY,
19+
// });
20+
// TODO: set up jobs for scraping a list of urls need to be scraped and add them to queue as "discovery"
21+
},
22+
23+
async queue(batch: MessageBatch<Error>, env: CloudflareBindings) {
24+
// const db = await createDB(env);
25+
// const api = new ConvexApi({
26+
// baseUrl: env.CONVEX_SITE_URL,
27+
// apiKey: env.CONVEX_API_KEY,
28+
// });
29+
// TODO: set up jobs for scrping given url and save structured data to convex database
30+
},
31+
};

apps/scraper/src/lib/convex.ts

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import {
2+
ZCreatePrerequisite,
3+
ZCreateRequirement,
4+
ZDeleteCourse,
5+
ZDeletePrerequisites,
6+
ZDeleteProgram,
7+
ZDeleteRequirements,
8+
ZUpsertCourse,
9+
ZUpsertProgram,
10+
} from "@dev-team-fall-25/server/convex/http";
11+
import type * as z from "zod/mini";
12+
13+
type ConvexApiConfig = {
14+
baseUrl: string;
15+
apiKey: string;
16+
};
17+
18+
export class ConvexApi {
19+
private config: ConvexApiConfig;
20+
21+
constructor(config: ConvexApiConfig) {
22+
this.config = config;
23+
}
24+
25+
private async request<T extends z.ZodMiniType>(
26+
path: string,
27+
schema: T,
28+
data: z.infer<T>,
29+
): Promise<{ success: boolean; id?: string }> {
30+
const validated = schema.parse(data);
31+
32+
const response = await fetch(`${this.config.baseUrl}/${path}`, {
33+
method: "POST",
34+
headers: {
35+
"Content-Type": "application/json",
36+
"X-API-KEY": this.config.apiKey,
37+
},
38+
body: JSON.stringify(validated),
39+
});
40+
41+
if (!response.ok) {
42+
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
43+
}
44+
45+
return response.json();
46+
}
47+
48+
async upsertCourse(data: z.infer<typeof ZUpsertCourse>) {
49+
const result = await this.request(
50+
"/api/courses/upsert",
51+
ZUpsertCourse,
52+
data,
53+
);
54+
return result.id;
55+
}
56+
57+
async deleteCourse(data: z.infer<typeof ZDeleteCourse>) {
58+
await this.request("/api/courses/delete", ZDeleteCourse, data);
59+
}
60+
61+
async upsertProgram(data: z.infer<typeof ZUpsertProgram>) {
62+
const result = await this.request(
63+
"/api/programs/upsert",
64+
ZUpsertProgram,
65+
data,
66+
);
67+
return result.id;
68+
}
69+
70+
async deleteProgram(data: z.infer<typeof ZDeleteProgram>) {
71+
await this.request("/api/programs/delete", ZDeleteProgram, data);
72+
}
73+
74+
async createRequirement(data: z.infer<typeof ZCreateRequirement>) {
75+
const result = await this.request(
76+
"/api/requirements/create",
77+
ZCreateRequirement,
78+
data,
79+
);
80+
return result.id;
81+
}
82+
83+
async deleteRequirements(data: z.infer<typeof ZDeleteRequirements>) {
84+
await this.request("/api/requirements/delete", ZDeleteRequirements, data);
85+
}
86+
87+
async createPrerequisite(data: z.infer<typeof ZCreatePrerequisite>) {
88+
const result = await this.request(
89+
"/api/prerequisites/create",
90+
ZCreatePrerequisite,
91+
data,
92+
);
93+
return result.id;
94+
}
95+
96+
async deletePrerequisites(data: z.infer<typeof ZDeletePrerequisites>) {
97+
await this.request("/api/prerequisites/delete", ZDeletePrerequisites, data);
98+
}
99+
}

apps/scraper/tsconfig.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@
88
"lib": ["ESNext"],
99
"jsx": "react-jsx",
1010
"jsxImportSource": "hono/jsx"
11-
}
11+
},
12+
"include": ["src/**/*", "worker-configuration.d.ts"]
1213
}

apps/scraper/worker-configuration.d.ts

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
/* eslint-disable */
2-
// Generated by Wrangler by running `wrangler types --env-interface CloudflareBindings` (hash: 502e2034a04451831617b45e933366c8)
3-
// Runtime types generated with workerd@1.20250924.0 2025-09-26
2+
// Generated by Wrangler by running `wrangler types --env-interface CloudflareBindings` (hash: e48c63bb4c89a59e02be6825aa1e7c60)
3+
// Runtime types generated with workerd@1.20250927.0 2025-09-26
44
declare namespace Cloudflare {
55
interface GlobalProps {
66
mainModule: typeof import("./src/index");
77
}
88
interface Env {
9+
SCRAPING_BASE_URL: "https://bulletins.nyu.edu/";
10+
CONVEX_SITE_URL: string;
11+
CONVEX_API_KEY: string;
12+
DB: D1Database;
13+
SCRAPING_QUEUE: Queue;
914
}
1015
}
1116
interface CloudflareBindings extends Cloudflare.Env {}
@@ -7463,6 +7468,19 @@ interface MediaError extends Error {
74637468
readonly message: string;
74647469
readonly stack?: string;
74657470
}
7471+
declare module 'cloudflare:node' {
7472+
interface NodeStyleServer {
7473+
listen(...args: unknown[]): this;
7474+
address(): {
7475+
port?: number | null | undefined;
7476+
};
7477+
}
7478+
export function httpServerHandler(port: number): ExportedHandler;
7479+
export function httpServerHandler(options: {
7480+
port: number;
7481+
}): ExportedHandler;
7482+
export function httpServerHandler(server: NodeStyleServer): ExportedHandler;
7483+
}
74667484
type Params<P extends string = any> = Record<P, string | string[]>;
74677485
type EventContext<Env, P extends string, Data> = {
74687486
request: Request<unknown, IncomingRequestCfProperties<unknown>>;
@@ -7720,19 +7738,6 @@ declare namespace Cloudflare {
77207738
& (K extends GlobalProp<"durableNamespaces", never> ? MainModule[K] extends new (...args: any[]) => infer DoInstance ? DoInstance extends Rpc.DurableObjectBranded ? DurableObjectNamespace<DoInstance> : DurableObjectNamespace<undefined> : DurableObjectNamespace<undefined> : {});
77217739
};
77227740
}
7723-
declare module 'cloudflare:node' {
7724-
export interface DefaultHandler {
7725-
fetch?(request: Request): Response | Promise<Response>;
7726-
tail?(events: TraceItem[]): void | Promise<void>;
7727-
trace?(traces: TraceItem[]): void | Promise<void>;
7728-
scheduled?(controller: ScheduledController): void | Promise<void>;
7729-
queue?(batch: MessageBatch<unknown>): void | Promise<void>;
7730-
test?(controller: TestController): void | Promise<void>;
7731-
}
7732-
export function httpServerHandler(options: {
7733-
port: number;
7734-
}, handlers?: Omit<DefaultHandler, 'fetch'>): DefaultHandler;
7735-
}
77367741
declare namespace CloudflareWorkersModule {
77377742
export type RpcStub<T extends Rpc.Stubable> = Rpc.Stub<T>;
77387743
export const RpcStub: {

0 commit comments

Comments
 (0)