Skip to content

Commit ab57ed0

Browse files
committed
Initial SDK release
0 parents  commit ab57ed0

File tree

13 files changed

+703
-0
lines changed

13 files changed

+703
-0
lines changed

.github/workflows/publish-npm.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
name: Publish npm
2+
on:
3+
push:
4+
tags:
5+
- 'v*'
6+
jobs:
7+
build-publish:
8+
runs-on: ubuntu-latest
9+
steps:
10+
- uses: actions/checkout@v4
11+
- uses: actions/setup-node@v4
12+
with:
13+
node-version: '20'
14+
registry-url: 'https://registry.npmjs.org'
15+
- name: Build package
16+
run: |
17+
npm ci
18+
npm run build
19+
- name: Publish to npm
20+
env:
21+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
22+
run: npm publish --access public
23+

.gitignore

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Dependencies
2+
node_modules/
3+
4+
# Builds
5+
dist/
6+
*.tsbuildinfo
7+
8+
# Logs
9+
npm-debug.log*
10+
yarn-debug.log*
11+
yarn-error.log*
12+
13+
# OS
14+
.DS_Store
15+
16+
# Env
17+
.env

README.md

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# supacrawler-js
2+
3+
Typed TypeScript/JavaScript SDK for Supacrawler API.
4+
5+
## Install
6+
7+
- From npm:
8+
```bash
9+
npm install supacrawler-js
10+
# or
11+
yarn add supacrawler-js
12+
# or
13+
pnpm add supacrawler-js
14+
# or
15+
bun add supacrawler-js
16+
```
17+
18+
- From GitHub (direct):
19+
```bash
20+
npm install your-org/your-repo#sdk/supacrawler-js
21+
```
22+
23+
- From local path (development):
24+
```bash
25+
npm install
26+
npm run build
27+
```
28+
29+
## Usage
30+
31+
```ts
32+
import { SupacrawlerClient } from 'supacrawler-js'
33+
34+
const client = new SupacrawlerClient({ apiKey: process.env.SUPACRAWLER_API_KEY! })
35+
36+
// Scrape (markdown)
37+
await client.scrape({ url: 'https://example.com', format: 'markdown' })
38+
39+
// Scrape with rendering
40+
await client.scrape({ url: 'https://spa-example.com', format: 'html', render_js: true, wait: 3000, device: 'desktop' })
41+
42+
// Map links
43+
await client.scrape({ url: 'https://example.com', format: 'links', depth: 2, max_links: 100 })
44+
45+
// Create crawl job and wait
46+
const job = await client.createJob({ url: 'https://supabase.com/docs', type: 'crawl', depth: 2, link_limit: 50, format: 'markdown' })
47+
const status = await client.waitForJob(job.job_id)
48+
49+
// Screenshot job
50+
const sJob = await client.createScreenshotJob({ url: 'https://example.com', device: 'desktop', full_page: true })
51+
52+
// Watch create/pause/resume/check/delete
53+
const watch = await client.watchCreate({ url: 'https://example.com/pricing', frequency: 'daily', notify_email: '[email protected]' })
54+
await client.watchPause(watch.watch_id)
55+
await client.watchResume(watch.watch_id)
56+
await client.watchCheck(watch.watch_id)
57+
await client.watchDelete(watch.watch_id)
58+
```
59+
60+
## API coverage
61+
- GET `/v1/scrape` (all params)
62+
- POST `/v1/jobs`, GET `/v1/jobs/{id}`
63+
- POST `/v1/screenshots`
64+
- POST `/v1/watch`, GET `/v1/watch`, GET `/v1/watch/{id}`, DELETE `/v1/watch/{id}`, PATCH `/v1/watch/{id}/pause`, PATCH `/v1/watch/{id}/resume`, POST `/v1/watch/{id}/check`
65+
66+
## Development
67+
- Env var: `SUPACRAWLER_API_KEY`.
68+
- Example runners: `npm run example:scrape|jobs|screenshots|watch`.

package-lock.json

Lines changed: 51 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
{
2+
"name": "@supacrawler/js",
3+
"version": "0.1.0",
4+
"description": "Typed TypeScript/JavaScript SDK for Supacrawler API (scrape, jobs, screenshots, watch)",
5+
"type": "module",
6+
"main": "dist/index.js",
7+
"types": "dist/index.d.ts",
8+
"exports": {
9+
".": {
10+
"import": "./dist/index.js",
11+
"types": "./dist/index.d.ts"
12+
}
13+
},
14+
"files": [
15+
"dist"
16+
],
17+
"keywords": ["scrape", "crawler", "screenshots", "watch", "sdk", "supacrawler"],
18+
"repository": {
19+
"type": "git",
20+
"url": "git+https://github.com/Supacrawler/supacrawler-js.git",
21+
"directory": "sdk/supacrawler-js"
22+
},
23+
"homepage": "https://supacrawler.com",
24+
"bugs": {
25+
"url": "https://github.com/Supacrawler/supacrawler-js/issues"
26+
},
27+
"author": "Supacrawler",
28+
"engines": {
29+
"node": ">=18"
30+
},
31+
"publishConfig": { "access": "public" },
32+
"scripts": {
33+
"build": "tsc -p tsconfig.json",
34+
"prepublishOnly": "npm run build",
35+
"example:scrape": "npm run build && node dist/examples/scrape.js",
36+
"example:jobs": "npm run build && node dist/examples/jobs.js",
37+
"example:screenshots": "npm run build && node dist/examples/screenshots.js",
38+
"example:watch": "npm run build && node dist/examples/watch.js"
39+
},
40+
"devDependencies": {
41+
"@types/node": "^20.12.7",
42+
"typescript": "^5.4.0"
43+
},
44+
"license": "MIT"
45+
}

src/client.ts

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
import type {
2+
ScrapeParams,
3+
ScrapeResponse,
4+
JobCreateRequest,
5+
JobCreateResponse,
6+
JobStatusResponse,
7+
ScreenshotRequest,
8+
ScreenshotCreateResponse,
9+
WatchCreateRequest,
10+
WatchCreateResponse,
11+
WatchGetResponse,
12+
WatchListResponse,
13+
WatchDeleteResponse,
14+
WatchActionResponse,
15+
} from './types'
16+
17+
export interface ClientOptions {
18+
apiKey: string
19+
baseUrl?: string
20+
fetchFn?: typeof fetch
21+
timeoutMs?: number
22+
}
23+
24+
export class SupacrawlerError extends Error {
25+
status?: number
26+
body?: unknown
27+
constructor(message: string, status?: number, body?: unknown) {
28+
super(message)
29+
this.status = status
30+
this.body = body
31+
}
32+
}
33+
34+
export class SupacrawlerClient {
35+
private apiKey: string
36+
private baseUrl: string
37+
private fetchFn: typeof fetch
38+
private timeoutMs: number
39+
40+
constructor(options: ClientOptions) {
41+
this.apiKey = options.apiKey
42+
this.baseUrl = (options.baseUrl ?? 'https://api.supacrawler.com/api/v1').replace(/\/$/, '')
43+
this.fetchFn = options.fetchFn ?? fetch
44+
this.timeoutMs = options.timeoutMs ?? 30000
45+
}
46+
47+
private headers(): HeadersInit {
48+
return { Authorization: `Bearer ${this.apiKey}`, 'Content-Type': 'application/json' }
49+
}
50+
51+
private async request<T>(path: string, init?: RequestInit & { timeoutMs?: number }): Promise<T> {
52+
const controller = new AbortController()
53+
const timeout = init?.timeoutMs ?? this.timeoutMs
54+
const id = setTimeout(() => controller.abort(), timeout)
55+
56+
try {
57+
const res = await this.fetchFn(`${this.baseUrl}${path}`, { ...init, signal: controller.signal })
58+
return await this.handle<T>(res)
59+
} catch (e: any) {
60+
if (e?.name === 'AbortError') {
61+
throw new SupacrawlerError(`Request timed out after ${timeout}ms`)
62+
}
63+
throw e
64+
} finally {
65+
clearTimeout(id)
66+
}
67+
}
68+
69+
private async handle<T>(res: Response): Promise<T> {
70+
if (!res.ok) {
71+
let body: unknown
72+
try { body = await res.json() } catch { body = await res.text() }
73+
throw new SupacrawlerError(`HTTP ${res.status}`, res.status, body)
74+
}
75+
return res.json() as Promise<T>
76+
}
77+
78+
// ------------- Scrape -------------
79+
async scrape(params: ScrapeParams): Promise<ScrapeResponse> {
80+
const qs = new URLSearchParams()
81+
Object.entries(params).forEach(([k, v]) => {
82+
if (v !== undefined && v !== null) qs.append(k, String(v))
83+
})
84+
return this.request<ScrapeResponse>(`/scrape?${qs.toString()}`, { headers: { Authorization: `Bearer ${this.apiKey}` } })
85+
}
86+
87+
// ------------- Jobs (crawl + status) -------------
88+
async createJob(req: JobCreateRequest): Promise<JobCreateResponse> {
89+
return this.request<JobCreateResponse>(`/jobs`, {
90+
method: 'POST',
91+
headers: this.headers(),
92+
body: JSON.stringify(req),
93+
})
94+
}
95+
96+
async getJob(jobId: string): Promise<JobStatusResponse> {
97+
return this.request<JobStatusResponse>(`/jobs/${jobId}`, { headers: { Authorization: `Bearer ${this.apiKey}` } })
98+
}
99+
100+
async waitForJob(jobId: string, opts: { intervalMs?: number; timeoutMs?: number } = {}): Promise<JobStatusResponse> {
101+
const interval = opts.intervalMs ?? 3000
102+
const timeout = opts.timeoutMs ?? 300000
103+
const start = Date.now()
104+
while (true) {
105+
const status = await this.getJob(jobId)
106+
if (status.status === 'completed' || status.status === 'failed') return status
107+
if (Date.now() - start > timeout) throw new SupacrawlerError(`Timeout waiting for job ${jobId}`)
108+
await new Promise(r => setTimeout(r, interval))
109+
}
110+
}
111+
112+
// ------------- Screenshots -------------
113+
async createScreenshotJob(req: ScreenshotRequest): Promise<ScreenshotCreateResponse> {
114+
return this.request<ScreenshotCreateResponse>(`/screenshots`, {
115+
method: 'POST',
116+
headers: this.headers(),
117+
body: JSON.stringify(req),
118+
})
119+
}
120+
121+
// ------------- Watch -------------
122+
async watchCreate(req: WatchCreateRequest): Promise<WatchCreateResponse> {
123+
return this.request<WatchCreateResponse>(`/watch`, {
124+
method: 'POST',
125+
headers: this.headers(),
126+
body: JSON.stringify(req),
127+
})
128+
}
129+
130+
async watchGet(watchId: string): Promise<WatchGetResponse> {
131+
return this.request<WatchGetResponse>(`/watch/${watchId}`, { headers: { Authorization: `Bearer ${this.apiKey}` } })
132+
}
133+
134+
async watchList(): Promise<WatchListResponse> {
135+
return this.request<WatchListResponse>(`/watch`, { headers: { Authorization: `Bearer ${this.apiKey}` } })
136+
}
137+
138+
async watchDelete(watchId: string): Promise<WatchDeleteResponse> {
139+
return this.request<WatchDeleteResponse>(`/watch/${watchId}`, { method: 'DELETE', headers: { Authorization: `Bearer ${this.apiKey}` } })
140+
}
141+
142+
async watchPause(watchId: string): Promise<WatchActionResponse> {
143+
return this.request<WatchActionResponse>(`/watch/${watchId}/pause`, { method: 'PATCH', headers: this.headers() })
144+
}
145+
146+
async watchResume(watchId: string): Promise<WatchActionResponse> {
147+
return this.request<WatchActionResponse>(`/watch/${watchId}/resume`, { method: 'PATCH', headers: this.headers() })
148+
}
149+
150+
async watchCheck(watchId: string): Promise<WatchActionResponse> {
151+
return this.request<WatchActionResponse>(`/watch/${watchId}/check`, { method: 'POST', headers: this.headers() })
152+
}
153+
}

src/examples/jobs.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import { SupacrawlerClient } from '../index'
2+
3+
async function main() {
4+
const client = new SupacrawlerClient({ apiKey: process.env.SUPACRAWLER_API_KEY || 'YOUR_API_KEY' })
5+
6+
const job = await client.createJob({
7+
url: 'https://supabase.com/docs',
8+
type: 'crawl',
9+
format: 'markdown',
10+
link_limit: 50,
11+
depth: 2,
12+
include_subdomains: false,
13+
render_js: false,
14+
patterns: ['/blog/*', '/docs/*']
15+
})
16+
17+
console.log('Job created:', job)
18+
19+
const status = await client.waitForJob(job.job_id, { intervalMs: 3000, timeoutMs: 600000 })
20+
console.log('Final status:', status.status)
21+
if (status.status === 'completed' && status.data && 'crawl_data' in status.data) {
22+
const crawlData = (status.data as any).crawl_data
23+
console.log('Crawled pages:', Object.keys(crawlData).length)
24+
}
25+
}
26+
27+
main().catch((e) => {
28+
console.error(e)
29+
process.exit(1)
30+
})

0 commit comments

Comments
 (0)