Skip to content

Commit c732c7e

Browse files
skeptrunedevcdxker
authored andcommitted
feature: add route to listen for firecrawl webhooks on page scrape events
1 parent 5a8f9a7 commit c732c7e

File tree

10 files changed

+680
-38
lines changed

10 files changed

+680
-38
lines changed

.vscode/launch.json

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
"name": "Debug executable 'trieve-server'",
88
"cargo": {
99
"args": [
10-
"+default",
10+
"+nightly",
1111
"build",
1212
"--manifest-path=./server/Cargo.toml",
1313
"--bin=trieve-server",
@@ -35,6 +35,20 @@
3535
"args": [],
3636
"cwd": "${workspaceFolder}/server"
3737
},
38+
{
39+
"type": "lldb",
40+
"request": "launch",
41+
"name": "Debug executable 'crawl-worker'",
42+
"cargo": {
43+
"args": [
44+
"build",
45+
"--manifest-path=./server/Cargo.toml",
46+
"--bin=crawl-worker"
47+
]
48+
},
49+
"args": [],
50+
"cwd": "${workspaceFolder}/server"
51+
},
3852
{
3953
"type": "lldb",
4054
"request": "launch",

clients/ts-sdk/openapi.json

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4387,6 +4387,42 @@
43874387
]
43884388
}
43894389
},
4390+
"/api/file/html_page": {
4391+
"post": {
4392+
"tags": [
4393+
"File"
4394+
],
4395+
"summary": "Upload HTML Page",
4396+
"description": "Chunk HTML by headings and queue for indexing into the specified dataset.",
4397+
"operationId": "upload_html_page",
4398+
"requestBody": {
4399+
"description": "JSON request payload to upload a file",
4400+
"content": {
4401+
"application/json": {
4402+
"schema": {
4403+
"$ref": "#/components/schemas/UploadHtmlPageReqPayload"
4404+
}
4405+
}
4406+
},
4407+
"required": true
4408+
},
4409+
"responses": {
4410+
"204": {
4411+
"description": "Confirmation that html is being processed"
4412+
},
4413+
"400": {
4414+
"description": "Service error relating to processing the file",
4415+
"content": {
4416+
"application/json": {
4417+
"schema": {
4418+
"$ref": "#/components/schemas/ErrorResponseBody"
4419+
}
4420+
}
4421+
}
4422+
}
4423+
}
4424+
}
4425+
},
43904426
"/api/file/{file_id}": {
43914427
"get": {
43924428
"tags": [
@@ -8460,6 +8496,15 @@
84608496
"type": "string",
84618497
"description": "The URL to crawl",
84628498
"nullable": true
8499+
},
8500+
"webhook_metadata": {
8501+
"description": "Metadata to send back with the webhook call for each successful page scrape",
8502+
"nullable": true
8503+
},
8504+
"webhook_url": {
8505+
"type": "string",
8506+
"description": "Host to call back on the webhook for each successful page scrape",
8507+
"nullable": true
84638508
}
84648509
},
84658510
"example": {
@@ -9642,6 +9687,44 @@
96429687
"dot"
96439688
]
96449689
},
9690+
"Document": {
9691+
"type": "object",
9692+
"required": [
9693+
"metadata"
9694+
],
9695+
"properties": {
9696+
"extract": {
9697+
"type": "string",
9698+
"nullable": true
9699+
},
9700+
"html": {
9701+
"type": "string",
9702+
"nullable": true
9703+
},
9704+
"links": {
9705+
"type": "array",
9706+
"items": {
9707+
"type": "string"
9708+
},
9709+
"nullable": true
9710+
},
9711+
"markdown": {
9712+
"type": "string",
9713+
"nullable": true
9714+
},
9715+
"metadata": {
9716+
"$ref": "#/components/schemas/Metadata"
9717+
},
9718+
"rawHtml": {
9719+
"type": "string",
9720+
"nullable": true
9721+
},
9722+
"screenshot": {
9723+
"type": "string",
9724+
"nullable": true
9725+
}
9726+
}
9727+
},
96459728
"EditMessageReqPayload": {
96469729
"type": "object",
96479730
"required": [
@@ -11680,6 +11763,152 @@
1168011763
"updated_at": "2021-01-01 00:00:00.000"
1168111764
}
1168211765
},
11766+
"Metadata": {
11767+
"type": "object",
11768+
"properties": {
11769+
"articleSection": {
11770+
"type": "string",
11771+
"nullable": true
11772+
},
11773+
"articleTag": {
11774+
"type": "string",
11775+
"nullable": true
11776+
},
11777+
"dcDate": {
11778+
"type": "string",
11779+
"nullable": true
11780+
},
11781+
"dcDateCreated": {
11782+
"type": "string",
11783+
"nullable": true
11784+
},
11785+
"dcDescription": {
11786+
"type": "string",
11787+
"nullable": true
11788+
},
11789+
"dcSubject": {
11790+
"type": "string",
11791+
"nullable": true
11792+
},
11793+
"dcTermsAudience": {
11794+
"type": "string",
11795+
"nullable": true
11796+
},
11797+
"dcTermsCreated": {
11798+
"type": "string",
11799+
"nullable": true
11800+
},
11801+
"dcTermsKeywords": {
11802+
"type": "string",
11803+
"nullable": true
11804+
},
11805+
"dcTermsSubject": {
11806+
"type": "string",
11807+
"nullable": true
11808+
},
11809+
"dcTermsType": {
11810+
"type": "string",
11811+
"nullable": true
11812+
},
11813+
"dcType": {
11814+
"type": "string",
11815+
"nullable": true
11816+
},
11817+
"description": {
11818+
"type": "string",
11819+
"nullable": true
11820+
},
11821+
"error": {
11822+
"type": "string",
11823+
"nullable": true
11824+
},
11825+
"keywords": {
11826+
"type": "string",
11827+
"nullable": true
11828+
},
11829+
"language": {
11830+
"type": "string",
11831+
"nullable": true
11832+
},
11833+
"modifiedTime": {
11834+
"type": "string",
11835+
"nullable": true
11836+
},
11837+
"ogAudio": {
11838+
"type": "string",
11839+
"nullable": true
11840+
},
11841+
"ogDescription": {
11842+
"type": "string",
11843+
"nullable": true
11844+
},
11845+
"ogDeterminer": {
11846+
"type": "string",
11847+
"nullable": true
11848+
},
11849+
"ogImage": {
11850+
"type": "string",
11851+
"nullable": true
11852+
},
11853+
"ogLocale": {
11854+
"type": "string",
11855+
"nullable": true
11856+
},
11857+
"ogLocaleAlternate": {
11858+
"type": "array",
11859+
"items": {
11860+
"type": "string"
11861+
},
11862+
"nullable": true
11863+
},
11864+
"ogSiteName": {
11865+
"type": "string",
11866+
"nullable": true
11867+
},
11868+
"ogTitle": {
11869+
"type": "string",
11870+
"nullable": true
11871+
},
11872+
"ogUrl": {
11873+
"type": "string",
11874+
"nullable": true
11875+
},
11876+
"ogVideo": {
11877+
"type": "string",
11878+
"nullable": true
11879+
},
11880+
"publishedTime": {
11881+
"type": "string",
11882+
"nullable": true
11883+
},
11884+
"robots": {
11885+
"type": "string",
11886+
"nullable": true
11887+
},
11888+
"site_map": {
11889+
"allOf": [
11890+
{
11891+
"$ref": "#/components/schemas/Sitemap"
11892+
}
11893+
],
11894+
"nullable": true
11895+
},
11896+
"sourceURL": {
11897+
"type": "string",
11898+
"nullable": true
11899+
},
11900+
"statusCode": {
11901+
"type": "integer",
11902+
"format": "int32",
11903+
"nullable": true,
11904+
"minimum": 0
11905+
},
11906+
"title": {
11907+
"type": "string",
11908+
"nullable": true
11909+
}
11910+
}
11911+
},
1168311912
"MmrOptions": {
1168411913
"type": "object",
1168511914
"description": "MMR Options lets you specify different methods to rerank the chunks in the result set using Maximal Marginal Relevance. If not specified, this defaults to the score of the chunks.",
@@ -15025,6 +15254,17 @@
1502515254
"pos_in_queue": 1
1502615255
}
1502715256
},
15257+
"Sitemap": {
15258+
"type": "object",
15259+
"required": [
15260+
"changefreq"
15261+
],
15262+
"properties": {
15263+
"changefreq": {
15264+
"type": "string"
15265+
}
15266+
}
15267+
},
1502815268
"SlimChunkMetadata": {
1502915269
"type": "object",
1503015270
"required": [
@@ -16378,6 +16618,24 @@
1637816618
}
1637916619
}
1638016620
},
16621+
"UploadHtmlPageReqPayload": {
16622+
"type": "object",
16623+
"required": [
16624+
"data",
16625+
"metadata",
16626+
"scrapeId"
16627+
],
16628+
"properties": {
16629+
"data": {
16630+
"$ref": "#/components/schemas/Document"
16631+
},
16632+
"metadata": {},
16633+
"scrapeId": {
16634+
"type": "string",
16635+
"format": "uuid"
16636+
}
16637+
}
16638+
},
1638116639
"UsageGraphPoint": {
1638216640
"type": "object",
1638316641
"required": [

0 commit comments

Comments
 (0)