Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/firecrawl/actions/crawl-url/crawl-url.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ export default {
key: "firecrawl-crawl-url",
name: "Crawl URL",
description: "Crawls a given URL and returns the contents of sub-pages. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/crawl-post)",
version: "1.0.0",
version: "1.0.1",
type: "action",
props: {
firecrawl,
Expand Down
96 changes: 96 additions & 0 deletions components/firecrawl/actions/extract-data/extract-data.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import firecrawl from "../../firecrawl.app.mjs";
import { ConfigurationError } from "@pipedream/platform";
import { parseObjectEntries } from "../../common/utils.mjs";

export default {
key: "firecrawl-extract-data",
name: "Extract Data",
description: "Extract structured data from one or multiple URLs. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/extract)",
version: "0.0.1",
type: "action",
props: {
firecrawl,
urls: {
type: "string[]",
label: "URLs",
description: "An array of one or more URLs. Supports wildcards (/*) for broader crawling.",
},
prompt: {
type: "string",
label: "Prompt",
description: "(Optional unless no schema): A natural language prompt describing the data you want or specifying how you want that data structured.",
optional: true,
},
schema: {
type: "object",
label: "Schema",
description: "(Optional unless no prompt): A more rigid structure if you already know the JSON layout.",
optional: true,
},
enableWebSearch: {
type: "boolean",
label: "Enable Web Search",
description: "When `true`, the extraction will use web search to find additional data",
optional: true,
},
importSitemap: {
type: "boolean",
label: "Import Sitemap",
description: "When true, sitemap.xml files will be ignored during website scanning",
optional: true,
},
includeSubdomains: {
type: "boolean",
label: "Include Subdomains",
description: "When true, subdomains of the provided URLs will also be scanned",
optional: true,
},
showSources: {
type: "boolean",
label: "Show Sources",
description: "When true, the sources used to extract the data will be included in the response",
optional: true,
},
waitForCompletion: {
type: "boolean",
label: "Wait For Completion",
description: "Set to `true` to poll the API in 3-second intervals until the job is completed",
optional: true,
},
},
async run({ $ }) {
if (!this.prompt && !this.schema) {
throw new ConfigurationError("Must enter one of Prompt or Schema");
}

let response = await this.firecrawl.extract({
$,
data: {
urls: this.urls,
prompt: this.prompt,
schema: this.schema && parseObjectEntries(this.schema),
enableWebSearch: this.enableWebSearch,
importSitemap: this.importSitemap,
includeSubdomains: this.includeSubdomains,
showSources: this.showSources,
},
});

if (this.waitForCompletion) {
const id = response.id;
const timer = (ms) => new Promise((res) => setTimeout(res, ms));
do {
response = await this.firecrawl.getExtractStatus({
$,
id,
});
await timer(3000);
} while (response.status === "processing");
}

if (response.success) {
$.export("$summary", "Successfully extracted data.");
}
return response;
},
};
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export default {
key: "firecrawl-get-crawl-status",
name: "Get Crawl Data",
description: "Obtains the status and data from a previous crawl operation. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/crawl-get)",
version: "0.0.2",
version: "0.0.3",
type: "action",
props: {
firecrawl,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import firecrawl from "../../firecrawl.app.mjs";

export default {
key: "firecrawl-get-extract-status",
name: "Get Extract Data",
description: "Obtains the status and data from a previous extract operation. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/extract-get)",
version: "0.0.1",
type: "action",
props: {
firecrawl,
extractId: {
type: "string",
label: "Extract Job ID",
description: "The ID of the extract job",
},
},
async run({ $ }) {
const response = await this.firecrawl.getExtractStatus({
$,
id: this.extractId,
});

$.export("$summary", `Successfully retrieved status for extract (ID: ${this.extractId})`);
return response;
},
};
2 changes: 1 addition & 1 deletion components/firecrawl/actions/scrape-page/scrape-page.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export default {
name: "Scrape Page",
description:
"Scrapes a URL and returns content from that page. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/scrape)",
version: "1.0.0",
version: "1.0.1",
type: "action",
props: {
firecrawl,
Expand Down
15 changes: 15 additions & 0 deletions components/firecrawl/firecrawl.app.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,20 @@ export default {
...opts,
});
},
extract(opts = {}) {
return this._makeRequest({
method: "POST",
path: "/extract",
...opts,
});
},
getExtractStatus({
id, ...opts
}) {
return this._makeRequest({
path: `/extract/${id}`,
...opts,
});
},
},
};
2 changes: 1 addition & 1 deletion components/firecrawl/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@pipedream/firecrawl",
"version": "1.0.0",
"version": "1.1.0",
"description": "Pipedream FireCrawl Components",
"main": "firecrawl.app.mjs",
"keywords": [
Expand Down
Loading