Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 43 additions & 147 deletions components/firecrawl/actions/crawl-url/crawl-url.mjs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import { parseObjectEntries } from "../../common/utils.mjs";
import firecrawl from "../../firecrawl.app.mjs";

export default {
key: "firecrawl-crawl-url",
name: "Crawl URL",
description: "Crawls a given input URL and returns the contents of sub-pages. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/crawl)",
version: "0.0.1",
description: "Crawls a given URL and returns the contents of sub-pages. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/crawl-post)",
version: "1.0.0",
type: "action",
props: {
firecrawl,
Expand All @@ -14,180 +15,75 @@ export default {
"url",
],
},
includes: {
propDefinition: [
firecrawl,
"includes",
],
excludePaths: {
type: "string[]",
label: "Exclude Paths",
description: "URL pathname regex patterns that exclude matching URLs from the crawl. For example, a value of `blog/.*` for the URL `firecrawl.dev` will exclude any results matching that pattern, such as `https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap`",
optional: true,
},
excludes: {
propDefinition: [
firecrawl,
"excludes",
],
optional: true,
},
generateImgAltText: {
propDefinition: [
firecrawl,
"generateImgAltText",
],
optional: true,
},
returnOnlyUrls: {
propDefinition: [
firecrawl,
"returnOnlyUrls",
],
includePaths: {
type: "string[]",
label: "Exclude Paths",
description: "Similar to `Exclude Paths`, but if set, only the paths matching the specified patterns will be included",
optional: true,
},
maxDepth: {
propDefinition: [
firecrawl,
"maxDepth",
],
optional: true,
},
mode: {
propDefinition: [
firecrawl,
"mode",
],
type: "integer",
label: "Max Depth",
description: "Maximum depth to crawl relative to the entered URL",
optional: true,
},
ignoreSitemap: {
propDefinition: [
firecrawl,
"ignoreSitemap",
],
optional: true,
},
limit: {
propDefinition: [
firecrawl,
"limit",
],
optional: true,
},
allowBackwardCrawling: {
propDefinition: [
firecrawl,
"allowBackwardCrawling",
],
optional: true,
},
allowExternalContentLinks: {
propDefinition: [
firecrawl,
"allowExternalContentLinks",
],
type: "boolean",
label: "Ignore Sitemap",
description: "Ignore the website sitemap when crawling",
optional: true,
},
headers: {
propDefinition: [
firecrawl,
"headers",
],
ignoreQueryParameters: {
type: "boolean",
label: "Ignore Query Parameters",
description: "Do not re-scrape the same path with different (or none) query parameters",
optional: true,
},
includeHtml: {
propDefinition: [
firecrawl,
"includeHtml",
],
optional: true,
},
includeRawHtml: {
propDefinition: [
firecrawl,
"includeRawHtml",
],
optional: true,
},
onlyIncludeTags: {
propDefinition: [
firecrawl,
"onlyIncludeTags",
],
optional: true,
},
onlyMainContent: {
propDefinition: [
firecrawl,
"onlyMainContent",
],
optional: true,
},
removeTags: {
propDefinition: [
firecrawl,
"removeTags",
],
optional: true,
},
replaceAllPathsWithAbsolutePaths: {
propDefinition: [
firecrawl,
"replaceAllPathsWithAbsolutePaths",
],
limit: {
type: "integer",
label: "Limit",
description: "Maximum number of pages to crawl",
optional: true,
},
screenshot: {
propDefinition: [
firecrawl,
"screenshot",
],
allowBackwardLinks: {
type: "boolean",
label: "Allow Backward Links",
description: "Enables the crawler to navigate from a specific URL to previously linked pages",
optional: true,
},
fullPageScreenshot: {
propDefinition: [
firecrawl,
"fullPageScreenshot",
],
allowExternalLinks: {
type: "boolean",
label: "Allow External Links",
description: "Allows the crawler to follow links to external websites",
optional: true,
},
waitFor: {
additionalOptions: {
propDefinition: [
firecrawl,
"waitFor",
"additionalOptions",
],
optional: true,
description: "Additional parameters to send in the request. [https://docs.firecrawl.dev/api-reference/endpoint/crawl-post) for available parameters. Values will be parsed as JSON where applicable.",
},
},
async run({ $ }) {
const response = await this.firecrawl.crawl({
const {
firecrawl, additionalOptions, ...data
} = this;
const response = await firecrawl.crawl({
$,
data: {
url: this.url,
crawlerOptions: {
includes: this.includes,
excludes: this.excludes,
generateImgAltText: this.generateImgAltText,
returnOnlyUrls: this.returnOnlyUrls,
maxDepth: parseInt(this.maxDepth),
mode: this.mode,
ignoreSitemap: this.ignoreSitemap,
limit: this.limit,
allowBackwardCrawling: this.allowBackwardCrawling,
allowExternalContentLinks: this.allowExternalContentLinks,
},
pageOptions: {
headers: this.headers,
includeHtml: this.includeHtml,
includeRawHtml: this.includeRawHtml,
onlyIncludeTags: this.onlyIncludeTags,
onlyMainContent: this.onlyMainContent,
removeTags: this.removeTags,
replaceAllPathsWithAbsolutePaths: this.replaceAllPathsWithAbsolutePaths,
screenshot: this.screenshot,
fullPageScreenshot: this.fullPageScreenshot,
waitFor: parseInt(this.waitFor),
},
...data,
...(additionalOptions && parseObjectEntries(additionalOptions)),
},
});

$.export("$summary", `Crawl job started with jobId: ${response.jobId}`);
$.export("$summary", `Crawl job started (ID: ${response.id})`);
return response;
},
};
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ import firecrawl from "../../firecrawl.app.mjs";

export default {
key: "firecrawl-get-crawl-status",
name: "Get Crawl Status",
description: "Obtains the status and data from a previous crawl operation. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/status)",
version: "0.0.1",
name: "Get Crawl Data",
description: "Obtains the status and data from a previous crawl operation. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/crawl-get)",
version: "0.0.2",
type: "action",
props: {
firecrawl,
Expand All @@ -21,7 +21,7 @@ export default {
crawlId: this.crawlId,
});

$.export("$summary", `Successfully retrieved status for crawl ID: ${this.crawlId}`);
$.export("$summary", `Successfully retrieved status for crawl (ID: ${this.crawlId})`);
return response;
},
};
Loading
Loading